diff options
Diffstat (limited to 'arch/arm64/kernel')
55 files changed, 14287 insertions, 0 deletions
diff --git a/arch/arm64/kernel/.gitignore b/arch/arm64/kernel/.gitignore new file mode 100644 index 00000000000..c5f676c3c22 --- /dev/null +++ b/arch/arm64/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile new file mode 100644 index 00000000000..cdaedad3afe --- /dev/null +++ b/arch/arm64/kernel/Makefile @@ -0,0 +1,40 @@ +# +# Makefile for the linux kernel. +# + +CPPFLAGS_vmlinux.lds	:= -DTEXT_OFFSET=$(TEXT_OFFSET) +AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET) +CFLAGS_efi-stub.o 	:= -DTEXT_OFFSET=$(TEXT_OFFSET) \ +			   -I$(src)/../../../scripts/dtc/libfdt + +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_insn.o = -pg +CFLAGS_REMOVE_return_address.o = -pg + +# Object file lists. +arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\ +			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\ +			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\ +			   hyp-stub.o psci.o cpu_ops.o insn.o return_address.o + +arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\ +					   sys_compat.o +arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o +arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o +arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o topology.o +arm64-obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o +arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o +arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o +arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND)	+= sleep.o suspend.o +arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o +arm64-obj-$(CONFIG_KGDB)		+= kgdb.o +arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o + +obj-y					+= $(arm64-obj-y) vdso/ +obj-m					+= $(arm64-obj-m) +head-y					:= head.o +extra-y					:= $(head-y) vmlinux.lds + +# vDSO - this must be built first to generate the symbol offsets +$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h +$(obj)/vdso/vdso-offsets.h: $(obj)/vdso diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c new file mode 100644 index 00000000000..a85843ddbde --- /dev/null +++ b/arch/arm64/kernel/arm64ksyms.c @@ -0,0 +1,67 @@ +/* + * Based on arch/arm/kernel/armksyms.c + * + * Copyright (C) 2000 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/cryptohash.h> +#include <linux/delay.h> +#include <linux/in6.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <linux/io.h> + +#include <asm/checksum.h> + +EXPORT_SYMBOL(copy_page); +EXPORT_SYMBOL(clear_page); + +	/* user mem (segment) */ +EXPORT_SYMBOL(__copy_from_user); +EXPORT_SYMBOL(__copy_to_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__copy_in_user); + +	/* physical memory */ +EXPORT_SYMBOL(memstart_addr); + +	/* string / mem functions */ +EXPORT_SYMBOL(strchr); +EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strnlen); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memchr); +EXPORT_SYMBOL(memcmp); + +	/* atomic bitops */ +EXPORT_SYMBOL(set_bit); +EXPORT_SYMBOL(test_and_set_bit); +EXPORT_SYMBOL(clear_bit); +EXPORT_SYMBOL(test_and_clear_bit); +EXPORT_SYMBOL(change_bit); +EXPORT_SYMBOL(test_and_change_bit); + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c new file mode 100644 index 00000000000..646f888387c --- /dev/null +++ b/arch/arm64/kernel/asm-offsets.c @@ -0,0 +1,153 @@ +/* + * Based on arch/arm/kernel/asm-offsets.c + * + * Copyright (C) 1995-2003 Russell King + *               2001-2002 Keith Owens + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/dma-mapping.h> +#include <linux/kvm_host.h> +#include <asm/thread_info.h> +#include <asm/memory.h> +#include <asm/cputable.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> +#include <asm/vdso_datapage.h> +#include <linux/kbuild.h> + +int main(void) +{ +  DEFINE(TSK_ACTIVE_MM,		offsetof(struct task_struct, active_mm)); +  BLANK(); +  DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags)); +  DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count)); +  DEFINE(TI_ADDR_LIMIT,		offsetof(struct thread_info, addr_limit)); +  DEFINE(TI_TASK,		offsetof(struct thread_info, task)); +  DEFINE(TI_EXEC_DOMAIN,	offsetof(struct thread_info, exec_domain)); +  DEFINE(TI_CPU,		offsetof(struct thread_info, cpu)); +  BLANK(); +  DEFINE(THREAD_CPU_CONTEXT,	offsetof(struct task_struct, thread.cpu_context)); +  BLANK(); +  DEFINE(S_X0,			offsetof(struct pt_regs, regs[0])); +  DEFINE(S_X1,			offsetof(struct pt_regs, regs[1])); +  DEFINE(S_X2,			offsetof(struct pt_regs, regs[2])); +  DEFINE(S_X3,			offsetof(struct pt_regs, regs[3])); +  DEFINE(S_X4,			offsetof(struct pt_regs, regs[4])); +  DEFINE(S_X5,			offsetof(struct pt_regs, regs[5])); +  DEFINE(S_X6,			offsetof(struct pt_regs, regs[6])); +  DEFINE(S_X7,			offsetof(struct pt_regs, regs[7])); +  DEFINE(S_LR,			offsetof(struct pt_regs, regs[30])); +  DEFINE(S_SP,			offsetof(struct pt_regs, sp)); +#ifdef CONFIG_COMPAT +  DEFINE(S_COMPAT_SP,		offsetof(struct pt_regs, compat_sp)); +#endif +  DEFINE(S_PSTATE,		offsetof(struct pt_regs, pstate)); +  DEFINE(S_PC,			offsetof(struct pt_regs, pc)); +  DEFINE(S_ORIG_X0,		offsetof(struct pt_regs, orig_x0)); +  DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno)); +  DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs)); +  BLANK(); +  DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id)); +  BLANK(); +  DEFINE(VMA_VM_MM,		offsetof(struct vm_area_struct, vm_mm)); +  DEFINE(VMA_VM_FLAGS,		offsetof(struct vm_area_struct, vm_flags)); +  BLANK(); +  DEFINE(VM_EXEC,	       	VM_EXEC); +  BLANK(); +  DEFINE(PAGE_SZ,	       	PAGE_SIZE); +  BLANK(); +  DEFINE(CPU_INFO_SZ,		sizeof(struct cpu_info)); +  DEFINE(CPU_INFO_SETUP,	offsetof(struct cpu_info, cpu_setup)); +  BLANK(); +  DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL); +  DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE); +  DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE); +  BLANK(); +  DEFINE(CLOCK_REALTIME,	CLOCK_REALTIME); +  DEFINE(CLOCK_MONOTONIC,	CLOCK_MONOTONIC); +  DEFINE(CLOCK_REALTIME_RES,	MONOTONIC_RES_NSEC); +  DEFINE(CLOCK_REALTIME_COARSE,	CLOCK_REALTIME_COARSE); +  DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); +  DEFINE(CLOCK_COARSE_RES,	LOW_RES_NSEC); +  DEFINE(NSEC_PER_SEC,		NSEC_PER_SEC); +  BLANK(); +  DEFINE(VDSO_CS_CYCLE_LAST,	offsetof(struct vdso_data, cs_cycle_last)); +  DEFINE(VDSO_XTIME_CLK_SEC,	offsetof(struct vdso_data, xtime_clock_sec)); +  DEFINE(VDSO_XTIME_CLK_NSEC,	offsetof(struct vdso_data, xtime_clock_nsec)); +  DEFINE(VDSO_XTIME_CRS_SEC,	offsetof(struct vdso_data, xtime_coarse_sec)); +  DEFINE(VDSO_XTIME_CRS_NSEC,	offsetof(struct vdso_data, xtime_coarse_nsec)); +  DEFINE(VDSO_WTM_CLK_SEC,	offsetof(struct vdso_data, wtm_clock_sec)); +  DEFINE(VDSO_WTM_CLK_NSEC,	offsetof(struct vdso_data, wtm_clock_nsec)); +  DEFINE(VDSO_TB_SEQ_COUNT,	offsetof(struct vdso_data, tb_seq_count)); +  DEFINE(VDSO_CS_MULT,		offsetof(struct vdso_data, cs_mult)); +  DEFINE(VDSO_CS_SHIFT,		offsetof(struct vdso_data, cs_shift)); +  DEFINE(VDSO_TZ_MINWEST,	offsetof(struct vdso_data, tz_minuteswest)); +  DEFINE(VDSO_TZ_DSTTIME,	offsetof(struct vdso_data, tz_dsttime)); +  DEFINE(VDSO_USE_SYSCALL,	offsetof(struct vdso_data, use_syscall)); +  BLANK(); +  DEFINE(TVAL_TV_SEC,		offsetof(struct timeval, tv_sec)); +  DEFINE(TVAL_TV_USEC,		offsetof(struct timeval, tv_usec)); +  DEFINE(TSPEC_TV_SEC,		offsetof(struct timespec, tv_sec)); +  DEFINE(TSPEC_TV_NSEC,		offsetof(struct timespec, tv_nsec)); +  BLANK(); +  DEFINE(TZ_MINWEST,		offsetof(struct timezone, tz_minuteswest)); +  DEFINE(TZ_DSTTIME,		offsetof(struct timezone, tz_dsttime)); +  BLANK(); +#ifdef CONFIG_KVM_ARM_HOST +  DEFINE(VCPU_CONTEXT,		offsetof(struct kvm_vcpu, arch.ctxt)); +  DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs)); +  DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs)); +  DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs)); +  DEFINE(CPU_SP_EL1,		offsetof(struct kvm_regs, sp_el1)); +  DEFINE(CPU_ELR_EL1,		offsetof(struct kvm_regs, elr_el1)); +  DEFINE(CPU_SPSR,		offsetof(struct kvm_regs, spsr)); +  DEFINE(CPU_SYSREGS,		offsetof(struct kvm_cpu_context, sys_regs)); +  DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2)); +  DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2)); +  DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); +  DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2)); +  DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines)); +  DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context)); +  DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); +  DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); +  DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff)); +  DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled)); +  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm)); +  DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu)); +  DEFINE(VGIC_CPU_HCR,		offsetof(struct vgic_cpu, vgic_hcr)); +  DEFINE(VGIC_CPU_VMCR,		offsetof(struct vgic_cpu, vgic_vmcr)); +  DEFINE(VGIC_CPU_MISR,		offsetof(struct vgic_cpu, vgic_misr)); +  DEFINE(VGIC_CPU_EISR,		offsetof(struct vgic_cpu, vgic_eisr)); +  DEFINE(VGIC_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_elrsr)); +  DEFINE(VGIC_CPU_APR,		offsetof(struct vgic_cpu, vgic_apr)); +  DEFINE(VGIC_CPU_LR,		offsetof(struct vgic_cpu, vgic_lr)); +  DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr)); +  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr)); +  DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base)); +#endif +#ifdef CONFIG_ARM64_CPU_SUSPEND +  DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx)); +  DEFINE(CPU_CTX_SP,		offsetof(struct cpu_suspend_ctx, sp)); +  DEFINE(MPIDR_HASH_MASK,	offsetof(struct mpidr_hash, mask)); +  DEFINE(MPIDR_HASH_SHIFTS,	offsetof(struct mpidr_hash, shift_aff)); +  DEFINE(SLEEP_SAVE_SP_SZ,	sizeof(struct sleep_save_sp)); +  DEFINE(SLEEP_SAVE_SP_PHYS,	offsetof(struct sleep_save_sp, save_ptr_stash_phys)); +  DEFINE(SLEEP_SAVE_SP_VIRT,	offsetof(struct sleep_save_sp, save_ptr_stash)); +#endif +  return 0; +} diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c new file mode 100644 index 00000000000..d62d12fb36c --- /dev/null +++ b/arch/arm64/kernel/cpu_ops.c @@ -0,0 +1,87 @@ +/* + * CPU kernel entry/exit control + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <asm/cpu_ops.h> +#include <asm/smp_plat.h> +#include <linux/errno.h> +#include <linux/of.h> +#include <linux/string.h> + +extern const struct cpu_operations smp_spin_table_ops; +extern const struct cpu_operations cpu_psci_ops; + +const struct cpu_operations *cpu_ops[NR_CPUS]; + +static const struct cpu_operations *supported_cpu_ops[] __initconst = { +#ifdef CONFIG_SMP +	&smp_spin_table_ops, +	&cpu_psci_ops, +#endif +	NULL, +}; + +static const struct cpu_operations * __init cpu_get_ops(const char *name) +{ +	const struct cpu_operations **ops = supported_cpu_ops; + +	while (*ops) { +		if (!strcmp(name, (*ops)->name)) +			return *ops; + +		ops++; +	} + +	return NULL; +} + +/* + * Read a cpu's enable method from the device tree and record it in cpu_ops. + */ +int __init cpu_read_ops(struct device_node *dn, int cpu) +{ +	const char *enable_method = of_get_property(dn, "enable-method", NULL); +	if (!enable_method) { +		/* +		 * The boot CPU may not have an enable method (e.g. when +		 * spin-table is used for secondaries). Don't warn spuriously. +		 */ +		if (cpu != 0) +			pr_err("%s: missing enable-method property\n", +				dn->full_name); +		return -ENOENT; +	} + +	cpu_ops[cpu] = cpu_get_ops(enable_method); +	if (!cpu_ops[cpu]) { +		pr_warn("%s: unsupported enable-method property: %s\n", +			dn->full_name, enable_method); +		return -EOPNOTSUPP; +	} + +	return 0; +} + +void __init cpu_read_bootcpu_ops(void) +{ +	struct device_node *dn = of_get_cpu_node(0, NULL); +	if (!dn) { +		pr_err("Failed to find device node for boot cpu\n"); +		return; +	} +	cpu_read_ops(dn, 0); +} diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c new file mode 100644 index 00000000000..fd3993cb060 --- /dev/null +++ b/arch/arm64/kernel/cputable.c @@ -0,0 +1,33 @@ +/* + * arch/arm64/kernel/cputable.c + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/init.h> + +#include <asm/cputable.h> + +extern unsigned long __cpu_setup(void); + +struct cpu_info cpu_table[] = { +	{ +		.cpu_id_val	= 0x000f0000, +		.cpu_id_mask	= 0x000f0000, +		.cpu_name	= "AArch64 Processor", +		.cpu_setup	= __cpu_setup, +	}, +	{ /* Empty */ }, +}; diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c new file mode 100644 index 00000000000..a7fb874b595 --- /dev/null +++ b/arch/arm64/kernel/debug-monitors.c @@ -0,0 +1,438 @@ +/* + * ARMv8 single-step debug support and mdscr context switching. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/cpu.h> +#include <linux/debugfs.h> +#include <linux/hardirq.h> +#include <linux/init.h> +#include <linux/ptrace.h> +#include <linux/stat.h> +#include <linux/uaccess.h> + +#include <asm/debug-monitors.h> +#include <asm/cputype.h> +#include <asm/system_misc.h> + +/* Low-level stepping controls. */ +#define DBG_MDSCR_SS		(1 << 0) +#define DBG_SPSR_SS		(1 << 21) + +/* MDSCR_EL1 enabling bits */ +#define DBG_MDSCR_KDE		(1 << 13) +#define DBG_MDSCR_MDE		(1 << 15) +#define DBG_MDSCR_MASK		~(DBG_MDSCR_KDE | DBG_MDSCR_MDE) + +/* Determine debug architecture. */ +u8 debug_monitors_arch(void) +{ +	return read_cpuid(ID_AA64DFR0_EL1) & 0xf; +} + +/* + * MDSCR access routines. + */ +static void mdscr_write(u32 mdscr) +{ +	unsigned long flags; +	local_dbg_save(flags); +	asm volatile("msr mdscr_el1, %0" :: "r" (mdscr)); +	local_dbg_restore(flags); +} + +static u32 mdscr_read(void) +{ +	u32 mdscr; +	asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr)); +	return mdscr; +} + +/* + * Allow root to disable self-hosted debug from userspace. + * This is useful if you want to connect an external JTAG debugger. + */ +static u32 debug_enabled = 1; + +static int create_debug_debugfs_entry(void) +{ +	debugfs_create_bool("debug_enabled", 0644, NULL, &debug_enabled); +	return 0; +} +fs_initcall(create_debug_debugfs_entry); + +static int __init early_debug_disable(char *buf) +{ +	debug_enabled = 0; +	return 0; +} + +early_param("nodebugmon", early_debug_disable); + +/* + * Keep track of debug users on each core. + * The ref counts are per-cpu so we use a local_t type. + */ +static DEFINE_PER_CPU(int, mde_ref_count); +static DEFINE_PER_CPU(int, kde_ref_count); + +void enable_debug_monitors(enum debug_el el) +{ +	u32 mdscr, enable = 0; + +	WARN_ON(preemptible()); + +	if (this_cpu_inc_return(mde_ref_count) == 1) +		enable = DBG_MDSCR_MDE; + +	if (el == DBG_ACTIVE_EL1 && +	    this_cpu_inc_return(kde_ref_count) == 1) +		enable |= DBG_MDSCR_KDE; + +	if (enable && debug_enabled) { +		mdscr = mdscr_read(); +		mdscr |= enable; +		mdscr_write(mdscr); +	} +} + +void disable_debug_monitors(enum debug_el el) +{ +	u32 mdscr, disable = 0; + +	WARN_ON(preemptible()); + +	if (this_cpu_dec_return(mde_ref_count) == 0) +		disable = ~DBG_MDSCR_MDE; + +	if (el == DBG_ACTIVE_EL1 && +	    this_cpu_dec_return(kde_ref_count) == 0) +		disable &= ~DBG_MDSCR_KDE; + +	if (disable) { +		mdscr = mdscr_read(); +		mdscr &= disable; +		mdscr_write(mdscr); +	} +} + +/* + * OS lock clearing. + */ +static void clear_os_lock(void *unused) +{ +	asm volatile("msr oslar_el1, %0" : : "r" (0)); +} + +static int os_lock_notify(struct notifier_block *self, +				    unsigned long action, void *data) +{ +	int cpu = (unsigned long)data; +	if (action == CPU_ONLINE) +		smp_call_function_single(cpu, clear_os_lock, NULL, 1); +	return NOTIFY_OK; +} + +static struct notifier_block os_lock_nb = { +	.notifier_call = os_lock_notify, +}; + +static int debug_monitors_init(void) +{ +	cpu_notifier_register_begin(); + +	/* Clear the OS lock. */ +	on_each_cpu(clear_os_lock, NULL, 1); +	isb(); +	local_dbg_enable(); + +	/* Register hotplug handler. */ +	__register_cpu_notifier(&os_lock_nb); + +	cpu_notifier_register_done(); +	return 0; +} +postcore_initcall(debug_monitors_init); + +/* + * Single step API and exception handling. + */ +static void set_regs_spsr_ss(struct pt_regs *regs) +{ +	unsigned long spsr; + +	spsr = regs->pstate; +	spsr &= ~DBG_SPSR_SS; +	spsr |= DBG_SPSR_SS; +	regs->pstate = spsr; +} + +static void clear_regs_spsr_ss(struct pt_regs *regs) +{ +	unsigned long spsr; + +	spsr = regs->pstate; +	spsr &= ~DBG_SPSR_SS; +	regs->pstate = spsr; +} + +/* EL1 Single Step Handler hooks */ +static LIST_HEAD(step_hook); +static DEFINE_RWLOCK(step_hook_lock); + +void register_step_hook(struct step_hook *hook) +{ +	write_lock(&step_hook_lock); +	list_add(&hook->node, &step_hook); +	write_unlock(&step_hook_lock); +} + +void unregister_step_hook(struct step_hook *hook) +{ +	write_lock(&step_hook_lock); +	list_del(&hook->node); +	write_unlock(&step_hook_lock); +} + +/* + * Call registered single step handers + * There is no Syndrome info to check for determining the handler. + * So we call all the registered handlers, until the right handler is + * found which returns zero. + */ +static int call_step_hook(struct pt_regs *regs, unsigned int esr) +{ +	struct step_hook *hook; +	int retval = DBG_HOOK_ERROR; + +	read_lock(&step_hook_lock); + +	list_for_each_entry(hook, &step_hook, node)	{ +		retval = hook->fn(regs, esr); +		if (retval == DBG_HOOK_HANDLED) +			break; +	} + +	read_unlock(&step_hook_lock); + +	return retval; +} + +static int single_step_handler(unsigned long addr, unsigned int esr, +			       struct pt_regs *regs) +{ +	siginfo_t info; + +	/* +	 * If we are stepping a pending breakpoint, call the hw_breakpoint +	 * handler first. +	 */ +	if (!reinstall_suspended_bps(regs)) +		return 0; + +	if (user_mode(regs)) { +		info.si_signo = SIGTRAP; +		info.si_errno = 0; +		info.si_code  = TRAP_HWBKPT; +		info.si_addr  = (void __user *)instruction_pointer(regs); +		force_sig_info(SIGTRAP, &info, current); + +		/* +		 * ptrace will disable single step unless explicitly +		 * asked to re-enable it. For other clients, it makes +		 * sense to leave it enabled (i.e. rewind the controls +		 * to the active-not-pending state). +		 */ +		user_rewind_single_step(current); +	} else { +		if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) +			return 0; + +		pr_warning("Unexpected kernel single-step exception at EL1\n"); +		/* +		 * Re-enable stepping since we know that we will be +		 * returning to regs. +		 */ +		set_regs_spsr_ss(regs); +	} + +	return 0; +} + +/* + * Breakpoint handler is re-entrant as another breakpoint can + * hit within breakpoint handler, especically in kprobes. + * Use reader/writer locks instead of plain spinlock. + */ +static LIST_HEAD(break_hook); +static DEFINE_RWLOCK(break_hook_lock); + +void register_break_hook(struct break_hook *hook) +{ +	write_lock(&break_hook_lock); +	list_add(&hook->node, &break_hook); +	write_unlock(&break_hook_lock); +} + +void unregister_break_hook(struct break_hook *hook) +{ +	write_lock(&break_hook_lock); +	list_del(&hook->node); +	write_unlock(&break_hook_lock); +} + +static int call_break_hook(struct pt_regs *regs, unsigned int esr) +{ +	struct break_hook *hook; +	int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL; + +	read_lock(&break_hook_lock); +	list_for_each_entry(hook, &break_hook, node) +		if ((esr & hook->esr_mask) == hook->esr_val) +			fn = hook->fn; +	read_unlock(&break_hook_lock); + +	return fn ? fn(regs, esr) : DBG_HOOK_ERROR; +} + +static int brk_handler(unsigned long addr, unsigned int esr, +		       struct pt_regs *regs) +{ +	siginfo_t info; + +	if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) +		return 0; + +	if (!user_mode(regs)) +		return -EFAULT; + +	info = (siginfo_t) { +		.si_signo = SIGTRAP, +		.si_errno = 0, +		.si_code  = TRAP_BRKPT, +		.si_addr  = (void __user *)instruction_pointer(regs), +	}; + +	force_sig_info(SIGTRAP, &info, current); +	return 0; +} + +int aarch32_break_handler(struct pt_regs *regs) +{ +	siginfo_t info; +	u32 arm_instr; +	u16 thumb_instr; +	bool bp = false; +	void __user *pc = (void __user *)instruction_pointer(regs); + +	if (!compat_user_mode(regs)) +		return -EFAULT; + +	if (compat_thumb_mode(regs)) { +		/* get 16-bit Thumb instruction */ +		get_user(thumb_instr, (u16 __user *)pc); +		thumb_instr = le16_to_cpu(thumb_instr); +		if (thumb_instr == AARCH32_BREAK_THUMB2_LO) { +			/* get second half of 32-bit Thumb-2 instruction */ +			get_user(thumb_instr, (u16 __user *)(pc + 2)); +			thumb_instr = le16_to_cpu(thumb_instr); +			bp = thumb_instr == AARCH32_BREAK_THUMB2_HI; +		} else { +			bp = thumb_instr == AARCH32_BREAK_THUMB; +		} +	} else { +		/* 32-bit ARM instruction */ +		get_user(arm_instr, (u32 __user *)pc); +		arm_instr = le32_to_cpu(arm_instr); +		bp = (arm_instr & ~0xf0000000) == AARCH32_BREAK_ARM; +	} + +	if (!bp) +		return -EFAULT; + +	info = (siginfo_t) { +		.si_signo = SIGTRAP, +		.si_errno = 0, +		.si_code  = TRAP_BRKPT, +		.si_addr  = pc, +	}; + +	force_sig_info(SIGTRAP, &info, current); +	return 0; +} + +static int __init debug_traps_init(void) +{ +	hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, +			      TRAP_HWBKPT, "single-step handler"); +	hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, +			      TRAP_BRKPT, "ptrace BRK handler"); +	return 0; +} +arch_initcall(debug_traps_init); + +/* Re-enable single step for syscall restarting. */ +void user_rewind_single_step(struct task_struct *task) +{ +	/* +	 * If single step is active for this thread, then set SPSR.SS +	 * to 1 to avoid returning to the active-pending state. +	 */ +	if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) +		set_regs_spsr_ss(task_pt_regs(task)); +} + +void user_fastforward_single_step(struct task_struct *task) +{ +	if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) +		clear_regs_spsr_ss(task_pt_regs(task)); +} + +/* Kernel API */ +void kernel_enable_single_step(struct pt_regs *regs) +{ +	WARN_ON(!irqs_disabled()); +	set_regs_spsr_ss(regs); +	mdscr_write(mdscr_read() | DBG_MDSCR_SS); +	enable_debug_monitors(DBG_ACTIVE_EL1); +} + +void kernel_disable_single_step(void) +{ +	WARN_ON(!irqs_disabled()); +	mdscr_write(mdscr_read() & ~DBG_MDSCR_SS); +	disable_debug_monitors(DBG_ACTIVE_EL1); +} + +int kernel_active_single_step(void) +{ +	WARN_ON(!irqs_disabled()); +	return mdscr_read() & DBG_MDSCR_SS; +} + +/* ptrace API */ +void user_enable_single_step(struct task_struct *task) +{ +	set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); +	set_regs_spsr_ss(task_pt_regs(task)); +} + +void user_disable_single_step(struct task_struct *task) +{ +	clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); +} diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S new file mode 100644 index 00000000000..619b1dd7bcd --- /dev/null +++ b/arch/arm64/kernel/efi-entry.S @@ -0,0 +1,108 @@ +/* + * EFI entry point. + * + * Copyright (C) 2013, 2014 Red Hat, Inc. + * Author: Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> + +#include <asm/assembler.h> + +#define EFI_LOAD_ERROR 0x8000000000000001 + +	__INIT + +	/* +	 * We arrive here from the EFI boot manager with: +	 * +	 *    * CPU in little-endian mode +	 *    * MMU on with identity-mapped RAM +	 *    * Icache and Dcache on +	 * +	 * We will most likely be running from some place other than where +	 * we want to be. The kernel image wants to be placed at TEXT_OFFSET +	 * from start of RAM. +	 */ +ENTRY(efi_stub_entry) +	/* +	 * Create a stack frame to save FP/LR with extra space +	 * for image_addr variable passed to efi_entry(). +	 */ +	stp	x29, x30, [sp, #-32]! + +	/* +	 * Call efi_entry to do the real work. +	 * x0 and x1 are already set up by firmware. Current runtime +	 * address of image is calculated and passed via *image_addr. +	 * +	 * unsigned long efi_entry(void *handle, +	 *                         efi_system_table_t *sys_table, +	 *                         unsigned long *image_addr) ; +	 */ +	adrp	x8, _text +	add	x8, x8, #:lo12:_text +	add	x2, sp, 16 +	str	x8, [x2] +	bl	efi_entry +	cmn	x0, #1 +	b.eq	efi_load_fail + +	/* +	 * efi_entry() will have relocated the kernel image if necessary +	 * and we return here with device tree address in x0 and the kernel +	 * entry point stored at *image_addr. Save those values in registers +	 * which are callee preserved. +	 */ +	mov	x20, x0		// DTB address +	ldr	x0, [sp, #16]	// relocated _text address +	mov	x21, x0 + +	/* +	 * Flush dcache covering current runtime addresses +	 * of kernel text/data. Then flush all of icache. +	 */ +	adrp	x1, _text +	add	x1, x1, #:lo12:_text +	adrp	x2, _edata +	add	x2, x2, #:lo12:_edata +	sub	x1, x2, x1 + +	bl	__flush_dcache_area +	ic	ialluis + +	/* Turn off Dcache and MMU */ +	mrs	x0, CurrentEL +	cmp	x0, #CurrentEL_EL2 +	b.ne	1f +	mrs	x0, sctlr_el2 +	bic	x0, x0, #1 << 0	// clear SCTLR.M +	bic	x0, x0, #1 << 2	// clear SCTLR.C +	msr	sctlr_el2, x0 +	isb +	b	2f +1: +	mrs	x0, sctlr_el1 +	bic	x0, x0, #1 << 0	// clear SCTLR.M +	bic	x0, x0, #1 << 2	// clear SCTLR.C +	msr	sctlr_el1, x0 +	isb +2: +	/* Jump to kernel entry point */ +	mov	x0, x20 +	mov	x1, xzr +	mov	x2, xzr +	mov	x3, xzr +	br	x21 + +efi_load_fail: +	mov	x0, #EFI_LOAD_ERROR +	ldp	x29, x30, [sp], #32 +	ret + +ENDPROC(efi_stub_entry) diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c new file mode 100644 index 00000000000..e786e6cdc40 --- /dev/null +++ b/arch/arm64/kernel/efi-stub.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2013, 2014 Linaro Ltd;  <roy.franz@linaro.org> + * + * This file implements the EFI boot stub for the arm64 kernel. + * Adapted from ARM version by Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/efi.h> +#include <linux/libfdt.h> +#include <asm/sections.h> + +/* + * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from + * start of kernel and may not cross a 2MiB boundary. We set alignment to + * 2MiB so we know it won't cross a 2MiB boundary. + */ +#define EFI_FDT_ALIGN	SZ_2M   /* used by allocate_new_fdt_and_exit_boot() */ +#define MAX_FDT_OFFSET	SZ_512M + +#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) + +static void efi_char16_printk(efi_system_table_t *sys_table_arg, +			      efi_char16_t *str); + +static efi_status_t efi_open_volume(efi_system_table_t *sys_table, +				    void *__image, void **__fh); +static efi_status_t efi_file_close(void *handle); + +static efi_status_t +efi_file_read(void *handle, unsigned long *size, void *addr); + +static efi_status_t +efi_file_size(efi_system_table_t *sys_table, void *__fh, +	      efi_char16_t *filename_16, void **handle, u64 *file_sz); + +/* Include shared EFI stub code */ +#include "../../../drivers/firmware/efi/efi-stub-helper.c" +#include "../../../drivers/firmware/efi/fdt.c" +#include "../../../drivers/firmware/efi/arm-stub.c" + + +static efi_status_t handle_kernel_image(efi_system_table_t *sys_table, +					unsigned long *image_addr, +					unsigned long *image_size, +					unsigned long *reserve_addr, +					unsigned long *reserve_size, +					unsigned long dram_base, +					efi_loaded_image_t *image) +{ +	efi_status_t status; +	unsigned long kernel_size, kernel_memsize = 0; + +	/* Relocate the image, if required. */ +	kernel_size = _edata - _text; +	if (*image_addr != (dram_base + TEXT_OFFSET)) { +		kernel_memsize = kernel_size + (_end - _edata); +		status = efi_relocate_kernel(sys_table, image_addr, +					     kernel_size, kernel_memsize, +					     dram_base + TEXT_OFFSET, +					     PAGE_SIZE); +		if (status != EFI_SUCCESS) { +			pr_efi_err(sys_table, "Failed to relocate kernel\n"); +			return status; +		} +		if (*image_addr != (dram_base + TEXT_OFFSET)) { +			pr_efi_err(sys_table, "Failed to alloc kernel memory\n"); +			efi_free(sys_table, kernel_memsize, *image_addr); +			return EFI_ERROR; +		} +		*image_size = kernel_memsize; +	} + + +	return EFI_SUCCESS; +} diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c new file mode 100644 index 00000000000..14db1f6e8d7 --- /dev/null +++ b/arch/arm64/kernel/efi.c @@ -0,0 +1,469 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 2.4 + * + * Copyright (C) 2013, 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/efi.h> +#include <linux/export.h> +#include <linux/memblock.h> +#include <linux/bootmem.h> +#include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/cacheflush.h> +#include <asm/efi.h> +#include <asm/tlbflush.h> +#include <asm/mmu_context.h> + +struct efi_memory_map memmap; + +static efi_runtime_services_t *runtime; + +static u64 efi_system_table; + +static int uefi_debug __initdata; +static int __init uefi_debug_setup(char *str) +{ +	uefi_debug = 1; + +	return 0; +} +early_param("uefi_debug", uefi_debug_setup); + +static int __init is_normal_ram(efi_memory_desc_t *md) +{ +	if (md->attribute & EFI_MEMORY_WB) +		return 1; +	return 0; +} + +static void __init efi_setup_idmap(void) +{ +	struct memblock_region *r; +	efi_memory_desc_t *md; +	u64 paddr, npages, size; + +	for_each_memblock(memory, r) +		create_id_mapping(r->base, r->size, 0); + +	/* map runtime io spaces */ +	for_each_efi_memory_desc(&memmap, md) { +		if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md)) +			continue; +		paddr = md->phys_addr; +		npages = md->num_pages; +		memrange_efi_to_native(&paddr, &npages); +		size = npages << PAGE_SHIFT; +		create_id_mapping(paddr, size, 1); +	} +} + +static int __init uefi_init(void) +{ +	efi_char16_t *c16; +	char vendor[100] = "unknown"; +	int i, retval; + +	efi.systab = early_memremap(efi_system_table, +				    sizeof(efi_system_table_t)); +	if (efi.systab == NULL) { +		pr_warn("Unable to map EFI system table.\n"); +		return -ENOMEM; +	} + +	set_bit(EFI_BOOT, &efi.flags); +	set_bit(EFI_64BIT, &efi.flags); + +	/* +	 * Verify the EFI Table +	 */ +	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { +		pr_err("System table signature incorrect\n"); +		return -EINVAL; +	} +	if ((efi.systab->hdr.revision >> 16) < 2) +		pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n", +			efi.systab->hdr.revision >> 16, +			efi.systab->hdr.revision & 0xffff); + +	/* Show what we know for posterity */ +	c16 = early_memremap(efi.systab->fw_vendor, +			     sizeof(vendor)); +	if (c16) { +		for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) +			vendor[i] = c16[i]; +		vendor[i] = '\0'; +	} + +	pr_info("EFI v%u.%.02u by %s\n", +		efi.systab->hdr.revision >> 16, +		efi.systab->hdr.revision & 0xffff, vendor); + +	retval = efi_config_init(NULL); +	if (retval == 0) +		set_bit(EFI_CONFIG_TABLES, &efi.flags); + +	early_memunmap(c16, sizeof(vendor)); +	early_memunmap(efi.systab,  sizeof(efi_system_table_t)); + +	return retval; +} + +static __initdata char memory_type_name[][32] = { +	{"Reserved"}, +	{"Loader Code"}, +	{"Loader Data"}, +	{"Boot Code"}, +	{"Boot Data"}, +	{"Runtime Code"}, +	{"Runtime Data"}, +	{"Conventional Memory"}, +	{"Unusable Memory"}, +	{"ACPI Reclaim Memory"}, +	{"ACPI Memory NVS"}, +	{"Memory Mapped I/O"}, +	{"MMIO Port Space"}, +	{"PAL Code"}, +}; + +/* + * Return true for RAM regions we want to permanently reserve. + */ +static __init int is_reserve_region(efi_memory_desc_t *md) +{ +	if (!is_normal_ram(md)) +		return 0; + +	if (md->attribute & EFI_MEMORY_RUNTIME) +		return 1; + +	if (md->type == EFI_ACPI_RECLAIM_MEMORY || +	    md->type == EFI_RESERVED_TYPE) +		return 1; + +	return 0; +} + +static __init void reserve_regions(void) +{ +	efi_memory_desc_t *md; +	u64 paddr, npages, size; + +	if (uefi_debug) +		pr_info("Processing EFI memory map:\n"); + +	for_each_efi_memory_desc(&memmap, md) { +		paddr = md->phys_addr; +		npages = md->num_pages; + +		if (uefi_debug) +			pr_info("  0x%012llx-0x%012llx [%s]", +				paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, +				memory_type_name[md->type]); + +		memrange_efi_to_native(&paddr, &npages); +		size = npages << PAGE_SHIFT; + +		if (is_normal_ram(md)) +			early_init_dt_add_memory_arch(paddr, size); + +		if (is_reserve_region(md) || +		    md->type == EFI_BOOT_SERVICES_CODE || +		    md->type == EFI_BOOT_SERVICES_DATA) { +			memblock_reserve(paddr, size); +			if (uefi_debug) +				pr_cont("*"); +		} + +		if (uefi_debug) +			pr_cont("\n"); +	} +} + + +static u64 __init free_one_region(u64 start, u64 end) +{ +	u64 size = end - start; + +	if (uefi_debug) +		pr_info("  EFI freeing: 0x%012llx-0x%012llx\n",	start, end - 1); + +	free_bootmem_late(start, size); +	return size; +} + +static u64 __init free_region(u64 start, u64 end) +{ +	u64 map_start, map_end, total = 0; + +	if (end <= start) +		return total; + +	map_start = (u64)memmap.phys_map; +	map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map)); +	map_start &= PAGE_MASK; + +	if (start < map_end && end > map_start) { +		/* region overlaps UEFI memmap */ +		if (start < map_start) +			total += free_one_region(start, map_start); + +		if (map_end < end) +			total += free_one_region(map_end, end); +	} else +		total += free_one_region(start, end); + +	return total; +} + +static void __init free_boot_services(void) +{ +	u64 total_freed = 0; +	u64 keep_end, free_start, free_end; +	efi_memory_desc_t *md; + +	/* +	 * If kernel uses larger pages than UEFI, we have to be careful +	 * not to inadvertantly free memory we want to keep if there is +	 * overlap at the kernel page size alignment. We do not want to +	 * free is_reserve_region() memory nor the UEFI memmap itself. +	 * +	 * The memory map is sorted, so we keep track of the end of +	 * any previous region we want to keep, remember any region +	 * we want to free and defer freeing it until we encounter +	 * the next region we want to keep. This way, before freeing +	 * it, we can clip it as needed to avoid freeing memory we +	 * want to keep for UEFI. +	 */ + +	keep_end = 0; +	free_start = 0; + +	for_each_efi_memory_desc(&memmap, md) { +		u64 paddr, npages, size; + +		if (is_reserve_region(md)) { +			/* +			 * We don't want to free any memory from this region. +			 */ +			if (free_start) { +				/* adjust free_end then free region */ +				if (free_end > md->phys_addr) +					free_end -= PAGE_SIZE; +				total_freed += free_region(free_start, free_end); +				free_start = 0; +			} +			keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); +			continue; +		} + +		if (md->type != EFI_BOOT_SERVICES_CODE && +		    md->type != EFI_BOOT_SERVICES_DATA) { +			/* no need to free this region */ +			continue; +		} + +		/* +		 * We want to free memory from this region. +		 */ +		paddr = md->phys_addr; +		npages = md->num_pages; +		memrange_efi_to_native(&paddr, &npages); +		size = npages << PAGE_SHIFT; + +		if (free_start) { +			if (paddr <= free_end) +				free_end = paddr + size; +			else { +				total_freed += free_region(free_start, free_end); +				free_start = paddr; +				free_end = paddr + size; +			} +		} else { +			free_start = paddr; +			free_end = paddr + size; +		} +		if (free_start < keep_end) { +			free_start += PAGE_SIZE; +			if (free_start >= free_end) +				free_start = 0; +		} +	} +	if (free_start) +		total_freed += free_region(free_start, free_end); + +	if (total_freed) +		pr_info("Freed 0x%llx bytes of EFI boot services memory", +			total_freed); +} + +void __init efi_init(void) +{ +	struct efi_fdt_params params; + +	/* Grab UEFI information placed in FDT by stub */ +	if (!efi_get_fdt_params(¶ms, uefi_debug)) +		return; + +	efi_system_table = params.system_table; + +	memblock_reserve(params.mmap & PAGE_MASK, +			 PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK))); +	memmap.phys_map = (void *)params.mmap; +	memmap.map = early_memremap(params.mmap, params.mmap_size); +	memmap.map_end = memmap.map + params.mmap_size; +	memmap.desc_size = params.desc_size; +	memmap.desc_version = params.desc_ver; + +	if (uefi_init() < 0) +		return; + +	reserve_regions(); +} + +void __init efi_idmap_init(void) +{ +	if (!efi_enabled(EFI_BOOT)) +		return; + +	/* boot time idmap_pg_dir is incomplete, so fill in missing parts */ +	efi_setup_idmap(); +} + +static int __init remap_region(efi_memory_desc_t *md, void **new) +{ +	u64 paddr, vaddr, npages, size; + +	paddr = md->phys_addr; +	npages = md->num_pages; +	memrange_efi_to_native(&paddr, &npages); +	size = npages << PAGE_SHIFT; + +	if (is_normal_ram(md)) +		vaddr = (__force u64)ioremap_cache(paddr, size); +	else +		vaddr = (__force u64)ioremap(paddr, size); + +	if (!vaddr) { +		pr_err("Unable to remap 0x%llx pages @ %p\n", +		       npages, (void *)paddr); +		return 0; +	} + +	/* adjust for any rounding when EFI and system pagesize differs */ +	md->virt_addr = vaddr + (md->phys_addr - paddr); + +	if (uefi_debug) +		pr_info("  EFI remap 0x%012llx => %p\n", +			md->phys_addr, (void *)md->virt_addr); + +	memcpy(*new, md, memmap.desc_size); +	*new += memmap.desc_size; + +	return 1; +} + +/* + * Switch UEFI from an identity map to a kernel virtual map + */ +static int __init arm64_enter_virtual_mode(void) +{ +	efi_memory_desc_t *md; +	phys_addr_t virtmap_phys; +	void *virtmap, *virt_md; +	efi_status_t status; +	u64 mapsize; +	int count = 0; +	unsigned long flags; + +	if (!efi_enabled(EFI_BOOT)) { +		pr_info("EFI services will not be available.\n"); +		return -1; +	} + +	pr_info("Remapping and enabling EFI services.\n"); + +	/* replace early memmap mapping with permanent mapping */ +	mapsize = memmap.map_end - memmap.map; +	early_memunmap(memmap.map, mapsize); +	memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, +						   mapsize); +	memmap.map_end = memmap.map + mapsize; + +	efi.memmap = &memmap; + +	/* Map the runtime regions */ +	virtmap = kmalloc(mapsize, GFP_KERNEL); +	if (!virtmap) { +		pr_err("Failed to allocate EFI virtual memmap\n"); +		return -1; +	} +	virtmap_phys = virt_to_phys(virtmap); +	virt_md = virtmap; + +	for_each_efi_memory_desc(&memmap, md) { +		if (!(md->attribute & EFI_MEMORY_RUNTIME)) +			continue; +		if (remap_region(md, &virt_md)) +			++count; +	} + +	efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); +	if (efi.systab) +		set_bit(EFI_SYSTEM_TABLES, &efi.flags); + +	local_irq_save(flags); +	cpu_switch_mm(idmap_pg_dir, &init_mm); + +	/* Call SetVirtualAddressMap with the physical address of the map */ +	runtime = efi.systab->runtime; +	efi.set_virtual_address_map = runtime->set_virtual_address_map; + +	status = efi.set_virtual_address_map(count * memmap.desc_size, +					     memmap.desc_size, +					     memmap.desc_version, +					     (efi_memory_desc_t *)virtmap_phys); +	cpu_set_reserved_ttbr0(); +	flush_tlb_all(); +	local_irq_restore(flags); + +	kfree(virtmap); + +	free_boot_services(); + +	if (status != EFI_SUCCESS) { +		pr_err("Failed to set EFI virtual address map! [%lx]\n", +			status); +		return -1; +	} + +	/* Set up runtime services function pointers */ +	runtime = efi.systab->runtime; +	efi.get_time = runtime->get_time; +	efi.set_time = runtime->set_time; +	efi.get_wakeup_time = runtime->get_wakeup_time; +	efi.set_wakeup_time = runtime->set_wakeup_time; +	efi.get_variable = runtime->get_variable; +	efi.get_next_variable = runtime->get_next_variable; +	efi.set_variable = runtime->set_variable; +	efi.query_variable_info = runtime->query_variable_info; +	efi.update_capsule = runtime->update_capsule; +	efi.query_capsule_caps = runtime->query_capsule_caps; +	efi.get_next_high_mono_count = runtime->get_next_high_mono_count; +	efi.reset_system = runtime->reset_system; + +	set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + +	return 0; +} +early_initcall(arm64_enter_virtual_mode); diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S new file mode 100644 index 00000000000..d358ccacfc0 --- /dev/null +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -0,0 +1,67 @@ +/* + * FP/SIMD state saving and restoring + * + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/fpsimdmacros.h> + +/* + * Save the FP registers. + * + * x0 - pointer to struct fpsimd_state + */ +ENTRY(fpsimd_save_state) +	fpsimd_save x0, 8 +	ret +ENDPROC(fpsimd_save_state) + +/* + * Load the FP registers. + * + * x0 - pointer to struct fpsimd_state + */ +ENTRY(fpsimd_load_state) +	fpsimd_restore x0, 8 +	ret +ENDPROC(fpsimd_load_state) + +#ifdef CONFIG_KERNEL_MODE_NEON + +/* + * Save the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_save_partial_state) +	fpsimd_save_partial x0, 1, 8, 9 +	ret +ENDPROC(fpsimd_load_partial_state) + +/* + * Load the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_load_partial_state) +	fpsimd_restore_partial x0, 8, 9 +	ret +ENDPROC(fpsimd_load_partial_state) + +#endif diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S new file mode 100644 index 00000000000..aa5f9fcbf9e --- /dev/null +++ b/arch/arm64/kernel/entry-ftrace.S @@ -0,0 +1,218 @@ +/* + * arch/arm64/kernel/entry-ftrace.S + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +/* + * Gcc with -pg will put the following code in the beginning of each function: + *      mov x0, x30 + *      bl _mcount + *	[function's body ...] + * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic + * ftrace is enabled. + * + * Please note that x0 as an argument will not be used here because we can + * get lr(x30) of instrumented function at any time by winding up call stack + * as long as the kernel is compiled without -fomit-frame-pointer. + * (or CONFIG_FRAME_POINTER, this is forced on arm64) + * + * stack layout after mcount_enter in _mcount(): + * + * current sp/fp =>  0:+-----+ + * in _mcount()        | x29 | -> instrumented function's fp + *                     +-----+ + *                     | x30 | -> _mcount()'s lr (= instrumented function's pc) + * old sp       => +16:+-----+ + * when instrumented   |     | + * function calls      | ... | + * _mcount()           |     | + *                     |     | + * instrumented => +xx:+-----+ + * function's fp       | x29 | -> parent's fp + *                     +-----+ + *                     | x30 | -> instrumented function's lr (= parent's pc) + *                     +-----+ + *                     | ... | + */ + +	.macro mcount_enter +	stp	x29, x30, [sp, #-16]! +	mov	x29, sp +	.endm + +	.macro mcount_exit +	ldp	x29, x30, [sp], #16 +	ret +	.endm + +	.macro mcount_adjust_addr rd, rn +	sub	\rd, \rn, #AARCH64_INSN_SIZE +	.endm + +	/* for instrumented function's parent */ +	.macro mcount_get_parent_fp reg +	ldr	\reg, [x29] +	ldr	\reg, [\reg] +	.endm + +	/* for instrumented function */ +	.macro mcount_get_pc0 reg +	mcount_adjust_addr	\reg, x30 +	.endm + +	.macro mcount_get_pc reg +	ldr	\reg, [x29, #8] +	mcount_adjust_addr	\reg, \reg +	.endm + +	.macro mcount_get_lr reg +	ldr	\reg, [x29] +	ldr	\reg, [\reg, #8] +	mcount_adjust_addr	\reg, \reg +	.endm + +	.macro mcount_get_lr_addr reg +	ldr	\reg, [x29] +	add	\reg, \reg, #8 +	.endm + +#ifndef CONFIG_DYNAMIC_FTRACE +/* + * void _mcount(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function makes calls, if enabled, to: + *     - tracer function to probe instrumented function's entry, + *     - ftrace_graph_caller to set up an exit hook + */ +ENTRY(_mcount) +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST +	ldr	x0, =ftrace_trace_stop +	ldr	x0, [x0]		// if ftrace_trace_stop +	ret				//   return; +#endif +	mcount_enter + +	ldr	x0, =ftrace_trace_function +	ldr	x2, [x0] +	adr	x0, ftrace_stub +	cmp	x0, x2			// if (ftrace_trace_function +	b.eq	skip_ftrace_call	//     != ftrace_stub) { + +	mcount_get_pc	x0		//       function's pc +	mcount_get_lr	x1		//       function's lr (= parent's pc) +	blr	x2			//   (*ftrace_trace_function)(pc, lr); + +#ifndef CONFIG_FUNCTION_GRAPH_TRACER +skip_ftrace_call:			//   return; +	mcount_exit			// } +#else +	mcount_exit			//   return; +					// } +skip_ftrace_call: +	ldr	x1, =ftrace_graph_return +	ldr	x2, [x1]		//   if ((ftrace_graph_return +	cmp	x0, x2			//        != ftrace_stub) +	b.ne	ftrace_graph_caller + +	ldr	x1, =ftrace_graph_entry	//     || (ftrace_graph_entry +	ldr	x2, [x1]		//        != ftrace_graph_entry_stub)) +	ldr	x0, =ftrace_graph_entry_stub +	cmp	x0, x2 +	b.ne	ftrace_graph_caller	//     ftrace_graph_caller(); + +	mcount_exit +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +ENDPROC(_mcount) + +#else /* CONFIG_DYNAMIC_FTRACE */ +/* + * _mcount() is used to build the kernel with -pg option, but all the branch + * instructions to _mcount() are replaced to NOP initially at kernel start up, + * and later on, NOP to branch to ftrace_caller() when enabled or branch to + * NOP when disabled per-function base. + */ +ENTRY(_mcount) +	ret +ENDPROC(_mcount) + +/* + * void ftrace_caller(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function is a counterpart of _mcount() in 'static' ftrace, and + * makes calls to: + *     - tracer function to probe instrumented function's entry, + *     - ftrace_graph_caller to set up an exit hook + */ +ENTRY(ftrace_caller) +	mcount_enter + +	mcount_get_pc0	x0		//     function's pc +	mcount_get_lr	x1		//     function's lr + +	.global ftrace_call +ftrace_call:				// tracer(pc, lr); +	nop				// This will be replaced with "bl xxx" +					// where xxx can be any kind of tracer. + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +	.global ftrace_graph_call +ftrace_graph_call:			// ftrace_graph_caller(); +	nop				// If enabled, this will be replaced +					// "b ftrace_graph_caller" +#endif + +	mcount_exit +ENDPROC(ftrace_caller) +#endif /* CONFIG_DYNAMIC_FTRACE */ + +ENTRY(ftrace_stub) +	ret +ENDPROC(ftrace_stub) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * void ftrace_graph_caller(void) + * + * Called from _mcount() or ftrace_caller() when function_graph tracer is + * selected. + * This function w/ prepare_ftrace_return() fakes link register's value on + * the call stack in order to intercept instrumented function's return path + * and run return_to_handler() later on its exit. + */ +ENTRY(ftrace_graph_caller) +	mcount_get_lr_addr	  x0	//     pointer to function's saved lr +	mcount_get_pc		  x1	//     function's pc +	mcount_get_parent_fp	  x2	//     parent's fp +	bl	prepare_ftrace_return	// prepare_ftrace_return(&lr, pc, fp) + +	mcount_exit +ENDPROC(ftrace_graph_caller) + +/* + * void return_to_handler(void) + * + * Run ftrace_return_to_handler() before going back to parent. + * @fp is checked against the value passed by ftrace_graph_caller() + * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. + */ +ENTRY(return_to_handler) +	str	x0, [sp, #-16]! +	mov	x0, x29			//     parent's fp +	bl	ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); +	mov	x30, x0			// restore the original return address +	ldr	x0, [sp], #16 +	ret +END(return_to_handler) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S new file mode 100644 index 00000000000..9ce04ba6bcb --- /dev/null +++ b/arch/arm64/kernel/entry.S @@ -0,0 +1,659 @@ +/* + * Low-level exception handling code + * + * Copyright (C) 2012 ARM Ltd. + * Authors:	Catalin Marinas <catalin.marinas@arm.com> + *		Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/init.h> +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/errno.h> +#include <asm/esr.h> +#include <asm/thread_info.h> +#include <asm/unistd.h> +#include <asm/unistd32.h> + +/* + * Bad Abort numbers + *----------------- + */ +#define BAD_SYNC	0 +#define BAD_IRQ		1 +#define BAD_FIQ		2 +#define BAD_ERROR	3 + +	.macro	kernel_entry, el, regsize = 64 +	sub	sp, sp, #S_FRAME_SIZE - S_LR	// room for LR, SP, SPSR, ELR +	.if	\regsize == 32 +	mov	w0, w0				// zero upper 32 bits of x0 +	.endif +	push	x28, x29 +	push	x26, x27 +	push	x24, x25 +	push	x22, x23 +	push	x20, x21 +	push	x18, x19 +	push	x16, x17 +	push	x14, x15 +	push	x12, x13 +	push	x10, x11 +	push	x8, x9 +	push	x6, x7 +	push	x4, x5 +	push	x2, x3 +	push	x0, x1 +	.if	\el == 0 +	mrs	x21, sp_el0 +	get_thread_info tsk			// Ensure MDSCR_EL1.SS is clear, +	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug +	disable_step_tsk x19, x20		// exceptions when scheduling. +	.else +	add	x21, sp, #S_FRAME_SIZE +	.endif +	mrs	x22, elr_el1 +	mrs	x23, spsr_el1 +	stp	lr, x21, [sp, #S_LR] +	stp	x22, x23, [sp, #S_PC] + +	/* +	 * Set syscallno to -1 by default (overridden later if real syscall). +	 */ +	.if	\el == 0 +	mvn	x21, xzr +	str	x21, [sp, #S_SYSCALLNO] +	.endif + +	/* +	 * Registers that may be useful after this macro is invoked: +	 * +	 * x21 - aborted SP +	 * x22 - aborted PC +	 * x23 - aborted PSTATE +	*/ +	.endm + +	.macro	kernel_exit, el, ret = 0 +	ldp	x21, x22, [sp, #S_PC]		// load ELR, SPSR +	.if	\el == 0 +	ldr	x23, [sp, #S_SP]		// load return stack pointer +	.endif +	.if	\ret +	ldr	x1, [sp, #S_X1]			// preserve x0 (syscall return) +	add	sp, sp, S_X2 +	.else +	pop	x0, x1 +	.endif +	pop	x2, x3				// load the rest of the registers +	pop	x4, x5 +	pop	x6, x7 +	pop	x8, x9 +	msr	elr_el1, x21			// set up the return data +	msr	spsr_el1, x22 +	.if	\el == 0 +	msr	sp_el0, x23 +	.endif +	pop	x10, x11 +	pop	x12, x13 +	pop	x14, x15 +	pop	x16, x17 +	pop	x18, x19 +	pop	x20, x21 +	pop	x22, x23 +	pop	x24, x25 +	pop	x26, x27 +	pop	x28, x29 +	ldr	lr, [sp], #S_FRAME_SIZE - S_LR	// load LR and restore SP +	eret					// return to kernel +	.endm + +	.macro	get_thread_info, rd +	mov	\rd, sp +	and	\rd, \rd, #~(THREAD_SIZE - 1)	// top of stack +	.endm + +/* + * These are the registers used in the syscall handler, and allow us to + * have in theory up to 7 arguments to a function - x0 to x6. + * + * x7 is reserved for the system call number in 32-bit mode. + */ +sc_nr	.req	x25		// number of system calls +scno	.req	x26		// syscall number +stbl	.req	x27		// syscall table pointer +tsk	.req	x28		// current thread_info + +/* + * Interrupt handling. + */ +	.macro	irq_handler +	ldr	x1, handle_arch_irq +	mov	x0, sp +	blr	x1 +	.endm + +	.text + +/* + * Exception vectors. + */ + +	.align	11 +ENTRY(vectors) +	ventry	el1_sync_invalid		// Synchronous EL1t +	ventry	el1_irq_invalid			// IRQ EL1t +	ventry	el1_fiq_invalid			// FIQ EL1t +	ventry	el1_error_invalid		// Error EL1t + +	ventry	el1_sync			// Synchronous EL1h +	ventry	el1_irq				// IRQ EL1h +	ventry	el1_fiq_invalid			// FIQ EL1h +	ventry	el1_error_invalid		// Error EL1h + +	ventry	el0_sync			// Synchronous 64-bit EL0 +	ventry	el0_irq				// IRQ 64-bit EL0 +	ventry	el0_fiq_invalid			// FIQ 64-bit EL0 +	ventry	el0_error_invalid		// Error 64-bit EL0 + +#ifdef CONFIG_COMPAT +	ventry	el0_sync_compat			// Synchronous 32-bit EL0 +	ventry	el0_irq_compat			// IRQ 32-bit EL0 +	ventry	el0_fiq_invalid_compat		// FIQ 32-bit EL0 +	ventry	el0_error_invalid_compat	// Error 32-bit EL0 +#else +	ventry	el0_sync_invalid		// Synchronous 32-bit EL0 +	ventry	el0_irq_invalid			// IRQ 32-bit EL0 +	ventry	el0_fiq_invalid			// FIQ 32-bit EL0 +	ventry	el0_error_invalid		// Error 32-bit EL0 +#endif +END(vectors) + +/* + * Invalid mode handlers + */ +	.macro	inv_entry, el, reason, regsize = 64 +	kernel_entry el, \regsize +	mov	x0, sp +	mov	x1, #\reason +	mrs	x2, esr_el1 +	b	bad_mode +	.endm + +el0_sync_invalid: +	inv_entry 0, BAD_SYNC +ENDPROC(el0_sync_invalid) + +el0_irq_invalid: +	inv_entry 0, BAD_IRQ +ENDPROC(el0_irq_invalid) + +el0_fiq_invalid: +	inv_entry 0, BAD_FIQ +ENDPROC(el0_fiq_invalid) + +el0_error_invalid: +	inv_entry 0, BAD_ERROR +ENDPROC(el0_error_invalid) + +#ifdef CONFIG_COMPAT +el0_fiq_invalid_compat: +	inv_entry 0, BAD_FIQ, 32 +ENDPROC(el0_fiq_invalid_compat) + +el0_error_invalid_compat: +	inv_entry 0, BAD_ERROR, 32 +ENDPROC(el0_error_invalid_compat) +#endif + +el1_sync_invalid: +	inv_entry 1, BAD_SYNC +ENDPROC(el1_sync_invalid) + +el1_irq_invalid: +	inv_entry 1, BAD_IRQ +ENDPROC(el1_irq_invalid) + +el1_fiq_invalid: +	inv_entry 1, BAD_FIQ +ENDPROC(el1_fiq_invalid) + +el1_error_invalid: +	inv_entry 1, BAD_ERROR +ENDPROC(el1_error_invalid) + +/* + * EL1 mode handlers. + */ +	.align	6 +el1_sync: +	kernel_entry 1 +	mrs	x1, esr_el1			// read the syndrome register +	lsr	x24, x1, #ESR_EL1_EC_SHIFT	// exception class +	cmp	x24, #ESR_EL1_EC_DABT_EL1	// data abort in EL1 +	b.eq	el1_da +	cmp	x24, #ESR_EL1_EC_SYS64		// configurable trap +	b.eq	el1_undef +	cmp	x24, #ESR_EL1_EC_SP_ALIGN	// stack alignment exception +	b.eq	el1_sp_pc +	cmp	x24, #ESR_EL1_EC_PC_ALIGN	// pc alignment exception +	b.eq	el1_sp_pc +	cmp	x24, #ESR_EL1_EC_UNKNOWN	// unknown exception in EL1 +	b.eq	el1_undef +	cmp	x24, #ESR_EL1_EC_BREAKPT_EL1	// debug exception in EL1 +	b.ge	el1_dbg +	b	el1_inv +el1_da: +	/* +	 * Data abort handling +	 */ +	mrs	x0, far_el1 +	enable_dbg +	// re-enable interrupts if they were enabled in the aborted context +	tbnz	x23, #7, 1f			// PSR_I_BIT +	enable_irq +1: +	mov	x2, sp				// struct pt_regs +	bl	do_mem_abort + +	// disable interrupts before pulling preserved data off the stack +	disable_irq +	kernel_exit 1 +el1_sp_pc: +	/* +	 * Stack or PC alignment exception handling +	 */ +	mrs	x0, far_el1 +	enable_dbg +	mov	x2, sp +	b	do_sp_pc_abort +el1_undef: +	/* +	 * Undefined instruction +	 */ +	enable_dbg +	mov	x0, sp +	b	do_undefinstr +el1_dbg: +	/* +	 * Debug exception handling +	 */ +	cmp	x24, #ESR_EL1_EC_BRK64		// if BRK64 +	cinc	x24, x24, eq			// set bit '0' +	tbz	x24, #0, el1_inv		// EL1 only +	mrs	x0, far_el1 +	mov	x2, sp				// struct pt_regs +	bl	do_debug_exception +	enable_dbg +	kernel_exit 1 +el1_inv: +	// TODO: add support for undefined instructions in kernel mode +	enable_dbg +	mov	x0, sp +	mov	x1, #BAD_SYNC +	mrs	x2, esr_el1 +	b	bad_mode +ENDPROC(el1_sync) + +	.align	6 +el1_irq: +	kernel_entry 1 +	enable_dbg +#ifdef CONFIG_TRACE_IRQFLAGS +	bl	trace_hardirqs_off +#endif + +	irq_handler + +#ifdef CONFIG_PREEMPT +	get_thread_info tsk +	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count +	cbnz	w24, 1f				// preempt count != 0 +	ldr	x0, [tsk, #TI_FLAGS]		// get flags +	tbz	x0, #TIF_NEED_RESCHED, 1f	// needs rescheduling? +	bl	el1_preempt +1: +#endif +#ifdef CONFIG_TRACE_IRQFLAGS +	bl	trace_hardirqs_on +#endif +	kernel_exit 1 +ENDPROC(el1_irq) + +#ifdef CONFIG_PREEMPT +el1_preempt: +	mov	x24, lr +1:	bl	preempt_schedule_irq		// irq en/disable is done inside +	ldr	x0, [tsk, #TI_FLAGS]		// get new tasks TI_FLAGS +	tbnz	x0, #TIF_NEED_RESCHED, 1b	// needs rescheduling? +	ret	x24 +#endif + +/* + * EL0 mode handlers. + */ +	.align	6 +el0_sync: +	kernel_entry 0 +	mrs	x25, esr_el1			// read the syndrome register +	lsr	x24, x25, #ESR_EL1_EC_SHIFT	// exception class +	cmp	x24, #ESR_EL1_EC_SVC64		// SVC in 64-bit state +	b.eq	el0_svc +	adr	lr, ret_to_user +	cmp	x24, #ESR_EL1_EC_DABT_EL0	// data abort in EL0 +	b.eq	el0_da +	cmp	x24, #ESR_EL1_EC_IABT_EL0	// instruction abort in EL0 +	b.eq	el0_ia +	cmp	x24, #ESR_EL1_EC_FP_ASIMD	// FP/ASIMD access +	b.eq	el0_fpsimd_acc +	cmp	x24, #ESR_EL1_EC_FP_EXC64	// FP/ASIMD exception +	b.eq	el0_fpsimd_exc +	cmp	x24, #ESR_EL1_EC_SYS64		// configurable trap +	b.eq	el0_undef +	cmp	x24, #ESR_EL1_EC_SP_ALIGN	// stack alignment exception +	b.eq	el0_sp_pc +	cmp	x24, #ESR_EL1_EC_PC_ALIGN	// pc alignment exception +	b.eq	el0_sp_pc +	cmp	x24, #ESR_EL1_EC_UNKNOWN	// unknown exception in EL0 +	b.eq	el0_undef +	cmp	x24, #ESR_EL1_EC_BREAKPT_EL0	// debug exception in EL0 +	b.ge	el0_dbg +	b	el0_inv + +#ifdef CONFIG_COMPAT +	.align	6 +el0_sync_compat: +	kernel_entry 0, 32 +	mrs	x25, esr_el1			// read the syndrome register +	lsr	x24, x25, #ESR_EL1_EC_SHIFT	// exception class +	cmp	x24, #ESR_EL1_EC_SVC32		// SVC in 32-bit state +	b.eq	el0_svc_compat +	adr	lr, ret_to_user +	cmp	x24, #ESR_EL1_EC_DABT_EL0	// data abort in EL0 +	b.eq	el0_da +	cmp	x24, #ESR_EL1_EC_IABT_EL0	// instruction abort in EL0 +	b.eq	el0_ia +	cmp	x24, #ESR_EL1_EC_FP_ASIMD	// FP/ASIMD access +	b.eq	el0_fpsimd_acc +	cmp	x24, #ESR_EL1_EC_FP_EXC32	// FP/ASIMD exception +	b.eq	el0_fpsimd_exc +	cmp	x24, #ESR_EL1_EC_UNKNOWN	// unknown exception in EL0 +	b.eq	el0_undef +	cmp	x24, #ESR_EL1_EC_CP15_32	// CP15 MRC/MCR trap +	b.eq	el0_undef +	cmp	x24, #ESR_EL1_EC_CP15_64	// CP15 MRRC/MCRR trap +	b.eq	el0_undef +	cmp	x24, #ESR_EL1_EC_CP14_MR	// CP14 MRC/MCR trap +	b.eq	el0_undef +	cmp	x24, #ESR_EL1_EC_CP14_LS	// CP14 LDC/STC trap +	b.eq	el0_undef +	cmp	x24, #ESR_EL1_EC_CP14_64	// CP14 MRRC/MCRR trap +	b.eq	el0_undef +	cmp	x24, #ESR_EL1_EC_BREAKPT_EL0	// debug exception in EL0 +	b.ge	el0_dbg +	b	el0_inv +el0_svc_compat: +	/* +	 * AArch32 syscall handling +	 */ +	adr	stbl, compat_sys_call_table	// load compat syscall table pointer +	uxtw	scno, w7			// syscall number in w7 (r7) +	mov     sc_nr, #__NR_compat_syscalls +	b	el0_svc_naked + +	.align	6 +el0_irq_compat: +	kernel_entry 0, 32 +	b	el0_irq_naked +#endif + +el0_da: +	/* +	 * Data abort handling +	 */ +	mrs	x0, far_el1 +	bic	x0, x0, #(0xff << 56) +	// enable interrupts before calling the main handler +	enable_dbg_and_irq +	mov	x1, x25 +	mov	x2, sp +	b	do_mem_abort +el0_ia: +	/* +	 * Instruction abort handling +	 */ +	mrs	x0, far_el1 +	// enable interrupts before calling the main handler +	enable_dbg_and_irq +	orr	x1, x25, #1 << 24		// use reserved ISS bit for instruction aborts +	mov	x2, sp +	b	do_mem_abort +el0_fpsimd_acc: +	/* +	 * Floating Point or Advanced SIMD access +	 */ +	enable_dbg +	mov	x0, x25 +	mov	x1, sp +	b	do_fpsimd_acc +el0_fpsimd_exc: +	/* +	 * Floating Point or Advanced SIMD exception +	 */ +	enable_dbg +	mov	x0, x25 +	mov	x1, sp +	b	do_fpsimd_exc +el0_sp_pc: +	/* +	 * Stack or PC alignment exception handling +	 */ +	mrs	x0, far_el1 +	// enable interrupts before calling the main handler +	enable_dbg_and_irq +	mov	x1, x25 +	mov	x2, sp +	b	do_sp_pc_abort +el0_undef: +	/* +	 * Undefined instruction +	 */ +	// enable interrupts before calling the main handler +	enable_dbg_and_irq +	mov	x0, sp +	b	do_undefinstr +el0_dbg: +	/* +	 * Debug exception handling +	 */ +	tbnz	x24, #0, el0_inv		// EL0 only +	mrs	x0, far_el1 +	mov	x1, x25 +	mov	x2, sp +	bl	do_debug_exception +	enable_dbg +	b	ret_to_user +el0_inv: +	enable_dbg +	mov	x0, sp +	mov	x1, #BAD_SYNC +	mrs	x2, esr_el1 +	b	bad_mode +ENDPROC(el0_sync) + +	.align	6 +el0_irq: +	kernel_entry 0 +el0_irq_naked: +	enable_dbg +#ifdef CONFIG_TRACE_IRQFLAGS +	bl	trace_hardirqs_off +#endif + +	irq_handler + +#ifdef CONFIG_TRACE_IRQFLAGS +	bl	trace_hardirqs_on +#endif +	b	ret_to_user +ENDPROC(el0_irq) + +/* + * Register switch for AArch64. The callee-saved registers need to be saved + * and restored. On entry: + *   x0 = previous task_struct (must be preserved across the switch) + *   x1 = next task_struct + * Previous and next are guaranteed not to be the same. + * + */ +ENTRY(cpu_switch_to) +	add	x8, x0, #THREAD_CPU_CONTEXT +	mov	x9, sp +	stp	x19, x20, [x8], #16		// store callee-saved registers +	stp	x21, x22, [x8], #16 +	stp	x23, x24, [x8], #16 +	stp	x25, x26, [x8], #16 +	stp	x27, x28, [x8], #16 +	stp	x29, x9, [x8], #16 +	str	lr, [x8] +	add	x8, x1, #THREAD_CPU_CONTEXT +	ldp	x19, x20, [x8], #16		// restore callee-saved registers +	ldp	x21, x22, [x8], #16 +	ldp	x23, x24, [x8], #16 +	ldp	x25, x26, [x8], #16 +	ldp	x27, x28, [x8], #16 +	ldp	x29, x9, [x8], #16 +	ldr	lr, [x8] +	mov	sp, x9 +	ret +ENDPROC(cpu_switch_to) + +/* + * This is the fast syscall return path.  We do as little as possible here, + * and this includes saving x0 back into the kernel stack. + */ +ret_fast_syscall: +	disable_irq				// disable interrupts +	ldr	x1, [tsk, #TI_FLAGS] +	and	x2, x1, #_TIF_WORK_MASK +	cbnz	x2, fast_work_pending +	enable_step_tsk x1, x2 +	kernel_exit 0, ret = 1 + +/* + * Ok, we need to do extra processing, enter the slow path. + */ +fast_work_pending: +	str	x0, [sp, #S_X0]			// returned x0 +work_pending: +	tbnz	x1, #TIF_NEED_RESCHED, work_resched +	/* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ +	ldr	x2, [sp, #S_PSTATE] +	mov	x0, sp				// 'regs' +	tst	x2, #PSR_MODE_MASK		// user mode regs? +	b.ne	no_work_pending			// returning to kernel +	enable_irq				// enable interrupts for do_notify_resume() +	bl	do_notify_resume +	b	ret_to_user +work_resched: +	bl	schedule + +/* + * "slow" syscall return path. + */ +ret_to_user: +	disable_irq				// disable interrupts +	ldr	x1, [tsk, #TI_FLAGS] +	and	x2, x1, #_TIF_WORK_MASK +	cbnz	x2, work_pending +	enable_step_tsk x1, x2 +no_work_pending: +	kernel_exit 0, ret = 0 +ENDPROC(ret_to_user) + +/* + * This is how we return from a fork. + */ +ENTRY(ret_from_fork) +	bl	schedule_tail +	cbz	x19, 1f				// not a kernel thread +	mov	x0, x20 +	blr	x19 +1:	get_thread_info tsk +	b	ret_to_user +ENDPROC(ret_from_fork) + +/* + * SVC handler. + */ +	.align	6 +el0_svc: +	adrp	stbl, sys_call_table		// load syscall table pointer +	uxtw	scno, w8			// syscall number in w8 +	mov	sc_nr, #__NR_syscalls +el0_svc_naked:					// compat entry point +	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number +	enable_dbg_and_irq + +	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall hooks +	tst	x16, #_TIF_SYSCALL_WORK +	b.ne	__sys_trace +	adr	lr, ret_fast_syscall		// return address +	cmp     scno, sc_nr                     // check upper syscall limit +	b.hs	ni_sys +	ldr	x16, [stbl, scno, lsl #3]	// address in the syscall table +	br	x16				// call sys_* routine +ni_sys: +	mov	x0, sp +	b	do_ni_syscall +ENDPROC(el0_svc) + +	/* +	 * This is the really slow path.  We're going to be doing context +	 * switches, and waiting for our parent to respond. +	 */ +__sys_trace: +	mov	x0, sp +	bl	syscall_trace_enter +	adr	lr, __sys_trace_return		// return address +	uxtw	scno, w0			// syscall number (possibly new) +	mov	x1, sp				// pointer to regs +	cmp	scno, sc_nr			// check upper syscall limit +	b.hs	ni_sys +	ldp	x0, x1, [sp]			// restore the syscall args +	ldp	x2, x3, [sp, #S_X2] +	ldp	x4, x5, [sp, #S_X4] +	ldp	x6, x7, [sp, #S_X6] +	ldr	x16, [stbl, scno, lsl #3]	// address in the syscall table +	br	x16				// call sys_* routine + +__sys_trace_return: +	str	x0, [sp]			// save returned x0 +	mov	x0, sp +	bl	syscall_trace_exit +	b	ret_to_user + +/* + * Special system call wrappers. + */ +ENTRY(sys_rt_sigreturn_wrapper) +	mov	x0, sp +	b	sys_rt_sigreturn +ENDPROC(sys_rt_sigreturn_wrapper) + +ENTRY(handle_arch_irq) +	.quad	0 diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c new file mode 100644 index 00000000000..ad8aebb1cde --- /dev/null +++ b/arch/arm64/kernel/fpsimd.c @@ -0,0 +1,320 @@ +/* + * FP/SIMD context switching and fault handling + * + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/cpu_pm.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/hardirq.h> + +#include <asm/fpsimd.h> +#include <asm/cputype.h> + +#define FPEXC_IOF	(1 << 0) +#define FPEXC_DZF	(1 << 1) +#define FPEXC_OFF	(1 << 2) +#define FPEXC_UFF	(1 << 3) +#define FPEXC_IXF	(1 << 4) +#define FPEXC_IDF	(1 << 7) + +/* + * In order to reduce the number of times the FPSIMD state is needlessly saved + * and restored, we need to keep track of two things: + * (a) for each task, we need to remember which CPU was the last one to have + *     the task's FPSIMD state loaded into its FPSIMD registers; + * (b) for each CPU, we need to remember which task's userland FPSIMD state has + *     been loaded into its FPSIMD registers most recently, or whether it has + *     been used to perform kernel mode NEON in the meantime. + * + * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to + * the id of the current CPU everytime the state is loaded onto a CPU. For (b), + * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the + * address of the userland FPSIMD state of the task that was loaded onto the CPU + * the most recently, or NULL if kernel mode NEON has been performed after that. + * + * With this in place, we no longer have to restore the next FPSIMD state right + * when switching between tasks. Instead, we can defer this check to userland + * resume, at which time we verify whether the CPU's fpsimd_last_state and the + * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we + * can omit the FPSIMD restore. + * + * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to + * indicate whether or not the userland FPSIMD state of the current task is + * present in the registers. The flag is set unless the FPSIMD registers of this + * CPU currently contain the most recent userland FPSIMD state of the current + * task. + * + * For a certain task, the sequence may look something like this: + * - the task gets scheduled in; if both the task's fpsimd_state.cpu field + *   contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu + *   variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is + *   cleared, otherwise it is set; + * + * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's + *   userland FPSIMD state is copied from memory to the registers, the task's + *   fpsimd_state.cpu field is set to the id of the current CPU, the current + *   CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the + *   TIF_FOREIGN_FPSTATE flag is cleared; + * + * - the task executes an ordinary syscall; upon return to userland, the + *   TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is + *   restored; + * + * - the task executes a syscall which executes some NEON instructions; this is + *   preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD + *   register contents to memory, clears the fpsimd_last_state per-cpu variable + *   and sets the TIF_FOREIGN_FPSTATE flag; + * + * - the task gets preempted after kernel_neon_end() is called; as we have not + *   returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so + *   whatever is in the FPSIMD registers is not saved to memory, but discarded. + */ +static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); + +/* + * Trapped FP/ASIMD access. + */ +void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) +{ +	/* TODO: implement lazy context saving/restoring */ +	WARN_ON(1); +} + +/* + * Raise a SIGFPE for the current process. + */ +void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) +{ +	siginfo_t info; +	unsigned int si_code = 0; + +	if (esr & FPEXC_IOF) +		si_code = FPE_FLTINV; +	else if (esr & FPEXC_DZF) +		si_code = FPE_FLTDIV; +	else if (esr & FPEXC_OFF) +		si_code = FPE_FLTOVF; +	else if (esr & FPEXC_UFF) +		si_code = FPE_FLTUND; +	else if (esr & FPEXC_IXF) +		si_code = FPE_FLTRES; + +	memset(&info, 0, sizeof(info)); +	info.si_signo = SIGFPE; +	info.si_code = si_code; +	info.si_addr = (void __user *)instruction_pointer(regs); + +	send_sig_info(SIGFPE, &info, current); +} + +void fpsimd_thread_switch(struct task_struct *next) +{ +	/* +	 * Save the current FPSIMD state to memory, but only if whatever is in +	 * the registers is in fact the most recent userland FPSIMD state of +	 * 'current'. +	 */ +	if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) +		fpsimd_save_state(¤t->thread.fpsimd_state); + +	if (next->mm) { +		/* +		 * If we are switching to a task whose most recent userland +		 * FPSIMD state is already in the registers of *this* cpu, +		 * we can skip loading the state from memory. Otherwise, set +		 * the TIF_FOREIGN_FPSTATE flag so the state will be loaded +		 * upon the next return to userland. +		 */ +		struct fpsimd_state *st = &next->thread.fpsimd_state; + +		if (__this_cpu_read(fpsimd_last_state) == st +		    && st->cpu == smp_processor_id()) +			clear_ti_thread_flag(task_thread_info(next), +					     TIF_FOREIGN_FPSTATE); +		else +			set_ti_thread_flag(task_thread_info(next), +					   TIF_FOREIGN_FPSTATE); +	} +} + +void fpsimd_flush_thread(void) +{ +	memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); +	set_thread_flag(TIF_FOREIGN_FPSTATE); +} + +/* + * Save the userland FPSIMD state of 'current' to memory, but only if the state + * currently held in the registers does in fact belong to 'current' + */ +void fpsimd_preserve_current_state(void) +{ +	preempt_disable(); +	if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) +		fpsimd_save_state(¤t->thread.fpsimd_state); +	preempt_enable(); +} + +/* + * Load the userland FPSIMD state of 'current' from memory, but only if the + * FPSIMD state already held in the registers is /not/ the most recent FPSIMD + * state of 'current' + */ +void fpsimd_restore_current_state(void) +{ +	preempt_disable(); +	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { +		struct fpsimd_state *st = ¤t->thread.fpsimd_state; + +		fpsimd_load_state(st); +		this_cpu_write(fpsimd_last_state, st); +		st->cpu = smp_processor_id(); +	} +	preempt_enable(); +} + +/* + * Load an updated userland FPSIMD state for 'current' from memory and set the + * flag that indicates that the FPSIMD register contents are the most recent + * FPSIMD state of 'current' + */ +void fpsimd_update_current_state(struct fpsimd_state *state) +{ +	preempt_disable(); +	fpsimd_load_state(state); +	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { +		struct fpsimd_state *st = ¤t->thread.fpsimd_state; + +		this_cpu_write(fpsimd_last_state, st); +		st->cpu = smp_processor_id(); +	} +	preempt_enable(); +} + +/* + * Invalidate live CPU copies of task t's FPSIMD state + */ +void fpsimd_flush_task_state(struct task_struct *t) +{ +	t->thread.fpsimd_state.cpu = NR_CPUS; +} + +#ifdef CONFIG_KERNEL_MODE_NEON + +static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); +static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); + +/* + * Kernel-side NEON support functions + */ +void kernel_neon_begin_partial(u32 num_regs) +{ +	if (in_interrupt()) { +		struct fpsimd_partial_state *s = this_cpu_ptr( +			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); + +		BUG_ON(num_regs > 32); +		fpsimd_save_partial_state(s, roundup(num_regs, 2)); +	} else { +		/* +		 * Save the userland FPSIMD state if we have one and if we +		 * haven't done so already. Clear fpsimd_last_state to indicate +		 * that there is no longer userland FPSIMD state in the +		 * registers. +		 */ +		preempt_disable(); +		if (current->mm && +		    !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) +			fpsimd_save_state(¤t->thread.fpsimd_state); +		this_cpu_write(fpsimd_last_state, NULL); +	} +} +EXPORT_SYMBOL(kernel_neon_begin_partial); + +void kernel_neon_end(void) +{ +	if (in_interrupt()) { +		struct fpsimd_partial_state *s = this_cpu_ptr( +			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); +		fpsimd_load_partial_state(s); +	} else { +		preempt_enable(); +	} +} +EXPORT_SYMBOL(kernel_neon_end); + +#endif /* CONFIG_KERNEL_MODE_NEON */ + +#ifdef CONFIG_CPU_PM +static int fpsimd_cpu_pm_notifier(struct notifier_block *self, +				  unsigned long cmd, void *v) +{ +	switch (cmd) { +	case CPU_PM_ENTER: +		if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) +			fpsimd_save_state(¤t->thread.fpsimd_state); +		break; +	case CPU_PM_EXIT: +		if (current->mm) +			set_thread_flag(TIF_FOREIGN_FPSTATE); +		break; +	case CPU_PM_ENTER_FAILED: +	default: +		return NOTIFY_DONE; +	} +	return NOTIFY_OK; +} + +static struct notifier_block fpsimd_cpu_pm_notifier_block = { +	.notifier_call = fpsimd_cpu_pm_notifier, +}; + +static void fpsimd_pm_init(void) +{ +	cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); +} + +#else +static inline void fpsimd_pm_init(void) { } +#endif /* CONFIG_CPU_PM */ + +/* + * FP/SIMD support code initialisation. + */ +static int __init fpsimd_init(void) +{ +	u64 pfr = read_cpuid(ID_AA64PFR0_EL1); + +	if (pfr & (0xf << 16)) { +		pr_notice("Floating-point is not implemented\n"); +		return 0; +	} +	elf_hwcap |= HWCAP_FP; + +	if (pfr & (0xf << 20)) +		pr_notice("Advanced SIMD is not implemented\n"); +	else +		elf_hwcap |= HWCAP_ASIMD; + +	fpsimd_pm_init(); + +	return 0; +} +late_initcall(fpsimd_init); diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c new file mode 100644 index 00000000000..7924d73b647 --- /dev/null +++ b/arch/arm64/kernel/ftrace.c @@ -0,0 +1,176 @@ +/* + * arch/arm64/kernel/ftrace.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ftrace.h> +#include <linux/swab.h> +#include <linux/uaccess.h> + +#include <asm/cacheflush.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Replace a single instruction, which may be a branch or NOP. + * If @validate == true, a replaced instruction is checked against 'old'. + */ +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, +			      bool validate) +{ +	u32 replaced; + +	/* +	 * Note: +	 * Due to modules and __init, code can disappear and change, +	 * we need to protect against faulting as well as code changing. +	 * We do this by aarch64_insn_*() which use the probe_kernel_*(). +	 * +	 * No lock is held here because all the modifications are run +	 * through stop_machine(). +	 */ +	if (validate) { +		if (aarch64_insn_read((void *)pc, &replaced)) +			return -EFAULT; + +		if (replaced != old) +			return -EINVAL; +	} +	if (aarch64_insn_patch_text_nosync((void *)pc, new)) +		return -EPERM; + +	return 0; +} + +/* + * Replace tracer function in ftrace_caller() + */ +int ftrace_update_ftrace_func(ftrace_func_t func) +{ +	unsigned long pc; +	u32 new; + +	pc = (unsigned long)&ftrace_call; +	new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true); + +	return ftrace_modify_code(pc, 0, new, false); +} + +/* + * Turn on the call to ftrace_caller() in instrumented function + */ +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ +	unsigned long pc = rec->ip; +	u32 old, new; + +	old = aarch64_insn_gen_nop(); +	new = aarch64_insn_gen_branch_imm(pc, addr, true); + +	return ftrace_modify_code(pc, old, new, true); +} + +/* + * Turn off the call to ftrace_caller() in instrumented function + */ +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, +		    unsigned long addr) +{ +	unsigned long pc = rec->ip; +	u32 old, new; + +	old = aarch64_insn_gen_branch_imm(pc, addr, true); +	new = aarch64_insn_gen_nop(); + +	return ftrace_modify_code(pc, old, new, true); +} + +int __init ftrace_dyn_arch_init(void) +{ +	return 0; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * function_graph tracer expects ftrace_return_to_handler() to be called + * on the way back to parent. For this purpose, this function is called + * in _mcount() or ftrace_caller() to replace return address (*parent) on + * the call stack to return_to_handler. + * + * Note that @frame_pointer is used only for sanity check later. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, +			   unsigned long frame_pointer) +{ +	unsigned long return_hooker = (unsigned long)&return_to_handler; +	unsigned long old; +	struct ftrace_graph_ent trace; +	int err; + +	if (unlikely(atomic_read(¤t->tracing_graph_pause))) +		return; + +	/* +	 * Note: +	 * No protection against faulting at *parent, which may be seen +	 * on other archs. It's unlikely on AArch64. +	 */ +	old = *parent; +	*parent = return_hooker; + +	trace.func = self_addr; +	trace.depth = current->curr_ret_stack + 1; + +	/* Only trace if the calling function expects to */ +	if (!ftrace_graph_entry(&trace)) { +		*parent = old; +		return; +	} + +	err = ftrace_push_return_trace(old, self_addr, &trace.depth, +				       frame_pointer); +	if (err == -EBUSY) { +		*parent = old; +		return; +	} +} + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Turn on/off the call to ftrace_graph_caller() in ftrace_caller() + * depending on @enable. + */ +static int ftrace_modify_graph_caller(bool enable) +{ +	unsigned long pc = (unsigned long)&ftrace_graph_call; +	u32 branch, nop; + +	branch = aarch64_insn_gen_branch_imm(pc, +			(unsigned long)ftrace_graph_caller, false); +	nop = aarch64_insn_gen_nop(); + +	if (enable) +		return ftrace_modify_code(pc, nop, branch, true); +	else +		return ftrace_modify_code(pc, branch, nop, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ +	return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ +	return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S new file mode 100644 index 00000000000..a2c1195abb7 --- /dev/null +++ b/arch/arm64/kernel/head.S @@ -0,0 +1,709 @@ +/* + * Low-level CPU initialisation + * Based on arch/arm/kernel/head.S + * + * Copyright (C) 1994-2002 Russell King + * Copyright (C) 2003-2012 ARM Ltd. + * Authors:	Catalin Marinas <catalin.marinas@arm.com> + *		Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> +#include <linux/init.h> + +#include <asm/assembler.h> +#include <asm/ptrace.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> +#include <asm/cputype.h> +#include <asm/memory.h> +#include <asm/thread_info.h> +#include <asm/pgtable-hwdef.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/virt.h> + +/* + * swapper_pg_dir is the virtual address of the initial page table. We place + * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has + * 2 pages and is placed below swapper_pg_dir. + */ +#define KERNEL_RAM_VADDR	(PAGE_OFFSET + TEXT_OFFSET) + +#if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000 +#error KERNEL_RAM_VADDR must start at 0xXXX80000 +#endif + +#define SWAPPER_DIR_SIZE	(3 * PAGE_SIZE) +#define IDMAP_DIR_SIZE		(2 * PAGE_SIZE) + +	.globl	swapper_pg_dir +	.equ	swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE + +	.globl	idmap_pg_dir +	.equ	idmap_pg_dir, swapper_pg_dir - IDMAP_DIR_SIZE + +	.macro	pgtbl, ttb0, ttb1, phys +	add	\ttb1, \phys, #TEXT_OFFSET - SWAPPER_DIR_SIZE +	sub	\ttb0, \ttb1, #IDMAP_DIR_SIZE +	.endm + +#ifdef CONFIG_ARM64_64K_PAGES +#define BLOCK_SHIFT	PAGE_SHIFT +#define BLOCK_SIZE	PAGE_SIZE +#else +#define BLOCK_SHIFT	SECTION_SHIFT +#define BLOCK_SIZE	SECTION_SIZE +#endif + +#define KERNEL_START	KERNEL_RAM_VADDR +#define KERNEL_END	_end + +/* + * Initial memory map attributes. + */ +#ifndef CONFIG_SMP +#define PTE_FLAGS	PTE_TYPE_PAGE | PTE_AF +#define PMD_FLAGS	PMD_TYPE_SECT | PMD_SECT_AF +#else +#define PTE_FLAGS	PTE_TYPE_PAGE | PTE_AF | PTE_SHARED +#define PMD_FLAGS	PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S +#endif + +#ifdef CONFIG_ARM64_64K_PAGES +#define MM_MMUFLAGS	PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS +#else +#define MM_MMUFLAGS	PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS +#endif + +/* + * Kernel startup entry point. + * --------------------------- + * + * The requirements are: + *   MMU = off, D-cache = off, I-cache = on or off, + *   x0 = physical address to the FDT blob. + * + * This code is mostly position independent so you call this at + * __pa(PAGE_OFFSET + TEXT_OFFSET). + * + * Note that the callee-saved registers are used for storing variables + * that are useful before the MMU is enabled. The allocations are described + * in the entry routines. + */ +	__HEAD + +	/* +	 * DO NOT MODIFY. Image header expected by Linux boot-loaders. +	 */ +#ifdef CONFIG_EFI +efi_head: +	/* +	 * This add instruction has no meaningful effect except that +	 * its opcode forms the magic "MZ" signature required by UEFI. +	 */ +	add	x13, x18, #0x16 +	b	stext +#else +	b	stext				// branch to kernel start, magic +	.long	0				// reserved +#endif +	.quad	TEXT_OFFSET			// Image load offset from start of RAM +	.quad	0				// reserved +	.quad	0				// reserved +	.quad	0				// reserved +	.quad	0				// reserved +	.quad	0				// reserved +	.byte	0x41				// Magic number, "ARM\x64" +	.byte	0x52 +	.byte	0x4d +	.byte	0x64 +#ifdef CONFIG_EFI +	.long	pe_header - efi_head		// Offset to the PE header. +#else +	.word	0				// reserved +#endif + +#ifdef CONFIG_EFI +	.align 3 +pe_header: +	.ascii	"PE" +	.short 	0 +coff_header: +	.short	0xaa64				// AArch64 +	.short	2				// nr_sections +	.long	0 				// TimeDateStamp +	.long	0				// PointerToSymbolTable +	.long	1				// NumberOfSymbols +	.short	section_table - optional_header	// SizeOfOptionalHeader +	.short	0x206				// Characteristics. +						// IMAGE_FILE_DEBUG_STRIPPED | +						// IMAGE_FILE_EXECUTABLE_IMAGE | +						// IMAGE_FILE_LINE_NUMS_STRIPPED +optional_header: +	.short	0x20b				// PE32+ format +	.byte	0x02				// MajorLinkerVersion +	.byte	0x14				// MinorLinkerVersion +	.long	_edata - stext			// SizeOfCode +	.long	0				// SizeOfInitializedData +	.long	0				// SizeOfUninitializedData +	.long	efi_stub_entry - efi_head	// AddressOfEntryPoint +	.long	stext - efi_head		// BaseOfCode + +extra_header_fields: +	.quad	0				// ImageBase +	.long	0x20				// SectionAlignment +	.long	0x8				// FileAlignment +	.short	0				// MajorOperatingSystemVersion +	.short	0				// MinorOperatingSystemVersion +	.short	0				// MajorImageVersion +	.short	0				// MinorImageVersion +	.short	0				// MajorSubsystemVersion +	.short	0				// MinorSubsystemVersion +	.long	0				// Win32VersionValue + +	.long	_edata - efi_head		// SizeOfImage + +	// Everything before the kernel image is considered part of the header +	.long	stext - efi_head		// SizeOfHeaders +	.long	0				// CheckSum +	.short	0xa				// Subsystem (EFI application) +	.short	0				// DllCharacteristics +	.quad	0				// SizeOfStackReserve +	.quad	0				// SizeOfStackCommit +	.quad	0				// SizeOfHeapReserve +	.quad	0				// SizeOfHeapCommit +	.long	0				// LoaderFlags +	.long	0x6				// NumberOfRvaAndSizes + +	.quad	0				// ExportTable +	.quad	0				// ImportTable +	.quad	0				// ResourceTable +	.quad	0				// ExceptionTable +	.quad	0				// CertificationTable +	.quad	0				// BaseRelocationTable + +	// Section table +section_table: + +	/* +	 * The EFI application loader requires a relocation section +	 * because EFI applications must be relocatable.  This is a +	 * dummy section as far as we are concerned. +	 */ +	.ascii	".reloc" +	.byte	0 +	.byte	0			// end of 0 padding of section name +	.long	0 +	.long	0 +	.long	0			// SizeOfRawData +	.long	0			// PointerToRawData +	.long	0			// PointerToRelocations +	.long	0			// PointerToLineNumbers +	.short	0			// NumberOfRelocations +	.short	0			// NumberOfLineNumbers +	.long	0x42100040		// Characteristics (section flags) + + +	.ascii	".text" +	.byte	0 +	.byte	0 +	.byte	0        		// end of 0 padding of section name +	.long	_edata - stext		// VirtualSize +	.long	stext - efi_head	// VirtualAddress +	.long	_edata - stext		// SizeOfRawData +	.long	stext - efi_head	// PointerToRawData + +	.long	0		// PointerToRelocations (0 for executables) +	.long	0		// PointerToLineNumbers (0 for executables) +	.short	0		// NumberOfRelocations  (0 for executables) +	.short	0		// NumberOfLineNumbers  (0 for executables) +	.long	0xe0500020	// Characteristics (section flags) +	.align 5 +#endif + +ENTRY(stext) +	mov	x21, x0				// x21=FDT +	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode +	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET +	bl	set_cpu_boot_mode_flag +	mrs	x22, midr_el1			// x22=cpuid +	mov	x0, x22 +	bl	lookup_processor_type +	mov	x23, x0				// x23=current cpu_table +	cbz	x23, __error_p			// invalid processor (x23=0)? +	bl	__vet_fdt +	bl	__create_page_tables		// x25=TTBR0, x26=TTBR1 +	/* +	 * The following calls CPU specific code in a position independent +	 * manner. See arch/arm64/mm/proc.S for details. x23 = base of +	 * cpu_info structure selected by lookup_processor_type above. +	 * On return, the CPU will be ready for the MMU to be turned on and +	 * the TCR will have been set. +	 */ +	ldr	x27, __switch_data		// address to jump to after +						// MMU has been enabled +	adr	lr, __enable_mmu		// return (PIC) address +	ldr	x12, [x23, #CPU_INFO_SETUP] +	add	x12, x12, x28			// __virt_to_phys +	br	x12				// initialise processor +ENDPROC(stext) + +/* + * If we're fortunate enough to boot at EL2, ensure that the world is + * sane before dropping to EL1. + * + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if + * booted in EL1 or EL2 respectively. + */ +ENTRY(el2_setup) +	mrs	x0, CurrentEL +	cmp	x0, #CurrentEL_EL2 +	b.ne	1f +	mrs	x0, sctlr_el2 +CPU_BE(	orr	x0, x0, #(1 << 25)	)	// Set the EE bit for EL2 +CPU_LE(	bic	x0, x0, #(1 << 25)	)	// Clear the EE bit for EL2 +	msr	sctlr_el2, x0 +	b	2f +1:	mrs	x0, sctlr_el1 +CPU_BE(	orr	x0, x0, #(3 << 24)	)	// Set the EE and E0E bits for EL1 +CPU_LE(	bic	x0, x0, #(3 << 24)	)	// Clear the EE and E0E bits for EL1 +	msr	sctlr_el1, x0 +	mov	w20, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1 +	isb +	ret + +	/* Hyp configuration. */ +2:	mov	x0, #(1 << 31)			// 64-bit EL1 +	msr	hcr_el2, x0 + +	/* Generic timers. */ +	mrs	x0, cnthctl_el2 +	orr	x0, x0, #3			// Enable EL1 physical timers +	msr	cnthctl_el2, x0 +	msr	cntvoff_el2, xzr		// Clear virtual offset + +	/* Populate ID registers. */ +	mrs	x0, midr_el1 +	mrs	x1, mpidr_el1 +	msr	vpidr_el2, x0 +	msr	vmpidr_el2, x1 + +	/* sctlr_el1 */ +	mov	x0, #0x0800			// Set/clear RES{1,0} bits +CPU_BE(	movk	x0, #0x33d0, lsl #16	)	// Set EE and E0E on BE systems +CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems +	msr	sctlr_el1, x0 + +	/* Coprocessor traps. */ +	mov	x0, #0x33ff +	msr	cptr_el2, x0			// Disable copro. traps to EL2 + +#ifdef CONFIG_COMPAT +	msr	hstr_el2, xzr			// Disable CP15 traps to EL2 +#endif + +	/* Stage-2 translation */ +	msr	vttbr_el2, xzr + +	/* Hypervisor stub */ +	adr	x0, __hyp_stub_vectors +	msr	vbar_el2, x0 + +	/* spsr */ +	mov	x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ +		      PSR_MODE_EL1h) +	msr	spsr_el2, x0 +	msr	elr_el2, lr +	mov	w20, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2 +	eret +ENDPROC(el2_setup) + +/* + * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed + * in x20. See arch/arm64/include/asm/virt.h for more info. + */ +ENTRY(set_cpu_boot_mode_flag) +	ldr	x1, =__boot_cpu_mode		// Compute __boot_cpu_mode +	add	x1, x1, x28 +	cmp	w20, #BOOT_CPU_MODE_EL2 +	b.ne	1f +	add	x1, x1, #4 +1:	str	w20, [x1]			// This CPU has booted in EL1 +	dmb	sy +	dc	ivac, x1			// Invalidate potentially stale cache line +	ret +ENDPROC(set_cpu_boot_mode_flag) + +/* + * We need to find out the CPU boot mode long after boot, so we need to + * store it in a writable variable. + * + * This is not in .bss, because we set it sufficiently early that the boot-time + * zeroing of .bss would clobber it. + */ +	.pushsection	.data..cacheline_aligned +ENTRY(__boot_cpu_mode) +	.align	L1_CACHE_SHIFT +	.long	BOOT_CPU_MODE_EL2 +	.long	0 +	.popsection + +	.align	3 +2:	.quad	. +	.quad	PAGE_OFFSET + +#ifdef CONFIG_SMP +	.align	3 +1:	.quad	. +	.quad	secondary_holding_pen_release + +	/* +	 * This provides a "holding pen" for platforms to hold all secondary +	 * cores are held until we're ready for them to initialise. +	 */ +ENTRY(secondary_holding_pen) +	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode +	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET +	bl	set_cpu_boot_mode_flag +	mrs	x0, mpidr_el1 +	ldr     x1, =MPIDR_HWID_BITMASK +	and	x0, x0, x1 +	adr	x1, 1b +	ldp	x2, x3, [x1] +	sub	x1, x1, x2 +	add	x3, x3, x1 +pen:	ldr	x4, [x3] +	cmp	x4, x0 +	b.eq	secondary_startup +	wfe +	b	pen +ENDPROC(secondary_holding_pen) + +	/* +	 * Secondary entry point that jumps straight into the kernel. Only to +	 * be used where CPUs are brought online dynamically by the kernel. +	 */ +ENTRY(secondary_entry) +	bl	el2_setup			// Drop to EL1 +	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET +	bl	set_cpu_boot_mode_flag +	b	secondary_startup +ENDPROC(secondary_entry) + +ENTRY(secondary_startup) +	/* +	 * Common entry point for secondary CPUs. +	 */ +	mrs	x22, midr_el1			// x22=cpuid +	mov	x0, x22 +	bl	lookup_processor_type +	mov	x23, x0				// x23=current cpu_table +	cbz	x23, __error_p			// invalid processor (x23=0)? + +	pgtbl	x25, x26, x24			// x25=TTBR0, x26=TTBR1 +	ldr	x12, [x23, #CPU_INFO_SETUP] +	add	x12, x12, x28			// __virt_to_phys +	blr	x12				// initialise processor + +	ldr	x21, =secondary_data +	ldr	x27, =__secondary_switched	// address to jump to after enabling the MMU +	b	__enable_mmu +ENDPROC(secondary_startup) + +ENTRY(__secondary_switched) +	ldr	x0, [x21]			// get secondary_data.stack +	mov	sp, x0 +	mov	x29, #0 +	b	secondary_start_kernel +ENDPROC(__secondary_switched) +#endif	/* CONFIG_SMP */ + +/* + * Setup common bits before finally enabling the MMU. Essentially this is just + * loading the page table pointer and vector base registers. + * + * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on + * the MMU. + */ +__enable_mmu: +	ldr	x5, =vectors +	msr	vbar_el1, x5 +	msr	ttbr0_el1, x25			// load TTBR0 +	msr	ttbr1_el1, x26			// load TTBR1 +	isb +	b	__turn_mmu_on +ENDPROC(__enable_mmu) + +/* + * Enable the MMU. This completely changes the structure of the visible memory + * space. You will not be able to trace execution through this. + * + *  x0  = system control register + *  x27 = *virtual* address to jump to upon completion + * + * other registers depend on the function called upon completion + */ +	.align	6 +__turn_mmu_on: +	msr	sctlr_el1, x0 +	isb +	br	x27 +ENDPROC(__turn_mmu_on) + +/* + * Calculate the start of physical memory. + */ +__calc_phys_offset: +	adr	x0, 1f +	ldp	x1, x2, [x0] +	sub	x28, x0, x1			// x28 = PHYS_OFFSET - PAGE_OFFSET +	add	x24, x2, x28			// x24 = PHYS_OFFSET +	ret +ENDPROC(__calc_phys_offset) + +	.align 3 +1:	.quad	. +	.quad	PAGE_OFFSET + +/* + * Macro to populate the PGD for the corresponding block entry in the next + * level (tbl) for the given virtual address. + * + * Preserves:	pgd, tbl, virt + * Corrupts:	tmp1, tmp2 + */ +	.macro	create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 +	lsr	\tmp1, \virt, #PGDIR_SHIFT +	and	\tmp1, \tmp1, #PTRS_PER_PGD - 1	// PGD index +	orr	\tmp2, \tbl, #3			// PGD entry table type +	str	\tmp2, [\pgd, \tmp1, lsl #3] +	.endm + +/* + * Macro to populate block entries in the page table for the start..end + * virtual range (inclusive). + * + * Preserves:	tbl, flags + * Corrupts:	phys, start, end, pstate + */ +	.macro	create_block_map, tbl, flags, phys, start, end +	lsr	\phys, \phys, #BLOCK_SHIFT +	lsr	\start, \start, #BLOCK_SHIFT +	and	\start, \start, #PTRS_PER_PTE - 1	// table index +	orr	\phys, \flags, \phys, lsl #BLOCK_SHIFT	// table entry +	lsr	\end, \end, #BLOCK_SHIFT +	and	\end, \end, #PTRS_PER_PTE - 1		// table end index +9999:	str	\phys, [\tbl, \start, lsl #3]		// store the entry +	add	\start, \start, #1			// next entry +	add	\phys, \phys, #BLOCK_SIZE		// next block +	cmp	\start, \end +	b.ls	9999b +	.endm + +/* + * Setup the initial page tables. We only setup the barest amount which is + * required to get the kernel running. The following sections are required: + *   - identity mapping to enable the MMU (low address, TTBR0) + *   - first few MB of the kernel linear mapping to jump to once the MMU has + *     been enabled, including the FDT blob (TTBR1) + *   - pgd entry for fixed mappings (TTBR1) + */ +__create_page_tables: +	pgtbl	x25, x26, x24			// idmap_pg_dir and swapper_pg_dir addresses +	mov	x27, lr + +	/* +	 * Invalidate the idmap and swapper page tables to avoid potential +	 * dirty cache lines being evicted. +	 */ +	mov	x0, x25 +	add	x1, x26, #SWAPPER_DIR_SIZE +	bl	__inval_cache_range + +	/* +	 * Clear the idmap and swapper page tables. +	 */ +	mov	x0, x25 +	add	x6, x26, #SWAPPER_DIR_SIZE +1:	stp	xzr, xzr, [x0], #16 +	stp	xzr, xzr, [x0], #16 +	stp	xzr, xzr, [x0], #16 +	stp	xzr, xzr, [x0], #16 +	cmp	x0, x6 +	b.lo	1b + +	ldr	x7, =MM_MMUFLAGS + +	/* +	 * Create the identity mapping. +	 */ +	add	x0, x25, #PAGE_SIZE		// section table address +	ldr	x3, =KERNEL_START +	add	x3, x3, x28			// __pa(KERNEL_START) +	create_pgd_entry x25, x0, x3, x5, x6 +	ldr	x6, =KERNEL_END +	mov	x5, x3				// __pa(KERNEL_START) +	add	x6, x6, x28			// __pa(KERNEL_END) +	create_block_map x0, x7, x3, x5, x6 + +	/* +	 * Map the kernel image (starting with PHYS_OFFSET). +	 */ +	add	x0, x26, #PAGE_SIZE		// section table address +	mov	x5, #PAGE_OFFSET +	create_pgd_entry x26, x0, x5, x3, x6 +	ldr	x6, =KERNEL_END +	mov	x3, x24				// phys offset +	create_block_map x0, x7, x3, x5, x6 + +	/* +	 * Map the FDT blob (maximum 2MB; must be within 512MB of +	 * PHYS_OFFSET). +	 */ +	mov	x3, x21				// FDT phys address +	and	x3, x3, #~((1 << 21) - 1)	// 2MB aligned +	mov	x6, #PAGE_OFFSET +	sub	x5, x3, x24			// subtract PHYS_OFFSET +	tst	x5, #~((1 << 29) - 1)		// within 512MB? +	csel	x21, xzr, x21, ne		// zero the FDT pointer +	b.ne	1f +	add	x5, x5, x6			// __va(FDT blob) +	add	x6, x5, #1 << 21		// 2MB for the FDT blob +	sub	x6, x6, #1			// inclusive range +	create_block_map x0, x7, x3, x5, x6 +1: +	/* +	 * Create the pgd entry for the fixed mappings. +	 */ +	ldr	x5, =FIXADDR_TOP		// Fixed mapping virtual address +	add	x0, x26, #2 * PAGE_SIZE		// section table address +	create_pgd_entry x26, x0, x5, x6, x7 + +	/* +	 * Since the page tables have been populated with non-cacheable +	 * accesses (MMU disabled), invalidate the idmap and swapper page +	 * tables again to remove any speculatively loaded cache lines. +	 */ +	mov	x0, x25 +	add	x1, x26, #SWAPPER_DIR_SIZE +	bl	__inval_cache_range + +	mov	lr, x27 +	ret +ENDPROC(__create_page_tables) +	.ltorg + +	.align	3 +	.type	__switch_data, %object +__switch_data: +	.quad	__mmap_switched +	.quad	__bss_start			// x6 +	.quad	_end				// x7 +	.quad	processor_id			// x4 +	.quad	__fdt_pointer			// x5 +	.quad	memstart_addr			// x6 +	.quad	init_thread_union + THREAD_START_SP // sp + +/* + * The following fragment of code is executed with the MMU on in MMU mode, and + * uses absolute addresses; this is not position independent. + */ +__mmap_switched: +	adr	x3, __switch_data + 8 + +	ldp	x6, x7, [x3], #16 +1:	cmp	x6, x7 +	b.hs	2f +	str	xzr, [x6], #8			// Clear BSS +	b	1b +2: +	ldp	x4, x5, [x3], #16 +	ldr	x6, [x3], #8 +	ldr	x16, [x3] +	mov	sp, x16 +	str	x22, [x4]			// Save processor ID +	str	x21, [x5]			// Save FDT pointer +	str	x24, [x6]			// Save PHYS_OFFSET +	mov	x29, #0 +	b	start_kernel +ENDPROC(__mmap_switched) + +/* + * Exception handling. Something went wrong and we can't proceed. We ought to + * tell the user, but since we don't have any guarantee that we're even + * running on the right architecture, we do virtually nothing. + */ +__error_p: +ENDPROC(__error_p) + +__error: +1:	nop +	b	1b +ENDPROC(__error) + +/* + * This function gets the processor ID in w0 and searches the cpu_table[] for + * a match. It returns a pointer to the struct cpu_info it found. The + * cpu_table[] must end with an empty (all zeros) structure. + * + * This routine can be called via C code and it needs to work with the MMU + * both disabled and enabled (the offset is calculated automatically). + */ +ENTRY(lookup_processor_type) +	adr	x1, __lookup_processor_type_data +	ldp	x2, x3, [x1] +	sub	x1, x1, x2			// get offset between VA and PA +	add	x3, x3, x1			// convert VA to PA +1: +	ldp	w5, w6, [x3]			// load cpu_id_val and cpu_id_mask +	cbz	w5, 2f				// end of list? +	and	w6, w6, w0 +	cmp	w5, w6 +	b.eq	3f +	add	x3, x3, #CPU_INFO_SZ +	b	1b +2: +	mov	x3, #0				// unknown processor +3: +	mov	x0, x3 +	ret +ENDPROC(lookup_processor_type) + +	.align	3 +	.type	__lookup_processor_type_data, %object +__lookup_processor_type_data: +	.quad	. +	.quad	cpu_table +	.size	__lookup_processor_type_data, . - __lookup_processor_type_data + +/* + * Determine validity of the x21 FDT pointer. + * The dtb must be 8-byte aligned and live in the first 512M of memory. + */ +__vet_fdt: +	tst	x21, #0x7 +	b.ne	1f +	cmp	x21, x24 +	b.lt	1f +	mov	x0, #(1 << 29) +	add	x0, x0, x24 +	cmp	x21, x0 +	b.ge	1f +	ret +1: +	mov	x21, #0 +	ret +ENDPROC(__vet_fdt) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c new file mode 100644 index 00000000000..df1cf15377b --- /dev/null +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -0,0 +1,954 @@ +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + * + * Copyright (C) 2012 ARM Limited + * Author: Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#define pr_fmt(fmt) "hw-breakpoint: " fmt + +#include <linux/compat.h> +#include <linux/cpu_pm.h> +#include <linux/errno.h> +#include <linux/hw_breakpoint.h> +#include <linux/perf_event.h> +#include <linux/ptrace.h> +#include <linux/smp.h> + +#include <asm/current.h> +#include <asm/debug-monitors.h> +#include <asm/hw_breakpoint.h> +#include <asm/kdebug.h> +#include <asm/traps.h> +#include <asm/cputype.h> +#include <asm/system_misc.h> + +/* Breakpoint currently in use for each BRP. */ +static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); + +/* Watchpoint currently in use for each WRP. */ +static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]); + +/* Currently stepping a per-CPU kernel breakpoint. */ +static DEFINE_PER_CPU(int, stepping_kernel_bp); + +/* Number of BRP/WRP registers on this CPU. */ +static int core_num_brps; +static int core_num_wrps; + +/* Determine number of BRP registers available. */ +static int get_num_brps(void) +{ +	return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; +} + +/* Determine number of WRP registers available. */ +static int get_num_wrps(void) +{ +	return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; +} + +int hw_breakpoint_slots(int type) +{ +	/* +	 * We can be called early, so don't rely on +	 * our static variables being initialised. +	 */ +	switch (type) { +	case TYPE_INST: +		return get_num_brps(); +	case TYPE_DATA: +		return get_num_wrps(); +	default: +		pr_warning("unknown slot type: %d\n", type); +		return 0; +	} +} + +#define READ_WB_REG_CASE(OFF, N, REG, VAL)	\ +	case (OFF + N):				\ +		AARCH64_DBG_READ(N, REG, VAL);	\ +		break + +#define WRITE_WB_REG_CASE(OFF, N, REG, VAL)	\ +	case (OFF + N):				\ +		AARCH64_DBG_WRITE(N, REG, VAL);	\ +		break + +#define GEN_READ_WB_REG_CASES(OFF, REG, VAL)	\ +	READ_WB_REG_CASE(OFF,  0, REG, VAL);	\ +	READ_WB_REG_CASE(OFF,  1, REG, VAL);	\ +	READ_WB_REG_CASE(OFF,  2, REG, VAL);	\ +	READ_WB_REG_CASE(OFF,  3, REG, VAL);	\ +	READ_WB_REG_CASE(OFF,  4, REG, VAL);	\ +	READ_WB_REG_CASE(OFF,  5, REG, VAL);	\ +	READ_WB_REG_CASE(OFF,  6, REG, VAL);	\ +	READ_WB_REG_CASE(OFF,  7, REG, VAL);	\ +	READ_WB_REG_CASE(OFF,  8, REG, VAL);	\ +	READ_WB_REG_CASE(OFF,  9, REG, VAL);	\ +	READ_WB_REG_CASE(OFF, 10, REG, VAL);	\ +	READ_WB_REG_CASE(OFF, 11, REG, VAL);	\ +	READ_WB_REG_CASE(OFF, 12, REG, VAL);	\ +	READ_WB_REG_CASE(OFF, 13, REG, VAL);	\ +	READ_WB_REG_CASE(OFF, 14, REG, VAL);	\ +	READ_WB_REG_CASE(OFF, 15, REG, VAL) + +#define GEN_WRITE_WB_REG_CASES(OFF, REG, VAL)	\ +	WRITE_WB_REG_CASE(OFF,  0, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF,  1, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF,  2, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF,  3, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF,  4, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF,  5, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF,  6, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF,  7, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF,  8, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF,  9, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF, 10, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF, 11, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF, 12, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF, 13, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF, 14, REG, VAL);	\ +	WRITE_WB_REG_CASE(OFF, 15, REG, VAL) + +static u64 read_wb_reg(int reg, int n) +{ +	u64 val = 0; + +	switch (reg + n) { +	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); +	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); +	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); +	GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); +	default: +		pr_warning("attempt to read from unknown breakpoint register %d\n", n); +	} + +	return val; +} + +static void write_wb_reg(int reg, int n, u64 val) +{ +	switch (reg + n) { +	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BVR, AARCH64_DBG_REG_NAME_BVR, val); +	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_BCR, AARCH64_DBG_REG_NAME_BCR, val); +	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val); +	GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val); +	default: +		pr_warning("attempt to write to unknown breakpoint register %d\n", n); +	} +	isb(); +} + +/* + * Convert a breakpoint privilege level to the corresponding exception + * level. + */ +static enum debug_el debug_exception_level(int privilege) +{ +	switch (privilege) { +	case AARCH64_BREAKPOINT_EL0: +		return DBG_ACTIVE_EL0; +	case AARCH64_BREAKPOINT_EL1: +		return DBG_ACTIVE_EL1; +	default: +		pr_warning("invalid breakpoint privilege level %d\n", privilege); +		return -EINVAL; +	} +} + +enum hw_breakpoint_ops { +	HW_BREAKPOINT_INSTALL, +	HW_BREAKPOINT_UNINSTALL, +	HW_BREAKPOINT_RESTORE +}; + +/** + * hw_breakpoint_slot_setup - Find and setup a perf slot according to + *			      operations + * + * @slots: pointer to array of slots + * @max_slots: max number of slots + * @bp: perf_event to setup + * @ops: operation to be carried out on the slot + * + * Return: + *	slot index on success + *	-ENOSPC if no slot is available/matches + *	-EINVAL on wrong operations parameter + */ +static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots, +				    struct perf_event *bp, +				    enum hw_breakpoint_ops ops) +{ +	int i; +	struct perf_event **slot; + +	for (i = 0; i < max_slots; ++i) { +		slot = &slots[i]; +		switch (ops) { +		case HW_BREAKPOINT_INSTALL: +			if (!*slot) { +				*slot = bp; +				return i; +			} +			break; +		case HW_BREAKPOINT_UNINSTALL: +			if (*slot == bp) { +				*slot = NULL; +				return i; +			} +			break; +		case HW_BREAKPOINT_RESTORE: +			if (*slot == bp) +				return i; +			break; +		default: +			pr_warn_once("Unhandled hw breakpoint ops %d\n", ops); +			return -EINVAL; +		} +	} +	return -ENOSPC; +} + +static int hw_breakpoint_control(struct perf_event *bp, +				 enum hw_breakpoint_ops ops) +{ +	struct arch_hw_breakpoint *info = counter_arch_bp(bp); +	struct perf_event **slots; +	struct debug_info *debug_info = ¤t->thread.debug; +	int i, max_slots, ctrl_reg, val_reg, reg_enable; +	enum debug_el dbg_el = debug_exception_level(info->ctrl.privilege); +	u32 ctrl; + +	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { +		/* Breakpoint */ +		ctrl_reg = AARCH64_DBG_REG_BCR; +		val_reg = AARCH64_DBG_REG_BVR; +		slots = this_cpu_ptr(bp_on_reg); +		max_slots = core_num_brps; +		reg_enable = !debug_info->bps_disabled; +	} else { +		/* Watchpoint */ +		ctrl_reg = AARCH64_DBG_REG_WCR; +		val_reg = AARCH64_DBG_REG_WVR; +		slots = this_cpu_ptr(wp_on_reg); +		max_slots = core_num_wrps; +		reg_enable = !debug_info->wps_disabled; +	} + +	i = hw_breakpoint_slot_setup(slots, max_slots, bp, ops); + +	if (WARN_ONCE(i < 0, "Can't find any breakpoint slot")) +		return i; + +	switch (ops) { +	case HW_BREAKPOINT_INSTALL: +		/* +		 * Ensure debug monitors are enabled at the correct exception +		 * level. +		 */ +		enable_debug_monitors(dbg_el); +		/* Fall through */ +	case HW_BREAKPOINT_RESTORE: +		/* Setup the address register. */ +		write_wb_reg(val_reg, i, info->address); + +		/* Setup the control register. */ +		ctrl = encode_ctrl_reg(info->ctrl); +		write_wb_reg(ctrl_reg, i, +			     reg_enable ? ctrl | 0x1 : ctrl & ~0x1); +		break; +	case HW_BREAKPOINT_UNINSTALL: +		/* Reset the control register. */ +		write_wb_reg(ctrl_reg, i, 0); + +		/* +		 * Release the debug monitors for the correct exception +		 * level. +		 */ +		disable_debug_monitors(dbg_el); +		break; +	} + +	return 0; +} + +/* + * Install a perf counter breakpoint. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) +{ +	return hw_breakpoint_control(bp, HW_BREAKPOINT_INSTALL); +} + +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ +	hw_breakpoint_control(bp, HW_BREAKPOINT_UNINSTALL); +} + +static int get_hbp_len(u8 hbp_len) +{ +	unsigned int len_in_bytes = 0; + +	switch (hbp_len) { +	case ARM_BREAKPOINT_LEN_1: +		len_in_bytes = 1; +		break; +	case ARM_BREAKPOINT_LEN_2: +		len_in_bytes = 2; +		break; +	case ARM_BREAKPOINT_LEN_4: +		len_in_bytes = 4; +		break; +	case ARM_BREAKPOINT_LEN_8: +		len_in_bytes = 8; +		break; +	} + +	return len_in_bytes; +} + +/* + * Check whether bp virtual address is in kernel space. + */ +int arch_check_bp_in_kernelspace(struct perf_event *bp) +{ +	unsigned int len; +	unsigned long va; +	struct arch_hw_breakpoint *info = counter_arch_bp(bp); + +	va = info->address; +	len = get_hbp_len(info->ctrl.len); + +	return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); +} + +/* + * Extract generic type and length encodings from an arch_hw_breakpoint_ctrl. + * Hopefully this will disappear when ptrace can bypass the conversion + * to generic breakpoint descriptions. + */ +int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, +			   int *gen_len, int *gen_type) +{ +	/* Type */ +	switch (ctrl.type) { +	case ARM_BREAKPOINT_EXECUTE: +		*gen_type = HW_BREAKPOINT_X; +		break; +	case ARM_BREAKPOINT_LOAD: +		*gen_type = HW_BREAKPOINT_R; +		break; +	case ARM_BREAKPOINT_STORE: +		*gen_type = HW_BREAKPOINT_W; +		break; +	case ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE: +		*gen_type = HW_BREAKPOINT_RW; +		break; +	default: +		return -EINVAL; +	} + +	/* Len */ +	switch (ctrl.len) { +	case ARM_BREAKPOINT_LEN_1: +		*gen_len = HW_BREAKPOINT_LEN_1; +		break; +	case ARM_BREAKPOINT_LEN_2: +		*gen_len = HW_BREAKPOINT_LEN_2; +		break; +	case ARM_BREAKPOINT_LEN_4: +		*gen_len = HW_BREAKPOINT_LEN_4; +		break; +	case ARM_BREAKPOINT_LEN_8: +		*gen_len = HW_BREAKPOINT_LEN_8; +		break; +	default: +		return -EINVAL; +	} + +	return 0; +} + +/* + * Construct an arch_hw_breakpoint from a perf_event. + */ +static int arch_build_bp_info(struct perf_event *bp) +{ +	struct arch_hw_breakpoint *info = counter_arch_bp(bp); + +	/* Type */ +	switch (bp->attr.bp_type) { +	case HW_BREAKPOINT_X: +		info->ctrl.type = ARM_BREAKPOINT_EXECUTE; +		break; +	case HW_BREAKPOINT_R: +		info->ctrl.type = ARM_BREAKPOINT_LOAD; +		break; +	case HW_BREAKPOINT_W: +		info->ctrl.type = ARM_BREAKPOINT_STORE; +		break; +	case HW_BREAKPOINT_RW: +		info->ctrl.type = ARM_BREAKPOINT_LOAD | ARM_BREAKPOINT_STORE; +		break; +	default: +		return -EINVAL; +	} + +	/* Len */ +	switch (bp->attr.bp_len) { +	case HW_BREAKPOINT_LEN_1: +		info->ctrl.len = ARM_BREAKPOINT_LEN_1; +		break; +	case HW_BREAKPOINT_LEN_2: +		info->ctrl.len = ARM_BREAKPOINT_LEN_2; +		break; +	case HW_BREAKPOINT_LEN_4: +		info->ctrl.len = ARM_BREAKPOINT_LEN_4; +		break; +	case HW_BREAKPOINT_LEN_8: +		info->ctrl.len = ARM_BREAKPOINT_LEN_8; +		break; +	default: +		return -EINVAL; +	} + +	/* +	 * On AArch64, we only permit breakpoints of length 4, whereas +	 * AArch32 also requires breakpoints of length 2 for Thumb. +	 * Watchpoints can be of length 1, 2, 4 or 8 bytes. +	 */ +	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { +		if (is_compat_task()) { +			if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 && +			    info->ctrl.len != ARM_BREAKPOINT_LEN_4) +				return -EINVAL; +		} else if (info->ctrl.len != ARM_BREAKPOINT_LEN_4) { +			/* +			 * FIXME: Some tools (I'm looking at you perf) assume +			 *	  that breakpoints should be sizeof(long). This +			 *	  is nonsense. For now, we fix up the parameter +			 *	  but we should probably return -EINVAL instead. +			 */ +			info->ctrl.len = ARM_BREAKPOINT_LEN_4; +		} +	} + +	/* Address */ +	info->address = bp->attr.bp_addr; + +	/* +	 * Privilege +	 * Note that we disallow combined EL0/EL1 breakpoints because +	 * that would complicate the stepping code. +	 */ +	if (arch_check_bp_in_kernelspace(bp)) +		info->ctrl.privilege = AARCH64_BREAKPOINT_EL1; +	else +		info->ctrl.privilege = AARCH64_BREAKPOINT_EL0; + +	/* Enabled? */ +	info->ctrl.enabled = !bp->attr.disabled; + +	return 0; +} + +/* + * Validate the arch-specific HW Breakpoint register settings. + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp) +{ +	struct arch_hw_breakpoint *info = counter_arch_bp(bp); +	int ret; +	u64 alignment_mask, offset; + +	/* Build the arch_hw_breakpoint. */ +	ret = arch_build_bp_info(bp); +	if (ret) +		return ret; + +	/* +	 * Check address alignment. +	 * We don't do any clever alignment correction for watchpoints +	 * because using 64-bit unaligned addresses is deprecated for +	 * AArch64. +	 * +	 * AArch32 tasks expect some simple alignment fixups, so emulate +	 * that here. +	 */ +	if (is_compat_task()) { +		if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) +			alignment_mask = 0x7; +		else +			alignment_mask = 0x3; +		offset = info->address & alignment_mask; +		switch (offset) { +		case 0: +			/* Aligned */ +			break; +		case 1: +			/* Allow single byte watchpoint. */ +			if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) +				break; +		case 2: +			/* Allow halfword watchpoints and breakpoints. */ +			if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) +				break; +		default: +			return -EINVAL; +		} + +		info->address &= ~alignment_mask; +		info->ctrl.len <<= offset; +	} else { +		if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) +			alignment_mask = 0x3; +		else +			alignment_mask = 0x7; +		if (info->address & alignment_mask) +			return -EINVAL; +	} + +	/* +	 * Disallow per-task kernel breakpoints since these would +	 * complicate the stepping code. +	 */ +	if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.bp_target) +		return -EINVAL; + +	return 0; +} + +/* + * Enable/disable all of the breakpoints active at the specified + * exception level at the register level. + * This is used when single-stepping after a breakpoint exception. + */ +static void toggle_bp_registers(int reg, enum debug_el el, int enable) +{ +	int i, max_slots, privilege; +	u32 ctrl; +	struct perf_event **slots; + +	switch (reg) { +	case AARCH64_DBG_REG_BCR: +		slots = this_cpu_ptr(bp_on_reg); +		max_slots = core_num_brps; +		break; +	case AARCH64_DBG_REG_WCR: +		slots = this_cpu_ptr(wp_on_reg); +		max_slots = core_num_wrps; +		break; +	default: +		return; +	} + +	for (i = 0; i < max_slots; ++i) { +		if (!slots[i]) +			continue; + +		privilege = counter_arch_bp(slots[i])->ctrl.privilege; +		if (debug_exception_level(privilege) != el) +			continue; + +		ctrl = read_wb_reg(reg, i); +		if (enable) +			ctrl |= 0x1; +		else +			ctrl &= ~0x1; +		write_wb_reg(reg, i, ctrl); +	} +} + +/* + * Debug exception handlers. + */ +static int breakpoint_handler(unsigned long unused, unsigned int esr, +			      struct pt_regs *regs) +{ +	int i, step = 0, *kernel_step; +	u32 ctrl_reg; +	u64 addr, val; +	struct perf_event *bp, **slots; +	struct debug_info *debug_info; +	struct arch_hw_breakpoint_ctrl ctrl; + +	slots = this_cpu_ptr(bp_on_reg); +	addr = instruction_pointer(regs); +	debug_info = ¤t->thread.debug; + +	for (i = 0; i < core_num_brps; ++i) { +		rcu_read_lock(); + +		bp = slots[i]; + +		if (bp == NULL) +			goto unlock; + +		/* Check if the breakpoint value matches. */ +		val = read_wb_reg(AARCH64_DBG_REG_BVR, i); +		if (val != (addr & ~0x3)) +			goto unlock; + +		/* Possible match, check the byte address select to confirm. */ +		ctrl_reg = read_wb_reg(AARCH64_DBG_REG_BCR, i); +		decode_ctrl_reg(ctrl_reg, &ctrl); +		if (!((1 << (addr & 0x3)) & ctrl.len)) +			goto unlock; + +		counter_arch_bp(bp)->trigger = addr; +		perf_bp_event(bp, regs); + +		/* Do we need to handle the stepping? */ +		if (!bp->overflow_handler) +			step = 1; +unlock: +		rcu_read_unlock(); +	} + +	if (!step) +		return 0; + +	if (user_mode(regs)) { +		debug_info->bps_disabled = 1; +		toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 0); + +		/* If we're already stepping a watchpoint, just return. */ +		if (debug_info->wps_disabled) +			return 0; + +		if (test_thread_flag(TIF_SINGLESTEP)) +			debug_info->suspended_step = 1; +		else +			user_enable_single_step(current); +	} else { +		toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 0); +		kernel_step = this_cpu_ptr(&stepping_kernel_bp); + +		if (*kernel_step != ARM_KERNEL_STEP_NONE) +			return 0; + +		if (kernel_active_single_step()) { +			*kernel_step = ARM_KERNEL_STEP_SUSPEND; +		} else { +			*kernel_step = ARM_KERNEL_STEP_ACTIVE; +			kernel_enable_single_step(regs); +		} +	} + +	return 0; +} + +static int watchpoint_handler(unsigned long addr, unsigned int esr, +			      struct pt_regs *regs) +{ +	int i, step = 0, *kernel_step, access; +	u32 ctrl_reg; +	u64 val, alignment_mask; +	struct perf_event *wp, **slots; +	struct debug_info *debug_info; +	struct arch_hw_breakpoint *info; +	struct arch_hw_breakpoint_ctrl ctrl; + +	slots = this_cpu_ptr(wp_on_reg); +	debug_info = ¤t->thread.debug; + +	for (i = 0; i < core_num_wrps; ++i) { +		rcu_read_lock(); + +		wp = slots[i]; + +		if (wp == NULL) +			goto unlock; + +		info = counter_arch_bp(wp); +		/* AArch32 watchpoints are either 4 or 8 bytes aligned. */ +		if (is_compat_task()) { +			if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) +				alignment_mask = 0x7; +			else +				alignment_mask = 0x3; +		} else { +			alignment_mask = 0x7; +		} + +		/* Check if the watchpoint value matches. */ +		val = read_wb_reg(AARCH64_DBG_REG_WVR, i); +		if (val != (addr & ~alignment_mask)) +			goto unlock; + +		/* Possible match, check the byte address select to confirm. */ +		ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i); +		decode_ctrl_reg(ctrl_reg, &ctrl); +		if (!((1 << (addr & alignment_mask)) & ctrl.len)) +			goto unlock; + +		/* +		 * Check that the access type matches. +		 * 0 => load, otherwise => store +		 */ +		access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W : +			 HW_BREAKPOINT_R; +		if (!(access & hw_breakpoint_type(wp))) +			goto unlock; + +		info->trigger = addr; +		perf_bp_event(wp, regs); + +		/* Do we need to handle the stepping? */ +		if (!wp->overflow_handler) +			step = 1; + +unlock: +		rcu_read_unlock(); +	} + +	if (!step) +		return 0; + +	/* +	 * We always disable EL0 watchpoints because the kernel can +	 * cause these to fire via an unprivileged access. +	 */ +	toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 0); + +	if (user_mode(regs)) { +		debug_info->wps_disabled = 1; + +		/* If we're already stepping a breakpoint, just return. */ +		if (debug_info->bps_disabled) +			return 0; + +		if (test_thread_flag(TIF_SINGLESTEP)) +			debug_info->suspended_step = 1; +		else +			user_enable_single_step(current); +	} else { +		toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 0); +		kernel_step = this_cpu_ptr(&stepping_kernel_bp); + +		if (*kernel_step != ARM_KERNEL_STEP_NONE) +			return 0; + +		if (kernel_active_single_step()) { +			*kernel_step = ARM_KERNEL_STEP_SUSPEND; +		} else { +			*kernel_step = ARM_KERNEL_STEP_ACTIVE; +			kernel_enable_single_step(regs); +		} +	} + +	return 0; +} + +/* + * Handle single-step exception. + */ +int reinstall_suspended_bps(struct pt_regs *regs) +{ +	struct debug_info *debug_info = ¤t->thread.debug; +	int handled_exception = 0, *kernel_step; + +	kernel_step = this_cpu_ptr(&stepping_kernel_bp); + +	/* +	 * Called from single-step exception handler. +	 * Return 0 if execution can resume, 1 if a SIGTRAP should be +	 * reported. +	 */ +	if (user_mode(regs)) { +		if (debug_info->bps_disabled) { +			debug_info->bps_disabled = 0; +			toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL0, 1); +			handled_exception = 1; +		} + +		if (debug_info->wps_disabled) { +			debug_info->wps_disabled = 0; +			toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1); +			handled_exception = 1; +		} + +		if (handled_exception) { +			if (debug_info->suspended_step) { +				debug_info->suspended_step = 0; +				/* Allow exception handling to fall-through. */ +				handled_exception = 0; +			} else { +				user_disable_single_step(current); +			} +		} +	} else if (*kernel_step != ARM_KERNEL_STEP_NONE) { +		toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 1); +		toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 1); + +		if (!debug_info->wps_disabled) +			toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL0, 1); + +		if (*kernel_step != ARM_KERNEL_STEP_SUSPEND) { +			kernel_disable_single_step(); +			handled_exception = 1; +		} else { +			handled_exception = 0; +		} + +		*kernel_step = ARM_KERNEL_STEP_NONE; +	} + +	return !handled_exception; +} + +/* + * Context-switcher for restoring suspended breakpoints. + */ +void hw_breakpoint_thread_switch(struct task_struct *next) +{ +	/* +	 *           current        next +	 * disabled: 0              0     => The usual case, NOTIFY_DONE +	 *           0              1     => Disable the registers +	 *           1              0     => Enable the registers +	 *           1              1     => NOTIFY_DONE. per-task bps will +	 *                                   get taken care of by perf. +	 */ + +	struct debug_info *current_debug_info, *next_debug_info; + +	current_debug_info = ¤t->thread.debug; +	next_debug_info = &next->thread.debug; + +	/* Update breakpoints. */ +	if (current_debug_info->bps_disabled != next_debug_info->bps_disabled) +		toggle_bp_registers(AARCH64_DBG_REG_BCR, +				    DBG_ACTIVE_EL0, +				    !next_debug_info->bps_disabled); + +	/* Update watchpoints. */ +	if (current_debug_info->wps_disabled != next_debug_info->wps_disabled) +		toggle_bp_registers(AARCH64_DBG_REG_WCR, +				    DBG_ACTIVE_EL0, +				    !next_debug_info->wps_disabled); +} + +/* + * CPU initialisation. + */ +static void hw_breakpoint_reset(void *unused) +{ +	int i; +	struct perf_event **slots; +	/* +	 * When a CPU goes through cold-boot, it does not have any installed +	 * slot, so it is safe to share the same function for restoring and +	 * resetting breakpoints; when a CPU is hotplugged in, it goes +	 * through the slots, which are all empty, hence it just resets control +	 * and value for debug registers. +	 * When this function is triggered on warm-boot through a CPU PM +	 * notifier some slots might be initialized; if so they are +	 * reprogrammed according to the debug slots content. +	 */ +	for (slots = this_cpu_ptr(bp_on_reg), i = 0; i < core_num_brps; ++i) { +		if (slots[i]) { +			hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); +		} else { +			write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); +			write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); +		} +	} + +	for (slots = this_cpu_ptr(wp_on_reg), i = 0; i < core_num_wrps; ++i) { +		if (slots[i]) { +			hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); +		} else { +			write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); +			write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); +		} +	} +} + +static int hw_breakpoint_reset_notify(struct notifier_block *self, +						unsigned long action, +						void *hcpu) +{ +	int cpu = (long)hcpu; +	if (action == CPU_ONLINE) +		smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); +	return NOTIFY_OK; +} + +static struct notifier_block hw_breakpoint_reset_nb = { +	.notifier_call = hw_breakpoint_reset_notify, +}; + +#ifdef CONFIG_ARM64_CPU_SUSPEND +extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)); +#else +static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +{ +} +#endif + +/* + * One-time initialisation. + */ +static int __init arch_hw_breakpoint_init(void) +{ +	core_num_brps = get_num_brps(); +	core_num_wrps = get_num_wrps(); + +	pr_info("found %d breakpoint and %d watchpoint registers.\n", +		core_num_brps, core_num_wrps); + +	cpu_notifier_register_begin(); + +	/* +	 * Reset the breakpoint resources. We assume that a halting +	 * debugger will leave the world in a nice state for us. +	 */ +	smp_call_function(hw_breakpoint_reset, NULL, 1); +	hw_breakpoint_reset(NULL); + +	/* Register debug fault handlers. */ +	hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, +			      TRAP_HWBKPT, "hw-breakpoint handler"); +	hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP, +			      TRAP_HWBKPT, "hw-watchpoint handler"); + +	/* Register hotplug notifier. */ +	__register_cpu_notifier(&hw_breakpoint_reset_nb); + +	cpu_notifier_register_done(); + +	/* Register cpu_suspend hw breakpoint restore hook */ +	cpu_suspend_set_dbg_restorer(hw_breakpoint_reset); + +	return 0; +} +arch_initcall(arch_hw_breakpoint_init); + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ +} + +/* + * Dummy function to register with die_notifier. + */ +int hw_breakpoint_exceptions_notify(struct notifier_block *unused, +				    unsigned long val, void *data) +{ +	return NOTIFY_DONE; +} diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S new file mode 100644 index 00000000000..0959611d9ff --- /dev/null +++ b/arch/arm64/kernel/hyp-stub.S @@ -0,0 +1,109 @@ +/* + * Hypervisor stub + * + * Copyright (C) 2012 ARM Ltd. + * Author:	Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/init.h> +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/ptrace.h> +#include <asm/virt.h> + +	.text +	.align 11 + +ENTRY(__hyp_stub_vectors) +	ventry	el2_sync_invalid		// Synchronous EL2t +	ventry	el2_irq_invalid			// IRQ EL2t +	ventry	el2_fiq_invalid			// FIQ EL2t +	ventry	el2_error_invalid		// Error EL2t + +	ventry	el2_sync_invalid		// Synchronous EL2h +	ventry	el2_irq_invalid			// IRQ EL2h +	ventry	el2_fiq_invalid			// FIQ EL2h +	ventry	el2_error_invalid		// Error EL2h + +	ventry	el1_sync			// Synchronous 64-bit EL1 +	ventry	el1_irq_invalid			// IRQ 64-bit EL1 +	ventry	el1_fiq_invalid			// FIQ 64-bit EL1 +	ventry	el1_error_invalid		// Error 64-bit EL1 + +	ventry	el1_sync_invalid		// Synchronous 32-bit EL1 +	ventry	el1_irq_invalid			// IRQ 32-bit EL1 +	ventry	el1_fiq_invalid			// FIQ 32-bit EL1 +	ventry	el1_error_invalid		// Error 32-bit EL1 +ENDPROC(__hyp_stub_vectors) + +	.align 11 + +el1_sync: +	mrs	x1, esr_el2 +	lsr	x1, x1, #26 +	cmp	x1, #0x16 +	b.ne	2f				// Not an HVC trap +	cbz	x0, 1f +	msr	vbar_el2, x0			// Set vbar_el2 +	b	2f +1:	mrs	x0, vbar_el2			// Return vbar_el2 +2:	eret +ENDPROC(el1_sync) + +.macro invalid_vector	label +\label: +	b \label +ENDPROC(\label) +.endm + +	invalid_vector	el2_sync_invalid +	invalid_vector	el2_irq_invalid +	invalid_vector	el2_fiq_invalid +	invalid_vector	el2_error_invalid +	invalid_vector	el1_sync_invalid +	invalid_vector	el1_irq_invalid +	invalid_vector	el1_fiq_invalid +	invalid_vector	el1_error_invalid + +/* + * __hyp_set_vectors: Call this after boot to set the initial hypervisor + * vectors as part of hypervisor installation.  On an SMP system, this should + * be called on each CPU. + * + * x0 must be the physical address of the new vector table, and must be + * 2KB aligned. + * + * Before calling this, you must check that the stub hypervisor is installed + * everywhere, by waiting for any secondary CPUs to be brought up and then + * checking that is_hyp_mode_available() is true. + * + * If not, there is a pre-existing hypervisor, some CPUs failed to boot, or + * something else went wrong... in such cases, trying to install a new + * hypervisor is unlikely to work as desired. + * + * When you call into your shiny new hypervisor, sp_el2 will contain junk, + * so you will need to set that to something sensible at the new hypervisor's + * initialisation entry point. + */ + +ENTRY(__hyp_get_vectors) +	mov	x0, xzr +	// fall through +ENTRY(__hyp_set_vectors) +	hvc	#0 +	ret +ENDPROC(__hyp_get_vectors) +ENDPROC(__hyp_set_vectors) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c new file mode 100644 index 00000000000..92f36835486 --- /dev/null +++ b/arch/arm64/kernel/insn.c @@ -0,0 +1,304 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/stop_machine.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/insn.h> + +static int aarch64_insn_encoding_class[] = { +	AARCH64_INSN_CLS_UNKNOWN, +	AARCH64_INSN_CLS_UNKNOWN, +	AARCH64_INSN_CLS_UNKNOWN, +	AARCH64_INSN_CLS_UNKNOWN, +	AARCH64_INSN_CLS_LDST, +	AARCH64_INSN_CLS_DP_REG, +	AARCH64_INSN_CLS_LDST, +	AARCH64_INSN_CLS_DP_FPSIMD, +	AARCH64_INSN_CLS_DP_IMM, +	AARCH64_INSN_CLS_DP_IMM, +	AARCH64_INSN_CLS_BR_SYS, +	AARCH64_INSN_CLS_BR_SYS, +	AARCH64_INSN_CLS_LDST, +	AARCH64_INSN_CLS_DP_REG, +	AARCH64_INSN_CLS_LDST, +	AARCH64_INSN_CLS_DP_FPSIMD, +}; + +enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn) +{ +	return aarch64_insn_encoding_class[(insn >> 25) & 0xf]; +} + +/* NOP is an alias of HINT */ +bool __kprobes aarch64_insn_is_nop(u32 insn) +{ +	if (!aarch64_insn_is_hint(insn)) +		return false; + +	switch (insn & 0xFE0) { +	case AARCH64_INSN_HINT_YIELD: +	case AARCH64_INSN_HINT_WFE: +	case AARCH64_INSN_HINT_WFI: +	case AARCH64_INSN_HINT_SEV: +	case AARCH64_INSN_HINT_SEVL: +		return false; +	default: +		return true; +	} +} + +/* + * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always + * little-endian. + */ +int __kprobes aarch64_insn_read(void *addr, u32 *insnp) +{ +	int ret; +	u32 val; + +	ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); +	if (!ret) +		*insnp = le32_to_cpu(val); + +	return ret; +} + +int __kprobes aarch64_insn_write(void *addr, u32 insn) +{ +	insn = cpu_to_le32(insn); +	return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE); +} + +static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) +{ +	if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS) +		return false; + +	return	aarch64_insn_is_b(insn) || +		aarch64_insn_is_bl(insn) || +		aarch64_insn_is_svc(insn) || +		aarch64_insn_is_hvc(insn) || +		aarch64_insn_is_smc(insn) || +		aarch64_insn_is_brk(insn) || +		aarch64_insn_is_nop(insn); +} + +/* + * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a + * Section B2.6.5 "Concurrent modification and execution of instructions": + * Concurrent modification and execution of instructions can lead to the + * resulting instruction performing any behavior that can be achieved by + * executing any sequence of instructions that can be executed from the + * same Exception level, except where the instruction before modification + * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, + * or SMC instruction. + */ +bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn) +{ +	return __aarch64_insn_hotpatch_safe(old_insn) && +	       __aarch64_insn_hotpatch_safe(new_insn); +} + +int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) +{ +	u32 *tp = addr; +	int ret; + +	/* A64 instructions must be word aligned */ +	if ((uintptr_t)tp & 0x3) +		return -EINVAL; + +	ret = aarch64_insn_write(tp, insn); +	if (ret == 0) +		flush_icache_range((uintptr_t)tp, +				   (uintptr_t)tp + AARCH64_INSN_SIZE); + +	return ret; +} + +struct aarch64_insn_patch { +	void		**text_addrs; +	u32		*new_insns; +	int		insn_cnt; +	atomic_t	cpu_count; +}; + +static int __kprobes aarch64_insn_patch_text_cb(void *arg) +{ +	int i, ret = 0; +	struct aarch64_insn_patch *pp = arg; + +	/* The first CPU becomes master */ +	if (atomic_inc_return(&pp->cpu_count) == 1) { +		for (i = 0; ret == 0 && i < pp->insn_cnt; i++) +			ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], +							     pp->new_insns[i]); +		/* +		 * aarch64_insn_patch_text_nosync() calls flush_icache_range(), +		 * which ends with "dsb; isb" pair guaranteeing global +		 * visibility. +		 */ +		atomic_set(&pp->cpu_count, -1); +	} else { +		while (atomic_read(&pp->cpu_count) != -1) +			cpu_relax(); +		isb(); +	} + +	return ret; +} + +int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt) +{ +	struct aarch64_insn_patch patch = { +		.text_addrs = addrs, +		.new_insns = insns, +		.insn_cnt = cnt, +		.cpu_count = ATOMIC_INIT(0), +	}; + +	if (cnt <= 0) +		return -EINVAL; + +	return stop_machine(aarch64_insn_patch_text_cb, &patch, +			    cpu_online_mask); +} + +int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) +{ +	int ret; +	u32 insn; + +	/* Unsafe to patch multiple instructions without synchronizaiton */ +	if (cnt == 1) { +		ret = aarch64_insn_read(addrs[0], &insn); +		if (ret) +			return ret; + +		if (aarch64_insn_hotpatch_safe(insn, insns[0])) { +			/* +			 * ARMv8 architecture doesn't guarantee all CPUs see +			 * the new instruction after returning from function +			 * aarch64_insn_patch_text_nosync(). So send IPIs to +			 * all other CPUs to achieve instruction +			 * synchronization. +			 */ +			ret = aarch64_insn_patch_text_nosync(addrs[0], insns[0]); +			kick_all_cpus_sync(); +			return ret; +		} +	} + +	return aarch64_insn_patch_text_sync(addrs, insns, cnt); +} + +u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, +				  u32 insn, u64 imm) +{ +	u32 immlo, immhi, lomask, himask, mask; +	int shift; + +	switch (type) { +	case AARCH64_INSN_IMM_ADR: +		lomask = 0x3; +		himask = 0x7ffff; +		immlo = imm & lomask; +		imm >>= 2; +		immhi = imm & himask; +		imm = (immlo << 24) | (immhi); +		mask = (lomask << 24) | (himask); +		shift = 5; +		break; +	case AARCH64_INSN_IMM_26: +		mask = BIT(26) - 1; +		shift = 0; +		break; +	case AARCH64_INSN_IMM_19: +		mask = BIT(19) - 1; +		shift = 5; +		break; +	case AARCH64_INSN_IMM_16: +		mask = BIT(16) - 1; +		shift = 5; +		break; +	case AARCH64_INSN_IMM_14: +		mask = BIT(14) - 1; +		shift = 5; +		break; +	case AARCH64_INSN_IMM_12: +		mask = BIT(12) - 1; +		shift = 10; +		break; +	case AARCH64_INSN_IMM_9: +		mask = BIT(9) - 1; +		shift = 12; +		break; +	default: +		pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n", +			type); +		return 0; +	} + +	/* Update the immediate field. */ +	insn &= ~(mask << shift); +	insn |= (imm & mask) << shift; + +	return insn; +} + +u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, +					  enum aarch64_insn_branch_type type) +{ +	u32 insn; +	long offset; + +	/* +	 * PC: A 64-bit Program Counter holding the address of the current +	 * instruction. A64 instructions must be word-aligned. +	 */ +	BUG_ON((pc & 0x3) || (addr & 0x3)); + +	/* +	 * B/BL support [-128M, 128M) offset +	 * ARM64 virtual address arrangement guarantees all kernel and module +	 * texts are within +/-128M. +	 */ +	offset = ((long)addr - (long)pc); +	BUG_ON(offset < -SZ_128M || offset >= SZ_128M); + +	if (type == AARCH64_INSN_BRANCH_LINK) +		insn = aarch64_insn_get_bl_value(); +	else +		insn = aarch64_insn_get_b_value(); + +	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn, +					     offset >> 2); +} + +u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op) +{ +	return aarch64_insn_get_hint_value() | op; +} + +u32 __kprobes aarch64_insn_gen_nop(void) +{ +	return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP); +} diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c new file mode 100644 index 00000000000..7d37ead4d19 --- /dev/null +++ b/arch/arm64/kernel/io.c @@ -0,0 +1,64 @@ +/* + * Based on arch/arm/kernel/io.c + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/types.h> +#include <linux/io.h> + +/* + * Copy data from IO memory space to "real" memory space. + */ +void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) +{ +	unsigned char *t = to; +	while (count) { +		count--; +		*t = readb(from); +		t++; +		from++; +	} +} +EXPORT_SYMBOL(__memcpy_fromio); + +/* + * Copy data from "real" memory space to IO memory space. + */ +void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) +{ +	const unsigned char *f = from; +	while (count) { +		count--; +		writeb(*f, to); +		f++; +		to++; +	} +} +EXPORT_SYMBOL(__memcpy_toio); + +/* + * "memset" on IO memory space. + */ +void __memset_io(volatile void __iomem *dst, int c, size_t count) +{ +	while (count) { +		count--; +		writeb(c, dst); +		dst++; +	} +} +EXPORT_SYMBOL(__memset_io); diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c new file mode 100644 index 00000000000..0f08dfd69eb --- /dev/null +++ b/arch/arm64/kernel/irq.c @@ -0,0 +1,148 @@ +/* + * Based on arch/arm/kernel/irq.c + * + * Copyright (C) 1992 Linus Torvalds + * Modifications for ARM processor Copyright (C) 1995-2000 Russell King. + * Support for Dynamic Tick Timer Copyright (C) 2004-2005 Nokia Corporation. + * Dynamic Tick Timer written by Tony Lindgren <tony@atomide.com> and + * Tuukka Tikkanen <tuukka.tikkanen@elektrobit.com>. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/kernel_stat.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/irqchip.h> +#include <linux/seq_file.h> +#include <linux/ratelimit.h> + +unsigned long irq_err_count; + +int arch_show_interrupts(struct seq_file *p, int prec) +{ +#ifdef CONFIG_SMP +	show_ipi_list(p, prec); +#endif +	seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); +	return 0; +} + +/* + * handle_IRQ handles all hardware IRQ's.  Decoded IRQs should + * not come via this function.  Instead, they should provide their + * own 'handler'.  Used by platform code implementing C-based 1st + * level decoding. + */ +void handle_IRQ(unsigned int irq, struct pt_regs *regs) +{ +	struct pt_regs *old_regs = set_irq_regs(regs); + +	irq_enter(); + +	/* +	 * Some hardware gives randomly wrong interrupts.  Rather +	 * than crashing, do something sensible. +	 */ +	if (unlikely(irq >= nr_irqs)) { +		pr_warn_ratelimited("Bad IRQ%u\n", irq); +		ack_bad_irq(irq); +	} else { +		generic_handle_irq(irq); +	} + +	irq_exit(); +	set_irq_regs(old_regs); +} + +void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) +{ +	if (handle_arch_irq) +		return; + +	handle_arch_irq = handle_irq; +} + +void __init init_IRQ(void) +{ +	irqchip_init(); +	if (!handle_arch_irq) +		panic("No interrupt controller found."); +} + +#ifdef CONFIG_HOTPLUG_CPU +static bool migrate_one_irq(struct irq_desc *desc) +{ +	struct irq_data *d = irq_desc_get_irq_data(desc); +	const struct cpumask *affinity = d->affinity; +	struct irq_chip *c; +	bool ret = false; + +	/* +	 * If this is a per-CPU interrupt, or the affinity does not +	 * include this CPU, then we have nothing to do. +	 */ +	if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) +		return false; + +	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) +		ret = true; + +	/* +	 * when using forced irq_set_affinity we must ensure that the cpu +	 * being offlined is not present in the affinity mask, it may be +	 * selected as the target CPU otherwise +	 */ +	affinity = cpu_online_mask; +	c = irq_data_get_irq_chip(d); +	if (!c->irq_set_affinity) +		pr_debug("IRQ%u: unable to set affinity\n", d->irq); +	else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) +		cpumask_copy(d->affinity, affinity); + +	return ret; +} + +/* + * The current CPU has been marked offline.  Migrate IRQs off this CPU. + * If the affinity settings do not allow other CPUs, force them onto any + * available CPU. + * + * Note: we must iterate over all IRQs, whether they have an attached + * action structure or not, as we need to get chained interrupts too. + */ +void migrate_irqs(void) +{ +	unsigned int i; +	struct irq_desc *desc; +	unsigned long flags; + +	local_irq_save(flags); + +	for_each_irq_desc(i, desc) { +		bool affinity_broken; + +		raw_spin_lock(&desc->lock); +		affinity_broken = migrate_one_irq(desc); +		raw_spin_unlock(&desc->lock); + +		if (affinity_broken) +			pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", +					    i, smp_processor_id()); +	} + +	local_irq_restore(flags); +} +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c new file mode 100644 index 00000000000..263a166291f --- /dev/null +++ b/arch/arm64/kernel/jump_label.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * Based on arch/arm/kernel/jump_label.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/jump_label.h> +#include <asm/insn.h> + +#ifdef HAVE_JUMP_LABEL + +static void __arch_jump_label_transform(struct jump_entry *entry, +					enum jump_label_type type, +					bool is_static) +{ +	void *addr = (void *)entry->code; +	u32 insn; + +	if (type == JUMP_LABEL_ENABLE) { +		insn = aarch64_insn_gen_branch_imm(entry->code, +						   entry->target, +						   AARCH64_INSN_BRANCH_NOLINK); +	} else { +		insn = aarch64_insn_gen_nop(); +	} + +	if (is_static) +		aarch64_insn_patch_text_nosync(addr, insn); +	else +		aarch64_insn_patch_text(&addr, &insn, 1); +} + +void arch_jump_label_transform(struct jump_entry *entry, +			       enum jump_label_type type) +{ +	__arch_jump_label_transform(entry, type, false); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, +				      enum jump_label_type type) +{ +	__arch_jump_label_transform(entry, type, true); +} + +#endif	/* HAVE_JUMP_LABEL */ diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c new file mode 100644 index 00000000000..75c9cf1aafe --- /dev/null +++ b/arch/arm64/kernel/kgdb.c @@ -0,0 +1,336 @@ +/* + * AArch64 KGDB support + * + * Based on arch/arm/kernel/kgdb.c + * + * Copyright (C) 2013 Cavium Inc. + * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/irq.h> +#include <linux/kdebug.h> +#include <linux/kgdb.h> +#include <asm/traps.h> + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { +	{ "x0", 8, offsetof(struct pt_regs, regs[0])}, +	{ "x1", 8, offsetof(struct pt_regs, regs[1])}, +	{ "x2", 8, offsetof(struct pt_regs, regs[2])}, +	{ "x3", 8, offsetof(struct pt_regs, regs[3])}, +	{ "x4", 8, offsetof(struct pt_regs, regs[4])}, +	{ "x5", 8, offsetof(struct pt_regs, regs[5])}, +	{ "x6", 8, offsetof(struct pt_regs, regs[6])}, +	{ "x7", 8, offsetof(struct pt_regs, regs[7])}, +	{ "x8", 8, offsetof(struct pt_regs, regs[8])}, +	{ "x9", 8, offsetof(struct pt_regs, regs[9])}, +	{ "x10", 8, offsetof(struct pt_regs, regs[10])}, +	{ "x11", 8, offsetof(struct pt_regs, regs[11])}, +	{ "x12", 8, offsetof(struct pt_regs, regs[12])}, +	{ "x13", 8, offsetof(struct pt_regs, regs[13])}, +	{ "x14", 8, offsetof(struct pt_regs, regs[14])}, +	{ "x15", 8, offsetof(struct pt_regs, regs[15])}, +	{ "x16", 8, offsetof(struct pt_regs, regs[16])}, +	{ "x17", 8, offsetof(struct pt_regs, regs[17])}, +	{ "x18", 8, offsetof(struct pt_regs, regs[18])}, +	{ "x19", 8, offsetof(struct pt_regs, regs[19])}, +	{ "x20", 8, offsetof(struct pt_regs, regs[20])}, +	{ "x21", 8, offsetof(struct pt_regs, regs[21])}, +	{ "x22", 8, offsetof(struct pt_regs, regs[22])}, +	{ "x23", 8, offsetof(struct pt_regs, regs[23])}, +	{ "x24", 8, offsetof(struct pt_regs, regs[24])}, +	{ "x25", 8, offsetof(struct pt_regs, regs[25])}, +	{ "x26", 8, offsetof(struct pt_regs, regs[26])}, +	{ "x27", 8, offsetof(struct pt_regs, regs[27])}, +	{ "x28", 8, offsetof(struct pt_regs, regs[28])}, +	{ "x29", 8, offsetof(struct pt_regs, regs[29])}, +	{ "x30", 8, offsetof(struct pt_regs, regs[30])}, +	{ "sp", 8, offsetof(struct pt_regs, sp)}, +	{ "pc", 8, offsetof(struct pt_regs, pc)}, +	{ "pstate", 8, offsetof(struct pt_regs, pstate)}, +	{ "v0", 16, -1 }, +	{ "v1", 16, -1 }, +	{ "v2", 16, -1 }, +	{ "v3", 16, -1 }, +	{ "v4", 16, -1 }, +	{ "v5", 16, -1 }, +	{ "v6", 16, -1 }, +	{ "v7", 16, -1 }, +	{ "v8", 16, -1 }, +	{ "v9", 16, -1 }, +	{ "v10", 16, -1 }, +	{ "v11", 16, -1 }, +	{ "v12", 16, -1 }, +	{ "v13", 16, -1 }, +	{ "v14", 16, -1 }, +	{ "v15", 16, -1 }, +	{ "v16", 16, -1 }, +	{ "v17", 16, -1 }, +	{ "v18", 16, -1 }, +	{ "v19", 16, -1 }, +	{ "v20", 16, -1 }, +	{ "v21", 16, -1 }, +	{ "v22", 16, -1 }, +	{ "v23", 16, -1 }, +	{ "v24", 16, -1 }, +	{ "v25", 16, -1 }, +	{ "v26", 16, -1 }, +	{ "v27", 16, -1 }, +	{ "v28", 16, -1 }, +	{ "v29", 16, -1 }, +	{ "v30", 16, -1 }, +	{ "v31", 16, -1 }, +	{ "fpsr", 4, -1 }, +	{ "fpcr", 4, -1 }, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ +	if (regno >= DBG_MAX_REG_NUM || regno < 0) +		return NULL; + +	if (dbg_reg_def[regno].offset != -1) +		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, +		       dbg_reg_def[regno].size); +	else +		memset(mem, 0, dbg_reg_def[regno].size); +	return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ +	if (regno >= DBG_MAX_REG_NUM || regno < 0) +		return -EINVAL; + +	if (dbg_reg_def[regno].offset != -1) +		memcpy((void *)regs + dbg_reg_def[regno].offset, mem, +		       dbg_reg_def[regno].size); +	return 0; +} + +void +sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) +{ +	struct pt_regs *thread_regs; + +	/* Initialize to zero */ +	memset((char *)gdb_regs, 0, NUMREGBYTES); +	thread_regs = task_pt_regs(task); +	memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ +	regs->pc = pc; +} + +static int compiled_break; + +static void kgdb_arch_update_addr(struct pt_regs *regs, +				char *remcom_in_buffer) +{ +	unsigned long addr; +	char *ptr; + +	ptr = &remcom_in_buffer[1]; +	if (kgdb_hex2long(&ptr, &addr)) +		kgdb_arch_set_pc(regs, addr); +	else if (compiled_break == 1) +		kgdb_arch_set_pc(regs, regs->pc + 4); + +	compiled_break = 0; +} + +int kgdb_arch_handle_exception(int exception_vector, int signo, +			       int err_code, char *remcom_in_buffer, +			       char *remcom_out_buffer, +			       struct pt_regs *linux_regs) +{ +	int err; + +	switch (remcom_in_buffer[0]) { +	case 'D': +	case 'k': +		/* +		 * Packet D (Detach), k (kill). No special handling +		 * is required here. Handle same as c packet. +		 */ +	case 'c': +		/* +		 * Packet c (Continue) to continue executing. +		 * Set pc to required address. +		 * Try to read optional parameter and set pc. +		 * If this was a compiled breakpoint, we need to move +		 * to the next instruction else we will just breakpoint +		 * over and over again. +		 */ +		kgdb_arch_update_addr(linux_regs, remcom_in_buffer); +		atomic_set(&kgdb_cpu_doing_single_step, -1); +		kgdb_single_step =  0; + +		/* +		 * Received continue command, disable single step +		 */ +		if (kernel_active_single_step()) +			kernel_disable_single_step(); + +		err = 0; +		break; +	case 's': +		/* +		 * Update step address value with address passed +		 * with step packet. +		 * On debug exception return PC is copied to ELR +		 * So just update PC. +		 * If no step address is passed, resume from the address +		 * pointed by PC. Do not update PC +		 */ +		kgdb_arch_update_addr(linux_regs, remcom_in_buffer); +		atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); +		kgdb_single_step =  1; + +		/* +		 * Enable single step handling +		 */ +		if (!kernel_active_single_step()) +			kernel_enable_single_step(linux_regs); +		err = 0; +		break; +	default: +		err = -1; +	} +	return err; +} + +static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) +{ +	kgdb_handle_exception(1, SIGTRAP, 0, regs); +	return 0; +} + +static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) +{ +	compiled_break = 1; +	kgdb_handle_exception(1, SIGTRAP, 0, regs); + +	return 0; +} + +static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) +{ +	kgdb_handle_exception(1, SIGTRAP, 0, regs); +	return 0; +} + +static struct break_hook kgdb_brkpt_hook = { +	.esr_mask	= 0xffffffff, +	.esr_val	= DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM), +	.fn		= kgdb_brk_fn +}; + +static struct break_hook kgdb_compiled_brkpt_hook = { +	.esr_mask	= 0xffffffff, +	.esr_val	= DBG_ESR_VAL_BRK(KDBG_COMPILED_DBG_BRK_IMM), +	.fn		= kgdb_compiled_brk_fn +}; + +static struct step_hook kgdb_step_hook = { +	.fn		= kgdb_step_brk_fn +}; + +static void kgdb_call_nmi_hook(void *ignored) +{ +	kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); +} + +void kgdb_roundup_cpus(unsigned long flags) +{ +	local_irq_enable(); +	smp_call_function(kgdb_call_nmi_hook, NULL, 0); +	local_irq_disable(); +} + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ +	struct pt_regs *regs = args->regs; + +	if (kgdb_handle_exception(1, args->signr, cmd, regs)) +		return NOTIFY_DONE; +	return NOTIFY_STOP; +} + +static int +kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ +	unsigned long flags; +	int ret; + +	local_irq_save(flags); +	ret = __kgdb_notify(ptr, cmd); +	local_irq_restore(flags); + +	return ret; +} + +static struct notifier_block kgdb_notifier = { +	.notifier_call	= kgdb_notify, +	/* +	 * Want to be lowest priority +	 */ +	.priority	= -INT_MAX, +}; + +/* + * kgdb_arch_init - Perform any architecture specific initalization. + * This function will handle the initalization of any architecture + * specific callbacks. + */ +int kgdb_arch_init(void) +{ +	int ret = register_die_notifier(&kgdb_notifier); + +	if (ret != 0) +		return ret; + +	register_break_hook(&kgdb_brkpt_hook); +	register_break_hook(&kgdb_compiled_brkpt_hook); +	register_step_hook(&kgdb_step_hook); +	return 0; +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ +	unregister_break_hook(&kgdb_brkpt_hook); +	unregister_break_hook(&kgdb_compiled_brkpt_hook); +	unregister_step_hook(&kgdb_step_hook); +	unregister_die_notifier(&kgdb_notifier); +} + +/* + * ARM instructions are always in LE. + * Break instruction is encoded in LE format + */ +struct kgdb_arch arch_kgdb_ops = { +	.gdb_bpt_instr = { +		KGDB_DYN_BRK_INS_BYTE0, +		KGDB_DYN_BRK_INS_BYTE1, +		KGDB_DYN_BRK_INS_BYTE2, +		KGDB_DYN_BRK_INS_BYTE3, +	} +}; diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S new file mode 100644 index 00000000000..7787208e8cc --- /dev/null +++ b/arch/arm64/kernel/kuser32.S @@ -0,0 +1,118 @@ +/* + * Low-level user helpers placed in the vectors page for AArch32. + * Based on the kuser helpers in arch/arm/kernel/entry-armv.S. + * + * Copyright (C) 2005-2011 Nicolas Pitre <nico@fluxnic.net> + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + * + * + * AArch32 user helpers. + * + * Each segment is 32-byte aligned and will be moved to the top of the high + * vector page.  New segments (if ever needed) must be added in front of + * existing ones.  This mechanism should be used only for things that are + * really small and justified, and not be abused freely. + * + * See Documentation/arm/kernel_user_helpers.txt for formal definitions. + */ + +#include <asm/unistd32.h> + +	.align	5 +	.globl	__kuser_helper_start +__kuser_helper_start: + +__kuser_cmpxchg64:			// 0xffff0f60 +	.inst	0xe92d00f0		//	push		{r4, r5, r6, r7} +	.inst	0xe1c040d0		//	ldrd		r4, r5, [r0] +	.inst	0xe1c160d0		//	ldrd		r6, r7, [r1] +	.inst	0xe1b20f9f		// 1:	ldrexd		r0, r1, [r2] +	.inst	0xe0303004		//	eors		r3, r0, r4 +	.inst	0x00313005		//	eoreqs		r3, r1, r5 +	.inst	0x01a23e96		//	stlexdeq	r3, r6, [r2] +	.inst	0x03330001		//	teqeq		r3, #1 +	.inst	0x0afffff9		//	beq		1b +	.inst	0xf57ff05b		//	dmb		ish +	.inst	0xe2730000		//	rsbs		r0, r3, #0 +	.inst	0xe8bd00f0		//	pop		{r4, r5, r6, r7} +	.inst	0xe12fff1e		//	bx		lr + +	.align	5 +__kuser_memory_barrier:			// 0xffff0fa0 +	.inst	0xf57ff05b		//	dmb		ish +	.inst	0xe12fff1e		//	bx		lr + +	.align	5 +__kuser_cmpxchg:			// 0xffff0fc0 +	.inst	0xe1923f9f		// 1:	ldrex		r3, [r2] +	.inst	0xe0533000		//	subs		r3, r3, r0 +	.inst	0x01823e91		//	stlexeq		r3, r1, [r2] +	.inst	0x03330001		//	teqeq		r3, #1 +	.inst	0x0afffffa		//	beq		1b +	.inst	0xf57ff05b		//	dmb		ish +	.inst	0xe2730000		//	rsbs		r0, r3, #0 +	.inst	0xe12fff1e		//	bx		lr + +	.align	5 +__kuser_get_tls:			// 0xffff0fe0 +	.inst	0xee1d0f70		//	mrc		p15, 0, r0, c13, c0, 3 +	.inst	0xe12fff1e		//	bx		lr +	.rep	5 +	.word	0 +	.endr + +__kuser_helper_version:			// 0xffff0ffc +	.word	((__kuser_helper_end - __kuser_helper_start) >> 5) +	.globl	__kuser_helper_end +__kuser_helper_end: + +/* + * AArch32 sigreturn code + * + * For ARM syscalls, the syscall number has to be loaded into r7. + * We do not support an OABI userspace. + * + * For Thumb syscalls, we also pass the syscall number via r7. We therefore + * need two 16-bit instructions. + */ +	.globl __aarch32_sigret_code_start +__aarch32_sigret_code_start: + +	/* +	 * ARM Code +	 */ +	.byte	__NR_compat_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_sigreturn +	.byte	__NR_compat_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_sigreturn + +	/* +	 * Thumb code +	 */ +	.byte	__NR_compat_sigreturn, 0x27			// svc	#__NR_compat_sigreturn +	.byte	__NR_compat_sigreturn, 0xdf			// mov	r7, #__NR_compat_sigreturn + +	/* +	 * ARM code +	 */ +	.byte	__NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_rt_sigreturn +	.byte	__NR_compat_rt_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_rt_sigreturn + +	/* +	 * Thumb code +	 */ +	.byte	__NR_compat_rt_sigreturn, 0x27			// svc	#__NR_compat_rt_sigreturn +	.byte	__NR_compat_rt_sigreturn, 0xdf			// mov	r7, #__NR_compat_rt_sigreturn + +        .globl __aarch32_sigret_code_end +__aarch32_sigret_code_end: diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c new file mode 100644 index 00000000000..1eb1cc95513 --- /dev/null +++ b/arch/arm64/kernel/module.c @@ -0,0 +1,396 @@ +/* + * AArch64 loadable module support. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/bitops.h> +#include <linux/elf.h> +#include <linux/gfp.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/moduleloader.h> +#include <linux/vmalloc.h> +#include <asm/insn.h> + +#define	AARCH64_INSN_IMM_MOVNZ		AARCH64_INSN_IMM_MAX +#define	AARCH64_INSN_IMM_MOVK		AARCH64_INSN_IMM_16 + +void *module_alloc(unsigned long size) +{ +	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, +				    GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, +				    __builtin_return_address(0)); +} + +enum aarch64_reloc_op { +	RELOC_OP_NONE, +	RELOC_OP_ABS, +	RELOC_OP_PREL, +	RELOC_OP_PAGE, +}; + +static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val) +{ +	switch (reloc_op) { +	case RELOC_OP_ABS: +		return val; +	case RELOC_OP_PREL: +		return val - (u64)place; +	case RELOC_OP_PAGE: +		return (val & ~0xfff) - ((u64)place & ~0xfff); +	case RELOC_OP_NONE: +		return 0; +	} + +	pr_err("do_reloc: unknown relocation operation %d\n", reloc_op); +	return 0; +} + +static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) +{ +	u64 imm_mask = (1 << len) - 1; +	s64 sval = do_reloc(op, place, val); + +	switch (len) { +	case 16: +		*(s16 *)place = sval; +		break; +	case 32: +		*(s32 *)place = sval; +		break; +	case 64: +		*(s64 *)place = sval; +		break; +	default: +		pr_err("Invalid length (%d) for data relocation\n", len); +		return 0; +	} + +	/* +	 * Extract the upper value bits (including the sign bit) and +	 * shift them to bit 0. +	 */ +	sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1); + +	/* +	 * Overflow has occurred if the value is not representable in +	 * len bits (i.e the bottom len bits are not sign-extended and +	 * the top bits are not all zero). +	 */ +	if ((u64)(sval + 1) > 2) +		return -ERANGE; + +	return 0; +} + +static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, +			   int lsb, enum aarch64_insn_imm_type imm_type) +{ +	u64 imm, limit = 0; +	s64 sval; +	u32 insn = le32_to_cpu(*(u32 *)place); + +	sval = do_reloc(op, place, val); +	sval >>= lsb; +	imm = sval & 0xffff; + +	if (imm_type == AARCH64_INSN_IMM_MOVNZ) { +		/* +		 * For signed MOVW relocations, we have to manipulate the +		 * instruction encoding depending on whether or not the +		 * immediate is less than zero. +		 */ +		insn &= ~(3 << 29); +		if ((s64)imm >= 0) { +			/* >=0: Set the instruction to MOVZ (opcode 10b). */ +			insn |= 2 << 29; +		} else { +			/* +			 * <0: Set the instruction to MOVN (opcode 00b). +			 *     Since we've masked the opcode already, we +			 *     don't need to do anything other than +			 *     inverting the new immediate field. +			 */ +			imm = ~imm; +		} +		imm_type = AARCH64_INSN_IMM_MOVK; +	} + +	/* Update the instruction with the new encoding. */ +	insn = aarch64_insn_encode_immediate(imm_type, insn, imm); +	*(u32 *)place = cpu_to_le32(insn); + +	/* Shift out the immediate field. */ +	sval >>= 16; + +	/* +	 * For unsigned immediates, the overflow check is straightforward. +	 * For signed immediates, the sign bit is actually the bit past the +	 * most significant bit of the field. +	 * The AARCH64_INSN_IMM_16 immediate type is unsigned. +	 */ +	if (imm_type != AARCH64_INSN_IMM_16) { +		sval++; +		limit++; +	} + +	/* Check the upper bits depending on the sign of the immediate. */ +	if ((u64)sval > limit) +		return -ERANGE; + +	return 0; +} + +static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, +			  int lsb, int len, enum aarch64_insn_imm_type imm_type) +{ +	u64 imm, imm_mask; +	s64 sval; +	u32 insn = le32_to_cpu(*(u32 *)place); + +	/* Calculate the relocation value. */ +	sval = do_reloc(op, place, val); +	sval >>= lsb; + +	/* Extract the value bits and shift them to bit 0. */ +	imm_mask = (BIT(lsb + len) - 1) >> lsb; +	imm = sval & imm_mask; + +	/* Update the instruction's immediate field. */ +	insn = aarch64_insn_encode_immediate(imm_type, insn, imm); +	*(u32 *)place = cpu_to_le32(insn); + +	/* +	 * Extract the upper value bits (including the sign bit) and +	 * shift them to bit 0. +	 */ +	sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1); + +	/* +	 * Overflow has occurred if the upper bits are not all equal to +	 * the sign bit of the value. +	 */ +	if ((u64)(sval + 1) >= 2) +		return -ERANGE; + +	return 0; +} + +int apply_relocate_add(Elf64_Shdr *sechdrs, +		       const char *strtab, +		       unsigned int symindex, +		       unsigned int relsec, +		       struct module *me) +{ +	unsigned int i; +	int ovf; +	bool overflow_check; +	Elf64_Sym *sym; +	void *loc; +	u64 val; +	Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; + +	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { +		/* loc corresponds to P in the AArch64 ELF document. */ +		loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr +			+ rel[i].r_offset; + +		/* sym is the ELF symbol we're referring to. */ +		sym = (Elf64_Sym *)sechdrs[symindex].sh_addr +			+ ELF64_R_SYM(rel[i].r_info); + +		/* val corresponds to (S + A) in the AArch64 ELF document. */ +		val = sym->st_value + rel[i].r_addend; + +		/* Check for overflow by default. */ +		overflow_check = true; + +		/* Perform the static relocation. */ +		switch (ELF64_R_TYPE(rel[i].r_info)) { +		/* Null relocations. */ +		case R_ARM_NONE: +		case R_AARCH64_NONE: +			ovf = 0; +			break; + +		/* Data relocations. */ +		case R_AARCH64_ABS64: +			overflow_check = false; +			ovf = reloc_data(RELOC_OP_ABS, loc, val, 64); +			break; +		case R_AARCH64_ABS32: +			ovf = reloc_data(RELOC_OP_ABS, loc, val, 32); +			break; +		case R_AARCH64_ABS16: +			ovf = reloc_data(RELOC_OP_ABS, loc, val, 16); +			break; +		case R_AARCH64_PREL64: +			overflow_check = false; +			ovf = reloc_data(RELOC_OP_PREL, loc, val, 64); +			break; +		case R_AARCH64_PREL32: +			ovf = reloc_data(RELOC_OP_PREL, loc, val, 32); +			break; +		case R_AARCH64_PREL16: +			ovf = reloc_data(RELOC_OP_PREL, loc, val, 16); +			break; + +		/* MOVW instruction relocations. */ +		case R_AARCH64_MOVW_UABS_G0_NC: +			overflow_check = false; +		case R_AARCH64_MOVW_UABS_G0: +			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, +					      AARCH64_INSN_IMM_16); +			break; +		case R_AARCH64_MOVW_UABS_G1_NC: +			overflow_check = false; +		case R_AARCH64_MOVW_UABS_G1: +			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, +					      AARCH64_INSN_IMM_16); +			break; +		case R_AARCH64_MOVW_UABS_G2_NC: +			overflow_check = false; +		case R_AARCH64_MOVW_UABS_G2: +			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, +					      AARCH64_INSN_IMM_16); +			break; +		case R_AARCH64_MOVW_UABS_G3: +			/* We're using the top bits so we can't overflow. */ +			overflow_check = false; +			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, +					      AARCH64_INSN_IMM_16); +			break; +		case R_AARCH64_MOVW_SABS_G0: +			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, +					      AARCH64_INSN_IMM_MOVNZ); +			break; +		case R_AARCH64_MOVW_SABS_G1: +			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, +					      AARCH64_INSN_IMM_MOVNZ); +			break; +		case R_AARCH64_MOVW_SABS_G2: +			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, +					      AARCH64_INSN_IMM_MOVNZ); +			break; +		case R_AARCH64_MOVW_PREL_G0_NC: +			overflow_check = false; +			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, +					      AARCH64_INSN_IMM_MOVK); +			break; +		case R_AARCH64_MOVW_PREL_G0: +			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, +					      AARCH64_INSN_IMM_MOVNZ); +			break; +		case R_AARCH64_MOVW_PREL_G1_NC: +			overflow_check = false; +			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, +					      AARCH64_INSN_IMM_MOVK); +			break; +		case R_AARCH64_MOVW_PREL_G1: +			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, +					      AARCH64_INSN_IMM_MOVNZ); +			break; +		case R_AARCH64_MOVW_PREL_G2_NC: +			overflow_check = false; +			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, +					      AARCH64_INSN_IMM_MOVK); +			break; +		case R_AARCH64_MOVW_PREL_G2: +			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, +					      AARCH64_INSN_IMM_MOVNZ); +			break; +		case R_AARCH64_MOVW_PREL_G3: +			/* We're using the top bits so we can't overflow. */ +			overflow_check = false; +			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48, +					      AARCH64_INSN_IMM_MOVNZ); +			break; + +		/* Immediate instruction relocations. */ +		case R_AARCH64_LD_PREL_LO19: +			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, +					     AARCH64_INSN_IMM_19); +			break; +		case R_AARCH64_ADR_PREL_LO21: +			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, +					     AARCH64_INSN_IMM_ADR); +			break; +		case R_AARCH64_ADR_PREL_PG_HI21_NC: +			overflow_check = false; +		case R_AARCH64_ADR_PREL_PG_HI21: +			ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, +					     AARCH64_INSN_IMM_ADR); +			break; +		case R_AARCH64_ADD_ABS_LO12_NC: +		case R_AARCH64_LDST8_ABS_LO12_NC: +			overflow_check = false; +			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12, +					     AARCH64_INSN_IMM_12); +			break; +		case R_AARCH64_LDST16_ABS_LO12_NC: +			overflow_check = false; +			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11, +					     AARCH64_INSN_IMM_12); +			break; +		case R_AARCH64_LDST32_ABS_LO12_NC: +			overflow_check = false; +			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10, +					     AARCH64_INSN_IMM_12); +			break; +		case R_AARCH64_LDST64_ABS_LO12_NC: +			overflow_check = false; +			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9, +					     AARCH64_INSN_IMM_12); +			break; +		case R_AARCH64_LDST128_ABS_LO12_NC: +			overflow_check = false; +			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8, +					     AARCH64_INSN_IMM_12); +			break; +		case R_AARCH64_TSTBR14: +			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14, +					     AARCH64_INSN_IMM_14); +			break; +		case R_AARCH64_CONDBR19: +			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, +					     AARCH64_INSN_IMM_19); +			break; +		case R_AARCH64_JUMP26: +		case R_AARCH64_CALL26: +			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, +					     AARCH64_INSN_IMM_26); +			break; + +		default: +			pr_err("module %s: unsupported RELA relocation: %llu\n", +			       me->name, ELF64_R_TYPE(rel[i].r_info)); +			return -ENOEXEC; +		} + +		if (overflow_check && ovf == -ERANGE) +			goto overflow; + +	} + +	return 0; + +overflow: +	pr_err("module %s: overflow in relocation type %d val %Lx\n", +	       me->name, (int)ELF64_R_TYPE(rel[i].r_info), val); +	return -ENOEXEC; +} diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c new file mode 100644 index 00000000000..baf5afb7e6a --- /dev/null +++ b/arch/arm64/kernel/perf_event.c @@ -0,0 +1,1523 @@ +/* + * PMU support + * + * Copyright (C) 2012 ARM Limited + * Author: Will Deacon <will.deacon@arm.com> + * + * This code is based heavily on the ARMv7 perf event code. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ +#define pr_fmt(fmt) "hw perfevents: " fmt + +#include <linux/bitmap.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/perf_event.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> + +#include <asm/cputype.h> +#include <asm/irq.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> +#include <asm/stacktrace.h> + +/* + * ARMv8 supports a maximum of 32 events. + * The cycle counter is included in this total. + */ +#define ARMPMU_MAX_HWEVENTS		32 + +static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); +static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); +static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); + +#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) + +/* Set at runtime when we know what CPU type we are. */ +static struct arm_pmu *cpu_pmu; + +int +armpmu_get_max_events(void) +{ +	int max_events = 0; + +	if (cpu_pmu != NULL) +		max_events = cpu_pmu->num_events; + +	return max_events; +} +EXPORT_SYMBOL_GPL(armpmu_get_max_events); + +int perf_num_counters(void) +{ +	return armpmu_get_max_events(); +} +EXPORT_SYMBOL_GPL(perf_num_counters); + +#define HW_OP_UNSUPPORTED		0xFFFF + +#define C(_x) \ +	PERF_COUNT_HW_CACHE_##_x + +#define CACHE_OP_UNSUPPORTED		0xFFFF + +static int +armpmu_map_cache_event(const unsigned (*cache_map) +				      [PERF_COUNT_HW_CACHE_MAX] +				      [PERF_COUNT_HW_CACHE_OP_MAX] +				      [PERF_COUNT_HW_CACHE_RESULT_MAX], +		       u64 config) +{ +	unsigned int cache_type, cache_op, cache_result, ret; + +	cache_type = (config >>  0) & 0xff; +	if (cache_type >= PERF_COUNT_HW_CACHE_MAX) +		return -EINVAL; + +	cache_op = (config >>  8) & 0xff; +	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) +		return -EINVAL; + +	cache_result = (config >> 16) & 0xff; +	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) +		return -EINVAL; + +	ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; + +	if (ret == CACHE_OP_UNSUPPORTED) +		return -ENOENT; + +	return ret; +} + +static int +armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) +{ +	int mapping; + +	if (config >= PERF_COUNT_HW_MAX) +		return -EINVAL; + +	mapping = (*event_map)[config]; +	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; +} + +static int +armpmu_map_raw_event(u32 raw_event_mask, u64 config) +{ +	return (int)(config & raw_event_mask); +} + +static int map_cpu_event(struct perf_event *event, +			 const unsigned (*event_map)[PERF_COUNT_HW_MAX], +			 const unsigned (*cache_map) +					[PERF_COUNT_HW_CACHE_MAX] +					[PERF_COUNT_HW_CACHE_OP_MAX] +					[PERF_COUNT_HW_CACHE_RESULT_MAX], +			 u32 raw_event_mask) +{ +	u64 config = event->attr.config; + +	switch (event->attr.type) { +	case PERF_TYPE_HARDWARE: +		return armpmu_map_event(event_map, config); +	case PERF_TYPE_HW_CACHE: +		return armpmu_map_cache_event(cache_map, config); +	case PERF_TYPE_RAW: +		return armpmu_map_raw_event(raw_event_mask, config); +	} + +	return -ENOENT; +} + +int +armpmu_event_set_period(struct perf_event *event, +			struct hw_perf_event *hwc, +			int idx) +{ +	struct arm_pmu *armpmu = to_arm_pmu(event->pmu); +	s64 left = local64_read(&hwc->period_left); +	s64 period = hwc->sample_period; +	int ret = 0; + +	if (unlikely(left <= -period)) { +		left = period; +		local64_set(&hwc->period_left, left); +		hwc->last_period = period; +		ret = 1; +	} + +	if (unlikely(left <= 0)) { +		left += period; +		local64_set(&hwc->period_left, left); +		hwc->last_period = period; +		ret = 1; +	} + +	if (left > (s64)armpmu->max_period) +		left = armpmu->max_period; + +	local64_set(&hwc->prev_count, (u64)-left); + +	armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); + +	perf_event_update_userpage(event); + +	return ret; +} + +u64 +armpmu_event_update(struct perf_event *event, +		    struct hw_perf_event *hwc, +		    int idx) +{ +	struct arm_pmu *armpmu = to_arm_pmu(event->pmu); +	u64 delta, prev_raw_count, new_raw_count; + +again: +	prev_raw_count = local64_read(&hwc->prev_count); +	new_raw_count = armpmu->read_counter(idx); + +	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, +			     new_raw_count) != prev_raw_count) +		goto again; + +	delta = (new_raw_count - prev_raw_count) & armpmu->max_period; + +	local64_add(delta, &event->count); +	local64_sub(delta, &hwc->period_left); + +	return new_raw_count; +} + +static void +armpmu_read(struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; + +	/* Don't read disabled counters! */ +	if (hwc->idx < 0) +		return; + +	armpmu_event_update(event, hwc, hwc->idx); +} + +static void +armpmu_stop(struct perf_event *event, int flags) +{ +	struct arm_pmu *armpmu = to_arm_pmu(event->pmu); +	struct hw_perf_event *hwc = &event->hw; + +	/* +	 * ARM pmu always has to update the counter, so ignore +	 * PERF_EF_UPDATE, see comments in armpmu_start(). +	 */ +	if (!(hwc->state & PERF_HES_STOPPED)) { +		armpmu->disable(hwc, hwc->idx); +		barrier(); /* why? */ +		armpmu_event_update(event, hwc, hwc->idx); +		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; +	} +} + +static void +armpmu_start(struct perf_event *event, int flags) +{ +	struct arm_pmu *armpmu = to_arm_pmu(event->pmu); +	struct hw_perf_event *hwc = &event->hw; + +	/* +	 * ARM pmu always has to reprogram the period, so ignore +	 * PERF_EF_RELOAD, see the comment below. +	 */ +	if (flags & PERF_EF_RELOAD) +		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + +	hwc->state = 0; +	/* +	 * Set the period again. Some counters can't be stopped, so when we +	 * were stopped we simply disabled the IRQ source and the counter +	 * may have been left counting. If we don't do this step then we may +	 * get an interrupt too soon or *way* too late if the overflow has +	 * happened since disabling. +	 */ +	armpmu_event_set_period(event, hwc, hwc->idx); +	armpmu->enable(hwc, hwc->idx); +} + +static void +armpmu_del(struct perf_event *event, int flags) +{ +	struct arm_pmu *armpmu = to_arm_pmu(event->pmu); +	struct pmu_hw_events *hw_events = armpmu->get_hw_events(); +	struct hw_perf_event *hwc = &event->hw; +	int idx = hwc->idx; + +	WARN_ON(idx < 0); + +	armpmu_stop(event, PERF_EF_UPDATE); +	hw_events->events[idx] = NULL; +	clear_bit(idx, hw_events->used_mask); + +	perf_event_update_userpage(event); +} + +static int +armpmu_add(struct perf_event *event, int flags) +{ +	struct arm_pmu *armpmu = to_arm_pmu(event->pmu); +	struct pmu_hw_events *hw_events = armpmu->get_hw_events(); +	struct hw_perf_event *hwc = &event->hw; +	int idx; +	int err = 0; + +	perf_pmu_disable(event->pmu); + +	/* If we don't have a space for the counter then finish early. */ +	idx = armpmu->get_event_idx(hw_events, hwc); +	if (idx < 0) { +		err = idx; +		goto out; +	} + +	/* +	 * If there is an event in the counter we are going to use then make +	 * sure it is disabled. +	 */ +	event->hw.idx = idx; +	armpmu->disable(hwc, idx); +	hw_events->events[idx] = event; + +	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; +	if (flags & PERF_EF_START) +		armpmu_start(event, PERF_EF_RELOAD); + +	/* Propagate our changes to the userspace mapping. */ +	perf_event_update_userpage(event); + +out: +	perf_pmu_enable(event->pmu); +	return err; +} + +static int +validate_event(struct pmu_hw_events *hw_events, +	       struct perf_event *event) +{ +	struct arm_pmu *armpmu = to_arm_pmu(event->pmu); +	struct hw_perf_event fake_event = event->hw; +	struct pmu *leader_pmu = event->group_leader->pmu; + +	if (is_software_event(event)) +		return 1; + +	if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) +		return 1; + +	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) +		return 1; + +	return armpmu->get_event_idx(hw_events, &fake_event) >= 0; +} + +static int +validate_group(struct perf_event *event) +{ +	struct perf_event *sibling, *leader = event->group_leader; +	struct pmu_hw_events fake_pmu; +	DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS); + +	/* +	 * Initialise the fake PMU. We only need to populate the +	 * used_mask for the purposes of validation. +	 */ +	memset(fake_used_mask, 0, sizeof(fake_used_mask)); +	fake_pmu.used_mask = fake_used_mask; + +	if (!validate_event(&fake_pmu, leader)) +		return -EINVAL; + +	list_for_each_entry(sibling, &leader->sibling_list, group_entry) { +		if (!validate_event(&fake_pmu, sibling)) +			return -EINVAL; +	} + +	if (!validate_event(&fake_pmu, event)) +		return -EINVAL; + +	return 0; +} + +static void +armpmu_disable_percpu_irq(void *data) +{ +	unsigned int irq = *(unsigned int *)data; +	disable_percpu_irq(irq); +} + +static void +armpmu_release_hardware(struct arm_pmu *armpmu) +{ +	int irq; +	unsigned int i, irqs; +	struct platform_device *pmu_device = armpmu->plat_device; + +	irqs = min(pmu_device->num_resources, num_possible_cpus()); +	if (!irqs) +		return; + +	irq = platform_get_irq(pmu_device, 0); +	if (irq <= 0) +		return; + +	if (irq_is_percpu(irq)) { +		on_each_cpu(armpmu_disable_percpu_irq, &irq, 1); +		free_percpu_irq(irq, &cpu_hw_events); +	} else { +		for (i = 0; i < irqs; ++i) { +			if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) +				continue; +			irq = platform_get_irq(pmu_device, i); +			if (irq > 0) +				free_irq(irq, armpmu); +		} +	} +} + +static void +armpmu_enable_percpu_irq(void *data) +{ +	unsigned int irq = *(unsigned int *)data; +	enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + +static int +armpmu_reserve_hardware(struct arm_pmu *armpmu) +{ +	int err, irq; +	unsigned int i, irqs; +	struct platform_device *pmu_device = armpmu->plat_device; + +	if (!pmu_device) { +		pr_err("no PMU device registered\n"); +		return -ENODEV; +	} + +	irqs = min(pmu_device->num_resources, num_possible_cpus()); +	if (!irqs) { +		pr_err("no irqs for PMUs defined\n"); +		return -ENODEV; +	} + +	irq = platform_get_irq(pmu_device, 0); +	if (irq <= 0) { +		pr_err("failed to get valid irq for PMU device\n"); +		return -ENODEV; +	} + +	if (irq_is_percpu(irq)) { +		err = request_percpu_irq(irq, armpmu->handle_irq, +				"arm-pmu", &cpu_hw_events); + +		if (err) { +			pr_err("unable to request percpu IRQ%d for ARM PMU counters\n", +					irq); +			armpmu_release_hardware(armpmu); +			return err; +		} + +		on_each_cpu(armpmu_enable_percpu_irq, &irq, 1); +	} else { +		for (i = 0; i < irqs; ++i) { +			err = 0; +			irq = platform_get_irq(pmu_device, i); +			if (irq <= 0) +				continue; + +			/* +			 * If we have a single PMU interrupt that we can't shift, +			 * assume that we're running on a uniprocessor machine and +			 * continue. Otherwise, continue without this interrupt. +			 */ +			if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { +				pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", +						irq, i); +				continue; +			} + +			err = request_irq(irq, armpmu->handle_irq, +					IRQF_NOBALANCING, +					"arm-pmu", armpmu); +			if (err) { +				pr_err("unable to request IRQ%d for ARM PMU counters\n", +						irq); +				armpmu_release_hardware(armpmu); +				return err; +			} + +			cpumask_set_cpu(i, &armpmu->active_irqs); +		} +	} + +	return 0; +} + +static void +hw_perf_event_destroy(struct perf_event *event) +{ +	struct arm_pmu *armpmu = to_arm_pmu(event->pmu); +	atomic_t *active_events	 = &armpmu->active_events; +	struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; + +	if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { +		armpmu_release_hardware(armpmu); +		mutex_unlock(pmu_reserve_mutex); +	} +} + +static int +event_requires_mode_exclusion(struct perf_event_attr *attr) +{ +	return attr->exclude_idle || attr->exclude_user || +	       attr->exclude_kernel || attr->exclude_hv; +} + +static int +__hw_perf_event_init(struct perf_event *event) +{ +	struct arm_pmu *armpmu = to_arm_pmu(event->pmu); +	struct hw_perf_event *hwc = &event->hw; +	int mapping, err; + +	mapping = armpmu->map_event(event); + +	if (mapping < 0) { +		pr_debug("event %x:%llx not supported\n", event->attr.type, +			 event->attr.config); +		return mapping; +	} + +	/* +	 * We don't assign an index until we actually place the event onto +	 * hardware. Use -1 to signify that we haven't decided where to put it +	 * yet. For SMP systems, each core has it's own PMU so we can't do any +	 * clever allocation or constraints checking at this point. +	 */ +	hwc->idx		= -1; +	hwc->config_base	= 0; +	hwc->config		= 0; +	hwc->event_base		= 0; + +	/* +	 * Check whether we need to exclude the counter from certain modes. +	 */ +	if ((!armpmu->set_event_filter || +	     armpmu->set_event_filter(hwc, &event->attr)) && +	     event_requires_mode_exclusion(&event->attr)) { +		pr_debug("ARM performance counters do not support mode exclusion\n"); +		return -EPERM; +	} + +	/* +	 * Store the event encoding into the config_base field. +	 */ +	hwc->config_base	    |= (unsigned long)mapping; + +	if (!hwc->sample_period) { +		/* +		 * For non-sampling runs, limit the sample_period to half +		 * of the counter width. That way, the new counter value +		 * is far less likely to overtake the previous one unless +		 * you have some serious IRQ latency issues. +		 */ +		hwc->sample_period  = armpmu->max_period >> 1; +		hwc->last_period    = hwc->sample_period; +		local64_set(&hwc->period_left, hwc->sample_period); +	} + +	err = 0; +	if (event->group_leader != event) { +		err = validate_group(event); +		if (err) +			return -EINVAL; +	} + +	return err; +} + +static int armpmu_event_init(struct perf_event *event) +{ +	struct arm_pmu *armpmu = to_arm_pmu(event->pmu); +	int err = 0; +	atomic_t *active_events = &armpmu->active_events; + +	if (armpmu->map_event(event) == -ENOENT) +		return -ENOENT; + +	event->destroy = hw_perf_event_destroy; + +	if (!atomic_inc_not_zero(active_events)) { +		mutex_lock(&armpmu->reserve_mutex); +		if (atomic_read(active_events) == 0) +			err = armpmu_reserve_hardware(armpmu); + +		if (!err) +			atomic_inc(active_events); +		mutex_unlock(&armpmu->reserve_mutex); +	} + +	if (err) +		return err; + +	err = __hw_perf_event_init(event); +	if (err) +		hw_perf_event_destroy(event); + +	return err; +} + +static void armpmu_enable(struct pmu *pmu) +{ +	struct arm_pmu *armpmu = to_arm_pmu(pmu); +	struct pmu_hw_events *hw_events = armpmu->get_hw_events(); +	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + +	if (enabled) +		armpmu->start(); +} + +static void armpmu_disable(struct pmu *pmu) +{ +	struct arm_pmu *armpmu = to_arm_pmu(pmu); +	armpmu->stop(); +} + +static void __init armpmu_init(struct arm_pmu *armpmu) +{ +	atomic_set(&armpmu->active_events, 0); +	mutex_init(&armpmu->reserve_mutex); + +	armpmu->pmu = (struct pmu) { +		.pmu_enable	= armpmu_enable, +		.pmu_disable	= armpmu_disable, +		.event_init	= armpmu_event_init, +		.add		= armpmu_add, +		.del		= armpmu_del, +		.start		= armpmu_start, +		.stop		= armpmu_stop, +		.read		= armpmu_read, +	}; +} + +int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type) +{ +	armpmu_init(armpmu); +	return perf_pmu_register(&armpmu->pmu, name, type); +} + +/* + * ARMv8 PMUv3 Performance Events handling code. + * Common event types. + */ +enum armv8_pmuv3_perf_types { +	/* Required events. */ +	ARMV8_PMUV3_PERFCTR_PMNC_SW_INCR			= 0x00, +	ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL			= 0x03, +	ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS			= 0x04, +	ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED			= 0x10, +	ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES			= 0x11, +	ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED			= 0x12, + +	/* At least one of the following is required. */ +	ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED			= 0x08, +	ARMV8_PMUV3_PERFCTR_OP_SPEC				= 0x1B, + +	/* Common architectural events. */ +	ARMV8_PMUV3_PERFCTR_MEM_READ				= 0x06, +	ARMV8_PMUV3_PERFCTR_MEM_WRITE				= 0x07, +	ARMV8_PMUV3_PERFCTR_EXC_TAKEN				= 0x09, +	ARMV8_PMUV3_PERFCTR_EXC_EXECUTED			= 0x0A, +	ARMV8_PMUV3_PERFCTR_CID_WRITE				= 0x0B, +	ARMV8_PMUV3_PERFCTR_PC_WRITE				= 0x0C, +	ARMV8_PMUV3_PERFCTR_PC_IMM_BRANCH			= 0x0D, +	ARMV8_PMUV3_PERFCTR_PC_PROC_RETURN			= 0x0E, +	ARMV8_PMUV3_PERFCTR_MEM_UNALIGNED_ACCESS		= 0x0F, +	ARMV8_PMUV3_PERFCTR_TTBR_WRITE				= 0x1C, + +	/* Common microarchitectural events. */ +	ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL			= 0x01, +	ARMV8_PMUV3_PERFCTR_ITLB_REFILL				= 0x02, +	ARMV8_PMUV3_PERFCTR_DTLB_REFILL				= 0x05, +	ARMV8_PMUV3_PERFCTR_MEM_ACCESS				= 0x13, +	ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS			= 0x14, +	ARMV8_PMUV3_PERFCTR_L1_DCACHE_WB			= 0x15, +	ARMV8_PMUV3_PERFCTR_L2_CACHE_ACCESS			= 0x16, +	ARMV8_PMUV3_PERFCTR_L2_CACHE_REFILL			= 0x17, +	ARMV8_PMUV3_PERFCTR_L2_CACHE_WB				= 0x18, +	ARMV8_PMUV3_PERFCTR_BUS_ACCESS				= 0x19, +	ARMV8_PMUV3_PERFCTR_MEM_ERROR				= 0x1A, +	ARMV8_PMUV3_PERFCTR_BUS_CYCLES				= 0x1D, +}; + +/* PMUv3 HW events mapping. */ +static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { +	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, +	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, +	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, +	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, +	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= HW_OP_UNSUPPORTED, +	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, +	[PERF_COUNT_HW_BUS_CYCLES]		= HW_OP_UNSUPPORTED, +	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= HW_OP_UNSUPPORTED, +	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= HW_OP_UNSUPPORTED, +}; + +static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] +						[PERF_COUNT_HW_CACHE_OP_MAX] +						[PERF_COUNT_HW_CACHE_RESULT_MAX] = { +	[C(L1D)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, +			[C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, +			[C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +	[C(L1I)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +	[C(LL)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +	[C(DTLB)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +	[C(ITLB)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +	[C(BPU)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, +			[C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, +			[C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +	[C(NODE)] = { +		[C(OP_READ)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_WRITE)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +		[C(OP_PREFETCH)] = { +			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED, +			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED, +		}, +	}, +}; + +/* + * Perf Events' indices + */ +#define	ARMV8_IDX_CYCLE_COUNTER	0 +#define	ARMV8_IDX_COUNTER0	1 +#define	ARMV8_IDX_COUNTER_LAST	(ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) + +#define	ARMV8_MAX_COUNTERS	32 +#define	ARMV8_COUNTER_MASK	(ARMV8_MAX_COUNTERS - 1) + +/* + * ARMv8 low level PMU access + */ + +/* + * Perf Event to low level counters mapping + */ +#define	ARMV8_IDX_TO_COUNTER(x)	\ +	(((x) - ARMV8_IDX_COUNTER0) & ARMV8_COUNTER_MASK) + +/* + * Per-CPU PMCR: config reg + */ +#define ARMV8_PMCR_E		(1 << 0) /* Enable all counters */ +#define ARMV8_PMCR_P		(1 << 1) /* Reset all counters */ +#define ARMV8_PMCR_C		(1 << 2) /* Cycle counter reset */ +#define ARMV8_PMCR_D		(1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV8_PMCR_X		(1 << 4) /* Export to ETM */ +#define ARMV8_PMCR_DP		(1 << 5) /* Disable CCNT if non-invasive debug*/ +#define	ARMV8_PMCR_N_SHIFT	11	 /* Number of counters supported */ +#define	ARMV8_PMCR_N_MASK	0x1f +#define	ARMV8_PMCR_MASK		0x3f	 /* Mask for writable bits */ + +/* + * PMOVSR: counters overflow flag status reg + */ +#define	ARMV8_OVSR_MASK		0xffffffff	/* Mask for writable bits */ +#define	ARMV8_OVERFLOWED_MASK	ARMV8_OVSR_MASK + +/* + * PMXEVTYPER: Event selection reg + */ +#define	ARMV8_EVTYPE_MASK	0xc80003ff	/* Mask for writable bits */ +#define	ARMV8_EVTYPE_EVENT	0x3ff		/* Mask for EVENT bits */ + +/* + * Event filters for PMUv3 + */ +#define	ARMV8_EXCLUDE_EL1	(1 << 31) +#define	ARMV8_EXCLUDE_EL0	(1 << 30) +#define	ARMV8_INCLUDE_EL2	(1 << 27) + +static inline u32 armv8pmu_pmcr_read(void) +{ +	u32 val; +	asm volatile("mrs %0, pmcr_el0" : "=r" (val)); +	return val; +} + +static inline void armv8pmu_pmcr_write(u32 val) +{ +	val &= ARMV8_PMCR_MASK; +	isb(); +	asm volatile("msr pmcr_el0, %0" :: "r" (val)); +} + +static inline int armv8pmu_has_overflowed(u32 pmovsr) +{ +	return pmovsr & ARMV8_OVERFLOWED_MASK; +} + +static inline int armv8pmu_counter_valid(int idx) +{ +	return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST; +} + +static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) +{ +	int ret = 0; +	u32 counter; + +	if (!armv8pmu_counter_valid(idx)) { +		pr_err("CPU%u checking wrong counter %d overflow status\n", +			smp_processor_id(), idx); +	} else { +		counter = ARMV8_IDX_TO_COUNTER(idx); +		ret = pmnc & BIT(counter); +	} + +	return ret; +} + +static inline int armv8pmu_select_counter(int idx) +{ +	u32 counter; + +	if (!armv8pmu_counter_valid(idx)) { +		pr_err("CPU%u selecting wrong PMNC counter %d\n", +			smp_processor_id(), idx); +		return -EINVAL; +	} + +	counter = ARMV8_IDX_TO_COUNTER(idx); +	asm volatile("msr pmselr_el0, %0" :: "r" (counter)); +	isb(); + +	return idx; +} + +static inline u32 armv8pmu_read_counter(int idx) +{ +	u32 value = 0; + +	if (!armv8pmu_counter_valid(idx)) +		pr_err("CPU%u reading wrong counter %d\n", +			smp_processor_id(), idx); +	else if (idx == ARMV8_IDX_CYCLE_COUNTER) +		asm volatile("mrs %0, pmccntr_el0" : "=r" (value)); +	else if (armv8pmu_select_counter(idx) == idx) +		asm volatile("mrs %0, pmxevcntr_el0" : "=r" (value)); + +	return value; +} + +static inline void armv8pmu_write_counter(int idx, u32 value) +{ +	if (!armv8pmu_counter_valid(idx)) +		pr_err("CPU%u writing wrong counter %d\n", +			smp_processor_id(), idx); +	else if (idx == ARMV8_IDX_CYCLE_COUNTER) +		asm volatile("msr pmccntr_el0, %0" :: "r" (value)); +	else if (armv8pmu_select_counter(idx) == idx) +		asm volatile("msr pmxevcntr_el0, %0" :: "r" (value)); +} + +static inline void armv8pmu_write_evtype(int idx, u32 val) +{ +	if (armv8pmu_select_counter(idx) == idx) { +		val &= ARMV8_EVTYPE_MASK; +		asm volatile("msr pmxevtyper_el0, %0" :: "r" (val)); +	} +} + +static inline int armv8pmu_enable_counter(int idx) +{ +	u32 counter; + +	if (!armv8pmu_counter_valid(idx)) { +		pr_err("CPU%u enabling wrong PMNC counter %d\n", +			smp_processor_id(), idx); +		return -EINVAL; +	} + +	counter = ARMV8_IDX_TO_COUNTER(idx); +	asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter))); +	return idx; +} + +static inline int armv8pmu_disable_counter(int idx) +{ +	u32 counter; + +	if (!armv8pmu_counter_valid(idx)) { +		pr_err("CPU%u disabling wrong PMNC counter %d\n", +			smp_processor_id(), idx); +		return -EINVAL; +	} + +	counter = ARMV8_IDX_TO_COUNTER(idx); +	asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter))); +	return idx; +} + +static inline int armv8pmu_enable_intens(int idx) +{ +	u32 counter; + +	if (!armv8pmu_counter_valid(idx)) { +		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", +			smp_processor_id(), idx); +		return -EINVAL; +	} + +	counter = ARMV8_IDX_TO_COUNTER(idx); +	asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter))); +	return idx; +} + +static inline int armv8pmu_disable_intens(int idx) +{ +	u32 counter; + +	if (!armv8pmu_counter_valid(idx)) { +		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", +			smp_processor_id(), idx); +		return -EINVAL; +	} + +	counter = ARMV8_IDX_TO_COUNTER(idx); +	asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter))); +	isb(); +	/* Clear the overflow flag in case an interrupt is pending. */ +	asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter))); +	isb(); +	return idx; +} + +static inline u32 armv8pmu_getreset_flags(void) +{ +	u32 value; + +	/* Read */ +	asm volatile("mrs %0, pmovsclr_el0" : "=r" (value)); + +	/* Write to clear flags */ +	value &= ARMV8_OVSR_MASK; +	asm volatile("msr pmovsclr_el0, %0" :: "r" (value)); + +	return value; +} + +static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ +	unsigned long flags; +	struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + +	/* +	 * Enable counter and interrupt, and set the counter to count +	 * the event that we're interested in. +	 */ +	raw_spin_lock_irqsave(&events->pmu_lock, flags); + +	/* +	 * Disable counter +	 */ +	armv8pmu_disable_counter(idx); + +	/* +	 * Set event (if destined for PMNx counters). +	 */ +	armv8pmu_write_evtype(idx, hwc->config_base); + +	/* +	 * Enable interrupt for this counter +	 */ +	armv8pmu_enable_intens(idx); + +	/* +	 * Enable counter +	 */ +	armv8pmu_enable_counter(idx); + +	raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv8pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ +	unsigned long flags; +	struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + +	/* +	 * Disable counter and interrupt +	 */ +	raw_spin_lock_irqsave(&events->pmu_lock, flags); + +	/* +	 * Disable counter +	 */ +	armv8pmu_disable_counter(idx); + +	/* +	 * Disable interrupt for this counter +	 */ +	armv8pmu_disable_intens(idx); + +	raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) +{ +	u32 pmovsr; +	struct perf_sample_data data; +	struct pmu_hw_events *cpuc; +	struct pt_regs *regs; +	int idx; + +	/* +	 * Get and reset the IRQ flags +	 */ +	pmovsr = armv8pmu_getreset_flags(); + +	/* +	 * Did an overflow occur? +	 */ +	if (!armv8pmu_has_overflowed(pmovsr)) +		return IRQ_NONE; + +	/* +	 * Handle the counter(s) overflow(s) +	 */ +	regs = get_irq_regs(); + +	cpuc = this_cpu_ptr(&cpu_hw_events); +	for (idx = 0; idx < cpu_pmu->num_events; ++idx) { +		struct perf_event *event = cpuc->events[idx]; +		struct hw_perf_event *hwc; + +		/* Ignore if we don't have an event. */ +		if (!event) +			continue; + +		/* +		 * We have a single interrupt for all counters. Check that +		 * each counter has overflowed before we process it. +		 */ +		if (!armv8pmu_counter_has_overflowed(pmovsr, idx)) +			continue; + +		hwc = &event->hw; +		armpmu_event_update(event, hwc, idx); +		perf_sample_data_init(&data, 0, hwc->last_period); +		if (!armpmu_event_set_period(event, hwc, idx)) +			continue; + +		if (perf_event_overflow(event, &data, regs)) +			cpu_pmu->disable(hwc, idx); +	} + +	/* +	 * Handle the pending perf events. +	 * +	 * Note: this call *must* be run with interrupts disabled. For +	 * platforms that can have the PMU interrupts raised as an NMI, this +	 * will not work. +	 */ +	irq_work_run(); + +	return IRQ_HANDLED; +} + +static void armv8pmu_start(void) +{ +	unsigned long flags; +	struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + +	raw_spin_lock_irqsave(&events->pmu_lock, flags); +	/* Enable all counters */ +	armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMCR_E); +	raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv8pmu_stop(void) +{ +	unsigned long flags; +	struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + +	raw_spin_lock_irqsave(&events->pmu_lock, flags); +	/* Disable all counters */ +	armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMCR_E); +	raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, +				  struct hw_perf_event *event) +{ +	int idx; +	unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT; + +	/* Always place a cycle counter into the cycle counter. */ +	if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) { +		if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) +			return -EAGAIN; + +		return ARMV8_IDX_CYCLE_COUNTER; +	} + +	/* +	 * For anything other than a cycle counter, try and use +	 * the events counters +	 */ +	for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { +		if (!test_and_set_bit(idx, cpuc->used_mask)) +			return idx; +	} + +	/* The counters are all in use. */ +	return -EAGAIN; +} + +/* + * Add an event filter to a given event. This will only work for PMUv2 PMUs. + */ +static int armv8pmu_set_event_filter(struct hw_perf_event *event, +				     struct perf_event_attr *attr) +{ +	unsigned long config_base = 0; + +	if (attr->exclude_idle) +		return -EPERM; +	if (attr->exclude_user) +		config_base |= ARMV8_EXCLUDE_EL0; +	if (attr->exclude_kernel) +		config_base |= ARMV8_EXCLUDE_EL1; +	if (!attr->exclude_hv) +		config_base |= ARMV8_INCLUDE_EL2; + +	/* +	 * Install the filter into config_base as this is used to +	 * construct the event type. +	 */ +	event->config_base = config_base; + +	return 0; +} + +static void armv8pmu_reset(void *info) +{ +	u32 idx, nb_cnt = cpu_pmu->num_events; + +	/* The counter and interrupt enable registers are unknown at reset. */ +	for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) +		armv8pmu_disable_event(NULL, idx); + +	/* Initialize & Reset PMNC: C and P bits. */ +	armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); + +	/* Disable access from userspace. */ +	asm volatile("msr pmuserenr_el0, %0" :: "r" (0)); +} + +static int armv8_pmuv3_map_event(struct perf_event *event) +{ +	return map_cpu_event(event, &armv8_pmuv3_perf_map, +				&armv8_pmuv3_perf_cache_map, +				ARMV8_EVTYPE_EVENT); +} + +static struct arm_pmu armv8pmu = { +	.handle_irq		= armv8pmu_handle_irq, +	.enable			= armv8pmu_enable_event, +	.disable		= armv8pmu_disable_event, +	.read_counter		= armv8pmu_read_counter, +	.write_counter		= armv8pmu_write_counter, +	.get_event_idx		= armv8pmu_get_event_idx, +	.start			= armv8pmu_start, +	.stop			= armv8pmu_stop, +	.reset			= armv8pmu_reset, +	.max_period		= (1LLU << 32) - 1, +}; + +static u32 __init armv8pmu_read_num_pmnc_events(void) +{ +	u32 nb_cnt; + +	/* Read the nb of CNTx counters supported from PMNC */ +	nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK; + +	/* Add the CPU cycles counter and return */ +	return nb_cnt + 1; +} + +static struct arm_pmu *__init armv8_pmuv3_pmu_init(void) +{ +	armv8pmu.name			= "arm/armv8-pmuv3"; +	armv8pmu.map_event		= armv8_pmuv3_map_event; +	armv8pmu.num_events		= armv8pmu_read_num_pmnc_events(); +	armv8pmu.set_event_filter	= armv8pmu_set_event_filter; +	return &armv8pmu; +} + +/* + * Ensure the PMU has sane values out of reset. + * This requires SMP to be available, so exists as a separate initcall. + */ +static int __init +cpu_pmu_reset(void) +{ +	if (cpu_pmu && cpu_pmu->reset) +		return on_each_cpu(cpu_pmu->reset, NULL, 1); +	return 0; +} +arch_initcall(cpu_pmu_reset); + +/* + * PMU platform driver and devicetree bindings. + */ +static struct of_device_id armpmu_of_device_ids[] = { +	{.compatible = "arm,armv8-pmuv3"}, +	{}, +}; + +static int armpmu_device_probe(struct platform_device *pdev) +{ +	if (!cpu_pmu) +		return -ENODEV; + +	cpu_pmu->plat_device = pdev; +	return 0; +} + +static struct platform_driver armpmu_driver = { +	.driver		= { +		.name	= "arm-pmu", +		.of_match_table = armpmu_of_device_ids, +	}, +	.probe		= armpmu_device_probe, +}; + +static int __init register_pmu_driver(void) +{ +	return platform_driver_register(&armpmu_driver); +} +device_initcall(register_pmu_driver); + +static struct pmu_hw_events *armpmu_get_cpu_events(void) +{ +	return this_cpu_ptr(&cpu_hw_events); +} + +static void __init cpu_pmu_init(struct arm_pmu *armpmu) +{ +	int cpu; +	for_each_possible_cpu(cpu) { +		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); +		events->events = per_cpu(hw_events, cpu); +		events->used_mask = per_cpu(used_mask, cpu); +		raw_spin_lock_init(&events->pmu_lock); +	} +	armpmu->get_hw_events = armpmu_get_cpu_events; +} + +static int __init init_hw_perf_events(void) +{ +	u64 dfr = read_cpuid(ID_AA64DFR0_EL1); + +	switch ((dfr >> 8) & 0xf) { +	case 0x1:	/* PMUv3 */ +		cpu_pmu = armv8_pmuv3_pmu_init(); +		break; +	} + +	if (cpu_pmu) { +		pr_info("enabled with %s PMU driver, %d counters available\n", +			cpu_pmu->name, cpu_pmu->num_events); +		cpu_pmu_init(cpu_pmu); +		armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW); +	} else { +		pr_info("no hardware support available\n"); +	} + +	return 0; +} +early_initcall(init_hw_perf_events); + +/* + * Callchain handling code. + */ +struct frame_tail { +	struct frame_tail	__user *fp; +	unsigned long		lr; +} __attribute__((packed)); + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static struct frame_tail __user * +user_backtrace(struct frame_tail __user *tail, +	       struct perf_callchain_entry *entry) +{ +	struct frame_tail buftail; +	unsigned long err; + +	/* Also check accessibility of one struct frame_tail beyond */ +	if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) +		return NULL; + +	pagefault_disable(); +	err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); +	pagefault_enable(); + +	if (err) +		return NULL; + +	perf_callchain_store(entry, buftail.lr); + +	/* +	 * Frame pointers should strictly progress back up the stack +	 * (towards higher addresses). +	 */ +	if (tail >= buftail.fp) +		return NULL; + +	return buftail.fp; +} + +#ifdef CONFIG_COMPAT +/* + * The registers we're interested in are at the end of the variable + * length saved register structure. The fp points at the end of this + * structure so the address of this struct is: + * (struct compat_frame_tail *)(xxx->fp)-1 + * + * This code has been adapted from the ARM OProfile support. + */ +struct compat_frame_tail { +	compat_uptr_t	fp; /* a (struct compat_frame_tail *) in compat mode */ +	u32		sp; +	u32		lr; +} __attribute__((packed)); + +static struct compat_frame_tail __user * +compat_user_backtrace(struct compat_frame_tail __user *tail, +		      struct perf_callchain_entry *entry) +{ +	struct compat_frame_tail buftail; +	unsigned long err; + +	/* Also check accessibility of one struct frame_tail beyond */ +	if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) +		return NULL; + +	pagefault_disable(); +	err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); +	pagefault_enable(); + +	if (err) +		return NULL; + +	perf_callchain_store(entry, buftail.lr); + +	/* +	 * Frame pointers should strictly progress back up the stack +	 * (towards higher addresses). +	 */ +	if (tail + 1 >= (struct compat_frame_tail __user *) +			compat_ptr(buftail.fp)) +		return NULL; + +	return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; +} +#endif /* CONFIG_COMPAT */ + +void perf_callchain_user(struct perf_callchain_entry *entry, +			 struct pt_regs *regs) +{ +	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { +		/* We don't support guest os callchain now */ +		return; +	} + +	perf_callchain_store(entry, regs->pc); + +	if (!compat_user_mode(regs)) { +		/* AARCH64 mode */ +		struct frame_tail __user *tail; + +		tail = (struct frame_tail __user *)regs->regs[29]; + +		while (entry->nr < PERF_MAX_STACK_DEPTH && +		       tail && !((unsigned long)tail & 0xf)) +			tail = user_backtrace(tail, entry); +	} else { +#ifdef CONFIG_COMPAT +		/* AARCH32 compat mode */ +		struct compat_frame_tail __user *tail; + +		tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; + +		while ((entry->nr < PERF_MAX_STACK_DEPTH) && +			tail && !((unsigned long)tail & 0x3)) +			tail = compat_user_backtrace(tail, entry); +#endif +	} +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called + * whist unwinding the stackframe and is like a subroutine return so we use + * the PC. + */ +static int callchain_trace(struct stackframe *frame, void *data) +{ +	struct perf_callchain_entry *entry = data; +	perf_callchain_store(entry, frame->pc); +	return 0; +} + +void perf_callchain_kernel(struct perf_callchain_entry *entry, +			   struct pt_regs *regs) +{ +	struct stackframe frame; + +	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { +		/* We don't support guest os callchain now */ +		return; +	} + +	frame.fp = regs->regs[29]; +	frame.sp = regs->sp; +	frame.pc = regs->pc; + +	walk_stackframe(&frame, callchain_trace, entry); +} + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ +	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) +		return perf_guest_cbs->get_guest_ip(); + +	return instruction_pointer(regs); +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ +	int misc = 0; + +	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { +		if (perf_guest_cbs->is_user_mode()) +			misc |= PERF_RECORD_MISC_GUEST_USER; +		else +			misc |= PERF_RECORD_MISC_GUEST_KERNEL; +	} else { +		if (user_mode(regs)) +			misc |= PERF_RECORD_MISC_USER; +		else +			misc |= PERF_RECORD_MISC_KERNEL; +	} + +	return misc; +} diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c new file mode 100644 index 00000000000..422ebd63b61 --- /dev/null +++ b/arch/arm64/kernel/perf_regs.c @@ -0,0 +1,46 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/bug.h> + +#include <asm/compat.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ +	if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX)) +		return 0; + +	/* +	 * Compat (i.e. 32 bit) mode: +	 * - PC has been set in the pt_regs struct in kernel_entry, +	 * - Handle SP and LR here. +	 */ +	if (compat_user_mode(regs)) { +		if ((u32)idx == PERF_REG_ARM64_SP) +			return regs->compat_sp; +		if ((u32)idx == PERF_REG_ARM64_LR) +			return regs->compat_lr; +	} + +	return regs->regs[idx]; +} + +#define REG_RESERVED (~((1ULL << PERF_REG_ARM64_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ +	if (!mask || mask & REG_RESERVED) +		return -EINVAL; + +	return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ +	if (is_compat_thread(task_thread_info(task))) +		return PERF_SAMPLE_REGS_ABI_32; +	else +		return PERF_SAMPLE_REGS_ABI_64; +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c new file mode 100644 index 00000000000..43b7c34f92c --- /dev/null +++ b/arch/arm64/kernel/process.c @@ -0,0 +1,386 @@ +/* + * Based on arch/arm/kernel/process.c + * + * Original Copyright (C) 1995  Linus Torvalds + * Copyright (C) 1996-2000 Russell King - Converted to ARM. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdarg.h> + +#include <linux/compat.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/user.h> +#include <linux/delay.h> +#include <linux/reboot.h> +#include <linux/interrupt.h> +#include <linux/kallsyms.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/elfcore.h> +#include <linux/pm.h> +#include <linux/tick.h> +#include <linux/utsname.h> +#include <linux/uaccess.h> +#include <linux/random.h> +#include <linux/hw_breakpoint.h> +#include <linux/personality.h> +#include <linux/notifier.h> + +#include <asm/compat.h> +#include <asm/cacheflush.h> +#include <asm/fpsimd.h> +#include <asm/mmu_context.h> +#include <asm/processor.h> +#include <asm/stacktrace.h> + +static void setup_restart(void) +{ +	/* +	 * Tell the mm system that we are going to reboot - +	 * we may need it to insert some 1:1 mappings so that +	 * soft boot works. +	 */ +	setup_mm_for_reboot(); + +	/* Clean and invalidate caches */ +	flush_cache_all(); + +	/* Turn D-cache off */ +	cpu_cache_off(); + +	/* Push out any further dirty data, and ensure cache is empty */ +	flush_cache_all(); +} + +void soft_restart(unsigned long addr) +{ +	typedef void (*phys_reset_t)(unsigned long); +	phys_reset_t phys_reset; + +	setup_restart(); + +	/* Switch to the identity mapping */ +	phys_reset = (phys_reset_t)virt_to_phys(cpu_reset); +	phys_reset(addr); + +	/* Should never get here */ +	BUG(); +} + +/* + * Function pointers to optional machine specific functions + */ +void (*pm_power_off)(void); +EXPORT_SYMBOL_GPL(pm_power_off); + +void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); +EXPORT_SYMBOL_GPL(arm_pm_restart); + +/* + * This is our default idle handler. + */ +void arch_cpu_idle(void) +{ +	/* +	 * This should do all the clock switching and wait for interrupt +	 * tricks +	 */ +	cpu_do_idle(); +	local_irq_enable(); +} + +#ifdef CONFIG_HOTPLUG_CPU +void arch_cpu_idle_dead(void) +{ +       cpu_die(); +} +#endif + +/* + * Called by kexec, immediately prior to machine_kexec(). + * + * This must completely disable all secondary CPUs; simply causing those CPUs + * to execute e.g. a RAM-based pin loop is not sufficient. This allows the + * kexec'd kernel to use any and all RAM as it sees fit, without having to + * avoid any code or data used by any SW CPU pin loop. The CPU hotplug + * functionality embodied in disable_nonboot_cpus() to achieve this. + */ +void machine_shutdown(void) +{ +	disable_nonboot_cpus(); +} + +/* + * Halting simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. + */ +void machine_halt(void) +{ +	local_irq_disable(); +	smp_send_stop(); +	while (1); +} + +/* + * Power-off simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. When the system power is turned off, it will take all CPUs + * with it. + */ +void machine_power_off(void) +{ +	local_irq_disable(); +	smp_send_stop(); +	if (pm_power_off) +		pm_power_off(); +} + +/* + * Restart requires that the secondary CPUs stop performing any activity + * while the primary CPU resets the system. Systems with a single CPU can + * use soft_restart() as their machine descriptor's .restart hook, since that + * will cause the only available CPU to reset. Systems with multiple CPUs must + * provide a HW restart implementation, to ensure that all CPUs reset at once. + * This is required so that any code running after reset on the primary CPU + * doesn't have to co-ordinate with other CPUs to ensure they aren't still + * executing pre-reset code, and using RAM that the primary CPU's code wishes + * to use. Implementing such co-ordination would be essentially impossible. + */ +void machine_restart(char *cmd) +{ +	/* Disable interrupts first */ +	local_irq_disable(); +	smp_send_stop(); + +	/* Now call the architecture specific reboot code. */ +	if (arm_pm_restart) +		arm_pm_restart(reboot_mode, cmd); + +	/* +	 * Whoops - the architecture was unable to reboot. +	 */ +	printk("Reboot failed -- System halted\n"); +	while (1); +} + +void __show_regs(struct pt_regs *regs) +{ +	int i, top_reg; +	u64 lr, sp; + +	if (compat_user_mode(regs)) { +		lr = regs->compat_lr; +		sp = regs->compat_sp; +		top_reg = 12; +	} else { +		lr = regs->regs[30]; +		sp = regs->sp; +		top_reg = 29; +	} + +	show_regs_print_info(KERN_DEFAULT); +	print_symbol("PC is at %s\n", instruction_pointer(regs)); +	print_symbol("LR is at %s\n", lr); +	printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", +	       regs->pc, lr, regs->pstate); +	printk("sp : %016llx\n", sp); +	for (i = top_reg; i >= 0; i--) { +		printk("x%-2d: %016llx ", i, regs->regs[i]); +		if (i % 2 == 0) +			printk("\n"); +	} +	printk("\n"); +} + +void show_regs(struct pt_regs * regs) +{ +	printk("\n"); +	__show_regs(regs); +} + +/* + * Free current thread data structures etc.. + */ +void exit_thread(void) +{ +} + +void flush_thread(void) +{ +	fpsimd_flush_thread(); +	flush_ptrace_hw_breakpoint(current); +} + +void release_thread(struct task_struct *dead_task) +{ +} + +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ +	fpsimd_preserve_current_state(); +	*dst = *src; +	return 0; +} + +asmlinkage void ret_from_fork(void) asm("ret_from_fork"); + +int copy_thread(unsigned long clone_flags, unsigned long stack_start, +		unsigned long stk_sz, struct task_struct *p) +{ +	struct pt_regs *childregs = task_pt_regs(p); +	unsigned long tls = p->thread.tp_value; + +	memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context)); + +	if (likely(!(p->flags & PF_KTHREAD))) { +		*childregs = *current_pt_regs(); +		childregs->regs[0] = 0; +		if (is_compat_thread(task_thread_info(p))) { +			if (stack_start) +				childregs->compat_sp = stack_start; +		} else { +			/* +			 * Read the current TLS pointer from tpidr_el0 as it may be +			 * out-of-sync with the saved value. +			 */ +			asm("mrs %0, tpidr_el0" : "=r" (tls)); +			if (stack_start) { +				/* 16-byte aligned stack mandatory on AArch64 */ +				if (stack_start & 15) +					return -EINVAL; +				childregs->sp = stack_start; +			} +		} +		/* +		 * If a TLS pointer was passed to clone (4th argument), use it +		 * for the new thread. +		 */ +		if (clone_flags & CLONE_SETTLS) +			tls = childregs->regs[3]; +	} else { +		memset(childregs, 0, sizeof(struct pt_regs)); +		childregs->pstate = PSR_MODE_EL1h; +		p->thread.cpu_context.x19 = stack_start; +		p->thread.cpu_context.x20 = stk_sz; +	} +	p->thread.cpu_context.pc = (unsigned long)ret_from_fork; +	p->thread.cpu_context.sp = (unsigned long)childregs; +	p->thread.tp_value = tls; + +	ptrace_hw_copy_thread(p); + +	return 0; +} + +static void tls_thread_switch(struct task_struct *next) +{ +	unsigned long tpidr, tpidrro; + +	if (!is_compat_task()) { +		asm("mrs %0, tpidr_el0" : "=r" (tpidr)); +		current->thread.tp_value = tpidr; +	} + +	if (is_compat_thread(task_thread_info(next))) { +		tpidr = 0; +		tpidrro = next->thread.tp_value; +	} else { +		tpidr = next->thread.tp_value; +		tpidrro = 0; +	} + +	asm( +	"	msr	tpidr_el0, %0\n" +	"	msr	tpidrro_el0, %1" +	: : "r" (tpidr), "r" (tpidrro)); +} + +/* + * Thread switching. + */ +struct task_struct *__switch_to(struct task_struct *prev, +				struct task_struct *next) +{ +	struct task_struct *last; + +	fpsimd_thread_switch(next); +	tls_thread_switch(next); +	hw_breakpoint_thread_switch(next); +	contextidr_thread_switch(next); + +	/* +	 * Complete any pending TLB or cache maintenance on this CPU in case +	 * the thread migrates to a different CPU. +	 */ +	dsb(ish); + +	/* the actual thread switch */ +	last = cpu_switch_to(prev, next); + +	return last; +} + +unsigned long get_wchan(struct task_struct *p) +{ +	struct stackframe frame; +	unsigned long stack_page; +	int count = 0; +	if (!p || p == current || p->state == TASK_RUNNING) +		return 0; + +	frame.fp = thread_saved_fp(p); +	frame.sp = thread_saved_sp(p); +	frame.pc = thread_saved_pc(p); +	stack_page = (unsigned long)task_stack_page(p); +	do { +		if (frame.sp < stack_page || +		    frame.sp >= stack_page + THREAD_SIZE || +		    unwind_frame(&frame)) +			return 0; +		if (!in_sched_functions(frame.pc)) +			return frame.pc; +	} while (count ++ < 16); +	return 0; +} + +unsigned long arch_align_stack(unsigned long sp) +{ +	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) +		sp -= get_random_int() & ~PAGE_MASK; +	return sp & ~0xf; +} + +static unsigned long randomize_base(unsigned long base) +{ +	unsigned long range_end = base + (STACK_RND_MASK << PAGE_SHIFT) + 1; +	return randomize_range(base, range_end, 0) ? : base; +} + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ +	return randomize_base(mm->brk); +} + +unsigned long randomize_et_dyn(unsigned long base) +{ +	return randomize_base(base); +} diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c new file mode 100644 index 00000000000..9e9798f9117 --- /dev/null +++ b/arch/arm64/kernel/psci.c @@ -0,0 +1,450 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * Copyright (C) 2013 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#define pr_fmt(fmt) "psci: " fmt + +#include <linux/init.h> +#include <linux/of.h> +#include <linux/smp.h> +#include <linux/reboot.h> +#include <linux/pm.h> +#include <linux/delay.h> +#include <uapi/linux/psci.h> + +#include <asm/compiler.h> +#include <asm/cpu_ops.h> +#include <asm/errno.h> +#include <asm/psci.h> +#include <asm/smp_plat.h> +#include <asm/system_misc.h> + +#define PSCI_POWER_STATE_TYPE_STANDBY		0 +#define PSCI_POWER_STATE_TYPE_POWER_DOWN	1 + +struct psci_power_state { +	u16	id; +	u8	type; +	u8	affinity_level; +}; + +struct psci_operations { +	int (*cpu_suspend)(struct psci_power_state state, +			   unsigned long entry_point); +	int (*cpu_off)(struct psci_power_state state); +	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); +	int (*migrate)(unsigned long cpuid); +	int (*affinity_info)(unsigned long target_affinity, +			unsigned long lowest_affinity_level); +	int (*migrate_info_type)(void); +}; + +static struct psci_operations psci_ops; + +static int (*invoke_psci_fn)(u64, u64, u64, u64); +typedef int (*psci_initcall_t)(const struct device_node *); + +enum psci_function { +	PSCI_FN_CPU_SUSPEND, +	PSCI_FN_CPU_ON, +	PSCI_FN_CPU_OFF, +	PSCI_FN_MIGRATE, +	PSCI_FN_AFFINITY_INFO, +	PSCI_FN_MIGRATE_INFO_TYPE, +	PSCI_FN_MAX, +}; + +static u32 psci_function_id[PSCI_FN_MAX]; + +static int psci_to_linux_errno(int errno) +{ +	switch (errno) { +	case PSCI_RET_SUCCESS: +		return 0; +	case PSCI_RET_NOT_SUPPORTED: +		return -EOPNOTSUPP; +	case PSCI_RET_INVALID_PARAMS: +		return -EINVAL; +	case PSCI_RET_DENIED: +		return -EPERM; +	}; + +	return -EINVAL; +} + +static u32 psci_power_state_pack(struct psci_power_state state) +{ +	return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) +			& PSCI_0_2_POWER_STATE_ID_MASK) | +		((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) +		 & PSCI_0_2_POWER_STATE_TYPE_MASK) | +		((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) +		 & PSCI_0_2_POWER_STATE_AFFL_MASK); +} + +/* + * The following two functions are invoked via the invoke_psci_fn pointer + * and will not be inlined, allowing us to piggyback on the AAPCS. + */ +static noinline int __invoke_psci_fn_hvc(u64 function_id, u64 arg0, u64 arg1, +					 u64 arg2) +{ +	asm volatile( +			__asmeq("%0", "x0") +			__asmeq("%1", "x1") +			__asmeq("%2", "x2") +			__asmeq("%3", "x3") +			"hvc	#0\n" +		: "+r" (function_id) +		: "r" (arg0), "r" (arg1), "r" (arg2)); + +	return function_id; +} + +static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, +					 u64 arg2) +{ +	asm volatile( +			__asmeq("%0", "x0") +			__asmeq("%1", "x1") +			__asmeq("%2", "x2") +			__asmeq("%3", "x3") +			"smc	#0\n" +		: "+r" (function_id) +		: "r" (arg0), "r" (arg1), "r" (arg2)); + +	return function_id; +} + +static int psci_get_version(void) +{ +	int err; + +	err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); +	return err; +} + +static int psci_cpu_suspend(struct psci_power_state state, +			    unsigned long entry_point) +{ +	int err; +	u32 fn, power_state; + +	fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; +	power_state = psci_power_state_pack(state); +	err = invoke_psci_fn(fn, power_state, entry_point, 0); +	return psci_to_linux_errno(err); +} + +static int psci_cpu_off(struct psci_power_state state) +{ +	int err; +	u32 fn, power_state; + +	fn = psci_function_id[PSCI_FN_CPU_OFF]; +	power_state = psci_power_state_pack(state); +	err = invoke_psci_fn(fn, power_state, 0, 0); +	return psci_to_linux_errno(err); +} + +static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) +{ +	int err; +	u32 fn; + +	fn = psci_function_id[PSCI_FN_CPU_ON]; +	err = invoke_psci_fn(fn, cpuid, entry_point, 0); +	return psci_to_linux_errno(err); +} + +static int psci_migrate(unsigned long cpuid) +{ +	int err; +	u32 fn; + +	fn = psci_function_id[PSCI_FN_MIGRATE]; +	err = invoke_psci_fn(fn, cpuid, 0, 0); +	return psci_to_linux_errno(err); +} + +static int psci_affinity_info(unsigned long target_affinity, +		unsigned long lowest_affinity_level) +{ +	int err; +	u32 fn; + +	fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; +	err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); +	return err; +} + +static int psci_migrate_info_type(void) +{ +	int err; +	u32 fn; + +	fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; +	err = invoke_psci_fn(fn, 0, 0, 0); +	return err; +} + +static int get_set_conduit_method(struct device_node *np) +{ +	const char *method; + +	pr_info("probing for conduit method from DT.\n"); + +	if (of_property_read_string(np, "method", &method)) { +		pr_warn("missing \"method\" property\n"); +		return -ENXIO; +	} + +	if (!strcmp("hvc", method)) { +		invoke_psci_fn = __invoke_psci_fn_hvc; +	} else if (!strcmp("smc", method)) { +		invoke_psci_fn = __invoke_psci_fn_smc; +	} else { +		pr_warn("invalid \"method\" property: %s\n", method); +		return -EINVAL; +	} +	return 0; +} + +static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) +{ +	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); +} + +static void psci_sys_poweroff(void) +{ +	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); +} + +/* + * PSCI Function IDs for v0.2+ are well defined so use + * standard values. + */ +static int psci_0_2_init(struct device_node *np) +{ +	int err, ver; + +	err = get_set_conduit_method(np); + +	if (err) +		goto out_put_node; + +	ver = psci_get_version(); + +	if (ver == PSCI_RET_NOT_SUPPORTED) { +		/* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ +		pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); +		err = -EOPNOTSUPP; +		goto out_put_node; +	} else { +		pr_info("PSCIv%d.%d detected in firmware.\n", +				PSCI_VERSION_MAJOR(ver), +				PSCI_VERSION_MINOR(ver)); + +		if (PSCI_VERSION_MAJOR(ver) == 0 && +				PSCI_VERSION_MINOR(ver) < 2) { +			err = -EINVAL; +			pr_err("Conflicting PSCI version detected.\n"); +			goto out_put_node; +		} +	} + +	pr_info("Using standard PSCI v0.2 function IDs\n"); +	psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND; +	psci_ops.cpu_suspend = psci_cpu_suspend; + +	psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; +	psci_ops.cpu_off = psci_cpu_off; + +	psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON; +	psci_ops.cpu_on = psci_cpu_on; + +	psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE; +	psci_ops.migrate = psci_migrate; + +	psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO; +	psci_ops.affinity_info = psci_affinity_info; + +	psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = +		PSCI_0_2_FN_MIGRATE_INFO_TYPE; +	psci_ops.migrate_info_type = psci_migrate_info_type; + +	arm_pm_restart = psci_sys_reset; + +	pm_power_off = psci_sys_poweroff; + +out_put_node: +	of_node_put(np); +	return err; +} + +/* + * PSCI < v0.2 get PSCI Function IDs via DT. + */ +static int psci_0_1_init(struct device_node *np) +{ +	u32 id; +	int err; + +	err = get_set_conduit_method(np); + +	if (err) +		goto out_put_node; + +	pr_info("Using PSCI v0.1 Function IDs from DT\n"); + +	if (!of_property_read_u32(np, "cpu_suspend", &id)) { +		psci_function_id[PSCI_FN_CPU_SUSPEND] = id; +		psci_ops.cpu_suspend = psci_cpu_suspend; +	} + +	if (!of_property_read_u32(np, "cpu_off", &id)) { +		psci_function_id[PSCI_FN_CPU_OFF] = id; +		psci_ops.cpu_off = psci_cpu_off; +	} + +	if (!of_property_read_u32(np, "cpu_on", &id)) { +		psci_function_id[PSCI_FN_CPU_ON] = id; +		psci_ops.cpu_on = psci_cpu_on; +	} + +	if (!of_property_read_u32(np, "migrate", &id)) { +		psci_function_id[PSCI_FN_MIGRATE] = id; +		psci_ops.migrate = psci_migrate; +	} + +out_put_node: +	of_node_put(np); +	return err; +} + +static const struct of_device_id psci_of_match[] __initconst = { +	{ .compatible = "arm,psci",	.data = psci_0_1_init}, +	{ .compatible = "arm,psci-0.2",	.data = psci_0_2_init}, +	{}, +}; + +int __init psci_init(void) +{ +	struct device_node *np; +	const struct of_device_id *matched_np; +	psci_initcall_t init_fn; + +	np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); + +	if (!np) +		return -ENODEV; + +	init_fn = (psci_initcall_t)matched_np->data; +	return init_fn(np); +} + +#ifdef CONFIG_SMP + +static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu) +{ +	return 0; +} + +static int __init cpu_psci_cpu_prepare(unsigned int cpu) +{ +	if (!psci_ops.cpu_on) { +		pr_err("no cpu_on method, not booting CPU%d\n", cpu); +		return -ENODEV; +	} + +	return 0; +} + +static int cpu_psci_cpu_boot(unsigned int cpu) +{ +	int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); +	if (err) +		pr_err("failed to boot CPU%d (%d)\n", cpu, err); + +	return err; +} + +#ifdef CONFIG_HOTPLUG_CPU +static int cpu_psci_cpu_disable(unsigned int cpu) +{ +	/* Fail early if we don't have CPU_OFF support */ +	if (!psci_ops.cpu_off) +		return -EOPNOTSUPP; +	return 0; +} + +static void cpu_psci_cpu_die(unsigned int cpu) +{ +	int ret; +	/* +	 * There are no known implementations of PSCI actually using the +	 * power state field, pass a sensible default for now. +	 */ +	struct psci_power_state state = { +		.type = PSCI_POWER_STATE_TYPE_POWER_DOWN, +	}; + +	ret = psci_ops.cpu_off(state); + +	pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); +} + +static int cpu_psci_cpu_kill(unsigned int cpu) +{ +	int err, i; + +	if (!psci_ops.affinity_info) +		return 1; +	/* +	 * cpu_kill could race with cpu_die and we can +	 * potentially end up declaring this cpu undead +	 * while it is dying. So, try again a few times. +	 */ + +	for (i = 0; i < 10; i++) { +		err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); +		if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { +			pr_info("CPU%d killed.\n", cpu); +			return 1; +		} + +		msleep(10); +		pr_info("Retrying again to check for CPU kill\n"); +	} + +	pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", +			cpu, err); +	/* Make op_cpu_kill() fail. */ +	return 0; +} +#endif + +const struct cpu_operations cpu_psci_ops = { +	.name		= "psci", +	.cpu_init	= cpu_psci_cpu_init, +	.cpu_prepare	= cpu_psci_cpu_prepare, +	.cpu_boot	= cpu_psci_cpu_boot, +#ifdef CONFIG_HOTPLUG_CPU +	.cpu_disable	= cpu_psci_cpu_disable, +	.cpu_die	= cpu_psci_cpu_die, +	.cpu_kill	= cpu_psci_cpu_kill, +#endif +}; + +#endif diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c new file mode 100644 index 00000000000..9fde010c945 --- /dev/null +++ b/arch/arm64/kernel/ptrace.c @@ -0,0 +1,1126 @@ +/* + * Based on arch/arm/kernel/ptrace.c + * + * By Ross Biro 1/23/92 + * edited by Linus Torvalds + * ARM modifications Copyright (C) 2000 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compat.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/ptrace.h> +#include <linux/user.h> +#include <linux/security.h> +#include <linux/init.h> +#include <linux/signal.h> +#include <linux/uaccess.h> +#include <linux/perf_event.h> +#include <linux/hw_breakpoint.h> +#include <linux/regset.h> +#include <linux/tracehook.h> +#include <linux/elf.h> + +#include <asm/compat.h> +#include <asm/debug-monitors.h> +#include <asm/pgtable.h> +#include <asm/traps.h> +#include <asm/system_misc.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + +/* + * TODO: does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + +/* + * Called by kernel/ptrace.c when detaching.. + */ +void ptrace_disable(struct task_struct *child) +{ +} + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +/* + * Handle hitting a HW-breakpoint. + */ +static void ptrace_hbptriggered(struct perf_event *bp, +				struct perf_sample_data *data, +				struct pt_regs *regs) +{ +	struct arch_hw_breakpoint *bkpt = counter_arch_bp(bp); +	siginfo_t info = { +		.si_signo	= SIGTRAP, +		.si_errno	= 0, +		.si_code	= TRAP_HWBKPT, +		.si_addr	= (void __user *)(bkpt->trigger), +	}; + +#ifdef CONFIG_COMPAT +	int i; + +	if (!is_compat_task()) +		goto send_sig; + +	for (i = 0; i < ARM_MAX_BRP; ++i) { +		if (current->thread.debug.hbp_break[i] == bp) { +			info.si_errno = (i << 1) + 1; +			break; +		} +	} +	for (i = ARM_MAX_BRP; i < ARM_MAX_HBP_SLOTS && !bp; ++i) { +		if (current->thread.debug.hbp_watch[i] == bp) { +			info.si_errno = -((i << 1) + 1); +			break; +		} +	} + +send_sig: +#endif +	force_sig_info(SIGTRAP, &info, current); +} + +/* + * Unregister breakpoints from this task and reset the pointers in + * the thread_struct. + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) +{ +	int i; +	struct thread_struct *t = &tsk->thread; + +	for (i = 0; i < ARM_MAX_BRP; i++) { +		if (t->debug.hbp_break[i]) { +			unregister_hw_breakpoint(t->debug.hbp_break[i]); +			t->debug.hbp_break[i] = NULL; +		} +	} + +	for (i = 0; i < ARM_MAX_WRP; i++) { +		if (t->debug.hbp_watch[i]) { +			unregister_hw_breakpoint(t->debug.hbp_watch[i]); +			t->debug.hbp_watch[i] = NULL; +		} +	} +} + +void ptrace_hw_copy_thread(struct task_struct *tsk) +{ +	memset(&tsk->thread.debug, 0, sizeof(struct debug_info)); +} + +static struct perf_event *ptrace_hbp_get_event(unsigned int note_type, +					       struct task_struct *tsk, +					       unsigned long idx) +{ +	struct perf_event *bp = ERR_PTR(-EINVAL); + +	switch (note_type) { +	case NT_ARM_HW_BREAK: +		if (idx < ARM_MAX_BRP) +			bp = tsk->thread.debug.hbp_break[idx]; +		break; +	case NT_ARM_HW_WATCH: +		if (idx < ARM_MAX_WRP) +			bp = tsk->thread.debug.hbp_watch[idx]; +		break; +	} + +	return bp; +} + +static int ptrace_hbp_set_event(unsigned int note_type, +				struct task_struct *tsk, +				unsigned long idx, +				struct perf_event *bp) +{ +	int err = -EINVAL; + +	switch (note_type) { +	case NT_ARM_HW_BREAK: +		if (idx < ARM_MAX_BRP) { +			tsk->thread.debug.hbp_break[idx] = bp; +			err = 0; +		} +		break; +	case NT_ARM_HW_WATCH: +		if (idx < ARM_MAX_WRP) { +			tsk->thread.debug.hbp_watch[idx] = bp; +			err = 0; +		} +		break; +	} + +	return err; +} + +static struct perf_event *ptrace_hbp_create(unsigned int note_type, +					    struct task_struct *tsk, +					    unsigned long idx) +{ +	struct perf_event *bp; +	struct perf_event_attr attr; +	int err, type; + +	switch (note_type) { +	case NT_ARM_HW_BREAK: +		type = HW_BREAKPOINT_X; +		break; +	case NT_ARM_HW_WATCH: +		type = HW_BREAKPOINT_RW; +		break; +	default: +		return ERR_PTR(-EINVAL); +	} + +	ptrace_breakpoint_init(&attr); + +	/* +	 * Initialise fields to sane defaults +	 * (i.e. values that will pass validation). +	 */ +	attr.bp_addr	= 0; +	attr.bp_len	= HW_BREAKPOINT_LEN_4; +	attr.bp_type	= type; +	attr.disabled	= 1; + +	bp = register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL, tsk); +	if (IS_ERR(bp)) +		return bp; + +	err = ptrace_hbp_set_event(note_type, tsk, idx, bp); +	if (err) +		return ERR_PTR(err); + +	return bp; +} + +static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, +				     struct arch_hw_breakpoint_ctrl ctrl, +				     struct perf_event_attr *attr) +{ +	int err, len, type, disabled = !ctrl.enabled; + +	attr->disabled = disabled; +	if (disabled) +		return 0; + +	err = arch_bp_generic_fields(ctrl, &len, &type); +	if (err) +		return err; + +	switch (note_type) { +	case NT_ARM_HW_BREAK: +		if ((type & HW_BREAKPOINT_X) != type) +			return -EINVAL; +		break; +	case NT_ARM_HW_WATCH: +		if ((type & HW_BREAKPOINT_RW) != type) +			return -EINVAL; +		break; +	default: +		return -EINVAL; +	} + +	attr->bp_len	= len; +	attr->bp_type	= type; + +	return 0; +} + +static int ptrace_hbp_get_resource_info(unsigned int note_type, u32 *info) +{ +	u8 num; +	u32 reg = 0; + +	switch (note_type) { +	case NT_ARM_HW_BREAK: +		num = hw_breakpoint_slots(TYPE_INST); +		break; +	case NT_ARM_HW_WATCH: +		num = hw_breakpoint_slots(TYPE_DATA); +		break; +	default: +		return -EINVAL; +	} + +	reg |= debug_monitors_arch(); +	reg <<= 8; +	reg |= num; + +	*info = reg; +	return 0; +} + +static int ptrace_hbp_get_ctrl(unsigned int note_type, +			       struct task_struct *tsk, +			       unsigned long idx, +			       u32 *ctrl) +{ +	struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx); + +	if (IS_ERR(bp)) +		return PTR_ERR(bp); + +	*ctrl = bp ? encode_ctrl_reg(counter_arch_bp(bp)->ctrl) : 0; +	return 0; +} + +static int ptrace_hbp_get_addr(unsigned int note_type, +			       struct task_struct *tsk, +			       unsigned long idx, +			       u64 *addr) +{ +	struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx); + +	if (IS_ERR(bp)) +		return PTR_ERR(bp); + +	*addr = bp ? bp->attr.bp_addr : 0; +	return 0; +} + +static struct perf_event *ptrace_hbp_get_initialised_bp(unsigned int note_type, +							struct task_struct *tsk, +							unsigned long idx) +{ +	struct perf_event *bp = ptrace_hbp_get_event(note_type, tsk, idx); + +	if (!bp) +		bp = ptrace_hbp_create(note_type, tsk, idx); + +	return bp; +} + +static int ptrace_hbp_set_ctrl(unsigned int note_type, +			       struct task_struct *tsk, +			       unsigned long idx, +			       u32 uctrl) +{ +	int err; +	struct perf_event *bp; +	struct perf_event_attr attr; +	struct arch_hw_breakpoint_ctrl ctrl; + +	bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx); +	if (IS_ERR(bp)) { +		err = PTR_ERR(bp); +		return err; +	} + +	attr = bp->attr; +	decode_ctrl_reg(uctrl, &ctrl); +	err = ptrace_hbp_fill_attr_ctrl(note_type, ctrl, &attr); +	if (err) +		return err; + +	return modify_user_hw_breakpoint(bp, &attr); +} + +static int ptrace_hbp_set_addr(unsigned int note_type, +			       struct task_struct *tsk, +			       unsigned long idx, +			       u64 addr) +{ +	int err; +	struct perf_event *bp; +	struct perf_event_attr attr; + +	bp = ptrace_hbp_get_initialised_bp(note_type, tsk, idx); +	if (IS_ERR(bp)) { +		err = PTR_ERR(bp); +		return err; +	} + +	attr = bp->attr; +	attr.bp_addr = addr; +	err = modify_user_hw_breakpoint(bp, &attr); +	return err; +} + +#define PTRACE_HBP_ADDR_SZ	sizeof(u64) +#define PTRACE_HBP_CTRL_SZ	sizeof(u32) +#define PTRACE_HBP_PAD_SZ	sizeof(u32) + +static int hw_break_get(struct task_struct *target, +			const struct user_regset *regset, +			unsigned int pos, unsigned int count, +			void *kbuf, void __user *ubuf) +{ +	unsigned int note_type = regset->core_note_type; +	int ret, idx = 0, offset, limit; +	u32 info, ctrl; +	u64 addr; + +	/* Resource info */ +	ret = ptrace_hbp_get_resource_info(note_type, &info); +	if (ret) +		return ret; + +	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &info, 0, +				  sizeof(info)); +	if (ret) +		return ret; + +	/* Pad */ +	offset = offsetof(struct user_hwdebug_state, pad); +	ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, offset, +				       offset + PTRACE_HBP_PAD_SZ); +	if (ret) +		return ret; + +	/* (address, ctrl) registers */ +	offset = offsetof(struct user_hwdebug_state, dbg_regs); +	limit = regset->n * regset->size; +	while (count && offset < limit) { +		ret = ptrace_hbp_get_addr(note_type, target, idx, &addr); +		if (ret) +			return ret; +		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &addr, +					  offset, offset + PTRACE_HBP_ADDR_SZ); +		if (ret) +			return ret; +		offset += PTRACE_HBP_ADDR_SZ; + +		ret = ptrace_hbp_get_ctrl(note_type, target, idx, &ctrl); +		if (ret) +			return ret; +		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &ctrl, +					  offset, offset + PTRACE_HBP_CTRL_SZ); +		if (ret) +			return ret; +		offset += PTRACE_HBP_CTRL_SZ; + +		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, +					       offset, +					       offset + PTRACE_HBP_PAD_SZ); +		if (ret) +			return ret; +		offset += PTRACE_HBP_PAD_SZ; +		idx++; +	} + +	return 0; +} + +static int hw_break_set(struct task_struct *target, +			const struct user_regset *regset, +			unsigned int pos, unsigned int count, +			const void *kbuf, const void __user *ubuf) +{ +	unsigned int note_type = regset->core_note_type; +	int ret, idx = 0, offset, limit; +	u32 ctrl; +	u64 addr; + +	/* Resource info and pad */ +	offset = offsetof(struct user_hwdebug_state, dbg_regs); +	ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, 0, offset); +	if (ret) +		return ret; + +	/* (address, ctrl) registers */ +	limit = regset->n * regset->size; +	while (count && offset < limit) { +		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &addr, +					 offset, offset + PTRACE_HBP_ADDR_SZ); +		if (ret) +			return ret; +		ret = ptrace_hbp_set_addr(note_type, target, idx, addr); +		if (ret) +			return ret; +		offset += PTRACE_HBP_ADDR_SZ; + +		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, +					 offset, offset + PTRACE_HBP_CTRL_SZ); +		if (ret) +			return ret; +		ret = ptrace_hbp_set_ctrl(note_type, target, idx, ctrl); +		if (ret) +			return ret; +		offset += PTRACE_HBP_CTRL_SZ; + +		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, +						offset, +						offset + PTRACE_HBP_PAD_SZ); +		if (ret) +			return ret; +		offset += PTRACE_HBP_PAD_SZ; +		idx++; +	} + +	return 0; +} +#endif	/* CONFIG_HAVE_HW_BREAKPOINT */ + +static int gpr_get(struct task_struct *target, +		   const struct user_regset *regset, +		   unsigned int pos, unsigned int count, +		   void *kbuf, void __user *ubuf) +{ +	struct user_pt_regs *uregs = &task_pt_regs(target)->user_regs; +	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); +} + +static int gpr_set(struct task_struct *target, const struct user_regset *regset, +		   unsigned int pos, unsigned int count, +		   const void *kbuf, const void __user *ubuf) +{ +	int ret; +	struct user_pt_regs newregs; + +	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, 0, -1); +	if (ret) +		return ret; + +	if (!valid_user_regs(&newregs)) +		return -EINVAL; + +	task_pt_regs(target)->user_regs = newregs; +	return 0; +} + +/* + * TODO: update fp accessors for lazy context switching (sync/flush hwstate) + */ +static int fpr_get(struct task_struct *target, const struct user_regset *regset, +		   unsigned int pos, unsigned int count, +		   void *kbuf, void __user *ubuf) +{ +	struct user_fpsimd_state *uregs; +	uregs = &target->thread.fpsimd_state.user_fpsimd; +	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); +} + +static int fpr_set(struct task_struct *target, const struct user_regset *regset, +		   unsigned int pos, unsigned int count, +		   const void *kbuf, const void __user *ubuf) +{ +	int ret; +	struct user_fpsimd_state newstate; + +	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); +	if (ret) +		return ret; + +	target->thread.fpsimd_state.user_fpsimd = newstate; +	fpsimd_flush_task_state(target); +	return ret; +} + +static int tls_get(struct task_struct *target, const struct user_regset *regset, +		   unsigned int pos, unsigned int count, +		   void *kbuf, void __user *ubuf) +{ +	unsigned long *tls = &target->thread.tp_value; +	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1); +} + +static int tls_set(struct task_struct *target, const struct user_regset *regset, +		   unsigned int pos, unsigned int count, +		   const void *kbuf, const void __user *ubuf) +{ +	int ret; +	unsigned long tls; + +	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); +	if (ret) +		return ret; + +	target->thread.tp_value = tls; +	return ret; +} + +enum aarch64_regset { +	REGSET_GPR, +	REGSET_FPR, +	REGSET_TLS, +#ifdef CONFIG_HAVE_HW_BREAKPOINT +	REGSET_HW_BREAK, +	REGSET_HW_WATCH, +#endif +}; + +static const struct user_regset aarch64_regsets[] = { +	[REGSET_GPR] = { +		.core_note_type = NT_PRSTATUS, +		.n = sizeof(struct user_pt_regs) / sizeof(u64), +		.size = sizeof(u64), +		.align = sizeof(u64), +		.get = gpr_get, +		.set = gpr_set +	}, +	[REGSET_FPR] = { +		.core_note_type = NT_PRFPREG, +		.n = sizeof(struct user_fpsimd_state) / sizeof(u32), +		/* +		 * We pretend we have 32-bit registers because the fpsr and +		 * fpcr are 32-bits wide. +		 */ +		.size = sizeof(u32), +		.align = sizeof(u32), +		.get = fpr_get, +		.set = fpr_set +	}, +	[REGSET_TLS] = { +		.core_note_type = NT_ARM_TLS, +		.n = 1, +		.size = sizeof(void *), +		.align = sizeof(void *), +		.get = tls_get, +		.set = tls_set, +	}, +#ifdef CONFIG_HAVE_HW_BREAKPOINT +	[REGSET_HW_BREAK] = { +		.core_note_type = NT_ARM_HW_BREAK, +		.n = sizeof(struct user_hwdebug_state) / sizeof(u32), +		.size = sizeof(u32), +		.align = sizeof(u32), +		.get = hw_break_get, +		.set = hw_break_set, +	}, +	[REGSET_HW_WATCH] = { +		.core_note_type = NT_ARM_HW_WATCH, +		.n = sizeof(struct user_hwdebug_state) / sizeof(u32), +		.size = sizeof(u32), +		.align = sizeof(u32), +		.get = hw_break_get, +		.set = hw_break_set, +	}, +#endif +}; + +static const struct user_regset_view user_aarch64_view = { +	.name = "aarch64", .e_machine = EM_AARCH64, +	.regsets = aarch64_regsets, .n = ARRAY_SIZE(aarch64_regsets) +}; + +#ifdef CONFIG_COMPAT +#include <linux/compat.h> + +enum compat_regset { +	REGSET_COMPAT_GPR, +	REGSET_COMPAT_VFP, +}; + +static int compat_gpr_get(struct task_struct *target, +			  const struct user_regset *regset, +			  unsigned int pos, unsigned int count, +			  void *kbuf, void __user *ubuf) +{ +	int ret = 0; +	unsigned int i, start, num_regs; + +	/* Calculate the number of AArch32 registers contained in count */ +	num_regs = count / regset->size; + +	/* Convert pos into an register number */ +	start = pos / regset->size; + +	if (start + num_regs > regset->n) +		return -EIO; + +	for (i = 0; i < num_regs; ++i) { +		unsigned int idx = start + i; +		compat_ulong_t reg; + +		switch (idx) { +		case 15: +			reg = task_pt_regs(target)->pc; +			break; +		case 16: +			reg = task_pt_regs(target)->pstate; +			break; +		case 17: +			reg = task_pt_regs(target)->orig_x0; +			break; +		default: +			reg = task_pt_regs(target)->regs[idx]; +		} + +		if (kbuf) { +			memcpy(kbuf, ®, sizeof(reg)); +			kbuf += sizeof(reg); +		} else { +			ret = copy_to_user(ubuf, ®, sizeof(reg)); +			if (ret) +				break; + +			ubuf += sizeof(reg); +		} +	} + +	return ret; +} + +static int compat_gpr_set(struct task_struct *target, +			  const struct user_regset *regset, +			  unsigned int pos, unsigned int count, +			  const void *kbuf, const void __user *ubuf) +{ +	struct pt_regs newregs; +	int ret = 0; +	unsigned int i, start, num_regs; + +	/* Calculate the number of AArch32 registers contained in count */ +	num_regs = count / regset->size; + +	/* Convert pos into an register number */ +	start = pos / regset->size; + +	if (start + num_regs > regset->n) +		return -EIO; + +	newregs = *task_pt_regs(target); + +	for (i = 0; i < num_regs; ++i) { +		unsigned int idx = start + i; +		compat_ulong_t reg; + +		if (kbuf) { +			memcpy(®, kbuf, sizeof(reg)); +			kbuf += sizeof(reg); +		} else { +			ret = copy_from_user(®, ubuf, sizeof(reg)); +			if (ret) +				return ret; + +			ubuf += sizeof(reg); +		} + +		switch (idx) { +		case 15: +			newregs.pc = reg; +			break; +		case 16: +			newregs.pstate = reg; +			break; +		case 17: +			newregs.orig_x0 = reg; +			break; +		default: +			newregs.regs[idx] = reg; +		} + +	} + +	if (valid_user_regs(&newregs.user_regs)) +		*task_pt_regs(target) = newregs; +	else +		ret = -EINVAL; + +	return ret; +} + +static int compat_vfp_get(struct task_struct *target, +			  const struct user_regset *regset, +			  unsigned int pos, unsigned int count, +			  void *kbuf, void __user *ubuf) +{ +	struct user_fpsimd_state *uregs; +	compat_ulong_t fpscr; +	int ret; + +	uregs = &target->thread.fpsimd_state.user_fpsimd; + +	/* +	 * The VFP registers are packed into the fpsimd_state, so they all sit +	 * nicely together for us. We just need to create the fpscr separately. +	 */ +	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, +				  VFP_STATE_SIZE - sizeof(compat_ulong_t)); + +	if (count && !ret) { +		fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) | +			(uregs->fpcr & VFP_FPSCR_CTRL_MASK); +		ret = put_user(fpscr, (compat_ulong_t *)ubuf); +	} + +	return ret; +} + +static int compat_vfp_set(struct task_struct *target, +			  const struct user_regset *regset, +			  unsigned int pos, unsigned int count, +			  const void *kbuf, const void __user *ubuf) +{ +	struct user_fpsimd_state *uregs; +	compat_ulong_t fpscr; +	int ret; + +	if (pos + count > VFP_STATE_SIZE) +		return -EIO; + +	uregs = &target->thread.fpsimd_state.user_fpsimd; + +	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0, +				 VFP_STATE_SIZE - sizeof(compat_ulong_t)); + +	if (count && !ret) { +		ret = get_user(fpscr, (compat_ulong_t *)ubuf); +		uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK; +		uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; +	} + +	fpsimd_flush_task_state(target); +	return ret; +} + +static const struct user_regset aarch32_regsets[] = { +	[REGSET_COMPAT_GPR] = { +		.core_note_type = NT_PRSTATUS, +		.n = COMPAT_ELF_NGREG, +		.size = sizeof(compat_elf_greg_t), +		.align = sizeof(compat_elf_greg_t), +		.get = compat_gpr_get, +		.set = compat_gpr_set +	}, +	[REGSET_COMPAT_VFP] = { +		.core_note_type = NT_ARM_VFP, +		.n = VFP_STATE_SIZE / sizeof(compat_ulong_t), +		.size = sizeof(compat_ulong_t), +		.align = sizeof(compat_ulong_t), +		.get = compat_vfp_get, +		.set = compat_vfp_set +	}, +}; + +static const struct user_regset_view user_aarch32_view = { +	.name = "aarch32", .e_machine = EM_ARM, +	.regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets) +}; + +static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, +				   compat_ulong_t __user *ret) +{ +	compat_ulong_t tmp; + +	if (off & 3) +		return -EIO; + +	if (off == COMPAT_PT_TEXT_ADDR) +		tmp = tsk->mm->start_code; +	else if (off == COMPAT_PT_DATA_ADDR) +		tmp = tsk->mm->start_data; +	else if (off == COMPAT_PT_TEXT_END_ADDR) +		tmp = tsk->mm->end_code; +	else if (off < sizeof(compat_elf_gregset_t)) +		return copy_regset_to_user(tsk, &user_aarch32_view, +					   REGSET_COMPAT_GPR, off, +					   sizeof(compat_ulong_t), ret); +	else if (off >= COMPAT_USER_SZ) +		return -EIO; +	else +		tmp = 0; + +	return put_user(tmp, ret); +} + +static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, +				    compat_ulong_t val) +{ +	int ret; +	mm_segment_t old_fs = get_fs(); + +	if (off & 3 || off >= COMPAT_USER_SZ) +		return -EIO; + +	if (off >= sizeof(compat_elf_gregset_t)) +		return 0; + +	set_fs(KERNEL_DS); +	ret = copy_regset_from_user(tsk, &user_aarch32_view, +				    REGSET_COMPAT_GPR, off, +				    sizeof(compat_ulong_t), +				    &val); +	set_fs(old_fs); + +	return ret; +} + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + +/* + * Convert a virtual register number into an index for a thread_info + * breakpoint array. Breakpoints are identified using positive numbers + * whilst watchpoints are negative. The registers are laid out as pairs + * of (address, control), each pair mapping to a unique hw_breakpoint struct. + * Register 0 is reserved for describing resource information. + */ +static int compat_ptrace_hbp_num_to_idx(compat_long_t num) +{ +	return (abs(num) - 1) >> 1; +} + +static int compat_ptrace_hbp_get_resource_info(u32 *kdata) +{ +	u8 num_brps, num_wrps, debug_arch, wp_len; +	u32 reg = 0; + +	num_brps	= hw_breakpoint_slots(TYPE_INST); +	num_wrps	= hw_breakpoint_slots(TYPE_DATA); + +	debug_arch	= debug_monitors_arch(); +	wp_len		= 8; +	reg		|= debug_arch; +	reg		<<= 8; +	reg		|= wp_len; +	reg		<<= 8; +	reg		|= num_wrps; +	reg		<<= 8; +	reg		|= num_brps; + +	*kdata = reg; +	return 0; +} + +static int compat_ptrace_hbp_get(unsigned int note_type, +				 struct task_struct *tsk, +				 compat_long_t num, +				 u32 *kdata) +{ +	u64 addr = 0; +	u32 ctrl = 0; + +	int err, idx = compat_ptrace_hbp_num_to_idx(num);; + +	if (num & 1) { +		err = ptrace_hbp_get_addr(note_type, tsk, idx, &addr); +		*kdata = (u32)addr; +	} else { +		err = ptrace_hbp_get_ctrl(note_type, tsk, idx, &ctrl); +		*kdata = ctrl; +	} + +	return err; +} + +static int compat_ptrace_hbp_set(unsigned int note_type, +				 struct task_struct *tsk, +				 compat_long_t num, +				 u32 *kdata) +{ +	u64 addr; +	u32 ctrl; + +	int err, idx = compat_ptrace_hbp_num_to_idx(num); + +	if (num & 1) { +		addr = *kdata; +		err = ptrace_hbp_set_addr(note_type, tsk, idx, addr); +	} else { +		ctrl = *kdata; +		err = ptrace_hbp_set_ctrl(note_type, tsk, idx, ctrl); +	} + +	return err; +} + +static int compat_ptrace_gethbpregs(struct task_struct *tsk, compat_long_t num, +				    compat_ulong_t __user *data) +{ +	int ret; +	u32 kdata; +	mm_segment_t old_fs = get_fs(); + +	set_fs(KERNEL_DS); +	/* Watchpoint */ +	if (num < 0) { +		ret = compat_ptrace_hbp_get(NT_ARM_HW_WATCH, tsk, num, &kdata); +	/* Resource info */ +	} else if (num == 0) { +		ret = compat_ptrace_hbp_get_resource_info(&kdata); +	/* Breakpoint */ +	} else { +		ret = compat_ptrace_hbp_get(NT_ARM_HW_BREAK, tsk, num, &kdata); +	} +	set_fs(old_fs); + +	if (!ret) +		ret = put_user(kdata, data); + +	return ret; +} + +static int compat_ptrace_sethbpregs(struct task_struct *tsk, compat_long_t num, +				    compat_ulong_t __user *data) +{ +	int ret; +	u32 kdata = 0; +	mm_segment_t old_fs = get_fs(); + +	if (num == 0) +		return 0; + +	ret = get_user(kdata, data); +	if (ret) +		return ret; + +	set_fs(KERNEL_DS); +	if (num < 0) +		ret = compat_ptrace_hbp_set(NT_ARM_HW_WATCH, tsk, num, &kdata); +	else +		ret = compat_ptrace_hbp_set(NT_ARM_HW_BREAK, tsk, num, &kdata); +	set_fs(old_fs); + +	return ret; +} +#endif	/* CONFIG_HAVE_HW_BREAKPOINT */ + +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, +			compat_ulong_t caddr, compat_ulong_t cdata) +{ +	unsigned long addr = caddr; +	unsigned long data = cdata; +	void __user *datap = compat_ptr(data); +	int ret; + +	switch (request) { +		case PTRACE_PEEKUSR: +			ret = compat_ptrace_read_user(child, addr, datap); +			break; + +		case PTRACE_POKEUSR: +			ret = compat_ptrace_write_user(child, addr, data); +			break; + +		case COMPAT_PTRACE_GETREGS: +			ret = copy_regset_to_user(child, +						  &user_aarch32_view, +						  REGSET_COMPAT_GPR, +						  0, sizeof(compat_elf_gregset_t), +						  datap); +			break; + +		case COMPAT_PTRACE_SETREGS: +			ret = copy_regset_from_user(child, +						    &user_aarch32_view, +						    REGSET_COMPAT_GPR, +						    0, sizeof(compat_elf_gregset_t), +						    datap); +			break; + +		case COMPAT_PTRACE_GET_THREAD_AREA: +			ret = put_user((compat_ulong_t)child->thread.tp_value, +				       (compat_ulong_t __user *)datap); +			break; + +		case COMPAT_PTRACE_SET_SYSCALL: +			task_pt_regs(child)->syscallno = data; +			ret = 0; +			break; + +		case COMPAT_PTRACE_GETVFPREGS: +			ret = copy_regset_to_user(child, +						  &user_aarch32_view, +						  REGSET_COMPAT_VFP, +						  0, VFP_STATE_SIZE, +						  datap); +			break; + +		case COMPAT_PTRACE_SETVFPREGS: +			ret = copy_regset_from_user(child, +						    &user_aarch32_view, +						    REGSET_COMPAT_VFP, +						    0, VFP_STATE_SIZE, +						    datap); +			break; + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +		case COMPAT_PTRACE_GETHBPREGS: +			ret = compat_ptrace_gethbpregs(child, addr, datap); +			break; + +		case COMPAT_PTRACE_SETHBPREGS: +			ret = compat_ptrace_sethbpregs(child, addr, datap); +			break; +#endif + +		default: +			ret = compat_ptrace_request(child, request, addr, +						    data); +			break; +	} + +	return ret; +} +#endif /* CONFIG_COMPAT */ + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ +#ifdef CONFIG_COMPAT +	if (is_compat_thread(task_thread_info(task))) +		return &user_aarch32_view; +#endif +	return &user_aarch64_view; +} + +long arch_ptrace(struct task_struct *child, long request, +		 unsigned long addr, unsigned long data) +{ +	return ptrace_request(child, request, addr, data); +} + +enum ptrace_syscall_dir { +	PTRACE_SYSCALL_ENTER = 0, +	PTRACE_SYSCALL_EXIT, +}; + +static void tracehook_report_syscall(struct pt_regs *regs, +				     enum ptrace_syscall_dir dir) +{ +	int regno; +	unsigned long saved_reg; + +	/* +	 * A scratch register (ip(r12) on AArch32, x7 on AArch64) is +	 * used to denote syscall entry/exit: +	 */ +	regno = (is_compat_task() ? 12 : 7); +	saved_reg = regs->regs[regno]; +	regs->regs[regno] = dir; + +	if (dir == PTRACE_SYSCALL_EXIT) +		tracehook_report_syscall_exit(regs, 0); +	else if (tracehook_report_syscall_entry(regs)) +		regs->syscallno = ~0UL; + +	regs->regs[regno] = saved_reg; +} + +asmlinkage int syscall_trace_enter(struct pt_regs *regs) +{ +	if (test_thread_flag(TIF_SYSCALL_TRACE)) +		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + +	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) +		trace_sys_enter(regs, regs->syscallno); + +	return regs->syscallno; +} + +asmlinkage void syscall_trace_exit(struct pt_regs *regs) +{ +	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) +		trace_sys_exit(regs, regs_return_value(regs)); + +	if (test_thread_flag(TIF_SYSCALL_TRACE)) +		tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); +} diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c new file mode 100644 index 00000000000..89102a6ffad --- /dev/null +++ b/arch/arm64/kernel/return_address.c @@ -0,0 +1,55 @@ +/* + * arch/arm64/kernel/return_address.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/export.h> +#include <linux/ftrace.h> + +#include <asm/stacktrace.h> + +struct return_address_data { +	unsigned int level; +	void *addr; +}; + +static int save_return_addr(struct stackframe *frame, void *d) +{ +	struct return_address_data *data = d; + +	if (!data->level) { +		data->addr = (void *)frame->pc; +		return 1; +	} else { +		--data->level; +		return 0; +	} +} + +void *return_address(unsigned int level) +{ +	struct return_address_data data; +	struct stackframe frame; +	register unsigned long current_sp asm ("sp"); + +	data.level = level + 2; +	data.addr = NULL; + +	frame.fp = (unsigned long)__builtin_frame_address(0); +	frame.sp = current_sp; +	frame.pc = (unsigned long)return_address; /* dummy */ + +	walk_stackframe(&frame, save_return_addr, &data); + +	if (!data.level) +		return data.addr; +	else +		return NULL; +} +EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c new file mode 100644 index 00000000000..46d1125571f --- /dev/null +++ b/arch/arm64/kernel/setup.c @@ -0,0 +1,506 @@ +/* + * Based on arch/arm/kernel/setup.c + * + * Copyright (C) 1995-2001 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/stddef.h> +#include <linux/ioport.h> +#include <linux/delay.h> +#include <linux/utsname.h> +#include <linux/initrd.h> +#include <linux/console.h> +#include <linux/cache.h> +#include <linux/bootmem.h> +#include <linux/seq_file.h> +#include <linux/screen_info.h> +#include <linux/init.h> +#include <linux/kexec.h> +#include <linux/crash_dump.h> +#include <linux/root_dev.h> +#include <linux/clk-provider.h> +#include <linux/cpu.h> +#include <linux/interrupt.h> +#include <linux/smp.h> +#include <linux/fs.h> +#include <linux/proc_fs.h> +#include <linux/memblock.h> +#include <linux/of_fdt.h> +#include <linux/of_platform.h> +#include <linux/efi.h> + +#include <asm/fixmap.h> +#include <asm/cputype.h> +#include <asm/elf.h> +#include <asm/cputable.h> +#include <asm/cpu_ops.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/smp_plat.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/traps.h> +#include <asm/memblock.h> +#include <asm/psci.h> +#include <asm/efi.h> + +unsigned int processor_id; +EXPORT_SYMBOL(processor_id); + +unsigned long elf_hwcap __read_mostly; +EXPORT_SYMBOL_GPL(elf_hwcap); + +#ifdef CONFIG_COMPAT +#define COMPAT_ELF_HWCAP_DEFAULT	\ +				(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ +				 COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ +				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ +				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ +				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) +unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; +unsigned int compat_elf_hwcap2 __read_mostly; +#endif + +static const char *cpu_name; +static const char *machine_name; +phys_addr_t __fdt_pointer __initdata; + +/* + * Standard memory resources + */ +static struct resource mem_res[] = { +	{ +		.name = "Kernel code", +		.start = 0, +		.end = 0, +		.flags = IORESOURCE_MEM +	}, +	{ +		.name = "Kernel data", +		.start = 0, +		.end = 0, +		.flags = IORESOURCE_MEM +	} +}; + +#define kernel_code mem_res[0] +#define kernel_data mem_res[1] + +void __init early_print(const char *str, ...) +{ +	char buf[256]; +	va_list ap; + +	va_start(ap, str); +	vsnprintf(buf, sizeof(buf), str, ap); +	va_end(ap); + +	printk("%s", buf); +} + +void __init smp_setup_processor_id(void) +{ +	/* +	 * clear __my_cpu_offset on boot CPU to avoid hang caused by +	 * using percpu variable early, for example, lockdep will +	 * access percpu variable inside lock_release +	 */ +	set_my_cpu_offset(0); +} + +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ +	return phys_id == cpu_logical_map(cpu); +} + +struct mpidr_hash mpidr_hash; +#ifdef CONFIG_SMP +/** + * smp_build_mpidr_hash - Pre-compute shifts required at each affinity + *			  level in order to build a linear index from an + *			  MPIDR value. Resulting algorithm is a collision + *			  free hash carried out through shifting and ORing + */ +static void __init smp_build_mpidr_hash(void) +{ +	u32 i, affinity, fs[4], bits[4], ls; +	u64 mask = 0; +	/* +	 * Pre-scan the list of MPIDRS and filter out bits that do +	 * not contribute to affinity levels, ie they never toggle. +	 */ +	for_each_possible_cpu(i) +		mask |= (cpu_logical_map(i) ^ cpu_logical_map(0)); +	pr_debug("mask of set bits %#llx\n", mask); +	/* +	 * Find and stash the last and first bit set at all affinity levels to +	 * check how many bits are required to represent them. +	 */ +	for (i = 0; i < 4; i++) { +		affinity = MPIDR_AFFINITY_LEVEL(mask, i); +		/* +		 * Find the MSB bit and LSB bits position +		 * to determine how many bits are required +		 * to express the affinity level. +		 */ +		ls = fls(affinity); +		fs[i] = affinity ? ffs(affinity) - 1 : 0; +		bits[i] = ls - fs[i]; +	} +	/* +	 * An index can be created from the MPIDR_EL1 by isolating the +	 * significant bits at each affinity level and by shifting +	 * them in order to compress the 32 bits values space to a +	 * compressed set of values. This is equivalent to hashing +	 * the MPIDR_EL1 through shifting and ORing. It is a collision free +	 * hash though not minimal since some levels might contain a number +	 * of CPUs that is not an exact power of 2 and their bit +	 * representation might contain holes, eg MPIDR_EL1[7:0] = {0x2, 0x80}. +	 */ +	mpidr_hash.shift_aff[0] = MPIDR_LEVEL_SHIFT(0) + fs[0]; +	mpidr_hash.shift_aff[1] = MPIDR_LEVEL_SHIFT(1) + fs[1] - bits[0]; +	mpidr_hash.shift_aff[2] = MPIDR_LEVEL_SHIFT(2) + fs[2] - +						(bits[1] + bits[0]); +	mpidr_hash.shift_aff[3] = MPIDR_LEVEL_SHIFT(3) + +				  fs[3] - (bits[2] + bits[1] + bits[0]); +	mpidr_hash.mask = mask; +	mpidr_hash.bits = bits[3] + bits[2] + bits[1] + bits[0]; +	pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] aff3[%u] mask[%#llx] bits[%u]\n", +		mpidr_hash.shift_aff[0], +		mpidr_hash.shift_aff[1], +		mpidr_hash.shift_aff[2], +		mpidr_hash.shift_aff[3], +		mpidr_hash.mask, +		mpidr_hash.bits); +	/* +	 * 4x is an arbitrary value used to warn on a hash table much bigger +	 * than expected on most systems. +	 */ +	if (mpidr_hash_size() > 4 * num_possible_cpus()) +		pr_warn("Large number of MPIDR hash buckets detected\n"); +	__flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); +} +#endif + +static void __init setup_processor(void) +{ +	struct cpu_info *cpu_info; +	u64 features, block; +	u32 cwg; +	int cls; + +	cpu_info = lookup_processor_type(read_cpuid_id()); +	if (!cpu_info) { +		printk("CPU configuration botched (ID %08x), unable to continue.\n", +		       read_cpuid_id()); +		while (1); +	} + +	cpu_name = cpu_info->cpu_name; + +	printk("CPU: %s [%08x] revision %d\n", +	       cpu_name, read_cpuid_id(), read_cpuid_id() & 15); + +	sprintf(init_utsname()->machine, ELF_PLATFORM); +	elf_hwcap = 0; + +	/* +	 * Check for sane CTR_EL0.CWG value. +	 */ +	cwg = cache_type_cwg(); +	cls = cache_line_size(); +	if (!cwg) +		pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", +			cls); +	if (L1_CACHE_BYTES < cls) +		pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", +			L1_CACHE_BYTES, cls); + +	/* +	 * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. +	 * The blocks we test below represent incremental functionality +	 * for non-negative values. Negative values are reserved. +	 */ +	features = read_cpuid(ID_AA64ISAR0_EL1); +	block = (features >> 4) & 0xf; +	if (!(block & 0x8)) { +		switch (block) { +		default: +		case 2: +			elf_hwcap |= HWCAP_PMULL; +		case 1: +			elf_hwcap |= HWCAP_AES; +		case 0: +			break; +		} +	} + +	block = (features >> 8) & 0xf; +	if (block && !(block & 0x8)) +		elf_hwcap |= HWCAP_SHA1; + +	block = (features >> 12) & 0xf; +	if (block && !(block & 0x8)) +		elf_hwcap |= HWCAP_SHA2; + +	block = (features >> 16) & 0xf; +	if (block && !(block & 0x8)) +		elf_hwcap |= HWCAP_CRC32; + +#ifdef CONFIG_COMPAT +	/* +	 * ID_ISAR5_EL1 carries similar information as above, but pertaining to +	 * the Aarch32 32-bit execution state. +	 */ +	features = read_cpuid(ID_ISAR5_EL1); +	block = (features >> 4) & 0xf; +	if (!(block & 0x8)) { +		switch (block) { +		default: +		case 2: +			compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL; +		case 1: +			compat_elf_hwcap2 |= COMPAT_HWCAP2_AES; +		case 0: +			break; +		} +	} + +	block = (features >> 8) & 0xf; +	if (block && !(block & 0x8)) +		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; + +	block = (features >> 12) & 0xf; +	if (block && !(block & 0x8)) +		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; + +	block = (features >> 16) & 0xf; +	if (block && !(block & 0x8)) +		compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; +#endif +} + +static void __init setup_machine_fdt(phys_addr_t dt_phys) +{ +	if (!dt_phys || !early_init_dt_scan(phys_to_virt(dt_phys))) { +		early_print("\n" +			"Error: invalid device tree blob at physical address 0x%p (virtual address 0x%p)\n" +			"The dtb must be 8-byte aligned and passed in the first 512MB of memory\n" +			"\nPlease check your bootloader.\n", +			dt_phys, phys_to_virt(dt_phys)); + +		while (true) +			cpu_relax(); +	} + +	machine_name = of_flat_dt_get_machine_name(); +} + +/* + * Limit the memory size that was specified via FDT. + */ +static int __init early_mem(char *p) +{ +	phys_addr_t limit; + +	if (!p) +		return 1; + +	limit = memparse(p, &p) & PAGE_MASK; +	pr_notice("Memory limited to %lldMB\n", limit >> 20); + +	memblock_enforce_memory_limit(limit); + +	return 0; +} +early_param("mem", early_mem); + +static void __init request_standard_resources(void) +{ +	struct memblock_region *region; +	struct resource *res; + +	kernel_code.start   = virt_to_phys(_text); +	kernel_code.end     = virt_to_phys(_etext - 1); +	kernel_data.start   = virt_to_phys(_sdata); +	kernel_data.end     = virt_to_phys(_end - 1); + +	for_each_memblock(memory, region) { +		res = alloc_bootmem_low(sizeof(*res)); +		res->name  = "System RAM"; +		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); +		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; +		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + +		request_resource(&iomem_resource, res); + +		if (kernel_code.start >= res->start && +		    kernel_code.end <= res->end) +			request_resource(res, &kernel_code); +		if (kernel_data.start >= res->start && +		    kernel_data.end <= res->end) +			request_resource(res, &kernel_data); +	} +} + +u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; + +void __init setup_arch(char **cmdline_p) +{ +	/* +	 * Unmask asynchronous aborts early to catch possible system errors. +	 */ +	local_async_enable(); + +	setup_processor(); + +	setup_machine_fdt(__fdt_pointer); + +	init_mm.start_code = (unsigned long) _text; +	init_mm.end_code   = (unsigned long) _etext; +	init_mm.end_data   = (unsigned long) _edata; +	init_mm.brk	   = (unsigned long) _end; + +	*cmdline_p = boot_command_line; + +	early_ioremap_init(); + +	parse_early_param(); + +	efi_init(); +	arm64_memblock_init(); + +	paging_init(); +	request_standard_resources(); + +	efi_idmap_init(); + +	unflatten_device_tree(); + +	psci_init(); + +	cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; +	cpu_read_bootcpu_ops(); +#ifdef CONFIG_SMP +	smp_init_cpus(); +	smp_build_mpidr_hash(); +#endif + +#ifdef CONFIG_VT +#if defined(CONFIG_VGA_CONSOLE) +	conswitchp = &vga_con; +#elif defined(CONFIG_DUMMY_CONSOLE) +	conswitchp = &dummy_con; +#endif +#endif +} + +static int __init arm64_device_init(void) +{ +	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); +	return 0; +} +arch_initcall_sync(arm64_device_init); + +static DEFINE_PER_CPU(struct cpu, cpu_data); + +static int __init topology_init(void) +{ +	int i; + +	for_each_possible_cpu(i) { +		struct cpu *cpu = &per_cpu(cpu_data, i); +		cpu->hotpluggable = 1; +		register_cpu(cpu, i); +	} + +	return 0; +} +subsys_initcall(topology_init); + +static const char *hwcap_str[] = { +	"fp", +	"asimd", +	"evtstrm", +	"aes", +	"pmull", +	"sha1", +	"sha2", +	"crc32", +	NULL +}; + +static int c_show(struct seq_file *m, void *v) +{ +	int i; + +	seq_printf(m, "Processor\t: %s rev %d (%s)\n", +		   cpu_name, read_cpuid_id() & 15, ELF_PLATFORM); + +	for_each_online_cpu(i) { +		/* +		 * glibc reads /proc/cpuinfo to determine the number of +		 * online processors, looking for lines beginning with +		 * "processor".  Give glibc what it expects. +		 */ +#ifdef CONFIG_SMP +		seq_printf(m, "processor\t: %d\n", i); +#endif +	} + +	/* dump out the processor features */ +	seq_puts(m, "Features\t: "); + +	for (i = 0; hwcap_str[i]; i++) +		if (elf_hwcap & (1 << i)) +			seq_printf(m, "%s ", hwcap_str[i]); + +	seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24); +	seq_printf(m, "CPU architecture: AArch64\n"); +	seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15); +	seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff); +	seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15); + +	seq_puts(m, "\n"); + +	seq_printf(m, "Hardware\t: %s\n", machine_name); + +	return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ +	return *pos < 1 ? (void *)1 : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ +	++*pos; +	return NULL; +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { +	.start	= c_start, +	.next	= c_next, +	.stop	= c_stop, +	.show	= c_show +}; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c new file mode 100644 index 00000000000..6357b9c6c90 --- /dev/null +++ b/arch/arm64/kernel/signal.c @@ -0,0 +1,437 @@ +/* + * Based on arch/arm/kernel/signal.c + * + * Copyright (C) 1995-2009 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compat.h> +#include <linux/errno.h> +#include <linux/signal.h> +#include <linux/personality.h> +#include <linux/freezer.h> +#include <linux/uaccess.h> +#include <linux/tracehook.h> +#include <linux/ratelimit.h> + +#include <asm/debug-monitors.h> +#include <asm/elf.h> +#include <asm/cacheflush.h> +#include <asm/ucontext.h> +#include <asm/unistd.h> +#include <asm/fpsimd.h> +#include <asm/signal32.h> +#include <asm/vdso.h> + +/* + * Do a signal return; undo the signal stack. These are aligned to 128-bit. + */ +struct rt_sigframe { +	struct siginfo info; +	struct ucontext uc; +	u64 fp; +	u64 lr; +}; + +static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) +{ +	struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; +	int err; + +	/* dump the hardware registers to the fpsimd_state structure */ +	fpsimd_preserve_current_state(); + +	/* copy the FP and status/control registers */ +	err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); +	__put_user_error(fpsimd->fpsr, &ctx->fpsr, err); +	__put_user_error(fpsimd->fpcr, &ctx->fpcr, err); + +	/* copy the magic/size information */ +	__put_user_error(FPSIMD_MAGIC, &ctx->head.magic, err); +	__put_user_error(sizeof(struct fpsimd_context), &ctx->head.size, err); + +	return err ? -EFAULT : 0; +} + +static int restore_fpsimd_context(struct fpsimd_context __user *ctx) +{ +	struct fpsimd_state fpsimd; +	__u32 magic, size; +	int err = 0; + +	/* check the magic/size information */ +	__get_user_error(magic, &ctx->head.magic, err); +	__get_user_error(size, &ctx->head.size, err); +	if (err) +		return -EFAULT; +	if (magic != FPSIMD_MAGIC || size != sizeof(struct fpsimd_context)) +		return -EINVAL; + +	/* copy the FP and status/control registers */ +	err = __copy_from_user(fpsimd.vregs, ctx->vregs, +			       sizeof(fpsimd.vregs)); +	__get_user_error(fpsimd.fpsr, &ctx->fpsr, err); +	__get_user_error(fpsimd.fpcr, &ctx->fpcr, err); + +	/* load the hardware registers from the fpsimd_state structure */ +	if (!err) +		fpsimd_update_current_state(&fpsimd); + +	return err ? -EFAULT : 0; +} + +static int restore_sigframe(struct pt_regs *regs, +			    struct rt_sigframe __user *sf) +{ +	sigset_t set; +	int i, err; +	void *aux = sf->uc.uc_mcontext.__reserved; + +	err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); +	if (err == 0) +		set_current_blocked(&set); + +	for (i = 0; i < 31; i++) +		__get_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i], +				 err); +	__get_user_error(regs->sp, &sf->uc.uc_mcontext.sp, err); +	__get_user_error(regs->pc, &sf->uc.uc_mcontext.pc, err); +	__get_user_error(regs->pstate, &sf->uc.uc_mcontext.pstate, err); + +	/* +	 * Avoid sys_rt_sigreturn() restarting. +	 */ +	regs->syscallno = ~0UL; + +	err |= !valid_user_regs(®s->user_regs); + +	if (err == 0) { +		struct fpsimd_context *fpsimd_ctx = +			container_of(aux, struct fpsimd_context, head); +		err |= restore_fpsimd_context(fpsimd_ctx); +	} + +	return err; +} + +asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) +{ +	struct rt_sigframe __user *frame; + +	/* Always make any pending restarted system calls return -EINTR */ +	current_thread_info()->restart_block.fn = do_no_restart_syscall; + +	/* +	 * Since we stacked the signal on a 128-bit boundary, then 'sp' should +	 * be word aligned here. +	 */ +	if (regs->sp & 15) +		goto badframe; + +	frame = (struct rt_sigframe __user *)regs->sp; + +	if (!access_ok(VERIFY_READ, frame, sizeof (*frame))) +		goto badframe; + +	if (restore_sigframe(regs, frame)) +		goto badframe; + +	if (restore_altstack(&frame->uc.uc_stack)) +		goto badframe; + +	return regs->regs[0]; + +badframe: +	if (show_unhandled_signals) +		pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", +				    current->comm, task_pid_nr(current), __func__, +				    regs->pc, regs->sp); +	force_sig(SIGSEGV, current); +	return 0; +} + +static int setup_sigframe(struct rt_sigframe __user *sf, +			  struct pt_regs *regs, sigset_t *set) +{ +	int i, err = 0; +	void *aux = sf->uc.uc_mcontext.__reserved; +	struct _aarch64_ctx *end; + +	/* set up the stack frame for unwinding */ +	__put_user_error(regs->regs[29], &sf->fp, err); +	__put_user_error(regs->regs[30], &sf->lr, err); + +	for (i = 0; i < 31; i++) +		__put_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i], +				 err); +	__put_user_error(regs->sp, &sf->uc.uc_mcontext.sp, err); +	__put_user_error(regs->pc, &sf->uc.uc_mcontext.pc, err); +	__put_user_error(regs->pstate, &sf->uc.uc_mcontext.pstate, err); + +	__put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err); + +	err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); + +	if (err == 0) { +		struct fpsimd_context *fpsimd_ctx = +			container_of(aux, struct fpsimd_context, head); +		err |= preserve_fpsimd_context(fpsimd_ctx); +		aux += sizeof(*fpsimd_ctx); +	} + +	/* fault information, if valid */ +	if (current->thread.fault_code) { +		struct esr_context *esr_ctx = +			container_of(aux, struct esr_context, head); +		__put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err); +		__put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err); +		__put_user_error(current->thread.fault_code, &esr_ctx->esr, err); +		aux += sizeof(*esr_ctx); +	} + +	/* set the "end" magic */ +	end = aux; +	__put_user_error(0, &end->magic, err); +	__put_user_error(0, &end->size, err); + +	return err; +} + +static struct rt_sigframe __user *get_sigframe(struct k_sigaction *ka, +					       struct pt_regs *regs) +{ +	unsigned long sp, sp_top; +	struct rt_sigframe __user *frame; + +	sp = sp_top = regs->sp; + +	/* +	 * This is the X/Open sanctioned signal stack switching. +	 */ +	if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) +		sp = sp_top = current->sas_ss_sp + current->sas_ss_size; + +	sp = (sp - sizeof(struct rt_sigframe)) & ~15; +	frame = (struct rt_sigframe __user *)sp; + +	/* +	 * Check that we can actually write to the signal frame. +	 */ +	if (!access_ok(VERIFY_WRITE, frame, sp_top - sp)) +		frame = NULL; + +	return frame; +} + +static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, +			 void __user *frame, int usig) +{ +	__sigrestore_t sigtramp; + +	regs->regs[0] = usig; +	regs->sp = (unsigned long)frame; +	regs->regs[29] = regs->sp + offsetof(struct rt_sigframe, fp); +	regs->pc = (unsigned long)ka->sa.sa_handler; + +	if (ka->sa.sa_flags & SA_RESTORER) +		sigtramp = ka->sa.sa_restorer; +	else +		sigtramp = VDSO_SYMBOL(current->mm->context.vdso, sigtramp); + +	regs->regs[30] = (unsigned long)sigtramp; +} + +static int setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, +			  sigset_t *set, struct pt_regs *regs) +{ +	struct rt_sigframe __user *frame; +	int err = 0; + +	frame = get_sigframe(ka, regs); +	if (!frame) +		return 1; + +	__put_user_error(0, &frame->uc.uc_flags, err); +	__put_user_error(NULL, &frame->uc.uc_link, err); + +	err |= __save_altstack(&frame->uc.uc_stack, regs->sp); +	err |= setup_sigframe(frame, regs, set); +	if (err == 0) { +		setup_return(regs, ka, frame, usig); +		if (ka->sa.sa_flags & SA_SIGINFO) { +			err |= copy_siginfo_to_user(&frame->info, info); +			regs->regs[1] = (unsigned long)&frame->info; +			regs->regs[2] = (unsigned long)&frame->uc; +		} +	} + +	return err; +} + +static void setup_restart_syscall(struct pt_regs *regs) +{ +	if (is_compat_task()) +		compat_setup_restart_syscall(regs); +	else +		regs->regs[8] = __NR_restart_syscall; +} + +/* + * OK, we're invoking a handler + */ +static void handle_signal(unsigned long sig, struct k_sigaction *ka, +			  siginfo_t *info, struct pt_regs *regs) +{ +	struct thread_info *thread = current_thread_info(); +	struct task_struct *tsk = current; +	sigset_t *oldset = sigmask_to_save(); +	int usig = sig; +	int ret; + +	/* +	 * translate the signal +	 */ +	if (usig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap) +		usig = thread->exec_domain->signal_invmap[usig]; + +	/* +	 * Set up the stack frame +	 */ +	if (is_compat_task()) { +		if (ka->sa.sa_flags & SA_SIGINFO) +			ret = compat_setup_rt_frame(usig, ka, info, oldset, +						    regs); +		else +			ret = compat_setup_frame(usig, ka, oldset, regs); +	} else { +		ret = setup_rt_frame(usig, ka, info, oldset, regs); +	} + +	/* +	 * Check that the resulting registers are actually sane. +	 */ +	ret |= !valid_user_regs(®s->user_regs); + +	if (ret != 0) { +		force_sigsegv(sig, tsk); +		return; +	} + +	/* +	 * Fast forward the stepping logic so we step into the signal +	 * handler. +	 */ +	user_fastforward_single_step(tsk); + +	signal_delivered(sig, info, ka, regs, 0); +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + * + * Note that we go through the signals twice: once to check the signals that + * the kernel can handle, and then we build all the user-level signal handling + * stack-frames in one go after that. + */ +static void do_signal(struct pt_regs *regs) +{ +	unsigned long continue_addr = 0, restart_addr = 0; +	struct k_sigaction ka; +	siginfo_t info; +	int signr, retval = 0; +	int syscall = (int)regs->syscallno; + +	/* +	 * If we were from a system call, check for system call restarting... +	 */ +	if (syscall >= 0) { +		continue_addr = regs->pc; +		restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4); +		retval = regs->regs[0]; + +		/* +		 * Avoid additional syscall restarting via ret_to_user. +		 */ +		regs->syscallno = ~0UL; + +		/* +		 * Prepare for system call restart. We do this here so that a +		 * debugger will see the already changed PC. +		 */ +		switch (retval) { +		case -ERESTARTNOHAND: +		case -ERESTARTSYS: +		case -ERESTARTNOINTR: +		case -ERESTART_RESTARTBLOCK: +			regs->regs[0] = regs->orig_x0; +			regs->pc = restart_addr; +			break; +		} +	} + +	/* +	 * Get the signal to deliver. When running under ptrace, at this point +	 * the debugger may change all of our registers. +	 */ +	signr = get_signal_to_deliver(&info, &ka, regs, NULL); +	if (signr > 0) { +		/* +		 * Depending on the signal settings, we may need to revert the +		 * decision to restart the system call, but skip this if a +		 * debugger has chosen to restart at a different PC. +		 */ +		if (regs->pc == restart_addr && +		    (retval == -ERESTARTNOHAND || +		     retval == -ERESTART_RESTARTBLOCK || +		     (retval == -ERESTARTSYS && +		      !(ka.sa.sa_flags & SA_RESTART)))) { +			regs->regs[0] = -EINTR; +			regs->pc = continue_addr; +		} + +		handle_signal(signr, &ka, &info, regs); +		return; +	} + +	/* +	 * Handle restarting a different system call. As above, if a debugger +	 * has chosen to restart at a different PC, ignore the restart. +	 */ +	if (syscall >= 0 && regs->pc == restart_addr) { +		if (retval == -ERESTART_RESTARTBLOCK) +			setup_restart_syscall(regs); +		user_rewind_single_step(current); +	} + +	restore_saved_sigmask(); +} + +asmlinkage void do_notify_resume(struct pt_regs *regs, +				 unsigned int thread_flags) +{ +	if (thread_flags & _TIF_SIGPENDING) +		do_signal(regs); + +	if (thread_flags & _TIF_NOTIFY_RESUME) { +		clear_thread_flag(TIF_NOTIFY_RESUME); +		tracehook_notify_resume(regs); +	} + +	if (thread_flags & _TIF_FOREIGN_FPSTATE) +		fpsimd_restore_current_state(); + +} diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c new file mode 100644 index 00000000000..3491c638f17 --- /dev/null +++ b/arch/arm64/kernel/signal32.c @@ -0,0 +1,575 @@ +/* + * Based on arch/arm/kernel/signal.c + * + * Copyright (C) 1995-2009 Russell King + * Copyright (C) 2012 ARM Ltd. + * Modified by Will Deacon <will.deacon@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compat.h> +#include <linux/signal.h> +#include <linux/syscalls.h> +#include <linux/ratelimit.h> + +#include <asm/esr.h> +#include <asm/fpsimd.h> +#include <asm/signal32.h> +#include <asm/uaccess.h> +#include <asm/unistd32.h> + +struct compat_sigcontext { +	/* We always set these two fields to 0 */ +	compat_ulong_t			trap_no; +	compat_ulong_t			error_code; + +	compat_ulong_t			oldmask; +	compat_ulong_t			arm_r0; +	compat_ulong_t			arm_r1; +	compat_ulong_t			arm_r2; +	compat_ulong_t			arm_r3; +	compat_ulong_t			arm_r4; +	compat_ulong_t			arm_r5; +	compat_ulong_t			arm_r6; +	compat_ulong_t			arm_r7; +	compat_ulong_t			arm_r8; +	compat_ulong_t			arm_r9; +	compat_ulong_t			arm_r10; +	compat_ulong_t			arm_fp; +	compat_ulong_t			arm_ip; +	compat_ulong_t			arm_sp; +	compat_ulong_t			arm_lr; +	compat_ulong_t			arm_pc; +	compat_ulong_t			arm_cpsr; +	compat_ulong_t			fault_address; +}; + +struct compat_ucontext { +	compat_ulong_t			uc_flags; +	compat_uptr_t			uc_link; +	compat_stack_t			uc_stack; +	struct compat_sigcontext	uc_mcontext; +	compat_sigset_t			uc_sigmask; +	int		__unused[32 - (sizeof (compat_sigset_t) / sizeof (int))]; +	compat_ulong_t	uc_regspace[128] __attribute__((__aligned__(8))); +}; + +struct compat_vfp_sigframe { +	compat_ulong_t	magic; +	compat_ulong_t	size; +	struct compat_user_vfp { +		compat_u64	fpregs[32]; +		compat_ulong_t	fpscr; +	} ufp; +	struct compat_user_vfp_exc { +		compat_ulong_t	fpexc; +		compat_ulong_t	fpinst; +		compat_ulong_t	fpinst2; +	} ufp_exc; +} __attribute__((__aligned__(8))); + +#define VFP_MAGIC		0x56465001 +#define VFP_STORAGE_SIZE	sizeof(struct compat_vfp_sigframe) + +#define FSR_WRITE_SHIFT		(11) + +struct compat_aux_sigframe { +	struct compat_vfp_sigframe	vfp; + +	/* Something that isn't a valid magic number for any coprocessor.  */ +	unsigned long			end_magic; +} __attribute__((__aligned__(8))); + +struct compat_sigframe { +	struct compat_ucontext	uc; +	compat_ulong_t		retcode[2]; +}; + +struct compat_rt_sigframe { +	struct compat_siginfo info; +	struct compat_sigframe sig; +}; + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) +{ +	compat_sigset_t	cset; + +	cset.sig[0] = set->sig[0] & 0xffffffffull; +	cset.sig[1] = set->sig[0] >> 32; + +	return copy_to_user(uset, &cset, sizeof(*uset)); +} + +static inline int get_sigset_t(sigset_t *set, +			       const compat_sigset_t __user *uset) +{ +	compat_sigset_t s32; + +	if (copy_from_user(&s32, uset, sizeof(*uset))) +		return -EFAULT; + +	set->sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); +	return 0; +} + +int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) +{ +	int err; + +	if (!access_ok(VERIFY_WRITE, to, sizeof(*to))) +		return -EFAULT; + +	/* If you change siginfo_t structure, please be sure +	 * this code is fixed accordingly. +	 * It should never copy any pad contained in the structure +	 * to avoid security leaks, but must copy the generic +	 * 3 ints plus the relevant union member. +	 * This routine must convert siginfo from 64bit to 32bit as well +	 * at the same time. +	 */ +	err = __put_user(from->si_signo, &to->si_signo); +	err |= __put_user(from->si_errno, &to->si_errno); +	err |= __put_user((short)from->si_code, &to->si_code); +	if (from->si_code < 0) +		err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, +				      SI_PAD_SIZE); +	else switch (from->si_code & __SI_MASK) { +	case __SI_KILL: +		err |= __put_user(from->si_pid, &to->si_pid); +		err |= __put_user(from->si_uid, &to->si_uid); +		break; +	case __SI_TIMER: +		 err |= __put_user(from->si_tid, &to->si_tid); +		 err |= __put_user(from->si_overrun, &to->si_overrun); +		 err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, +				   &to->si_ptr); +		break; +	case __SI_POLL: +		err |= __put_user(from->si_band, &to->si_band); +		err |= __put_user(from->si_fd, &to->si_fd); +		break; +	case __SI_FAULT: +		err |= __put_user((compat_uptr_t)(unsigned long)from->si_addr, +				  &to->si_addr); +#ifdef BUS_MCEERR_AO +		/* +		 * Other callers might not initialize the si_lsb field, +		 * so check explicitely for the right codes here. +		 */ +		if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) +			err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); +#endif +		break; +	case __SI_CHLD: +		err |= __put_user(from->si_pid, &to->si_pid); +		err |= __put_user(from->si_uid, &to->si_uid); +		err |= __put_user(from->si_status, &to->si_status); +		err |= __put_user(from->si_utime, &to->si_utime); +		err |= __put_user(from->si_stime, &to->si_stime); +		break; +	case __SI_RT: /* This is not generated by the kernel as of now. */ +	case __SI_MESGQ: /* But this is */ +		err |= __put_user(from->si_pid, &to->si_pid); +		err |= __put_user(from->si_uid, &to->si_uid); +		err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, &to->si_ptr); +		break; +	default: /* this is just in case for now ... */ +		err |= __put_user(from->si_pid, &to->si_pid); +		err |= __put_user(from->si_uid, &to->si_uid); +		break; +	} +	return err; +} + +int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) +{ +	memset(to, 0, sizeof *to); + +	if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) || +	    copy_from_user(to->_sifields._pad, +			   from->_sifields._pad, SI_PAD_SIZE)) +		return -EFAULT; + +	return 0; +} + +/* + * VFP save/restore code. + */ +static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) +{ +	struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; +	compat_ulong_t magic = VFP_MAGIC; +	compat_ulong_t size = VFP_STORAGE_SIZE; +	compat_ulong_t fpscr, fpexc; +	int err = 0; + +	/* +	 * Save the hardware registers to the fpsimd_state structure. +	 * Note that this also saves V16-31, which aren't visible +	 * in AArch32. +	 */ +	fpsimd_preserve_current_state(); + +	/* Place structure header on the stack */ +	__put_user_error(magic, &frame->magic, err); +	__put_user_error(size, &frame->size, err); + +	/* +	 * Now copy the FP registers. Since the registers are packed, +	 * we can copy the prefix we want (V0-V15) as it is. +	 * FIXME: Won't work if big endian. +	 */ +	err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, +			      sizeof(frame->ufp.fpregs)); + +	/* Create an AArch32 fpscr from the fpsr and the fpcr. */ +	fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | +		(fpsimd->fpcr & VFP_FPSCR_CTRL_MASK); +	__put_user_error(fpscr, &frame->ufp.fpscr, err); + +	/* +	 * The exception register aren't available so we fake up a +	 * basic FPEXC and zero everything else. +	 */ +	fpexc = (1 << 30); +	__put_user_error(fpexc, &frame->ufp_exc.fpexc, err); +	__put_user_error(0, &frame->ufp_exc.fpinst, err); +	__put_user_error(0, &frame->ufp_exc.fpinst2, err); + +	return err ? -EFAULT : 0; +} + +static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) +{ +	struct fpsimd_state fpsimd; +	compat_ulong_t magic = VFP_MAGIC; +	compat_ulong_t size = VFP_STORAGE_SIZE; +	compat_ulong_t fpscr; +	int err = 0; + +	__get_user_error(magic, &frame->magic, err); +	__get_user_error(size, &frame->size, err); + +	if (err) +		return -EFAULT; +	if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) +		return -EINVAL; + +	/* +	 * Copy the FP registers into the start of the fpsimd_state. +	 * FIXME: Won't work if big endian. +	 */ +	err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, +				sizeof(frame->ufp.fpregs)); + +	/* Extract the fpsr and the fpcr from the fpscr */ +	__get_user_error(fpscr, &frame->ufp.fpscr, err); +	fpsimd.fpsr = fpscr & VFP_FPSCR_STAT_MASK; +	fpsimd.fpcr = fpscr & VFP_FPSCR_CTRL_MASK; + +	/* +	 * We don't need to touch the exception register, so +	 * reload the hardware state. +	 */ +	if (!err) +		fpsimd_update_current_state(&fpsimd); + +	return err ? -EFAULT : 0; +} + +static int compat_restore_sigframe(struct pt_regs *regs, +				   struct compat_sigframe __user *sf) +{ +	int err; +	sigset_t set; +	struct compat_aux_sigframe __user *aux; + +	err = get_sigset_t(&set, &sf->uc.uc_sigmask); +	if (err == 0) { +		sigdelsetmask(&set, ~_BLOCKABLE); +		set_current_blocked(&set); +	} + +	__get_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err); +	__get_user_error(regs->regs[1], &sf->uc.uc_mcontext.arm_r1, err); +	__get_user_error(regs->regs[2], &sf->uc.uc_mcontext.arm_r2, err); +	__get_user_error(regs->regs[3], &sf->uc.uc_mcontext.arm_r3, err); +	__get_user_error(regs->regs[4], &sf->uc.uc_mcontext.arm_r4, err); +	__get_user_error(regs->regs[5], &sf->uc.uc_mcontext.arm_r5, err); +	__get_user_error(regs->regs[6], &sf->uc.uc_mcontext.arm_r6, err); +	__get_user_error(regs->regs[7], &sf->uc.uc_mcontext.arm_r7, err); +	__get_user_error(regs->regs[8], &sf->uc.uc_mcontext.arm_r8, err); +	__get_user_error(regs->regs[9], &sf->uc.uc_mcontext.arm_r9, err); +	__get_user_error(regs->regs[10], &sf->uc.uc_mcontext.arm_r10, err); +	__get_user_error(regs->regs[11], &sf->uc.uc_mcontext.arm_fp, err); +	__get_user_error(regs->regs[12], &sf->uc.uc_mcontext.arm_ip, err); +	__get_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); +	__get_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); +	__get_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); +	__get_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + +	/* +	 * Avoid compat_sys_sigreturn() restarting. +	 */ +	regs->syscallno = ~0UL; + +	err |= !valid_user_regs(®s->user_regs); + +	aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace; +	if (err == 0) +		err |= compat_restore_vfp_context(&aux->vfp); + +	return err; +} + +asmlinkage int compat_sys_sigreturn(struct pt_regs *regs) +{ +	struct compat_sigframe __user *frame; + +	/* Always make any pending restarted system calls return -EINTR */ +	current_thread_info()->restart_block.fn = do_no_restart_syscall; + +	/* +	 * Since we stacked the signal on a 64-bit boundary, +	 * then 'sp' should be word aligned here.  If it's +	 * not, then the user is trying to mess with us. +	 */ +	if (regs->compat_sp & 7) +		goto badframe; + +	frame = (struct compat_sigframe __user *)regs->compat_sp; + +	if (!access_ok(VERIFY_READ, frame, sizeof (*frame))) +		goto badframe; + +	if (compat_restore_sigframe(regs, frame)) +		goto badframe; + +	return regs->regs[0]; + +badframe: +	if (show_unhandled_signals) +		pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", +				    current->comm, task_pid_nr(current), __func__, +				    regs->pc, regs->sp); +	force_sig(SIGSEGV, current); +	return 0; +} + +asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs) +{ +	struct compat_rt_sigframe __user *frame; + +	/* Always make any pending restarted system calls return -EINTR */ +	current_thread_info()->restart_block.fn = do_no_restart_syscall; + +	/* +	 * Since we stacked the signal on a 64-bit boundary, +	 * then 'sp' should be word aligned here.  If it's +	 * not, then the user is trying to mess with us. +	 */ +	if (regs->compat_sp & 7) +		goto badframe; + +	frame = (struct compat_rt_sigframe __user *)regs->compat_sp; + +	if (!access_ok(VERIFY_READ, frame, sizeof (*frame))) +		goto badframe; + +	if (compat_restore_sigframe(regs, &frame->sig)) +		goto badframe; + +	if (compat_restore_altstack(&frame->sig.uc.uc_stack)) +		goto badframe; + +	return regs->regs[0]; + +badframe: +	if (show_unhandled_signals) +		pr_info_ratelimited("%s[%d]: bad frame in %s: pc=%08llx sp=%08llx\n", +				    current->comm, task_pid_nr(current), __func__, +				    regs->pc, regs->sp); +	force_sig(SIGSEGV, current); +	return 0; +} + +static void __user *compat_get_sigframe(struct k_sigaction *ka, +					struct pt_regs *regs, +					int framesize) +{ +	compat_ulong_t sp = regs->compat_sp; +	void __user *frame; + +	/* +	 * This is the X/Open sanctioned signal stack switching. +	 */ +	if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) +		sp = current->sas_ss_sp + current->sas_ss_size; + +	/* +	 * ATPCS B01 mandates 8-byte alignment +	 */ +	frame = compat_ptr((compat_uptr_t)((sp - framesize) & ~7)); + +	/* +	 * Check that we can actually write to the signal frame. +	 */ +	if (!access_ok(VERIFY_WRITE, frame, framesize)) +		frame = NULL; + +	return frame; +} + +static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, +				compat_ulong_t __user *rc, void __user *frame, +				int usig) +{ +	compat_ulong_t handler = ptr_to_compat(ka->sa.sa_handler); +	compat_ulong_t retcode; +	compat_ulong_t spsr = regs->pstate & ~PSR_f; +	int thumb; + +	/* Check if the handler is written for ARM or Thumb */ +	thumb = handler & 1; + +	if (thumb) +		spsr |= COMPAT_PSR_T_BIT; +	else +		spsr &= ~COMPAT_PSR_T_BIT; + +	/* The IT state must be cleared for both ARM and Thumb-2 */ +	spsr &= ~COMPAT_PSR_IT_MASK; + +	if (ka->sa.sa_flags & SA_RESTORER) { +		retcode = ptr_to_compat(ka->sa.sa_restorer); +	} else { +		/* Set up sigreturn pointer */ +		unsigned int idx = thumb << 1; + +		if (ka->sa.sa_flags & SA_SIGINFO) +			idx += 3; + +		retcode = AARCH32_VECTORS_BASE + +			  AARCH32_KERN_SIGRET_CODE_OFFSET + +			  (idx << 2) + thumb; +	} + +	regs->regs[0]	= usig; +	regs->compat_sp	= ptr_to_compat(frame); +	regs->compat_lr	= retcode; +	regs->pc	= handler; +	regs->pstate	= spsr; +} + +static int compat_setup_sigframe(struct compat_sigframe __user *sf, +				 struct pt_regs *regs, sigset_t *set) +{ +	struct compat_aux_sigframe __user *aux; +	int err = 0; + +	__put_user_error(regs->regs[0], &sf->uc.uc_mcontext.arm_r0, err); +	__put_user_error(regs->regs[1], &sf->uc.uc_mcontext.arm_r1, err); +	__put_user_error(regs->regs[2], &sf->uc.uc_mcontext.arm_r2, err); +	__put_user_error(regs->regs[3], &sf->uc.uc_mcontext.arm_r3, err); +	__put_user_error(regs->regs[4], &sf->uc.uc_mcontext.arm_r4, err); +	__put_user_error(regs->regs[5], &sf->uc.uc_mcontext.arm_r5, err); +	__put_user_error(regs->regs[6], &sf->uc.uc_mcontext.arm_r6, err); +	__put_user_error(regs->regs[7], &sf->uc.uc_mcontext.arm_r7, err); +	__put_user_error(regs->regs[8], &sf->uc.uc_mcontext.arm_r8, err); +	__put_user_error(regs->regs[9], &sf->uc.uc_mcontext.arm_r9, err); +	__put_user_error(regs->regs[10], &sf->uc.uc_mcontext.arm_r10, err); +	__put_user_error(regs->regs[11], &sf->uc.uc_mcontext.arm_fp, err); +	__put_user_error(regs->regs[12], &sf->uc.uc_mcontext.arm_ip, err); +	__put_user_error(regs->compat_sp, &sf->uc.uc_mcontext.arm_sp, err); +	__put_user_error(regs->compat_lr, &sf->uc.uc_mcontext.arm_lr, err); +	__put_user_error(regs->pc, &sf->uc.uc_mcontext.arm_pc, err); +	__put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); + +	__put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); +	/* set the compat FSR WnR */ +	__put_user_error(!!(current->thread.fault_code & ESR_EL1_WRITE) << +			 FSR_WRITE_SHIFT, &sf->uc.uc_mcontext.error_code, err); +	__put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err); +	__put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err); + +	err |= put_sigset_t(&sf->uc.uc_sigmask, set); + +	aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace; + +	if (err == 0) +		err |= compat_preserve_vfp_context(&aux->vfp); +	__put_user_error(0, &aux->end_magic, err); + +	return err; +} + +/* + * 32-bit signal handling routines called from signal.c + */ +int compat_setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, +			  sigset_t *set, struct pt_regs *regs) +{ +	struct compat_rt_sigframe __user *frame; +	int err = 0; + +	frame = compat_get_sigframe(ka, regs, sizeof(*frame)); + +	if (!frame) +		return 1; + +	err |= copy_siginfo_to_user32(&frame->info, info); + +	__put_user_error(0, &frame->sig.uc.uc_flags, err); +	__put_user_error(0, &frame->sig.uc.uc_link, err); + +	err |= __compat_save_altstack(&frame->sig.uc.uc_stack, regs->compat_sp); + +	err |= compat_setup_sigframe(&frame->sig, regs, set); + +	if (err == 0) { +		compat_setup_return(regs, ka, frame->sig.retcode, frame, usig); +		regs->regs[1] = (compat_ulong_t)(unsigned long)&frame->info; +		regs->regs[2] = (compat_ulong_t)(unsigned long)&frame->sig.uc; +	} + +	return err; +} + +int compat_setup_frame(int usig, struct k_sigaction *ka, sigset_t *set, +		       struct pt_regs *regs) +{ +	struct compat_sigframe __user *frame; +	int err = 0; + +	frame = compat_get_sigframe(ka, regs, sizeof(*frame)); + +	if (!frame) +		return 1; + +	__put_user_error(0x5ac3c35a, &frame->uc.uc_flags, err); + +	err |= compat_setup_sigframe(frame, regs, set); +	if (err == 0) +		compat_setup_return(regs, ka, frame->retcode, frame, usig); + +	return err; +} + +void compat_setup_restart_syscall(struct pt_regs *regs) +{ +       regs->regs[7] = __NR_compat_restart_syscall; +} diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S new file mode 100644 index 00000000000..b1925729c69 --- /dev/null +++ b/arch/arm64/kernel/sleep.S @@ -0,0 +1,184 @@ +#include <linux/errno.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> + +	.text +/* + * Implementation of MPIDR_EL1 hash algorithm through shifting + * and OR'ing. + * + * @dst: register containing hash result + * @rs0: register containing affinity level 0 bit shift + * @rs1: register containing affinity level 1 bit shift + * @rs2: register containing affinity level 2 bit shift + * @rs3: register containing affinity level 3 bit shift + * @mpidr: register containing MPIDR_EL1 value + * @mask: register containing MPIDR mask + * + * Pseudo C-code: + * + *u32 dst; + * + *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 rs3, u64 mpidr, u64 mask) { + *	u32 aff0, aff1, aff2, aff3; + *	u64 mpidr_masked = mpidr & mask; + *	aff0 = mpidr_masked & 0xff; + *	aff1 = mpidr_masked & 0xff00; + *	aff2 = mpidr_masked & 0xff0000; + *	aff2 = mpidr_masked & 0xff00000000; + *	dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); + *} + * Input registers: rs0, rs1, rs2, rs3, mpidr, mask + * Output register: dst + * Note: input and output registers must be disjoint register sets +         (eg: a macro instance with mpidr = x1 and dst = x1 is invalid) + */ +	.macro compute_mpidr_hash dst, rs0, rs1, rs2, rs3, mpidr, mask +	and	\mpidr, \mpidr, \mask		// mask out MPIDR bits +	and	\dst, \mpidr, #0xff		// mask=aff0 +	lsr	\dst ,\dst, \rs0		// dst=aff0>>rs0 +	and	\mask, \mpidr, #0xff00		// mask = aff1 +	lsr	\mask ,\mask, \rs1 +	orr	\dst, \dst, \mask		// dst|=(aff1>>rs1) +	and	\mask, \mpidr, #0xff0000	// mask = aff2 +	lsr	\mask ,\mask, \rs2 +	orr	\dst, \dst, \mask		// dst|=(aff2>>rs2) +	and	\mask, \mpidr, #0xff00000000	// mask = aff3 +	lsr	\mask ,\mask, \rs3 +	orr	\dst, \dst, \mask		// dst|=(aff3>>rs3) +	.endm +/* + * Save CPU state for a suspend.  This saves callee registers, and allocates + * space on the kernel stack to save the CPU specific registers + some + * other data for resume. + * + *  x0 = suspend finisher argument + */ +ENTRY(__cpu_suspend) +	stp	x29, lr, [sp, #-96]! +	stp	x19, x20, [sp,#16] +	stp	x21, x22, [sp,#32] +	stp	x23, x24, [sp,#48] +	stp	x25, x26, [sp,#64] +	stp	x27, x28, [sp,#80] +	mov	x2, sp +	sub	sp, sp, #CPU_SUSPEND_SZ	// allocate cpu_suspend_ctx +	mov	x1, sp +	/* +	 * x1 now points to struct cpu_suspend_ctx allocated on the stack +	 */ +	str	x2, [x1, #CPU_CTX_SP] +	ldr	x2, =sleep_save_sp +	ldr	x2, [x2, #SLEEP_SAVE_SP_VIRT] +#ifdef CONFIG_SMP +	mrs	x7, mpidr_el1 +	ldr	x9, =mpidr_hash +	ldr	x10, [x9, #MPIDR_HASH_MASK] +	/* +	 * Following code relies on the struct mpidr_hash +	 * members size. +	 */ +	ldp	w3, w4, [x9, #MPIDR_HASH_SHIFTS] +	ldp	w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] +	compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 +	add	x2, x2, x8, lsl #3 +#endif +	bl	__cpu_suspend_finisher +        /* +	 * Never gets here, unless suspend fails. +	 * Successful cpu_suspend should return from cpu_resume, returning +	 * through this code path is considered an error +	 * If the return value is set to 0 force x0 = -EOPNOTSUPP +	 * to make sure a proper error condition is propagated +	 */ +	cmp	x0, #0 +	mov	x3, #-EOPNOTSUPP +	csel	x0, x3, x0, eq +	add	sp, sp, #CPU_SUSPEND_SZ	// rewind stack pointer +	ldp	x19, x20, [sp, #16] +	ldp	x21, x22, [sp, #32] +	ldp	x23, x24, [sp, #48] +	ldp	x25, x26, [sp, #64] +	ldp	x27, x28, [sp, #80] +	ldp	x29, lr, [sp], #96 +	ret +ENDPROC(__cpu_suspend) +	.ltorg + +/* + * x0 must contain the sctlr value retrieved from restored context + */ +ENTRY(cpu_resume_mmu) +	ldr	x3, =cpu_resume_after_mmu +	msr	sctlr_el1, x0		// restore sctlr_el1 +	isb +	br	x3			// global jump to virtual address +ENDPROC(cpu_resume_mmu) +cpu_resume_after_mmu: +	mov	x0, #0			// return zero on success +	ldp	x19, x20, [sp, #16] +	ldp	x21, x22, [sp, #32] +	ldp	x23, x24, [sp, #48] +	ldp	x25, x26, [sp, #64] +	ldp	x27, x28, [sp, #80] +	ldp	x29, lr, [sp], #96 +	ret +ENDPROC(cpu_resume_after_mmu) + +	.data +ENTRY(cpu_resume) +	bl	el2_setup		// if in EL2 drop to EL1 cleanly +#ifdef CONFIG_SMP +	mrs	x1, mpidr_el1 +	adr	x4, mpidr_hash_ptr +	ldr	x5, [x4] +	add	x8, x4, x5		// x8 = struct mpidr_hash phys address +        /* retrieve mpidr_hash members to compute the hash */ +	ldr	x2, [x8, #MPIDR_HASH_MASK] +	ldp	w3, w4, [x8, #MPIDR_HASH_SHIFTS] +	ldp	w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] +	compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 +        /* x7 contains hash index, let's use it to grab context pointer */ +#else +	mov	x7, xzr +#endif +	adr	x0, sleep_save_sp +	ldr	x0, [x0, #SLEEP_SAVE_SP_PHYS] +	ldr	x0, [x0, x7, lsl #3] +	/* load sp from context */ +	ldr	x2, [x0, #CPU_CTX_SP] +	adr	x1, sleep_idmap_phys +	/* load physical address of identity map page table in x1 */ +	ldr	x1, [x1] +	mov	sp, x2 +	/* +	 * cpu_do_resume expects x0 to contain context physical address +	 * pointer and x1 to contain physical address of 1:1 page tables +	 */ +	bl	cpu_do_resume		// PC relative jump, MMU off +	b	cpu_resume_mmu		// Resume MMU, never returns +ENDPROC(cpu_resume) + +	.align 3 +mpidr_hash_ptr: +	/* +	 * offset of mpidr_hash symbol from current location +	 * used to obtain run-time mpidr_hash address with MMU off +         */ +	.quad	mpidr_hash - . +/* + * physical address of identity mapped page tables + */ +	.type	sleep_idmap_phys, #object +ENTRY(sleep_idmap_phys) +	.quad	0 +/* + * struct sleep_save_sp { + *	phys_addr_t *save_ptr_stash; + *	phys_addr_t save_ptr_stash_phys; + * }; + */ +	.type	sleep_save_sp, #object +ENTRY(sleep_save_sp) +	.space	SLEEP_SAVE_SP_SZ	// struct sleep_save_sp diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c new file mode 100644 index 00000000000..40f38f46c8e --- /dev/null +++ b/arch/arm64/kernel/smp.c @@ -0,0 +1,645 @@ +/* + * SMP initialisation and IPI support + * Based on arch/arm/kernel/smp.c + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/cache.h> +#include <linux/profile.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/err.h> +#include <linux/cpu.h> +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/irq.h> +#include <linux/percpu.h> +#include <linux/clockchips.h> +#include <linux/completion.h> +#include <linux/of.h> +#include <linux/irq_work.h> + +#include <asm/atomic.h> +#include <asm/cacheflush.h> +#include <asm/cputype.h> +#include <asm/cpu_ops.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/processor.h> +#include <asm/smp_plat.h> +#include <asm/sections.h> +#include <asm/tlbflush.h> +#include <asm/ptrace.h> + +/* + * as from 2.5, kernels no longer have an init_tasks structure + * so we need some other way of telling a new secondary core + * where to place its SVC stack + */ +struct secondary_data secondary_data; + +enum ipi_msg_type { +	IPI_RESCHEDULE, +	IPI_CALL_FUNC, +	IPI_CALL_FUNC_SINGLE, +	IPI_CPU_STOP, +	IPI_TIMER, +	IPI_IRQ_WORK, +}; + +/* + * Boot a secondary CPU, and assign it the specified idle task. + * This also gives us the initial stack to use for this CPU. + */ +static int boot_secondary(unsigned int cpu, struct task_struct *idle) +{ +	if (cpu_ops[cpu]->cpu_boot) +		return cpu_ops[cpu]->cpu_boot(cpu); + +	return -EOPNOTSUPP; +} + +static DECLARE_COMPLETION(cpu_running); + +int __cpu_up(unsigned int cpu, struct task_struct *idle) +{ +	int ret; + +	/* +	 * We need to tell the secondary core where to find its stack and the +	 * page tables. +	 */ +	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; +	__flush_dcache_area(&secondary_data, sizeof(secondary_data)); + +	/* +	 * Now bring the CPU into our world. +	 */ +	ret = boot_secondary(cpu, idle); +	if (ret == 0) { +		/* +		 * CPU was successfully started, wait for it to come online or +		 * time out. +		 */ +		wait_for_completion_timeout(&cpu_running, +					    msecs_to_jiffies(1000)); + +		if (!cpu_online(cpu)) { +			pr_crit("CPU%u: failed to come online\n", cpu); +			ret = -EIO; +		} +	} else { +		pr_err("CPU%u: failed to boot: %d\n", cpu, ret); +	} + +	secondary_data.stack = NULL; + +	return ret; +} + +static void smp_store_cpu_info(unsigned int cpuid) +{ +	store_cpu_topology(cpuid); +} + +/* + * This is the secondary CPU boot entry.  We're using this CPUs + * idle thread stack, but a set of temporary page tables. + */ +asmlinkage void secondary_start_kernel(void) +{ +	struct mm_struct *mm = &init_mm; +	unsigned int cpu = smp_processor_id(); + +	/* +	 * All kernel threads share the same mm context; grab a +	 * reference and switch to it. +	 */ +	atomic_inc(&mm->mm_count); +	current->active_mm = mm; +	cpumask_set_cpu(cpu, mm_cpumask(mm)); + +	set_my_cpu_offset(per_cpu_offset(smp_processor_id())); +	printk("CPU%u: Booted secondary processor\n", cpu); + +	/* +	 * TTBR0 is only used for the identity mapping at this stage. Make it +	 * point to zero page to avoid speculatively fetching new entries. +	 */ +	cpu_set_reserved_ttbr0(); +	flush_tlb_all(); + +	preempt_disable(); +	trace_hardirqs_off(); + +	if (cpu_ops[cpu]->cpu_postboot) +		cpu_ops[cpu]->cpu_postboot(); + +	/* +	 * Enable GIC and timers. +	 */ +	notify_cpu_starting(cpu); + +	smp_store_cpu_info(cpu); + +	/* +	 * OK, now it's safe to let the boot CPU continue.  Wait for +	 * the CPU migration code to notice that the CPU is online +	 * before we continue. +	 */ +	set_cpu_online(cpu, true); +	complete(&cpu_running); + +	local_dbg_enable(); +	local_irq_enable(); +	local_async_enable(); + +	/* +	 * OK, it's off to the idle thread for us +	 */ +	cpu_startup_entry(CPUHP_ONLINE); +} + +#ifdef CONFIG_HOTPLUG_CPU +static int op_cpu_disable(unsigned int cpu) +{ +	/* +	 * If we don't have a cpu_die method, abort before we reach the point +	 * of no return. CPU0 may not have an cpu_ops, so test for it. +	 */ +	if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die) +		return -EOPNOTSUPP; + +	/* +	 * We may need to abort a hot unplug for some other mechanism-specific +	 * reason. +	 */ +	if (cpu_ops[cpu]->cpu_disable) +		return cpu_ops[cpu]->cpu_disable(cpu); + +	return 0; +} + +/* + * __cpu_disable runs on the processor to be shutdown. + */ +int __cpu_disable(void) +{ +	unsigned int cpu = smp_processor_id(); +	int ret; + +	ret = op_cpu_disable(cpu); +	if (ret) +		return ret; + +	/* +	 * Take this CPU offline.  Once we clear this, we can't return, +	 * and we must not schedule until we're ready to give up the cpu. +	 */ +	set_cpu_online(cpu, false); + +	/* +	 * OK - migrate IRQs away from this CPU +	 */ +	migrate_irqs(); + +	/* +	 * Remove this CPU from the vm mask set of all processes. +	 */ +	clear_tasks_mm_cpumask(cpu); + +	return 0; +} + +static int op_cpu_kill(unsigned int cpu) +{ +	/* +	 * If we have no means of synchronising with the dying CPU, then assume +	 * that it is really dead. We can only wait for an arbitrary length of +	 * time and hope that it's dead, so let's skip the wait and just hope. +	 */ +	if (!cpu_ops[cpu]->cpu_kill) +		return 1; + +	return cpu_ops[cpu]->cpu_kill(cpu); +} + +static DECLARE_COMPLETION(cpu_died); + +/* + * called on the thread which is asking for a CPU to be shutdown - + * waits until shutdown has completed, or it is timed out. + */ +void __cpu_die(unsigned int cpu) +{ +	if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) { +		pr_crit("CPU%u: cpu didn't die\n", cpu); +		return; +	} +	pr_notice("CPU%u: shutdown\n", cpu); + +	/* +	 * Now that the dying CPU is beyond the point of no return w.r.t. +	 * in-kernel synchronisation, try to get the firwmare to help us to +	 * verify that it has really left the kernel before we consider +	 * clobbering anything it might still be using. +	 */ +	if (!op_cpu_kill(cpu)) +		pr_warn("CPU%d may not have shut down cleanly\n", cpu); +} + +/* + * Called from the idle thread for the CPU which has been shutdown. + * + * Note that we disable IRQs here, but do not re-enable them + * before returning to the caller. This is also the behaviour + * of the other hotplug-cpu capable cores, so presumably coming + * out of idle fixes this. + */ +void cpu_die(void) +{ +	unsigned int cpu = smp_processor_id(); + +	idle_task_exit(); + +	local_irq_disable(); + +	/* Tell __cpu_die() that this CPU is now safe to dispose of */ +	complete(&cpu_died); + +	/* +	 * Actually shutdown the CPU. This must never fail. The specific hotplug +	 * mechanism must perform all required cache maintenance to ensure that +	 * no dirty lines are lost in the process of shutting down the CPU. +	 */ +	cpu_ops[cpu]->cpu_die(cpu); + +	BUG(); +} +#endif + +void __init smp_cpus_done(unsigned int max_cpus) +{ +	pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); +} + +void __init smp_prepare_boot_cpu(void) +{ +	set_my_cpu_offset(per_cpu_offset(smp_processor_id())); +} + +static void (*smp_cross_call)(const struct cpumask *, unsigned int); + +/* + * Enumerate the possible CPU set from the device tree and build the + * cpu logical map array containing MPIDR values related to logical + * cpus. Assumes that cpu_logical_map(0) has already been initialized. + */ +void __init smp_init_cpus(void) +{ +	struct device_node *dn = NULL; +	unsigned int i, cpu = 1; +	bool bootcpu_valid = false; + +	while ((dn = of_find_node_by_type(dn, "cpu"))) { +		const u32 *cell; +		u64 hwid; + +		/* +		 * A cpu node with missing "reg" property is +		 * considered invalid to build a cpu_logical_map +		 * entry. +		 */ +		cell = of_get_property(dn, "reg", NULL); +		if (!cell) { +			pr_err("%s: missing reg property\n", dn->full_name); +			goto next; +		} +		hwid = of_read_number(cell, of_n_addr_cells(dn)); + +		/* +		 * Non affinity bits must be set to 0 in the DT +		 */ +		if (hwid & ~MPIDR_HWID_BITMASK) { +			pr_err("%s: invalid reg property\n", dn->full_name); +			goto next; +		} + +		/* +		 * Duplicate MPIDRs are a recipe for disaster. Scan +		 * all initialized entries and check for +		 * duplicates. If any is found just ignore the cpu. +		 * cpu_logical_map was initialized to INVALID_HWID to +		 * avoid matching valid MPIDR values. +		 */ +		for (i = 1; (i < cpu) && (i < NR_CPUS); i++) { +			if (cpu_logical_map(i) == hwid) { +				pr_err("%s: duplicate cpu reg properties in the DT\n", +					dn->full_name); +				goto next; +			} +		} + +		/* +		 * The numbering scheme requires that the boot CPU +		 * must be assigned logical id 0. Record it so that +		 * the logical map built from DT is validated and can +		 * be used. +		 */ +		if (hwid == cpu_logical_map(0)) { +			if (bootcpu_valid) { +				pr_err("%s: duplicate boot cpu reg property in DT\n", +					dn->full_name); +				goto next; +			} + +			bootcpu_valid = true; + +			/* +			 * cpu_logical_map has already been +			 * initialized and the boot cpu doesn't need +			 * the enable-method so continue without +			 * incrementing cpu. +			 */ +			continue; +		} + +		if (cpu >= NR_CPUS) +			goto next; + +		if (cpu_read_ops(dn, cpu) != 0) +			goto next; + +		if (cpu_ops[cpu]->cpu_init(dn, cpu)) +			goto next; + +		pr_debug("cpu logical map 0x%llx\n", hwid); +		cpu_logical_map(cpu) = hwid; +next: +		cpu++; +	} + +	/* sanity check */ +	if (cpu > NR_CPUS) +		pr_warning("no. of cores (%d) greater than configured maximum of %d - clipping\n", +			   cpu, NR_CPUS); + +	if (!bootcpu_valid) { +		pr_err("DT missing boot CPU MPIDR, not enabling secondaries\n"); +		return; +	} + +	/* +	 * All the cpus that made it to the cpu_logical_map have been +	 * validated so set them as possible cpus. +	 */ +	for (i = 0; i < NR_CPUS; i++) +		if (cpu_logical_map(i) != INVALID_HWID) +			set_cpu_possible(i, true); +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ +	int err; +	unsigned int cpu, ncores = num_possible_cpus(); + +	init_cpu_topology(); + +	smp_store_cpu_info(smp_processor_id()); + +	/* +	 * are we trying to boot more cores than exist? +	 */ +	if (max_cpus > ncores) +		max_cpus = ncores; + +	/* Don't bother if we're effectively UP */ +	if (max_cpus <= 1) +		return; + +	/* +	 * Initialise the present map (which describes the set of CPUs +	 * actually populated at the present time) and release the +	 * secondaries from the bootloader. +	 * +	 * Make sure we online at most (max_cpus - 1) additional CPUs. +	 */ +	max_cpus--; +	for_each_possible_cpu(cpu) { +		if (max_cpus == 0) +			break; + +		if (cpu == smp_processor_id()) +			continue; + +		if (!cpu_ops[cpu]) +			continue; + +		err = cpu_ops[cpu]->cpu_prepare(cpu); +		if (err) +			continue; + +		set_cpu_present(cpu, true); +		max_cpus--; +	} +} + + +void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int)) +{ +	smp_cross_call = fn; +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ +	smp_cross_call(mask, IPI_CALL_FUNC); +} + +void arch_send_call_function_single_ipi(int cpu) +{ +	smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); +} + +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) +{ +	if (smp_cross_call) +		smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); +} +#endif + +static const char *ipi_types[NR_IPI] = { +#define S(x,s)	[x - IPI_RESCHEDULE] = s +	S(IPI_RESCHEDULE, "Rescheduling interrupts"), +	S(IPI_CALL_FUNC, "Function call interrupts"), +	S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), +	S(IPI_CPU_STOP, "CPU stop interrupts"), +	S(IPI_TIMER, "Timer broadcast interrupts"), +	S(IPI_IRQ_WORK, "IRQ work interrupts"), +}; + +void show_ipi_list(struct seq_file *p, int prec) +{ +	unsigned int cpu, i; + +	for (i = 0; i < NR_IPI; i++) { +		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i + IPI_RESCHEDULE, +			   prec >= 4 ? " " : ""); +		for_each_online_cpu(cpu) +			seq_printf(p, "%10u ", +				   __get_irq_stat(cpu, ipi_irqs[i])); +		seq_printf(p, "      %s\n", ipi_types[i]); +	} +} + +u64 smp_irq_stat_cpu(unsigned int cpu) +{ +	u64 sum = 0; +	int i; + +	for (i = 0; i < NR_IPI; i++) +		sum += __get_irq_stat(cpu, ipi_irqs[i]); + +	return sum; +} + +static DEFINE_RAW_SPINLOCK(stop_lock); + +/* + * ipi_cpu_stop - handle IPI from smp_send_stop() + */ +static void ipi_cpu_stop(unsigned int cpu) +{ +	if (system_state == SYSTEM_BOOTING || +	    system_state == SYSTEM_RUNNING) { +		raw_spin_lock(&stop_lock); +		pr_crit("CPU%u: stopping\n", cpu); +		dump_stack(); +		raw_spin_unlock(&stop_lock); +	} + +	set_cpu_online(cpu, false); + +	local_irq_disable(); + +	while (1) +		cpu_relax(); +} + +/* + * Main handler for inter-processor interrupts + */ +void handle_IPI(int ipinr, struct pt_regs *regs) +{ +	unsigned int cpu = smp_processor_id(); +	struct pt_regs *old_regs = set_irq_regs(regs); + +	if (ipinr >= IPI_RESCHEDULE && ipinr < IPI_RESCHEDULE + NR_IPI) +		__inc_irq_stat(cpu, ipi_irqs[ipinr - IPI_RESCHEDULE]); + +	switch (ipinr) { +	case IPI_RESCHEDULE: +		scheduler_ipi(); +		break; + +	case IPI_CALL_FUNC: +		irq_enter(); +		generic_smp_call_function_interrupt(); +		irq_exit(); +		break; + +	case IPI_CALL_FUNC_SINGLE: +		irq_enter(); +		generic_smp_call_function_single_interrupt(); +		irq_exit(); +		break; + +	case IPI_CPU_STOP: +		irq_enter(); +		ipi_cpu_stop(cpu); +		irq_exit(); +		break; + +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +	case IPI_TIMER: +		irq_enter(); +		tick_receive_broadcast(); +		irq_exit(); +		break; +#endif + +#ifdef CONFIG_IRQ_WORK +	case IPI_IRQ_WORK: +		irq_enter(); +		irq_work_run(); +		irq_exit(); +		break; +#endif + +	default: +		pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); +		break; +	} +	set_irq_regs(old_regs); +} + +void smp_send_reschedule(int cpu) +{ +	smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); +} + +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) +{ +	smp_cross_call(mask, IPI_TIMER); +} +#endif + +void smp_send_stop(void) +{ +	unsigned long timeout; + +	if (num_online_cpus() > 1) { +		cpumask_t mask; + +		cpumask_copy(&mask, cpu_online_mask); +		cpu_clear(smp_processor_id(), mask); + +		smp_cross_call(&mask, IPI_CPU_STOP); +	} + +	/* Wait up to one second for other CPUs to stop */ +	timeout = USEC_PER_SEC; +	while (num_online_cpus() > 1 && timeout--) +		udelay(1); + +	if (num_online_cpus() > 1) +		pr_warning("SMP: failed to stop secondary CPUs\n"); +} + +/* + * not supported here + */ +int setup_profiling_timer(unsigned int multiplier) +{ +	return -EINVAL; +} diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c new file mode 100644 index 00000000000..0347d38eea2 --- /dev/null +++ b/arch/arm64/kernel/smp_spin_table.c @@ -0,0 +1,114 @@ +/* + * Spin Table SMP initialisation + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/of.h> +#include <linux/smp.h> + +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/cputype.h> +#include <asm/smp_plat.h> + +extern void secondary_holding_pen(void); +volatile unsigned long secondary_holding_pen_release = INVALID_HWID; + +static phys_addr_t cpu_release_addr[NR_CPUS]; + +/* + * Write secondary_holding_pen_release in a way that is guaranteed to be + * visible to all observers, irrespective of whether they're taking part + * in coherency or not.  This is necessary for the hotplug code to work + * reliably. + */ +static void write_pen_release(u64 val) +{ +	void *start = (void *)&secondary_holding_pen_release; +	unsigned long size = sizeof(secondary_holding_pen_release); + +	secondary_holding_pen_release = val; +	__flush_dcache_area(start, size); +} + + +static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu) +{ +	/* +	 * Determine the address from which the CPU is polling. +	 */ +	if (of_property_read_u64(dn, "cpu-release-addr", +				 &cpu_release_addr[cpu])) { +		pr_err("CPU %d: missing or invalid cpu-release-addr property\n", +		       cpu); + +		return -1; +	} + +	return 0; +} + +static int smp_spin_table_cpu_prepare(unsigned int cpu) +{ +	void **release_addr; + +	if (!cpu_release_addr[cpu]) +		return -ENODEV; + +	release_addr = __va(cpu_release_addr[cpu]); + +	/* +	 * We write the release address as LE regardless of the native +	 * endianess of the kernel. Therefore, any boot-loaders that +	 * read this address need to convert this address to the +	 * boot-loader's endianess before jumping. This is mandated by +	 * the boot protocol. +	 */ +	release_addr[0] = (void *) cpu_to_le64(__pa(secondary_holding_pen)); + +	__flush_dcache_area(release_addr, sizeof(release_addr[0])); + +	/* +	 * Send an event to wake up the secondary CPU. +	 */ +	sev(); + +	return 0; +} + +static int smp_spin_table_cpu_boot(unsigned int cpu) +{ +	/* +	 * Update the pen release flag. +	 */ +	write_pen_release(cpu_logical_map(cpu)); + +	/* +	 * Send an event, causing the secondaries to read pen_release. +	 */ +	sev(); + +	return 0; +} + +const struct cpu_operations smp_spin_table_ops = { +	.name		= "spin-table", +	.cpu_init	= smp_spin_table_cpu_init, +	.cpu_prepare	= smp_spin_table_cpu_prepare, +	.cpu_boot	= smp_spin_table_cpu_boot, +}; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c new file mode 100644 index 00000000000..55437ba1f5a --- /dev/null +++ b/arch/arm64/kernel/stacktrace.c @@ -0,0 +1,131 @@ +/* + * Stack tracing support + * + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/stacktrace.h> + +#include <asm/stacktrace.h> + +/* + * AArch64 PCS assigns the frame pointer to x29. + * + * A simple function prologue looks like this: + * 	sub	sp, sp, #0x10 + *   	stp	x29, x30, [sp] + *	mov	x29, sp + * + * A simple function epilogue looks like this: + *	mov	sp, x29 + *	ldp	x29, x30, [sp] + *	add	sp, sp, #0x10 + */ +int notrace unwind_frame(struct stackframe *frame) +{ +	unsigned long high, low; +	unsigned long fp = frame->fp; + +	low  = frame->sp; +	high = ALIGN(low, THREAD_SIZE); + +	if (fp < low || fp > high - 0x18 || fp & 0xf) +		return -EINVAL; + +	frame->sp = fp + 0x10; +	frame->fp = *(unsigned long *)(fp); +	/* +	 * -4 here because we care about the PC at time of bl, +	 * not where the return will go. +	 */ +	frame->pc = *(unsigned long *)(fp + 8) - 4; + +	return 0; +} + +void notrace walk_stackframe(struct stackframe *frame, +		     int (*fn)(struct stackframe *, void *), void *data) +{ +	while (1) { +		int ret; + +		if (fn(frame, data)) +			break; +		ret = unwind_frame(frame); +		if (ret < 0) +			break; +	} +} +EXPORT_SYMBOL(walk_stackframe); + +#ifdef CONFIG_STACKTRACE +struct stack_trace_data { +	struct stack_trace *trace; +	unsigned int no_sched_functions; +	unsigned int skip; +}; + +static int save_trace(struct stackframe *frame, void *d) +{ +	struct stack_trace_data *data = d; +	struct stack_trace *trace = data->trace; +	unsigned long addr = frame->pc; + +	if (data->no_sched_functions && in_sched_functions(addr)) +		return 0; +	if (data->skip) { +		data->skip--; +		return 0; +	} + +	trace->entries[trace->nr_entries++] = addr; + +	return trace->nr_entries >= trace->max_entries; +} + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ +	struct stack_trace_data data; +	struct stackframe frame; + +	data.trace = trace; +	data.skip = trace->skip; + +	if (tsk != current) { +		data.no_sched_functions = 1; +		frame.fp = thread_saved_fp(tsk); +		frame.sp = thread_saved_sp(tsk); +		frame.pc = thread_saved_pc(tsk); +	} else { +		register unsigned long current_sp asm("sp"); +		data.no_sched_functions = 0; +		frame.fp = (unsigned long)__builtin_frame_address(0); +		frame.sp = current_sp; +		frame.pc = (unsigned long)save_stack_trace_tsk; +	} + +	walk_stackframe(&frame, save_trace, &data); +	if (trace->nr_entries < trace->max_entries) +		trace->entries[trace->nr_entries++] = ULONG_MAX; +} + +void save_stack_trace(struct stack_trace *trace) +{ +	save_stack_trace_tsk(current, trace); +} +EXPORT_SYMBOL_GPL(save_stack_trace); +#endif diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c new file mode 100644 index 00000000000..1fa9ce4afd8 --- /dev/null +++ b/arch/arm64/kernel/suspend.c @@ -0,0 +1,140 @@ +#include <linux/percpu.h> +#include <linux/slab.h> +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/debug-monitors.h> +#include <asm/pgtable.h> +#include <asm/memory.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> +#include <asm/tlbflush.h> + +extern int __cpu_suspend(unsigned long); +/* + * This is called by __cpu_suspend() to save the state, and do whatever + * flushing is required to ensure that when the CPU goes to sleep we have + * the necessary data available when the caches are not searched. + * + * @arg: Argument to pass to suspend operations + * @ptr: CPU context virtual address + * @save_ptr: address of the location where the context physical address + *            must be saved + */ +int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr, +			   phys_addr_t *save_ptr) +{ +	int cpu = smp_processor_id(); + +	*save_ptr = virt_to_phys(ptr); + +	cpu_do_suspend(ptr); +	/* +	 * Only flush the context that must be retrieved with the MMU +	 * off. VA primitives ensure the flush is applied to all +	 * cache levels so context is pushed to DRAM. +	 */ +	__flush_dcache_area(ptr, sizeof(*ptr)); +	__flush_dcache_area(save_ptr, sizeof(*save_ptr)); + +	return cpu_ops[cpu]->cpu_suspend(arg); +} + +/* + * This hook is provided so that cpu_suspend code can restore HW + * breakpoints as early as possible in the resume path, before reenabling + * debug exceptions. Code cannot be run from a CPU PM notifier since by the + * time the notifier runs debug exceptions might have been enabled already, + * with HW breakpoints registers content still in an unknown state. + */ +void (*hw_breakpoint_restore)(void *); +void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +{ +	/* Prevent multiple restore hook initializations */ +	if (WARN_ON(hw_breakpoint_restore)) +		return; +	hw_breakpoint_restore = hw_bp_restore; +} + +/** + * cpu_suspend + * + * @arg: argument to pass to the finisher function + */ +int cpu_suspend(unsigned long arg) +{ +	struct mm_struct *mm = current->active_mm; +	int ret, cpu = smp_processor_id(); +	unsigned long flags; + +	/* +	 * If cpu_ops have not been registered or suspend +	 * has not been initialized, cpu_suspend call fails early. +	 */ +	if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) +		return -EOPNOTSUPP; + +	/* +	 * From this point debug exceptions are disabled to prevent +	 * updates to mdscr register (saved and restored along with +	 * general purpose registers) from kernel debuggers. +	 */ +	local_dbg_save(flags); + +	/* +	 * mm context saved on the stack, it will be restored when +	 * the cpu comes out of reset through the identity mapped +	 * page tables, so that the thread address space is properly +	 * set-up on function return. +	 */ +	ret = __cpu_suspend(arg); +	if (ret == 0) { +		cpu_switch_mm(mm->pgd, mm); +		flush_tlb_all(); + +		/* +		 * Restore per-cpu offset before any kernel +		 * subsystem relying on it has a chance to run. +		 */ +		set_my_cpu_offset(per_cpu_offset(cpu)); + +		/* +		 * Restore HW breakpoint registers to sane values +		 * before debug exceptions are possibly reenabled +		 * through local_dbg_restore. +		 */ +		if (hw_breakpoint_restore) +			hw_breakpoint_restore(NULL); +	} + +	/* +	 * Restore pstate flags. OS lock and mdscr have been already +	 * restored, so from this point onwards, debugging is fully +	 * renabled if it was enabled when core started shutdown. +	 */ +	local_dbg_restore(flags); + +	return ret; +} + +extern struct sleep_save_sp sleep_save_sp; +extern phys_addr_t sleep_idmap_phys; + +static int cpu_suspend_init(void) +{ +	void *ctx_ptr; + +	/* ctx_ptr is an array of physical addresses */ +	ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + +	if (WARN_ON(!ctx_ptr)) +		return -ENOMEM; + +	sleep_save_sp.save_ptr_stash = ctx_ptr; +	sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); +	sleep_idmap_phys = virt_to_phys(idmap_pg_dir); +	__flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); +	__flush_dcache_area(&sleep_idmap_phys, sizeof(sleep_idmap_phys)); + +	return 0; +} +early_initcall(cpu_suspend_init); diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c new file mode 100644 index 00000000000..3fa98ff14f0 --- /dev/null +++ b/arch/arm64/kernel/sys.c @@ -0,0 +1,56 @@ +/* + * AArch64-specific system calls implementation + * + * Copyright (C) 2012 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/syscalls.h> + +asmlinkage long sys_mmap(unsigned long addr, unsigned long len, +			 unsigned long prot, unsigned long flags, +			 unsigned long fd, off_t off) +{ +	if (offset_in_page(off) != 0) +		return -EINVAL; + +	return sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); +} + +/* + * Wrappers to pass the pt_regs argument. + */ +#define sys_rt_sigreturn	sys_rt_sigreturn_wrapper + +#include <asm/syscalls.h> + +#undef __SYSCALL +#define __SYSCALL(nr, sym)	[nr] = sym, + +/* + * The sys_call_table array must be 4K aligned to be accessible from + * kernel/entry.S. + */ +void *sys_call_table[__NR_syscalls] __aligned(4096) = { +	[0 ... __NR_syscalls - 1] = sys_ni_syscall, +#include <asm/unistd.h> +}; diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S new file mode 100644 index 00000000000..423a5b3fc2b --- /dev/null +++ b/arch/arm64/kernel/sys32.S @@ -0,0 +1,115 @@ +/* + * Compat system call wrappers + * + * Copyright (C) 2012 ARM Ltd. + * Authors: Will Deacon <will.deacon@arm.com> + *	    Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/asm-offsets.h> + +/* + * System call wrappers for the AArch32 compatibility layer. + */ + +compat_sys_sigreturn_wrapper: +	mov	x0, sp +	mov	x27, #0		// prevent syscall restart handling (why) +	b	compat_sys_sigreturn +ENDPROC(compat_sys_sigreturn_wrapper) + +compat_sys_rt_sigreturn_wrapper: +	mov	x0, sp +	mov	x27, #0		// prevent syscall restart handling (why) +	b	compat_sys_rt_sigreturn +ENDPROC(compat_sys_rt_sigreturn_wrapper) + +compat_sys_statfs64_wrapper: +	mov	w3, #84 +	cmp	w1, #88 +	csel	w1, w3, w1, eq +	b	compat_sys_statfs64 +ENDPROC(compat_sys_statfs64_wrapper) + +compat_sys_fstatfs64_wrapper: +	mov	w3, #84 +	cmp	w1, #88 +	csel	w1, w3, w1, eq +	b	compat_sys_fstatfs64 +ENDPROC(compat_sys_fstatfs64_wrapper) + +/* + * Wrappers for AArch32 syscalls that either take 64-bit parameters + * in registers or that take 32-bit parameters which require sign + * extension. + */ +compat_sys_pread64_wrapper: +	regs_to_64	x3, x4, x5 +	b	sys_pread64 +ENDPROC(compat_sys_pread64_wrapper) + +compat_sys_pwrite64_wrapper: +	regs_to_64	x3, x4, x5 +	b	sys_pwrite64 +ENDPROC(compat_sys_pwrite64_wrapper) + +compat_sys_truncate64_wrapper: +	regs_to_64	x1, x2, x3 +	b	sys_truncate +ENDPROC(compat_sys_truncate64_wrapper) + +compat_sys_ftruncate64_wrapper: +	regs_to_64	x1, x2, x3 +	b	sys_ftruncate +ENDPROC(compat_sys_ftruncate64_wrapper) + +compat_sys_readahead_wrapper: +	regs_to_64	x1, x2, x3 +	mov	w2, w4 +	b	sys_readahead +ENDPROC(compat_sys_readahead_wrapper) + +compat_sys_fadvise64_64_wrapper: +	mov	w6, w1 +	regs_to_64	x1, x2, x3 +	regs_to_64	x2, x4, x5 +	mov	w3, w6 +	b	sys_fadvise64_64 +ENDPROC(compat_sys_fadvise64_64_wrapper) + +compat_sys_sync_file_range2_wrapper: +	regs_to_64	x2, x2, x3 +	regs_to_64	x3, x4, x5 +	b	sys_sync_file_range2 +ENDPROC(compat_sys_sync_file_range2_wrapper) + +compat_sys_fallocate_wrapper: +	regs_to_64	x2, x2, x3 +	regs_to_64	x3, x4, x5 +	b	sys_fallocate +ENDPROC(compat_sys_fallocate_wrapper) + +#undef __SYSCALL +#define __SYSCALL(x, y)		.quad	y	// x + +/* + * The system calls table must be 4KB aligned. + */ +	.align	12 +ENTRY(compat_sys_call_table) +#include <asm/unistd32.h> diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c new file mode 100644 index 00000000000..26e9c4eeaba --- /dev/null +++ b/arch/arm64/kernel/sys_compat.c @@ -0,0 +1,88 @@ +/* + * Based on arch/arm/kernel/sys_arm.c + * + * Copyright (C) People who wrote linux/arch/i386/kernel/sys_i386.c + * Copyright (C) 1995, 1996 Russell King. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/compat.h> +#include <linux/personality.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> + +#include <asm/cacheflush.h> +#include <asm/unistd32.h> + +static inline void +do_compat_cache_op(unsigned long start, unsigned long end, int flags) +{ +	struct mm_struct *mm = current->active_mm; +	struct vm_area_struct *vma; + +	if (end < start || flags) +		return; + +	down_read(&mm->mmap_sem); +	vma = find_vma(mm, start); +	if (vma && vma->vm_start < end) { +		if (start < vma->vm_start) +			start = vma->vm_start; +		if (end > vma->vm_end) +			end = vma->vm_end; +		up_read(&mm->mmap_sem); +		__flush_cache_user_range(start & PAGE_MASK, PAGE_ALIGN(end)); +		return; +	} +	up_read(&mm->mmap_sem); +} + +/* + * Handle all unrecognised system calls. + */ +long compat_arm_syscall(struct pt_regs *regs) +{ +	unsigned int no = regs->regs[7]; + +	switch (no) { +	/* +	 * Flush a region from virtual address 'r0' to virtual address 'r1' +	 * _exclusive_.  There is no alignment requirement on either address; +	 * user space does not need to know the hardware cache layout. +	 * +	 * r2 contains flags.  It should ALWAYS be passed as ZERO until it +	 * is defined to be something else.  For now we ignore it, but may +	 * the fires of hell burn in your belly if you break this rule. ;) +	 * +	 * (at a later date, we may want to allow this call to not flush +	 * various aspects of the cache.  Passing '0' will guarantee that +	 * everything necessary gets flushed to maintain consistency in +	 * the specified region). +	 */ +	case __ARM_NR_compat_cacheflush: +		do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]); +		return 0; + +	case __ARM_NR_compat_set_tls: +		current->thread.tp_value = regs->regs[0]; +		asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0])); +		return 0; + +	default: +		return -ENOSYS; +	} +} diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c new file mode 100644 index 00000000000..1a7125c3099 --- /dev/null +++ b/arch/arm64/kernel/time.c @@ -0,0 +1,81 @@ +/* + * Based on arch/arm/kernel/time.c + * + * Copyright (C) 1991, 1992, 1995  Linus Torvalds + * Modifications for ARM (C) 1994-2001 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/clockchips.h> +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/time.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/timex.h> +#include <linux/errno.h> +#include <linux/profile.h> +#include <linux/syscore_ops.h> +#include <linux/timer.h> +#include <linux/irq.h> +#include <linux/delay.h> +#include <linux/clocksource.h> +#include <linux/clk-provider.h> + +#include <clocksource/arm_arch_timer.h> + +#include <asm/thread_info.h> +#include <asm/stacktrace.h> + +#ifdef CONFIG_SMP +unsigned long profile_pc(struct pt_regs *regs) +{ +	struct stackframe frame; + +	if (!in_lock_functions(regs->pc)) +		return regs->pc; + +	frame.fp = regs->regs[29]; +	frame.sp = regs->sp; +	frame.pc = regs->pc; +	do { +		int ret = unwind_frame(&frame); +		if (ret < 0) +			return 0; +	} while (in_lock_functions(frame.pc)); + +	return frame.pc; +} +EXPORT_SYMBOL(profile_pc); +#endif + +void __init time_init(void) +{ +	u32 arch_timer_rate; + +	of_clk_init(NULL); +	clocksource_of_init(); + +	tick_setup_hrtimer_broadcast(); + +	arch_timer_rate = arch_timer_get_rate(); +	if (!arch_timer_rate) +		panic("Unable to initialise architected timer.\n"); + +	/* Calibrate the delay loop directly */ +	lpj_fine = arch_timer_rate / HZ; +} diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c new file mode 100644 index 00000000000..43514f90591 --- /dev/null +++ b/arch/arm64/kernel/topology.c @@ -0,0 +1,283 @@ +/* + * arch/arm64/kernel/topology.c + * + * Copyright (C) 2011,2013,2014 Linaro Limited. + * + * Based on the arm32 version written by Vincent Guittot in turn based on + * arch/sh/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License.  See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/node.h> +#include <linux/nodemask.h> +#include <linux/of.h> +#include <linux/sched.h> + +#include <asm/topology.h> + +static int __init get_cpu_for_node(struct device_node *node) +{ +	struct device_node *cpu_node; +	int cpu; + +	cpu_node = of_parse_phandle(node, "cpu", 0); +	if (!cpu_node) +		return -1; + +	for_each_possible_cpu(cpu) { +		if (of_get_cpu_node(cpu, NULL) == cpu_node) { +			of_node_put(cpu_node); +			return cpu; +		} +	} + +	pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name); + +	of_node_put(cpu_node); +	return -1; +} + +static int __init parse_core(struct device_node *core, int cluster_id, +			     int core_id) +{ +	char name[10]; +	bool leaf = true; +	int i = 0; +	int cpu; +	struct device_node *t; + +	do { +		snprintf(name, sizeof(name), "thread%d", i); +		t = of_get_child_by_name(core, name); +		if (t) { +			leaf = false; +			cpu = get_cpu_for_node(t); +			if (cpu >= 0) { +				cpu_topology[cpu].cluster_id = cluster_id; +				cpu_topology[cpu].core_id = core_id; +				cpu_topology[cpu].thread_id = i; +			} else { +				pr_err("%s: Can't get CPU for thread\n", +				       t->full_name); +				of_node_put(t); +				return -EINVAL; +			} +			of_node_put(t); +		} +		i++; +	} while (t); + +	cpu = get_cpu_for_node(core); +	if (cpu >= 0) { +		if (!leaf) { +			pr_err("%s: Core has both threads and CPU\n", +			       core->full_name); +			return -EINVAL; +		} + +		cpu_topology[cpu].cluster_id = cluster_id; +		cpu_topology[cpu].core_id = core_id; +	} else if (leaf) { +		pr_err("%s: Can't get CPU for leaf core\n", core->full_name); +		return -EINVAL; +	} + +	return 0; +} + +static int __init parse_cluster(struct device_node *cluster, int depth) +{ +	char name[10]; +	bool leaf = true; +	bool has_cores = false; +	struct device_node *c; +	static int cluster_id __initdata; +	int core_id = 0; +	int i, ret; + +	/* +	 * First check for child clusters; we currently ignore any +	 * information about the nesting of clusters and present the +	 * scheduler with a flat list of them. +	 */ +	i = 0; +	do { +		snprintf(name, sizeof(name), "cluster%d", i); +		c = of_get_child_by_name(cluster, name); +		if (c) { +			leaf = false; +			ret = parse_cluster(c, depth + 1); +			of_node_put(c); +			if (ret != 0) +				return ret; +		} +		i++; +	} while (c); + +	/* Now check for cores */ +	i = 0; +	do { +		snprintf(name, sizeof(name), "core%d", i); +		c = of_get_child_by_name(cluster, name); +		if (c) { +			has_cores = true; + +			if (depth == 0) { +				pr_err("%s: cpu-map children should be clusters\n", +				       c->full_name); +				of_node_put(c); +				return -EINVAL; +			} + +			if (leaf) { +				ret = parse_core(c, cluster_id, core_id++); +			} else { +				pr_err("%s: Non-leaf cluster with core %s\n", +				       cluster->full_name, name); +				ret = -EINVAL; +			} + +			of_node_put(c); +			if (ret != 0) +				return ret; +		} +		i++; +	} while (c); + +	if (leaf && !has_cores) +		pr_warn("%s: empty cluster\n", cluster->full_name); + +	if (leaf) +		cluster_id++; + +	return 0; +} + +static int __init parse_dt_topology(void) +{ +	struct device_node *cn, *map; +	int ret = 0; +	int cpu; + +	cn = of_find_node_by_path("/cpus"); +	if (!cn) { +		pr_err("No CPU information found in DT\n"); +		return 0; +	} + +	/* +	 * When topology is provided cpu-map is essentially a root +	 * cluster with restricted subnodes. +	 */ +	map = of_get_child_by_name(cn, "cpu-map"); +	if (!map) +		goto out; + +	ret = parse_cluster(map, 0); +	if (ret != 0) +		goto out_map; + +	/* +	 * Check that all cores are in the topology; the SMP code will +	 * only mark cores described in the DT as possible. +	 */ +	for_each_possible_cpu(cpu) { +		if (cpu_topology[cpu].cluster_id == -1) { +			pr_err("CPU%d: No topology information specified\n", +			       cpu); +			ret = -EINVAL; +		} +	} + +out_map: +	of_node_put(map); +out: +	of_node_put(cn); +	return ret; +} + +/* + * cpu topology table + */ +struct cpu_topology cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ +	return &cpu_topology[cpu].core_sibling; +} + +static void update_siblings_masks(unsigned int cpuid) +{ +	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; +	int cpu; + +	if (cpuid_topo->cluster_id == -1) { +		/* +		 * DT does not contain topology information for this cpu. +		 */ +		pr_debug("CPU%u: No topology information configured\n", cpuid); +		return; +	} + +	/* update core and thread sibling masks */ +	for_each_possible_cpu(cpu) { +		cpu_topo = &cpu_topology[cpu]; + +		if (cpuid_topo->cluster_id != cpu_topo->cluster_id) +			continue; + +		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); +		if (cpu != cpuid) +			cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + +		if (cpuid_topo->core_id != cpu_topo->core_id) +			continue; + +		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); +		if (cpu != cpuid) +			cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); +	} +} + +void store_cpu_topology(unsigned int cpuid) +{ +	update_siblings_masks(cpuid); +} + +static void __init reset_cpu_topology(void) +{ +	unsigned int cpu; + +	for_each_possible_cpu(cpu) { +		struct cpu_topology *cpu_topo = &cpu_topology[cpu]; + +		cpu_topo->thread_id = -1; +		cpu_topo->core_id = 0; +		cpu_topo->cluster_id = -1; + +		cpumask_clear(&cpu_topo->core_sibling); +		cpumask_set_cpu(cpu, &cpu_topo->core_sibling); +		cpumask_clear(&cpu_topo->thread_sibling); +		cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); +	} +} + +void __init init_cpu_topology(void) +{ +	reset_cpu_topology(); + +	/* +	 * Discard anything that was parsed if we hit an error so we +	 * don't use partial information. +	 */ +	if (parse_dt_topology()) +		reset_cpu_topology(); +} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c new file mode 100644 index 00000000000..c43cfa9b830 --- /dev/null +++ b/arch/arm64/kernel/traps.c @@ -0,0 +1,350 @@ +/* + * Based on arch/arm/kernel/traps.c + * + * Copyright (C) 1995-2009 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/signal.h> +#include <linux/personality.h> +#include <linux/kallsyms.h> +#include <linux/spinlock.h> +#include <linux/uaccess.h> +#include <linux/hardirq.h> +#include <linux/kdebug.h> +#include <linux/module.h> +#include <linux/kexec.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/syscalls.h> + +#include <asm/atomic.h> +#include <asm/debug-monitors.h> +#include <asm/traps.h> +#include <asm/stacktrace.h> +#include <asm/exception.h> +#include <asm/system_misc.h> + +static const char *handler[]= { +	"Synchronous Abort", +	"IRQ", +	"FIQ", +	"Error" +}; + +int show_unhandled_signals = 1; + +/* + * Dump out the contents of some memory nicely... + */ +static void dump_mem(const char *lvl, const char *str, unsigned long bottom, +		     unsigned long top) +{ +	unsigned long first; +	mm_segment_t fs; +	int i; + +	/* +	 * We need to switch to kernel mode so that we can use __get_user +	 * to safely read from kernel space.  Note that we now dump the +	 * code first, just in case the backtrace kills us. +	 */ +	fs = get_fs(); +	set_fs(KERNEL_DS); + +	printk("%s%s(0x%016lx to 0x%016lx)\n", lvl, str, bottom, top); + +	for (first = bottom & ~31; first < top; first += 32) { +		unsigned long p; +		char str[sizeof(" 12345678") * 8 + 1]; + +		memset(str, ' ', sizeof(str)); +		str[sizeof(str) - 1] = '\0'; + +		for (p = first, i = 0; i < 8 && p < top; i++, p += 4) { +			if (p >= bottom && p < top) { +				unsigned int val; +				if (__get_user(val, (unsigned int *)p) == 0) +					sprintf(str + i * 9, " %08x", val); +				else +					sprintf(str + i * 9, " ????????"); +			} +		} +		printk("%s%04lx:%s\n", lvl, first & 0xffff, str); +	} + +	set_fs(fs); +} + +static void dump_backtrace_entry(unsigned long where, unsigned long stack) +{ +	print_ip_sym(where); +	if (in_exception_text(where)) +		dump_mem("", "Exception stack", stack, +			 stack + sizeof(struct pt_regs)); +} + +static void dump_instr(const char *lvl, struct pt_regs *regs) +{ +	unsigned long addr = instruction_pointer(regs); +	mm_segment_t fs; +	char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; +	int i; + +	/* +	 * We need to switch to kernel mode so that we can use __get_user +	 * to safely read from kernel space.  Note that we now dump the +	 * code first, just in case the backtrace kills us. +	 */ +	fs = get_fs(); +	set_fs(KERNEL_DS); + +	for (i = -4; i < 1; i++) { +		unsigned int val, bad; + +		bad = __get_user(val, &((u32 *)addr)[i]); + +		if (!bad) +			p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val); +		else { +			p += sprintf(p, "bad PC value"); +			break; +		} +	} +	printk("%sCode: %s\n", lvl, str); + +	set_fs(fs); +} + +static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) +{ +	struct stackframe frame; +	const register unsigned long current_sp asm ("sp"); + +	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); + +	if (!tsk) +		tsk = current; + +	if (regs) { +		frame.fp = regs->regs[29]; +		frame.sp = regs->sp; +		frame.pc = regs->pc; +	} else if (tsk == current) { +		frame.fp = (unsigned long)__builtin_frame_address(0); +		frame.sp = current_sp; +		frame.pc = (unsigned long)dump_backtrace; +	} else { +		/* +		 * task blocked in __switch_to +		 */ +		frame.fp = thread_saved_fp(tsk); +		frame.sp = thread_saved_sp(tsk); +		frame.pc = thread_saved_pc(tsk); +	} + +	printk("Call trace:\n"); +	while (1) { +		unsigned long where = frame.pc; +		int ret; + +		ret = unwind_frame(&frame); +		if (ret < 0) +			break; +		dump_backtrace_entry(where, frame.sp); +	} +} + +void show_stack(struct task_struct *tsk, unsigned long *sp) +{ +	dump_backtrace(NULL, tsk); +	barrier(); +} + +#ifdef CONFIG_PREEMPT +#define S_PREEMPT " PREEMPT" +#else +#define S_PREEMPT "" +#endif +#ifdef CONFIG_SMP +#define S_SMP " SMP" +#else +#define S_SMP "" +#endif + +static int __die(const char *str, int err, struct thread_info *thread, +		 struct pt_regs *regs) +{ +	struct task_struct *tsk = thread->task; +	static int die_counter; +	int ret; + +	pr_emerg("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n", +		 str, err, ++die_counter); + +	/* trap and error numbers are mostly meaningless on ARM */ +	ret = notify_die(DIE_OOPS, str, regs, err, 0, SIGSEGV); +	if (ret == NOTIFY_STOP) +		return ret; + +	print_modules(); +	__show_regs(regs); +	pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n", +		 TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1); + +	if (!user_mode(regs) || in_interrupt()) { +		dump_mem(KERN_EMERG, "Stack: ", regs->sp, +			 THREAD_SIZE + (unsigned long)task_stack_page(tsk)); +		dump_backtrace(regs, tsk); +		dump_instr(KERN_EMERG, regs); +	} + +	return ret; +} + +static DEFINE_RAW_SPINLOCK(die_lock); + +/* + * This function is protected against re-entrancy. + */ +void die(const char *str, struct pt_regs *regs, int err) +{ +	struct thread_info *thread = current_thread_info(); +	int ret; + +	oops_enter(); + +	raw_spin_lock_irq(&die_lock); +	console_verbose(); +	bust_spinlocks(1); +	ret = __die(str, err, thread, regs); + +	if (regs && kexec_should_crash(thread->task)) +		crash_kexec(regs); + +	bust_spinlocks(0); +	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); +	raw_spin_unlock_irq(&die_lock); +	oops_exit(); + +	if (in_interrupt()) +		panic("Fatal exception in interrupt"); +	if (panic_on_oops) +		panic("Fatal exception"); +	if (ret != NOTIFY_STOP) +		do_exit(SIGSEGV); +} + +void arm64_notify_die(const char *str, struct pt_regs *regs, +		      struct siginfo *info, int err) +{ +	if (user_mode(regs)) { +		current->thread.fault_address = 0; +		current->thread.fault_code = err; +		force_sig_info(info->si_signo, info, current); +	} else { +		die(str, regs, err); +	} +} + +asmlinkage void __exception do_undefinstr(struct pt_regs *regs) +{ +	siginfo_t info; +	void __user *pc = (void __user *)instruction_pointer(regs); + +	/* check for AArch32 breakpoint instructions */ +	if (!aarch32_break_handler(regs)) +		return; + +	if (show_unhandled_signals && unhandled_signal(current, SIGILL) && +	    printk_ratelimit()) { +		pr_info("%s[%d]: undefined instruction: pc=%p\n", +			current->comm, task_pid_nr(current), pc); +		dump_instr(KERN_INFO, regs); +	} + +	info.si_signo = SIGILL; +	info.si_errno = 0; +	info.si_code  = ILL_ILLOPC; +	info.si_addr  = pc; + +	arm64_notify_die("Oops - undefined instruction", regs, &info, 0); +} + +long compat_arm_syscall(struct pt_regs *regs); + +asmlinkage long do_ni_syscall(struct pt_regs *regs) +{ +#ifdef CONFIG_COMPAT +	long ret; +	if (is_compat_task()) { +		ret = compat_arm_syscall(regs); +		if (ret != -ENOSYS) +			return ret; +	} +#endif + +	if (show_unhandled_signals && printk_ratelimit()) { +		pr_info("%s[%d]: syscall %d\n", current->comm, +			task_pid_nr(current), (int)regs->syscallno); +		dump_instr("", regs); +		if (user_mode(regs)) +			__show_regs(regs); +	} + +	return sys_ni_syscall(); +} + +/* + * bad_mode handles the impossible case in the exception vector. + */ +asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) +{ +	siginfo_t info; +	void __user *pc = (void __user *)instruction_pointer(regs); +	console_verbose(); + +	pr_crit("Bad mode in %s handler detected, code 0x%08x\n", +		handler[reason], esr); +	__show_regs(regs); + +	info.si_signo = SIGILL; +	info.si_errno = 0; +	info.si_code  = ILL_ILLOPC; +	info.si_addr  = pc; + +	arm64_notify_die("Oops - bad mode", regs, &info, 0); +} + +void __pte_error(const char *file, int line, unsigned long val) +{ +	printk("%s:%d: bad pte %016lx.\n", file, line, val); +} + +void __pmd_error(const char *file, int line, unsigned long val) +{ +	printk("%s:%d: bad pmd %016lx.\n", file, line, val); +} + +void __pgd_error(const char *file, int line, unsigned long val) +{ +	printk("%s:%d: bad pgd %016lx.\n", file, line, val); +} + +void __init trap_init(void) +{ +	return; +} diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c new file mode 100644 index 00000000000..50384fec56c --- /dev/null +++ b/arch/arm64/kernel/vdso.c @@ -0,0 +1,242 @@ +/* + * VDSO implementation for AArch64 and vector page setup for AArch32. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/kernel.h> +#include <linux/clocksource.h> +#include <linux/elf.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/gfp.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/slab.h> +#include <linux/timekeeper_internal.h> +#include <linux/vmalloc.h> + +#include <asm/cacheflush.h> +#include <asm/signal32.h> +#include <asm/vdso.h> +#include <asm/vdso_datapage.h> + +extern char vdso_start, vdso_end; +static unsigned long vdso_pages; +static struct page **vdso_pagelist; + +/* + * The vDSO data page. + */ +static union { +	struct vdso_data	data; +	u8			page[PAGE_SIZE]; +} vdso_data_store __page_aligned_data; +struct vdso_data *vdso_data = &vdso_data_store.data; + +#ifdef CONFIG_COMPAT +/* + * Create and map the vectors page for AArch32 tasks. + */ +static struct page *vectors_page[1]; + +static int alloc_vectors_page(void) +{ +	extern char __kuser_helper_start[], __kuser_helper_end[]; +	extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; + +	int kuser_sz = __kuser_helper_end - __kuser_helper_start; +	int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; +	unsigned long vpage; + +	vpage = get_zeroed_page(GFP_ATOMIC); + +	if (!vpage) +		return -ENOMEM; + +	/* kuser helpers */ +	memcpy((void *)vpage + 0x1000 - kuser_sz, __kuser_helper_start, +		kuser_sz); + +	/* sigreturn code */ +	memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET, +               __aarch32_sigret_code_start, sigret_sz); + +	flush_icache_range(vpage, vpage + PAGE_SIZE); +	vectors_page[0] = virt_to_page(vpage); + +	return 0; +} +arch_initcall(alloc_vectors_page); + +int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) +{ +	struct mm_struct *mm = current->mm; +	unsigned long addr = AARCH32_VECTORS_BASE; +	int ret; + +	down_write(&mm->mmap_sem); +	current->mm->context.vdso = (void *)addr; + +	/* Map vectors page at the high address. */ +	ret = install_special_mapping(mm, addr, PAGE_SIZE, +				      VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, +				      vectors_page); + +	up_write(&mm->mmap_sem); + +	return ret; +} +#endif /* CONFIG_COMPAT */ + +static int __init vdso_init(void) +{ +	int i; + +	if (memcmp(&vdso_start, "\177ELF", 4)) { +		pr_err("vDSO is not a valid ELF object!\n"); +		return -EINVAL; +	} + +	vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; +	pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n", +		vdso_pages + 1, vdso_pages, 1L, &vdso_start); + +	/* Allocate the vDSO pagelist, plus a page for the data. */ +	vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), +				GFP_KERNEL); +	if (vdso_pagelist == NULL) +		return -ENOMEM; + +	/* Grab the vDSO code pages. */ +	for (i = 0; i < vdso_pages; i++) +		vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); + +	/* Grab the vDSO data page. */ +	vdso_pagelist[i] = virt_to_page(vdso_data); + +	return 0; +} +arch_initcall(vdso_init); + +int arch_setup_additional_pages(struct linux_binprm *bprm, +				int uses_interp) +{ +	struct mm_struct *mm = current->mm; +	unsigned long vdso_base, vdso_mapping_len; +	int ret; + +	/* Be sure to map the data page */ +	vdso_mapping_len = (vdso_pages + 1) << PAGE_SHIFT; + +	down_write(&mm->mmap_sem); +	vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); +	if (IS_ERR_VALUE(vdso_base)) { +		ret = vdso_base; +		goto up_fail; +	} +	mm->context.vdso = (void *)vdso_base; + +	ret = install_special_mapping(mm, vdso_base, vdso_mapping_len, +				      VM_READ|VM_EXEC| +				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, +				      vdso_pagelist); +	if (ret) { +		mm->context.vdso = NULL; +		goto up_fail; +	} + +up_fail: +	up_write(&mm->mmap_sem); + +	return ret; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ +	/* +	 * We can re-use the vdso pointer in mm_context_t for identifying +	 * the vectors page for compat applications. The vDSO will always +	 * sit above TASK_UNMAPPED_BASE and so we don't need to worry about +	 * it conflicting with the vectors base. +	 */ +	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) { +#ifdef CONFIG_COMPAT +		if (vma->vm_start == AARCH32_VECTORS_BASE) +			return "[vectors]"; +#endif +		return "[vdso]"; +	} + +	return NULL; +} + +/* + * We define AT_SYSINFO_EHDR, so we need these function stubs to keep + * Linux happy. + */ +int in_gate_area_no_mm(unsigned long addr) +{ +	return 0; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ +	return 0; +} + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ +	return NULL; +} + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +void update_vsyscall(struct timekeeper *tk) +{ +	struct timespec xtime_coarse; +	u32 use_syscall = strcmp(tk->clock->name, "arch_sys_counter"); + +	++vdso_data->tb_seq_count; +	smp_wmb(); + +	xtime_coarse = __current_kernel_time(); +	vdso_data->use_syscall			= use_syscall; +	vdso_data->xtime_coarse_sec		= xtime_coarse.tv_sec; +	vdso_data->xtime_coarse_nsec		= xtime_coarse.tv_nsec; +	vdso_data->wtm_clock_sec		= tk->wall_to_monotonic.tv_sec; +	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec; + +	if (!use_syscall) { +		vdso_data->cs_cycle_last	= tk->clock->cycle_last; +		vdso_data->xtime_clock_sec	= tk->xtime_sec; +		vdso_data->xtime_clock_nsec	= tk->xtime_nsec; +		vdso_data->cs_mult		= tk->mult; +		vdso_data->cs_shift		= tk->shift; +	} + +	smp_wmb(); +	++vdso_data->tb_seq_count; +} + +void update_vsyscall_tz(void) +{ +	vdso_data->tz_minuteswest	= sys_tz.tz_minuteswest; +	vdso_data->tz_dsttime		= sys_tz.tz_dsttime; +} diff --git a/arch/arm64/kernel/vdso/.gitignore b/arch/arm64/kernel/vdso/.gitignore new file mode 100644 index 00000000000..b8cc94e9698 --- /dev/null +++ b/arch/arm64/kernel/vdso/.gitignore @@ -0,0 +1,2 @@ +vdso.lds +vdso-offsets.h diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile new file mode 100644 index 00000000000..6d20b7d162d --- /dev/null +++ b/arch/arm64/kernel/vdso/Makefile @@ -0,0 +1,63 @@ +# +# Building a vDSO image for AArch64. +# +# Author: Will Deacon <will.deacon@arm.com> +# Heavily based on the vDSO Makefiles for other archs. +# + +obj-vdso := gettimeofday.o note.o sigreturn.o + +# Build rules +targets := $(obj-vdso) vdso.so vdso.so.dbg +obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) + +ccflags-y := -shared -fno-common -fno-builtin +ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ +		$(call cc-ldoption, -Wl$(comma)--hash-style=sysv) + +obj-y += vdso.o +extra-y += vdso.lds vdso-offsets.h +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +# Force dependency (incbin is bad) +$(obj)/vdso.o : $(obj)/vdso.so + +# Link rule for the .so file, .lds has to be first +$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) +	$(call if_changed,vdsold) + +# Strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE +	$(call if_changed,objcopy) + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ +define cmd_vdsosym +	$(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \ +	cp $@ include/generated/ +endef + +$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE +	$(call if_changed,vdsosym) + +# Assembly rules for the .S files +$(obj-vdso): %.o: %.S +	$(call if_changed_dep,vdsoas) + +# Actual build commands +quiet_cmd_vdsold = VDSOL $@ +      cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ +quiet_cmd_vdsoas = VDSOA $@ +      cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< + +# Install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ +      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso.so: $(obj)/vdso.so.dbg +	@mkdir -p $(MODLIB)/vdso +	$(call cmd,vdso_install) + +vdso_install: vdso.so diff --git a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh new file mode 100755 index 00000000000..01924ff071a --- /dev/null +++ b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Author: Will Deacon <will.deacon@arm.com +# + +LC_ALL=C +sed -n -e 's/^00*/0/' -e \ +'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p' diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S new file mode 100644 index 00000000000..fe652ffd34c --- /dev/null +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -0,0 +1,249 @@ +/* + * Userspace implementations of gettimeofday() and friends. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + +#define NSEC_PER_SEC_LO16	0xca00 +#define NSEC_PER_SEC_HI16	0x3b9a + +vdso_data	.req	x6 +use_syscall	.req	w7 +seqcnt		.req	w8 + +	.macro	seqcnt_acquire +9999:	ldr	seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] +	tbnz	seqcnt, #0, 9999b +	dmb	ishld +	ldr	use_syscall, [vdso_data, #VDSO_USE_SYSCALL] +	.endm + +	.macro	seqcnt_read, cnt +	dmb	ishld +	ldr	\cnt, [vdso_data, #VDSO_TB_SEQ_COUNT] +	.endm + +	.macro	seqcnt_check, cnt, fail +	cmp	\cnt, seqcnt +	b.ne	\fail +	.endm + +	.text + +/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ +ENTRY(__kernel_gettimeofday) +	.cfi_startproc +	mov	x2, x30 +	.cfi_register x30, x2 + +	/* Acquire the sequence counter and get the timespec. */ +	adr	vdso_data, _vdso_data +1:	seqcnt_acquire +	cbnz	use_syscall, 4f + +	/* If tv is NULL, skip to the timezone code. */ +	cbz	x0, 2f +	bl	__do_get_tspec +	seqcnt_check w9, 1b + +	/* Convert ns to us. */ +	mov	x13, #1000 +	lsl	x13, x13, x12 +	udiv	x11, x11, x13 +	stp	x10, x11, [x0, #TVAL_TV_SEC] +2: +	/* If tz is NULL, return 0. */ +	cbz	x1, 3f +	ldp	w4, w5, [vdso_data, #VDSO_TZ_MINWEST] +	stp	w4, w5, [x1, #TZ_MINWEST] +3: +	mov	x0, xzr +	ret	x2 +4: +	/* Syscall fallback. */ +	mov	x8, #__NR_gettimeofday +	svc	#0 +	ret	x2 +	.cfi_endproc +ENDPROC(__kernel_gettimeofday) + +/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ +ENTRY(__kernel_clock_gettime) +	.cfi_startproc +	cmp	w0, #CLOCK_REALTIME +	ccmp	w0, #CLOCK_MONOTONIC, #0x4, ne +	b.ne	2f + +	mov	x2, x30 +	.cfi_register x30, x2 + +	/* Get kernel timespec. */ +	adr	vdso_data, _vdso_data +1:	seqcnt_acquire +	cbnz	use_syscall, 7f + +	bl	__do_get_tspec +	seqcnt_check w9, 1b + +	mov	x30, x2 + +	cmp	w0, #CLOCK_MONOTONIC +	b.ne	6f + +	/* Get wtm timespec. */ +	ldp	x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + +	/* Check the sequence counter. */ +	seqcnt_read w9 +	seqcnt_check w9, 1b +	b	4f +2: +	cmp	w0, #CLOCK_REALTIME_COARSE +	ccmp	w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne +	b.ne	8f + +	/* xtime_coarse_nsec is already right-shifted */ +	mov	x12, #0 + +	/* Get coarse timespec. */ +	adr	vdso_data, _vdso_data +3:	seqcnt_acquire +	ldp	x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] + +	/* Get wtm timespec. */ +	ldp	x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + +	/* Check the sequence counter. */ +	seqcnt_read w9 +	seqcnt_check w9, 3b + +	cmp	w0, #CLOCK_MONOTONIC_COARSE +	b.ne	6f +4: +	/* Add on wtm timespec. */ +	add	x10, x10, x13 +	lsl	x14, x14, x12 +	add	x11, x11, x14 + +	/* Normalise the new timespec. */ +	mov	x15, #NSEC_PER_SEC_LO16 +	movk	x15, #NSEC_PER_SEC_HI16, lsl #16 +	lsl	x15, x15, x12 +	cmp	x11, x15 +	b.lt	5f +	sub	x11, x11, x15 +	add	x10, x10, #1 +5: +	cmp	x11, #0 +	b.ge	6f +	add	x11, x11, x15 +	sub	x10, x10, #1 + +6:	/* Store to the user timespec. */ +	lsr	x11, x11, x12 +	stp	x10, x11, [x1, #TSPEC_TV_SEC] +	mov	x0, xzr +	ret +7: +	mov	x30, x2 +8:	/* Syscall fallback. */ +	mov	x8, #__NR_clock_gettime +	svc	#0 +	ret +	.cfi_endproc +ENDPROC(__kernel_clock_gettime) + +/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */ +ENTRY(__kernel_clock_getres) +	.cfi_startproc +	cbz	w1, 3f + +	cmp	w0, #CLOCK_REALTIME +	ccmp	w0, #CLOCK_MONOTONIC, #0x4, ne +	b.ne	1f + +	ldr	x2, 5f +	b	2f +1: +	cmp	w0, #CLOCK_REALTIME_COARSE +	ccmp	w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne +	b.ne	4f +	ldr	x2, 6f +2: +	stp	xzr, x2, [x1] + +3:	/* res == NULL. */ +	mov	w0, wzr +	ret + +4:	/* Syscall fallback. */ +	mov	x8, #__NR_clock_getres +	svc	#0 +	ret +5: +	.quad	CLOCK_REALTIME_RES +6: +	.quad	CLOCK_COARSE_RES +	.cfi_endproc +ENDPROC(__kernel_clock_getres) + +/* + * Read the current time from the architected counter. + * Expects vdso_data to be initialised. + * Clobbers the temporary registers (x9 - x15). + * Returns: + *  - w9		= vDSO sequence counter + *  - (x10, x11)	= (ts->tv_sec, shifted ts->tv_nsec) + *  - w12		= cs_shift + */ +ENTRY(__do_get_tspec) +	.cfi_startproc + +	/* Read from the vDSO data page. */ +	ldr	x10, [vdso_data, #VDSO_CS_CYCLE_LAST] +	ldp	x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] +	ldp	w11, w12, [vdso_data, #VDSO_CS_MULT] +	seqcnt_read w9 + +	/* Read the virtual counter. */ +	isb +	mrs	x15, cntvct_el0 + +	/* Calculate cycle delta and convert to ns. */ +	sub	x10, x15, x10 +	/* We can only guarantee 56 bits of precision. */ +	movn	x15, #0xff00, lsl #48 +	and	x10, x15, x10 +	mul	x10, x10, x11 + +	/* Use the kernel time to calculate the new timespec. */ +	mov	x11, #NSEC_PER_SEC_LO16 +	movk	x11, #NSEC_PER_SEC_HI16, lsl #16 +	lsl	x11, x11, x12 +	add	x15, x10, x14 +	udiv	x14, x15, x11 +	add	x10, x13, x14 +	mul	x13, x14, x11 +	sub	x11, x15, x13 + +	ret +	.cfi_endproc +ENDPROC(__do_get_tspec) diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S new file mode 100644 index 00000000000..b82c85e5d97 --- /dev/null +++ b/arch/arm64/kernel/vdso/note.S @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + * + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") +	.long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S new file mode 100644 index 00000000000..20d98effa7d --- /dev/null +++ b/arch/arm64/kernel/vdso/sigreturn.S @@ -0,0 +1,37 @@ +/* + * Sigreturn trampoline for returning from a signal when the SA_RESTORER + * flag is not set. + * + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/linkage.h> +#include <asm/unistd.h> + +	.text + +	nop +ENTRY(__kernel_rt_sigreturn) +	.cfi_startproc +	.cfi_signal_frame +	.cfi_def_cfa	x29, 0 +	.cfi_offset	x29, 0 * 8 +	.cfi_offset	x30, 1 * 8 +	mov	x8, #__NR_rt_sigreturn +	svc	#0 +	.cfi_endproc +ENDPROC(__kernel_rt_sigreturn) diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S new file mode 100644 index 00000000000..60c1db54b41 --- /dev/null +++ b/arch/arm64/kernel/vdso/vdso.S @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/const.h> +#include <asm/page.h> + +	__PAGE_ALIGNED_DATA + +	.globl vdso_start, vdso_end +	.balign PAGE_SIZE +vdso_start: +	.incbin "arch/arm64/kernel/vdso/vdso.so" +	.balign PAGE_SIZE +vdso_end: + +	.previous diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S new file mode 100644 index 00000000000..8154b8d1c82 --- /dev/null +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -0,0 +1,100 @@ +/* + * GNU linker script for the VDSO library. +* + * Copyright (C) 2012 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + * + * Author: Will Deacon <will.deacon@arm.com> + * Heavily based on the vDSO linker scripts for other archs. + */ + +#include <linux/const.h> +#include <asm/page.h> +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") +OUTPUT_ARCH(aarch64) + +SECTIONS +{ +	. = VDSO_LBASE + SIZEOF_HEADERS; + +	.hash		: { *(.hash) }			:text +	.gnu.hash	: { *(.gnu.hash) } +	.dynsym		: { *(.dynsym) } +	.dynstr		: { *(.dynstr) } +	.gnu.version	: { *(.gnu.version) } +	.gnu.version_d	: { *(.gnu.version_d) } +	.gnu.version_r	: { *(.gnu.version_r) } + +	.note		: { *(.note.*) }		:text	:note + +	. = ALIGN(16); + +	.text		: { *(.text*) }			:text	=0xd503201f +	PROVIDE (__etext = .); +	PROVIDE (_etext = .); +	PROVIDE (etext = .); + +	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr +	.eh_frame	: { KEEP (*(.eh_frame)) }	:text + +	.dynamic	: { *(.dynamic) }		:text	:dynamic + +	.rodata		: { *(.rodata*) }		:text + +	_end = .; +	PROVIDE(end = .); + +	. = ALIGN(PAGE_SIZE); +	PROVIDE(_vdso_data = .); + +	/DISCARD/	: { +		*(.note.GNU-stack) +		*(.data .data.* .gnu.linkonce.d.* .sdata*) +		*(.bss .sbss .dynbss .dynsbss) +	} +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ +	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ +	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */ +	note		PT_NOTE		FLAGS(4);		/* PF_R */ +	eh_frame_hdr	PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ +	LINUX_2.6.39 { +	global: +		__kernel_rt_sigreturn; +		__kernel_gettimeofday; +		__kernel_clock_gettime; +		__kernel_clock_getres; +	local: *; +	}; +} + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_sigtramp		= __kernel_rt_sigreturn; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S new file mode 100644 index 00000000000..f1e6d5c032e --- /dev/null +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -0,0 +1,116 @@ +/* + * ld script to make ARM Linux kernel + * taken from the i386 version by Russell King + * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz> + */ + +#include <asm-generic/vmlinux.lds.h> +#include <asm/thread_info.h> +#include <asm/memory.h> +#include <asm/page.h> + +#define ARM_EXIT_KEEP(x) +#define ARM_EXIT_DISCARD(x)	x + +OUTPUT_ARCH(aarch64) +ENTRY(_text) + +jiffies = jiffies_64; + +#define HYPERVISOR_TEXT					\ +	/*						\ +	 * Force the alignment to be compatible with	\ +	 * the vectors requirements			\ +	 */						\ +	. = ALIGN(2048);				\ +	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;	\ +	*(.hyp.idmap.text)				\ +	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;	\ +	VMLINUX_SYMBOL(__hyp_text_start) = .;		\ +	*(.hyp.text)					\ +	VMLINUX_SYMBOL(__hyp_text_end) = .; + +SECTIONS +{ +	/* +	 * XXX: The linker does not define how output sections are +	 * assigned to input sections when there are multiple statements +	 * matching the same input section name.  There is no documented +	 * order of matching. +	 */ +	/DISCARD/ : { +		ARM_EXIT_DISCARD(EXIT_TEXT) +		ARM_EXIT_DISCARD(EXIT_DATA) +		EXIT_CALL +		*(.discard) +		*(.discard.*) +	} + +	. = PAGE_OFFSET + TEXT_OFFSET; + +	.head.text : { +		_text = .; +		HEAD_TEXT +	} +	.text : {			/* Real text segment		*/ +		_stext = .;		/* Text and read-only data	*/ +			__exception_text_start = .; +			*(.exception.text) +			__exception_text_end = .; +			IRQENTRY_TEXT +			TEXT_TEXT +			SCHED_TEXT +			LOCK_TEXT +			HYPERVISOR_TEXT +			*(.fixup) +			*(.gnu.warning) +		. = ALIGN(16); +		*(.got)			/* Global offset table		*/ +	} + +	RO_DATA(PAGE_SIZE) +	EXCEPTION_TABLE(8) +	NOTES +	_etext = .;			/* End of text and rodata section */ + +	. = ALIGN(PAGE_SIZE); +	__init_begin = .; + +	INIT_TEXT_SECTION(8) +	.exit.text : { +		ARM_EXIT_KEEP(EXIT_TEXT) +	} +	. = ALIGN(16); +	.init.data : { +		INIT_DATA +		INIT_SETUP(16) +		INIT_CALLS +		CON_INITCALL +		SECURITY_INITCALL +		INIT_RAM_FS +	} +	.exit.data : { +		ARM_EXIT_KEEP(EXIT_DATA) +	} + +	PERCPU_SECTION(64) + +	__init_end = .; + +	. = ALIGN(PAGE_SIZE); +	_data = .; +	_sdata = .; +	RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) +	_edata = .; + +	BSS_SECTION(0, 0, 0) +	_end = .; + +	STABS_DEBUG +} + +/* + * The HYP init code can't be more than a page long. + */ +ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end), +       "HYP init code too big")  | 
