diff options
Diffstat (limited to 'arch/arm64/kernel')
46 files changed, 4607 insertions, 1054 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7b4b564961d..cdaedad3afe 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -4,20 +4,31 @@  CPPFLAGS_vmlinux.lds	:= -DTEXT_OFFSET=$(TEXT_OFFSET)  AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET) +CFLAGS_efi-stub.o 	:= -DTEXT_OFFSET=$(TEXT_OFFSET) \ +			   -I$(src)/../../../scripts/dtc/libfdt + +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_insn.o = -pg +CFLAGS_REMOVE_return_address.o = -pg  # Object file lists.  arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\  			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\  			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\ -			   hyp-stub.o psci.o +			   hyp-stub.o psci.o cpu_ops.o insn.o return_address.o  arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\  					   sys_compat.o +arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o  arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o -arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o smp_psci.o +arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o topology.o +arm64-obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o  arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o -arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o -arm64-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o +arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o +arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND)	+= sleep.o suspend.o +arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o +arm64-obj-$(CONFIG_KGDB)		+= kgdb.o +arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o  obj-y					+= $(arm64-obj-y) vdso/  obj-m					+= $(arm64-obj-m) diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 41b4f626d55..a85843ddbde 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -29,16 +29,14 @@  #include <asm/checksum.h> -	/* user mem (segment) */ -EXPORT_SYMBOL(__strnlen_user); -EXPORT_SYMBOL(__strncpy_from_user); -  EXPORT_SYMBOL(copy_page);  EXPORT_SYMBOL(clear_page); +	/* user mem (segment) */  EXPORT_SYMBOL(__copy_from_user);  EXPORT_SYMBOL(__copy_to_user);  EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(__copy_in_user);  	/* physical memory */  EXPORT_SYMBOL(memstart_addr); @@ -46,10 +44,15 @@ EXPORT_SYMBOL(memstart_addr);  	/* string / mem functions */  EXPORT_SYMBOL(strchr);  EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strnlen);  EXPORT_SYMBOL(memset);  EXPORT_SYMBOL(memcpy);  EXPORT_SYMBOL(memmove);  EXPORT_SYMBOL(memchr); +EXPORT_SYMBOL(memcmp);  	/* atomic bitops */  EXPORT_SYMBOL(set_bit); @@ -58,3 +61,7 @@ EXPORT_SYMBOL(clear_bit);  EXPORT_SYMBOL(test_and_clear_bit);  EXPORT_SYMBOL(change_bit);  EXPORT_SYMBOL(test_and_change_bit); + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 666e231d410..646f888387c 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -25,6 +25,8 @@  #include <asm/thread_info.h>  #include <asm/memory.h>  #include <asm/cputable.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h>  #include <asm/vdso_datapage.h>  #include <linux/kbuild.h> @@ -138,5 +140,14 @@ int main(void)    DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));    DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));  #endif +#ifdef CONFIG_ARM64_CPU_SUSPEND +  DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx)); +  DEFINE(CPU_CTX_SP,		offsetof(struct cpu_suspend_ctx, sp)); +  DEFINE(MPIDR_HASH_MASK,	offsetof(struct mpidr_hash, mask)); +  DEFINE(MPIDR_HASH_SHIFTS,	offsetof(struct mpidr_hash, shift_aff)); +  DEFINE(SLEEP_SAVE_SP_SZ,	sizeof(struct sleep_save_sp)); +  DEFINE(SLEEP_SAVE_SP_PHYS,	offsetof(struct sleep_save_sp, save_ptr_stash_phys)); +  DEFINE(SLEEP_SAVE_SP_VIRT,	offsetof(struct sleep_save_sp, save_ptr_stash)); +#endif    return 0;  } diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c new file mode 100644 index 00000000000..d62d12fb36c --- /dev/null +++ b/arch/arm64/kernel/cpu_ops.c @@ -0,0 +1,87 @@ +/* + * CPU kernel entry/exit control + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <asm/cpu_ops.h> +#include <asm/smp_plat.h> +#include <linux/errno.h> +#include <linux/of.h> +#include <linux/string.h> + +extern const struct cpu_operations smp_spin_table_ops; +extern const struct cpu_operations cpu_psci_ops; + +const struct cpu_operations *cpu_ops[NR_CPUS]; + +static const struct cpu_operations *supported_cpu_ops[] __initconst = { +#ifdef CONFIG_SMP +	&smp_spin_table_ops, +	&cpu_psci_ops, +#endif +	NULL, +}; + +static const struct cpu_operations * __init cpu_get_ops(const char *name) +{ +	const struct cpu_operations **ops = supported_cpu_ops; + +	while (*ops) { +		if (!strcmp(name, (*ops)->name)) +			return *ops; + +		ops++; +	} + +	return NULL; +} + +/* + * Read a cpu's enable method from the device tree and record it in cpu_ops. + */ +int __init cpu_read_ops(struct device_node *dn, int cpu) +{ +	const char *enable_method = of_get_property(dn, "enable-method", NULL); +	if (!enable_method) { +		/* +		 * The boot CPU may not have an enable method (e.g. when +		 * spin-table is used for secondaries). Don't warn spuriously. +		 */ +		if (cpu != 0) +			pr_err("%s: missing enable-method property\n", +				dn->full_name); +		return -ENOENT; +	} + +	cpu_ops[cpu] = cpu_get_ops(enable_method); +	if (!cpu_ops[cpu]) { +		pr_warn("%s: unsupported enable-method property: %s\n", +			dn->full_name, enable_method); +		return -EOPNOTSUPP; +	} + +	return 0; +} + +void __init cpu_read_bootcpu_ops(void) +{ +	struct device_node *dn = of_get_cpu_node(0, NULL); +	if (!dn) { +		pr_err("Failed to find device node for boot cpu\n"); +		return; +	} +	cpu_read_ops(dn, 0); +} diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c index 63cfc4a43f4..fd3993cb060 100644 --- a/arch/arm64/kernel/cputable.c +++ b/arch/arm64/kernel/cputable.c @@ -22,7 +22,7 @@  extern unsigned long __cpu_setup(void); -struct cpu_info __initdata cpu_table[] = { +struct cpu_info cpu_table[] = {  	{  		.cpu_id_val	= 0x000f0000,  		.cpu_id_mask	= 0x000f0000, diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index cbfacf7fb43..a7fb874b595 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -27,7 +27,6 @@  #include <linux/uaccess.h>  #include <asm/debug-monitors.h> -#include <asm/local.h>  #include <asm/cputype.h>  #include <asm/system_misc.h> @@ -89,8 +88,8 @@ early_param("nodebugmon", early_debug_disable);   * Keep track of debug users on each core.   * The ref counts are per-cpu so we use a local_t type.   */ -static DEFINE_PER_CPU(local_t, mde_ref_count); -static DEFINE_PER_CPU(local_t, kde_ref_count); +static DEFINE_PER_CPU(int, mde_ref_count); +static DEFINE_PER_CPU(int, kde_ref_count);  void enable_debug_monitors(enum debug_el el)  { @@ -98,11 +97,11 @@ void enable_debug_monitors(enum debug_el el)  	WARN_ON(preemptible()); -	if (local_inc_return(&__get_cpu_var(mde_ref_count)) == 1) +	if (this_cpu_inc_return(mde_ref_count) == 1)  		enable = DBG_MDSCR_MDE;  	if (el == DBG_ACTIVE_EL1 && -	    local_inc_return(&__get_cpu_var(kde_ref_count)) == 1) +	    this_cpu_inc_return(kde_ref_count) == 1)  		enable |= DBG_MDSCR_KDE;  	if (enable && debug_enabled) { @@ -118,11 +117,11 @@ void disable_debug_monitors(enum debug_el el)  	WARN_ON(preemptible()); -	if (local_dec_and_test(&__get_cpu_var(mde_ref_count))) +	if (this_cpu_dec_return(mde_ref_count) == 0)  		disable = ~DBG_MDSCR_MDE;  	if (el == DBG_ACTIVE_EL1 && -	    local_dec_and_test(&__get_cpu_var(kde_ref_count))) +	    this_cpu_dec_return(kde_ref_count) == 0)  		disable &= ~DBG_MDSCR_KDE;  	if (disable) { @@ -138,7 +137,6 @@ void disable_debug_monitors(enum debug_el el)  static void clear_os_lock(void *unused)  {  	asm volatile("msr oslar_el1, %0" : : "r" (0)); -	isb();  }  static int os_lock_notify(struct notifier_block *self, @@ -156,12 +154,17 @@ static struct notifier_block os_lock_nb = {  static int debug_monitors_init(void)  { +	cpu_notifier_register_begin(); +  	/* Clear the OS lock. */ -	smp_call_function(clear_os_lock, NULL, 1); -	clear_os_lock(NULL); +	on_each_cpu(clear_os_lock, NULL, 1); +	isb(); +	local_dbg_enable();  	/* Register hotplug handler. */ -	register_cpu_notifier(&os_lock_nb); +	__register_cpu_notifier(&os_lock_nb); + +	cpu_notifier_register_done();  	return 0;  }  postcore_initcall(debug_monitors_init); @@ -188,6 +191,48 @@ static void clear_regs_spsr_ss(struct pt_regs *regs)  	regs->pstate = spsr;  } +/* EL1 Single Step Handler hooks */ +static LIST_HEAD(step_hook); +static DEFINE_RWLOCK(step_hook_lock); + +void register_step_hook(struct step_hook *hook) +{ +	write_lock(&step_hook_lock); +	list_add(&hook->node, &step_hook); +	write_unlock(&step_hook_lock); +} + +void unregister_step_hook(struct step_hook *hook) +{ +	write_lock(&step_hook_lock); +	list_del(&hook->node); +	write_unlock(&step_hook_lock); +} + +/* + * Call registered single step handers + * There is no Syndrome info to check for determining the handler. + * So we call all the registered handlers, until the right handler is + * found which returns zero. + */ +static int call_step_hook(struct pt_regs *regs, unsigned int esr) +{ +	struct step_hook *hook; +	int retval = DBG_HOOK_ERROR; + +	read_lock(&step_hook_lock); + +	list_for_each_entry(hook, &step_hook, node)	{ +		retval = hook->fn(regs, esr); +		if (retval == DBG_HOOK_HANDLED) +			break; +	} + +	read_unlock(&step_hook_lock); + +	return retval; +} +  static int single_step_handler(unsigned long addr, unsigned int esr,  			       struct pt_regs *regs)  { @@ -215,7 +260,9 @@ static int single_step_handler(unsigned long addr, unsigned int esr,  		 */  		user_rewind_single_step(current);  	} else { -		/* TODO: route to KGDB */ +		if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) +			return 0; +  		pr_warning("Unexpected kernel single-step exception at EL1\n");  		/*  		 * Re-enable stepping since we know that we will be @@ -227,11 +274,50 @@ static int single_step_handler(unsigned long addr, unsigned int esr,  	return 0;  } +/* + * Breakpoint handler is re-entrant as another breakpoint can + * hit within breakpoint handler, especically in kprobes. + * Use reader/writer locks instead of plain spinlock. + */ +static LIST_HEAD(break_hook); +static DEFINE_RWLOCK(break_hook_lock); + +void register_break_hook(struct break_hook *hook) +{ +	write_lock(&break_hook_lock); +	list_add(&hook->node, &break_hook); +	write_unlock(&break_hook_lock); +} + +void unregister_break_hook(struct break_hook *hook) +{ +	write_lock(&break_hook_lock); +	list_del(&hook->node); +	write_unlock(&break_hook_lock); +} + +static int call_break_hook(struct pt_regs *regs, unsigned int esr) +{ +	struct break_hook *hook; +	int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL; + +	read_lock(&break_hook_lock); +	list_for_each_entry(hook, &break_hook, node) +		if ((esr & hook->esr_mask) == hook->esr_val) +			fn = hook->fn; +	read_unlock(&break_hook_lock); + +	return fn ? fn(regs, esr) : DBG_HOOK_ERROR; +} +  static int brk_handler(unsigned long addr, unsigned int esr,  		       struct pt_regs *regs)  {  	siginfo_t info; +	if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) +		return 0; +  	if (!user_mode(regs))  		return -EFAULT; @@ -249,7 +335,8 @@ static int brk_handler(unsigned long addr, unsigned int esr,  int aarch32_break_handler(struct pt_regs *regs)  {  	siginfo_t info; -	unsigned int instr; +	u32 arm_instr; +	u16 thumb_instr;  	bool bp = false;  	void __user *pc = (void __user *)instruction_pointer(regs); @@ -258,18 +345,21 @@ int aarch32_break_handler(struct pt_regs *regs)  	if (compat_thumb_mode(regs)) {  		/* get 16-bit Thumb instruction */ -		get_user(instr, (u16 __user *)pc); -		if (instr == AARCH32_BREAK_THUMB2_LO) { +		get_user(thumb_instr, (u16 __user *)pc); +		thumb_instr = le16_to_cpu(thumb_instr); +		if (thumb_instr == AARCH32_BREAK_THUMB2_LO) {  			/* get second half of 32-bit Thumb-2 instruction */ -			get_user(instr, (u16 __user *)(pc + 2)); -			bp = instr == AARCH32_BREAK_THUMB2_HI; +			get_user(thumb_instr, (u16 __user *)(pc + 2)); +			thumb_instr = le16_to_cpu(thumb_instr); +			bp = thumb_instr == AARCH32_BREAK_THUMB2_HI;  		} else { -			bp = instr == AARCH32_BREAK_THUMB; +			bp = thumb_instr == AARCH32_BREAK_THUMB;  		}  	} else {  		/* 32-bit ARM instruction */ -		get_user(instr, (u32 __user *)pc); -		bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; +		get_user(arm_instr, (u32 __user *)pc); +		arm_instr = le32_to_cpu(arm_instr); +		bp = (arm_instr & ~0xf0000000) == AARCH32_BREAK_ARM;  	}  	if (!bp) diff --git a/arch/arm64/kernel/early_printk.c b/arch/arm64/kernel/early_printk.c deleted file mode 100644 index fbb6e184365..00000000000 --- a/arch/arm64/kernel/early_printk.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Earlyprintk support. - * - * Copyright (C) 2012 ARM Ltd. - * Author: Catalin Marinas <catalin.marinas@arm.com> - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program.  If not, see <http://www.gnu.org/licenses/>. - */ -#include <linux/kernel.h> -#include <linux/console.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/io.h> - -#include <linux/amba/serial.h> -#include <linux/serial_reg.h> - -static void __iomem *early_base; -static void (*printch)(char ch); - -/* - * PL011 single character TX. - */ -static void pl011_printch(char ch) -{ -	while (readl_relaxed(early_base + UART01x_FR) & UART01x_FR_TXFF) -		; -	writeb_relaxed(ch, early_base + UART01x_DR); -	while (readl_relaxed(early_base + UART01x_FR) & UART01x_FR_BUSY) -		; -} - -/* - * Semihosting-based debug console - */ -static void smh_printch(char ch) -{ -	asm volatile("mov  x1, %0\n" -		     "mov  x0, #3\n" -		     "hlt  0xf000\n" -		     : : "r" (&ch) : "x0", "x1", "memory"); -} - -/* - * 8250/16550 (8-bit aligned registers) single character TX. - */ -static void uart8250_8bit_printch(char ch) -{ -	while (!(readb_relaxed(early_base + UART_LSR) & UART_LSR_THRE)) -		; -	writeb_relaxed(ch, early_base + UART_TX); -} - -/* - * 8250/16550 (32-bit aligned registers) single character TX. - */ -static void uart8250_32bit_printch(char ch) -{ -	while (!(readl_relaxed(early_base + (UART_LSR << 2)) & UART_LSR_THRE)) -		; -	writel_relaxed(ch, early_base + (UART_TX << 2)); -} - -struct earlycon_match { -	const char *name; -	void (*printch)(char ch); -}; - -static const struct earlycon_match earlycon_match[] __initconst = { -	{ .name = "pl011", .printch = pl011_printch, }, -	{ .name = "smh", .printch = smh_printch, }, -	{ .name = "uart8250-8bit", .printch = uart8250_8bit_printch, }, -	{ .name = "uart8250-32bit", .printch = uart8250_32bit_printch, }, -	{} -}; - -static void early_write(struct console *con, const char *s, unsigned n) -{ -	while (n-- > 0) { -		if (*s == '\n') -			printch('\r'); -		printch(*s); -		s++; -	} -} - -static struct console early_console_dev = { -	.name =		"earlycon", -	.write =	early_write, -	.flags =	CON_PRINTBUFFER | CON_BOOT, -	.index =	-1, -}; - -/* - * Parse earlyprintk=... parameter in the format: - * - *   <name>[,<addr>][,<options>] - * - * and register the early console. It is assumed that the UART has been - * initialised by the bootloader already. - */ -static int __init setup_early_printk(char *buf) -{ -	const struct earlycon_match *match = earlycon_match; -	phys_addr_t paddr = 0; - -	if (!buf) { -		pr_warning("No earlyprintk arguments passed.\n"); -		return 0; -	} - -	while (match->name) { -		size_t len = strlen(match->name); -		if (!strncmp(buf, match->name, len)) { -			buf += len; -			break; -		} -		match++; -	} -	if (!match->name) { -		pr_warning("Unknown earlyprintk arguments: %s\n", buf); -		return 0; -	} - -	/* I/O address */ -	if (!strncmp(buf, ",0x", 3)) { -		char *e; -		paddr = simple_strtoul(buf + 1, &e, 16); -		buf = e; -	} -	/* no options parsing yet */ - -	if (paddr) -		early_base = early_io_map(paddr, EARLYCON_IOBASE); - -	printch = match->printch; -	early_console = &early_console_dev; -	register_console(&early_console_dev); - -	return 0; -} - -early_param("earlyprintk", setup_early_printk); diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S new file mode 100644 index 00000000000..619b1dd7bcd --- /dev/null +++ b/arch/arm64/kernel/efi-entry.S @@ -0,0 +1,108 @@ +/* + * EFI entry point. + * + * Copyright (C) 2013, 2014 Red Hat, Inc. + * Author: Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> + +#include <asm/assembler.h> + +#define EFI_LOAD_ERROR 0x8000000000000001 + +	__INIT + +	/* +	 * We arrive here from the EFI boot manager with: +	 * +	 *    * CPU in little-endian mode +	 *    * MMU on with identity-mapped RAM +	 *    * Icache and Dcache on +	 * +	 * We will most likely be running from some place other than where +	 * we want to be. The kernel image wants to be placed at TEXT_OFFSET +	 * from start of RAM. +	 */ +ENTRY(efi_stub_entry) +	/* +	 * Create a stack frame to save FP/LR with extra space +	 * for image_addr variable passed to efi_entry(). +	 */ +	stp	x29, x30, [sp, #-32]! + +	/* +	 * Call efi_entry to do the real work. +	 * x0 and x1 are already set up by firmware. Current runtime +	 * address of image is calculated and passed via *image_addr. +	 * +	 * unsigned long efi_entry(void *handle, +	 *                         efi_system_table_t *sys_table, +	 *                         unsigned long *image_addr) ; +	 */ +	adrp	x8, _text +	add	x8, x8, #:lo12:_text +	add	x2, sp, 16 +	str	x8, [x2] +	bl	efi_entry +	cmn	x0, #1 +	b.eq	efi_load_fail + +	/* +	 * efi_entry() will have relocated the kernel image if necessary +	 * and we return here with device tree address in x0 and the kernel +	 * entry point stored at *image_addr. Save those values in registers +	 * which are callee preserved. +	 */ +	mov	x20, x0		// DTB address +	ldr	x0, [sp, #16]	// relocated _text address +	mov	x21, x0 + +	/* +	 * Flush dcache covering current runtime addresses +	 * of kernel text/data. Then flush all of icache. +	 */ +	adrp	x1, _text +	add	x1, x1, #:lo12:_text +	adrp	x2, _edata +	add	x2, x2, #:lo12:_edata +	sub	x1, x2, x1 + +	bl	__flush_dcache_area +	ic	ialluis + +	/* Turn off Dcache and MMU */ +	mrs	x0, CurrentEL +	cmp	x0, #CurrentEL_EL2 +	b.ne	1f +	mrs	x0, sctlr_el2 +	bic	x0, x0, #1 << 0	// clear SCTLR.M +	bic	x0, x0, #1 << 2	// clear SCTLR.C +	msr	sctlr_el2, x0 +	isb +	b	2f +1: +	mrs	x0, sctlr_el1 +	bic	x0, x0, #1 << 0	// clear SCTLR.M +	bic	x0, x0, #1 << 2	// clear SCTLR.C +	msr	sctlr_el1, x0 +	isb +2: +	/* Jump to kernel entry point */ +	mov	x0, x20 +	mov	x1, xzr +	mov	x2, xzr +	mov	x3, xzr +	br	x21 + +efi_load_fail: +	mov	x0, #EFI_LOAD_ERROR +	ldp	x29, x30, [sp], #32 +	ret + +ENDPROC(efi_stub_entry) diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c new file mode 100644 index 00000000000..e786e6cdc40 --- /dev/null +++ b/arch/arm64/kernel/efi-stub.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2013, 2014 Linaro Ltd;  <roy.franz@linaro.org> + * + * This file implements the EFI boot stub for the arm64 kernel. + * Adapted from ARM version by Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/efi.h> +#include <linux/libfdt.h> +#include <asm/sections.h> + +/* + * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from + * start of kernel and may not cross a 2MiB boundary. We set alignment to + * 2MiB so we know it won't cross a 2MiB boundary. + */ +#define EFI_FDT_ALIGN	SZ_2M   /* used by allocate_new_fdt_and_exit_boot() */ +#define MAX_FDT_OFFSET	SZ_512M + +#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) + +static void efi_char16_printk(efi_system_table_t *sys_table_arg, +			      efi_char16_t *str); + +static efi_status_t efi_open_volume(efi_system_table_t *sys_table, +				    void *__image, void **__fh); +static efi_status_t efi_file_close(void *handle); + +static efi_status_t +efi_file_read(void *handle, unsigned long *size, void *addr); + +static efi_status_t +efi_file_size(efi_system_table_t *sys_table, void *__fh, +	      efi_char16_t *filename_16, void **handle, u64 *file_sz); + +/* Include shared EFI stub code */ +#include "../../../drivers/firmware/efi/efi-stub-helper.c" +#include "../../../drivers/firmware/efi/fdt.c" +#include "../../../drivers/firmware/efi/arm-stub.c" + + +static efi_status_t handle_kernel_image(efi_system_table_t *sys_table, +					unsigned long *image_addr, +					unsigned long *image_size, +					unsigned long *reserve_addr, +					unsigned long *reserve_size, +					unsigned long dram_base, +					efi_loaded_image_t *image) +{ +	efi_status_t status; +	unsigned long kernel_size, kernel_memsize = 0; + +	/* Relocate the image, if required. */ +	kernel_size = _edata - _text; +	if (*image_addr != (dram_base + TEXT_OFFSET)) { +		kernel_memsize = kernel_size + (_end - _edata); +		status = efi_relocate_kernel(sys_table, image_addr, +					     kernel_size, kernel_memsize, +					     dram_base + TEXT_OFFSET, +					     PAGE_SIZE); +		if (status != EFI_SUCCESS) { +			pr_efi_err(sys_table, "Failed to relocate kernel\n"); +			return status; +		} +		if (*image_addr != (dram_base + TEXT_OFFSET)) { +			pr_efi_err(sys_table, "Failed to alloc kernel memory\n"); +			efi_free(sys_table, kernel_memsize, *image_addr); +			return EFI_ERROR; +		} +		*image_size = kernel_memsize; +	} + + +	return EFI_SUCCESS; +} diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c new file mode 100644 index 00000000000..14db1f6e8d7 --- /dev/null +++ b/arch/arm64/kernel/efi.c @@ -0,0 +1,469 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 2.4 + * + * Copyright (C) 2013, 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/efi.h> +#include <linux/export.h> +#include <linux/memblock.h> +#include <linux/bootmem.h> +#include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/cacheflush.h> +#include <asm/efi.h> +#include <asm/tlbflush.h> +#include <asm/mmu_context.h> + +struct efi_memory_map memmap; + +static efi_runtime_services_t *runtime; + +static u64 efi_system_table; + +static int uefi_debug __initdata; +static int __init uefi_debug_setup(char *str) +{ +	uefi_debug = 1; + +	return 0; +} +early_param("uefi_debug", uefi_debug_setup); + +static int __init is_normal_ram(efi_memory_desc_t *md) +{ +	if (md->attribute & EFI_MEMORY_WB) +		return 1; +	return 0; +} + +static void __init efi_setup_idmap(void) +{ +	struct memblock_region *r; +	efi_memory_desc_t *md; +	u64 paddr, npages, size; + +	for_each_memblock(memory, r) +		create_id_mapping(r->base, r->size, 0); + +	/* map runtime io spaces */ +	for_each_efi_memory_desc(&memmap, md) { +		if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md)) +			continue; +		paddr = md->phys_addr; +		npages = md->num_pages; +		memrange_efi_to_native(&paddr, &npages); +		size = npages << PAGE_SHIFT; +		create_id_mapping(paddr, size, 1); +	} +} + +static int __init uefi_init(void) +{ +	efi_char16_t *c16; +	char vendor[100] = "unknown"; +	int i, retval; + +	efi.systab = early_memremap(efi_system_table, +				    sizeof(efi_system_table_t)); +	if (efi.systab == NULL) { +		pr_warn("Unable to map EFI system table.\n"); +		return -ENOMEM; +	} + +	set_bit(EFI_BOOT, &efi.flags); +	set_bit(EFI_64BIT, &efi.flags); + +	/* +	 * Verify the EFI Table +	 */ +	if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { +		pr_err("System table signature incorrect\n"); +		return -EINVAL; +	} +	if ((efi.systab->hdr.revision >> 16) < 2) +		pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n", +			efi.systab->hdr.revision >> 16, +			efi.systab->hdr.revision & 0xffff); + +	/* Show what we know for posterity */ +	c16 = early_memremap(efi.systab->fw_vendor, +			     sizeof(vendor)); +	if (c16) { +		for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) +			vendor[i] = c16[i]; +		vendor[i] = '\0'; +	} + +	pr_info("EFI v%u.%.02u by %s\n", +		efi.systab->hdr.revision >> 16, +		efi.systab->hdr.revision & 0xffff, vendor); + +	retval = efi_config_init(NULL); +	if (retval == 0) +		set_bit(EFI_CONFIG_TABLES, &efi.flags); + +	early_memunmap(c16, sizeof(vendor)); +	early_memunmap(efi.systab,  sizeof(efi_system_table_t)); + +	return retval; +} + +static __initdata char memory_type_name[][32] = { +	{"Reserved"}, +	{"Loader Code"}, +	{"Loader Data"}, +	{"Boot Code"}, +	{"Boot Data"}, +	{"Runtime Code"}, +	{"Runtime Data"}, +	{"Conventional Memory"}, +	{"Unusable Memory"}, +	{"ACPI Reclaim Memory"}, +	{"ACPI Memory NVS"}, +	{"Memory Mapped I/O"}, +	{"MMIO Port Space"}, +	{"PAL Code"}, +}; + +/* + * Return true for RAM regions we want to permanently reserve. + */ +static __init int is_reserve_region(efi_memory_desc_t *md) +{ +	if (!is_normal_ram(md)) +		return 0; + +	if (md->attribute & EFI_MEMORY_RUNTIME) +		return 1; + +	if (md->type == EFI_ACPI_RECLAIM_MEMORY || +	    md->type == EFI_RESERVED_TYPE) +		return 1; + +	return 0; +} + +static __init void reserve_regions(void) +{ +	efi_memory_desc_t *md; +	u64 paddr, npages, size; + +	if (uefi_debug) +		pr_info("Processing EFI memory map:\n"); + +	for_each_efi_memory_desc(&memmap, md) { +		paddr = md->phys_addr; +		npages = md->num_pages; + +		if (uefi_debug) +			pr_info("  0x%012llx-0x%012llx [%s]", +				paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, +				memory_type_name[md->type]); + +		memrange_efi_to_native(&paddr, &npages); +		size = npages << PAGE_SHIFT; + +		if (is_normal_ram(md)) +			early_init_dt_add_memory_arch(paddr, size); + +		if (is_reserve_region(md) || +		    md->type == EFI_BOOT_SERVICES_CODE || +		    md->type == EFI_BOOT_SERVICES_DATA) { +			memblock_reserve(paddr, size); +			if (uefi_debug) +				pr_cont("*"); +		} + +		if (uefi_debug) +			pr_cont("\n"); +	} +} + + +static u64 __init free_one_region(u64 start, u64 end) +{ +	u64 size = end - start; + +	if (uefi_debug) +		pr_info("  EFI freeing: 0x%012llx-0x%012llx\n",	start, end - 1); + +	free_bootmem_late(start, size); +	return size; +} + +static u64 __init free_region(u64 start, u64 end) +{ +	u64 map_start, map_end, total = 0; + +	if (end <= start) +		return total; + +	map_start = (u64)memmap.phys_map; +	map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map)); +	map_start &= PAGE_MASK; + +	if (start < map_end && end > map_start) { +		/* region overlaps UEFI memmap */ +		if (start < map_start) +			total += free_one_region(start, map_start); + +		if (map_end < end) +			total += free_one_region(map_end, end); +	} else +		total += free_one_region(start, end); + +	return total; +} + +static void __init free_boot_services(void) +{ +	u64 total_freed = 0; +	u64 keep_end, free_start, free_end; +	efi_memory_desc_t *md; + +	/* +	 * If kernel uses larger pages than UEFI, we have to be careful +	 * not to inadvertantly free memory we want to keep if there is +	 * overlap at the kernel page size alignment. We do not want to +	 * free is_reserve_region() memory nor the UEFI memmap itself. +	 * +	 * The memory map is sorted, so we keep track of the end of +	 * any previous region we want to keep, remember any region +	 * we want to free and defer freeing it until we encounter +	 * the next region we want to keep. This way, before freeing +	 * it, we can clip it as needed to avoid freeing memory we +	 * want to keep for UEFI. +	 */ + +	keep_end = 0; +	free_start = 0; + +	for_each_efi_memory_desc(&memmap, md) { +		u64 paddr, npages, size; + +		if (is_reserve_region(md)) { +			/* +			 * We don't want to free any memory from this region. +			 */ +			if (free_start) { +				/* adjust free_end then free region */ +				if (free_end > md->phys_addr) +					free_end -= PAGE_SIZE; +				total_freed += free_region(free_start, free_end); +				free_start = 0; +			} +			keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); +			continue; +		} + +		if (md->type != EFI_BOOT_SERVICES_CODE && +		    md->type != EFI_BOOT_SERVICES_DATA) { +			/* no need to free this region */ +			continue; +		} + +		/* +		 * We want to free memory from this region. +		 */ +		paddr = md->phys_addr; +		npages = md->num_pages; +		memrange_efi_to_native(&paddr, &npages); +		size = npages << PAGE_SHIFT; + +		if (free_start) { +			if (paddr <= free_end) +				free_end = paddr + size; +			else { +				total_freed += free_region(free_start, free_end); +				free_start = paddr; +				free_end = paddr + size; +			} +		} else { +			free_start = paddr; +			free_end = paddr + size; +		} +		if (free_start < keep_end) { +			free_start += PAGE_SIZE; +			if (free_start >= free_end) +				free_start = 0; +		} +	} +	if (free_start) +		total_freed += free_region(free_start, free_end); + +	if (total_freed) +		pr_info("Freed 0x%llx bytes of EFI boot services memory", +			total_freed); +} + +void __init efi_init(void) +{ +	struct efi_fdt_params params; + +	/* Grab UEFI information placed in FDT by stub */ +	if (!efi_get_fdt_params(¶ms, uefi_debug)) +		return; + +	efi_system_table = params.system_table; + +	memblock_reserve(params.mmap & PAGE_MASK, +			 PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK))); +	memmap.phys_map = (void *)params.mmap; +	memmap.map = early_memremap(params.mmap, params.mmap_size); +	memmap.map_end = memmap.map + params.mmap_size; +	memmap.desc_size = params.desc_size; +	memmap.desc_version = params.desc_ver; + +	if (uefi_init() < 0) +		return; + +	reserve_regions(); +} + +void __init efi_idmap_init(void) +{ +	if (!efi_enabled(EFI_BOOT)) +		return; + +	/* boot time idmap_pg_dir is incomplete, so fill in missing parts */ +	efi_setup_idmap(); +} + +static int __init remap_region(efi_memory_desc_t *md, void **new) +{ +	u64 paddr, vaddr, npages, size; + +	paddr = md->phys_addr; +	npages = md->num_pages; +	memrange_efi_to_native(&paddr, &npages); +	size = npages << PAGE_SHIFT; + +	if (is_normal_ram(md)) +		vaddr = (__force u64)ioremap_cache(paddr, size); +	else +		vaddr = (__force u64)ioremap(paddr, size); + +	if (!vaddr) { +		pr_err("Unable to remap 0x%llx pages @ %p\n", +		       npages, (void *)paddr); +		return 0; +	} + +	/* adjust for any rounding when EFI and system pagesize differs */ +	md->virt_addr = vaddr + (md->phys_addr - paddr); + +	if (uefi_debug) +		pr_info("  EFI remap 0x%012llx => %p\n", +			md->phys_addr, (void *)md->virt_addr); + +	memcpy(*new, md, memmap.desc_size); +	*new += memmap.desc_size; + +	return 1; +} + +/* + * Switch UEFI from an identity map to a kernel virtual map + */ +static int __init arm64_enter_virtual_mode(void) +{ +	efi_memory_desc_t *md; +	phys_addr_t virtmap_phys; +	void *virtmap, *virt_md; +	efi_status_t status; +	u64 mapsize; +	int count = 0; +	unsigned long flags; + +	if (!efi_enabled(EFI_BOOT)) { +		pr_info("EFI services will not be available.\n"); +		return -1; +	} + +	pr_info("Remapping and enabling EFI services.\n"); + +	/* replace early memmap mapping with permanent mapping */ +	mapsize = memmap.map_end - memmap.map; +	early_memunmap(memmap.map, mapsize); +	memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, +						   mapsize); +	memmap.map_end = memmap.map + mapsize; + +	efi.memmap = &memmap; + +	/* Map the runtime regions */ +	virtmap = kmalloc(mapsize, GFP_KERNEL); +	if (!virtmap) { +		pr_err("Failed to allocate EFI virtual memmap\n"); +		return -1; +	} +	virtmap_phys = virt_to_phys(virtmap); +	virt_md = virtmap; + +	for_each_efi_memory_desc(&memmap, md) { +		if (!(md->attribute & EFI_MEMORY_RUNTIME)) +			continue; +		if (remap_region(md, &virt_md)) +			++count; +	} + +	efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); +	if (efi.systab) +		set_bit(EFI_SYSTEM_TABLES, &efi.flags); + +	local_irq_save(flags); +	cpu_switch_mm(idmap_pg_dir, &init_mm); + +	/* Call SetVirtualAddressMap with the physical address of the map */ +	runtime = efi.systab->runtime; +	efi.set_virtual_address_map = runtime->set_virtual_address_map; + +	status = efi.set_virtual_address_map(count * memmap.desc_size, +					     memmap.desc_size, +					     memmap.desc_version, +					     (efi_memory_desc_t *)virtmap_phys); +	cpu_set_reserved_ttbr0(); +	flush_tlb_all(); +	local_irq_restore(flags); + +	kfree(virtmap); + +	free_boot_services(); + +	if (status != EFI_SUCCESS) { +		pr_err("Failed to set EFI virtual address map! [%lx]\n", +			status); +		return -1; +	} + +	/* Set up runtime services function pointers */ +	runtime = efi.systab->runtime; +	efi.get_time = runtime->get_time; +	efi.set_time = runtime->set_time; +	efi.get_wakeup_time = runtime->get_wakeup_time; +	efi.set_wakeup_time = runtime->set_wakeup_time; +	efi.get_variable = runtime->get_variable; +	efi.get_next_variable = runtime->get_next_variable; +	efi.set_variable = runtime->set_variable; +	efi.query_variable_info = runtime->query_variable_info; +	efi.update_capsule = runtime->update_capsule; +	efi.query_capsule_caps = runtime->query_capsule_caps; +	efi.get_next_high_mono_count = runtime->get_next_high_mono_count; +	efi.reset_system = runtime->reset_system; + +	set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + +	return 0; +} +early_initcall(arm64_enter_virtual_mode); diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 6a27cd6dbfa..d358ccacfc0 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state)  	fpsimd_restore x0, 8  	ret  ENDPROC(fpsimd_load_state) + +#ifdef CONFIG_KERNEL_MODE_NEON + +/* + * Save the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_save_partial_state) +	fpsimd_save_partial x0, 1, 8, 9 +	ret +ENDPROC(fpsimd_load_partial_state) + +/* + * Load the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_load_partial_state) +	fpsimd_restore_partial x0, 8, 9 +	ret +ENDPROC(fpsimd_load_partial_state) + +#endif diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S new file mode 100644 index 00000000000..aa5f9fcbf9e --- /dev/null +++ b/arch/arm64/kernel/entry-ftrace.S @@ -0,0 +1,218 @@ +/* + * arch/arm64/kernel/entry-ftrace.S + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +/* + * Gcc with -pg will put the following code in the beginning of each function: + *      mov x0, x30 + *      bl _mcount + *	[function's body ...] + * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic + * ftrace is enabled. + * + * Please note that x0 as an argument will not be used here because we can + * get lr(x30) of instrumented function at any time by winding up call stack + * as long as the kernel is compiled without -fomit-frame-pointer. + * (or CONFIG_FRAME_POINTER, this is forced on arm64) + * + * stack layout after mcount_enter in _mcount(): + * + * current sp/fp =>  0:+-----+ + * in _mcount()        | x29 | -> instrumented function's fp + *                     +-----+ + *                     | x30 | -> _mcount()'s lr (= instrumented function's pc) + * old sp       => +16:+-----+ + * when instrumented   |     | + * function calls      | ... | + * _mcount()           |     | + *                     |     | + * instrumented => +xx:+-----+ + * function's fp       | x29 | -> parent's fp + *                     +-----+ + *                     | x30 | -> instrumented function's lr (= parent's pc) + *                     +-----+ + *                     | ... | + */ + +	.macro mcount_enter +	stp	x29, x30, [sp, #-16]! +	mov	x29, sp +	.endm + +	.macro mcount_exit +	ldp	x29, x30, [sp], #16 +	ret +	.endm + +	.macro mcount_adjust_addr rd, rn +	sub	\rd, \rn, #AARCH64_INSN_SIZE +	.endm + +	/* for instrumented function's parent */ +	.macro mcount_get_parent_fp reg +	ldr	\reg, [x29] +	ldr	\reg, [\reg] +	.endm + +	/* for instrumented function */ +	.macro mcount_get_pc0 reg +	mcount_adjust_addr	\reg, x30 +	.endm + +	.macro mcount_get_pc reg +	ldr	\reg, [x29, #8] +	mcount_adjust_addr	\reg, \reg +	.endm + +	.macro mcount_get_lr reg +	ldr	\reg, [x29] +	ldr	\reg, [\reg, #8] +	mcount_adjust_addr	\reg, \reg +	.endm + +	.macro mcount_get_lr_addr reg +	ldr	\reg, [x29] +	add	\reg, \reg, #8 +	.endm + +#ifndef CONFIG_DYNAMIC_FTRACE +/* + * void _mcount(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function makes calls, if enabled, to: + *     - tracer function to probe instrumented function's entry, + *     - ftrace_graph_caller to set up an exit hook + */ +ENTRY(_mcount) +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST +	ldr	x0, =ftrace_trace_stop +	ldr	x0, [x0]		// if ftrace_trace_stop +	ret				//   return; +#endif +	mcount_enter + +	ldr	x0, =ftrace_trace_function +	ldr	x2, [x0] +	adr	x0, ftrace_stub +	cmp	x0, x2			// if (ftrace_trace_function +	b.eq	skip_ftrace_call	//     != ftrace_stub) { + +	mcount_get_pc	x0		//       function's pc +	mcount_get_lr	x1		//       function's lr (= parent's pc) +	blr	x2			//   (*ftrace_trace_function)(pc, lr); + +#ifndef CONFIG_FUNCTION_GRAPH_TRACER +skip_ftrace_call:			//   return; +	mcount_exit			// } +#else +	mcount_exit			//   return; +					// } +skip_ftrace_call: +	ldr	x1, =ftrace_graph_return +	ldr	x2, [x1]		//   if ((ftrace_graph_return +	cmp	x0, x2			//        != ftrace_stub) +	b.ne	ftrace_graph_caller + +	ldr	x1, =ftrace_graph_entry	//     || (ftrace_graph_entry +	ldr	x2, [x1]		//        != ftrace_graph_entry_stub)) +	ldr	x0, =ftrace_graph_entry_stub +	cmp	x0, x2 +	b.ne	ftrace_graph_caller	//     ftrace_graph_caller(); + +	mcount_exit +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +ENDPROC(_mcount) + +#else /* CONFIG_DYNAMIC_FTRACE */ +/* + * _mcount() is used to build the kernel with -pg option, but all the branch + * instructions to _mcount() are replaced to NOP initially at kernel start up, + * and later on, NOP to branch to ftrace_caller() when enabled or branch to + * NOP when disabled per-function base. + */ +ENTRY(_mcount) +	ret +ENDPROC(_mcount) + +/* + * void ftrace_caller(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function is a counterpart of _mcount() in 'static' ftrace, and + * makes calls to: + *     - tracer function to probe instrumented function's entry, + *     - ftrace_graph_caller to set up an exit hook + */ +ENTRY(ftrace_caller) +	mcount_enter + +	mcount_get_pc0	x0		//     function's pc +	mcount_get_lr	x1		//     function's lr + +	.global ftrace_call +ftrace_call:				// tracer(pc, lr); +	nop				// This will be replaced with "bl xxx" +					// where xxx can be any kind of tracer. + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +	.global ftrace_graph_call +ftrace_graph_call:			// ftrace_graph_caller(); +	nop				// If enabled, this will be replaced +					// "b ftrace_graph_caller" +#endif + +	mcount_exit +ENDPROC(ftrace_caller) +#endif /* CONFIG_DYNAMIC_FTRACE */ + +ENTRY(ftrace_stub) +	ret +ENDPROC(ftrace_stub) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * void ftrace_graph_caller(void) + * + * Called from _mcount() or ftrace_caller() when function_graph tracer is + * selected. + * This function w/ prepare_ftrace_return() fakes link register's value on + * the call stack in order to intercept instrumented function's return path + * and run return_to_handler() later on its exit. + */ +ENTRY(ftrace_graph_caller) +	mcount_get_lr_addr	  x0	//     pointer to function's saved lr +	mcount_get_pc		  x1	//     function's pc +	mcount_get_parent_fp	  x2	//     parent's fp +	bl	prepare_ftrace_return	// prepare_ftrace_return(&lr, pc, fp) + +	mcount_exit +ENDPROC(ftrace_graph_caller) + +/* + * void return_to_handler(void) + * + * Run ftrace_return_to_handler() before going back to parent. + * @fp is checked against the value passed by ftrace_graph_caller() + * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. + */ +ENTRY(return_to_handler) +	str	x0, [sp, #-16]! +	mov	x0, x29			//     parent's fp +	bl	ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); +	mov	x30, x0			// restore the original return address +	ldr	x0, [sp], #16 +	ret +END(return_to_handler) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3881fd115eb..9ce04ba6bcb 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -60,6 +60,9 @@  	push	x0, x1  	.if	\el == 0  	mrs	x21, sp_el0 +	get_thread_info tsk			// Ensure MDSCR_EL1.SS is clear, +	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug +	disable_step_tsk x19, x20		// exceptions when scheduling.  	.else  	add	x21, sp, #S_FRAME_SIZE  	.endif @@ -259,7 +262,7 @@ el1_da:  	 * Data abort handling  	 */  	mrs	x0, far_el1 -	enable_dbg_if_not_stepping x2 +	enable_dbg  	// re-enable interrupts if they were enabled in the aborted context  	tbnz	x23, #7, 1f			// PSR_I_BIT  	enable_irq @@ -275,27 +278,31 @@ el1_sp_pc:  	 * Stack or PC alignment exception handling  	 */  	mrs	x0, far_el1 -	mov	x1, x25 +	enable_dbg  	mov	x2, sp  	b	do_sp_pc_abort  el1_undef:  	/*  	 * Undefined instruction  	 */ +	enable_dbg  	mov	x0, sp  	b	do_undefinstr  el1_dbg:  	/*  	 * Debug exception handling  	 */ +	cmp	x24, #ESR_EL1_EC_BRK64		// if BRK64 +	cinc	x24, x24, eq			// set bit '0'  	tbz	x24, #0, el1_inv		// EL1 only  	mrs	x0, far_el1  	mov	x2, sp				// struct pt_regs  	bl	do_debug_exception - +	enable_dbg  	kernel_exit 1  el1_inv:  	// TODO: add support for undefined instructions in kernel mode +	enable_dbg  	mov	x0, sp  	mov	x1, #BAD_SYNC  	mrs	x2, esr_el1 @@ -305,20 +312,17 @@ ENDPROC(el1_sync)  	.align	6  el1_irq:  	kernel_entry 1 -	enable_dbg_if_not_stepping x0 +	enable_dbg  #ifdef CONFIG_TRACE_IRQFLAGS  	bl	trace_hardirqs_off  #endif -#ifdef CONFIG_PREEMPT -	get_thread_info tsk -	ldr	x24, [tsk, #TI_PREEMPT]		// get preempt count -	add	x0, x24, #1			// increment it -	str	x0, [tsk, #TI_PREEMPT] -#endif +  	irq_handler +  #ifdef CONFIG_PREEMPT -	str	x24, [tsk, #TI_PREEMPT]		// restore preempt count -	cbnz	x24, 1f				// preempt count != 0 +	get_thread_info tsk +	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count +	cbnz	w24, 1f				// preempt count != 0  	ldr	x0, [tsk, #TI_FLAGS]		// get flags  	tbz	x0, #TIF_NEED_RESCHED, 1f	// needs rescheduling?  	bl	el1_preempt @@ -333,8 +337,7 @@ ENDPROC(el1_irq)  #ifdef CONFIG_PREEMPT  el1_preempt:  	mov	x24, lr -1:	enable_dbg -	bl	preempt_schedule_irq		// irq en/disable is done inside +1:	bl	preempt_schedule_irq		// irq en/disable is done inside  	ldr	x0, [tsk, #TI_FLAGS]		// get new tasks TI_FLAGS  	tbnz	x0, #TIF_NEED_RESCHED, 1b	// needs rescheduling?  	ret	x24 @@ -350,7 +353,7 @@ el0_sync:  	lsr	x24, x25, #ESR_EL1_EC_SHIFT	// exception class  	cmp	x24, #ESR_EL1_EC_SVC64		// SVC in 64-bit state  	b.eq	el0_svc -	adr	lr, ret_from_exception +	adr	lr, ret_to_user  	cmp	x24, #ESR_EL1_EC_DABT_EL0	// data abort in EL0  	b.eq	el0_da  	cmp	x24, #ESR_EL1_EC_IABT_EL0	// instruction abort in EL0 @@ -379,7 +382,7 @@ el0_sync_compat:  	lsr	x24, x25, #ESR_EL1_EC_SHIFT	// exception class  	cmp	x24, #ESR_EL1_EC_SVC32		// SVC in 32-bit state  	b.eq	el0_svc_compat -	adr	lr, ret_from_exception +	adr	lr, ret_to_user  	cmp	x24, #ESR_EL1_EC_DABT_EL0	// data abort in EL0  	b.eq	el0_da  	cmp	x24, #ESR_EL1_EC_IABT_EL0	// instruction abort in EL0 @@ -424,11 +427,8 @@ el0_da:  	 */  	mrs	x0, far_el1  	bic	x0, x0, #(0xff << 56) -	disable_step x1 -	isb -	enable_dbg  	// enable interrupts before calling the main handler -	enable_irq +	enable_dbg_and_irq  	mov	x1, x25  	mov	x2, sp  	b	do_mem_abort @@ -437,11 +437,8 @@ el0_ia:  	 * Instruction abort handling  	 */  	mrs	x0, far_el1 -	disable_step x1 -	isb -	enable_dbg  	// enable interrupts before calling the main handler -	enable_irq +	enable_dbg_and_irq  	orr	x1, x25, #1 << 24		// use reserved ISS bit for instruction aborts  	mov	x2, sp  	b	do_mem_abort @@ -449,6 +446,7 @@ el0_fpsimd_acc:  	/*  	 * Floating Point or Advanced SIMD access  	 */ +	enable_dbg  	mov	x0, x25  	mov	x1, sp  	b	do_fpsimd_acc @@ -456,6 +454,7 @@ el0_fpsimd_exc:  	/*  	 * Floating Point or Advanced SIMD exception  	 */ +	enable_dbg  	mov	x0, x25  	mov	x1, sp  	b	do_fpsimd_exc @@ -464,11 +463,8 @@ el0_sp_pc:  	 * Stack or PC alignment exception handling  	 */  	mrs	x0, far_el1 -	disable_step x1 -	isb -	enable_dbg  	// enable interrupts before calling the main handler -	enable_irq +	enable_dbg_and_irq  	mov	x1, x25  	mov	x2, sp  	b	do_sp_pc_abort @@ -476,9 +472,9 @@ el0_undef:  	/*  	 * Undefined instruction  	 */ -	mov	x0, sp  	// enable interrupts before calling the main handler -	enable_irq +	enable_dbg_and_irq +	mov	x0, sp  	b	do_undefinstr  el0_dbg:  	/* @@ -486,11 +482,13 @@ el0_dbg:  	 */  	tbnz	x24, #0, el0_inv		// EL0 only  	mrs	x0, far_el1 -	disable_step x1  	mov	x1, x25  	mov	x2, sp -	b	do_debug_exception +	bl	do_debug_exception +	enable_dbg +	b	ret_to_user  el0_inv: +	enable_dbg  	mov	x0, sp  	mov	x1, #BAD_SYNC  	mrs	x2, esr_el1 @@ -501,28 +499,13 @@ ENDPROC(el0_sync)  el0_irq:  	kernel_entry 0  el0_irq_naked: -	disable_step x1 -	isb  	enable_dbg  #ifdef CONFIG_TRACE_IRQFLAGS  	bl	trace_hardirqs_off  #endif -	get_thread_info tsk -#ifdef CONFIG_PREEMPT -	ldr	x24, [tsk, #TI_PREEMPT]		// get preempt count -	add	x23, x24, #1			// increment it -	str	x23, [tsk, #TI_PREEMPT] -#endif +  	irq_handler -#ifdef CONFIG_PREEMPT -	ldr	x0, [tsk, #TI_PREEMPT] -	str	x24, [tsk, #TI_PREEMPT] -	cmp	x0, x23 -	b.eq	1f -	mov	x1, #0 -	str	x1, [x1]			// BUG -1: -#endif +  #ifdef CONFIG_TRACE_IRQFLAGS  	bl	trace_hardirqs_on  #endif @@ -530,14 +513,6 @@ el0_irq_naked:  ENDPROC(el0_irq)  /* - * This is the return code to user mode for abort handlers - */ -ret_from_exception: -	get_thread_info tsk -	b	ret_to_user -ENDPROC(ret_from_exception) - -/*   * Register switch for AArch64. The callee-saved registers need to be saved   * and restored. On entry:   *   x0 = previous task_struct (must be preserved across the switch) @@ -576,10 +551,7 @@ ret_fast_syscall:  	ldr	x1, [tsk, #TI_FLAGS]  	and	x2, x1, #_TIF_WORK_MASK  	cbnz	x2, fast_work_pending -	tbz	x1, #TIF_SINGLESTEP, fast_exit -	disable_dbg -	enable_step x2 -fast_exit: +	enable_step_tsk x1, x2  	kernel_exit 0, ret = 1  /* @@ -589,7 +561,7 @@ fast_work_pending:  	str	x0, [sp, #S_X0]			// returned x0  work_pending:  	tbnz	x1, #TIF_NEED_RESCHED, work_resched -	/* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */ +	/* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */  	ldr	x2, [sp, #S_PSTATE]  	mov	x0, sp				// 'regs'  	tst	x2, #PSR_MODE_MASK		// user mode regs? @@ -598,7 +570,6 @@ work_pending:  	bl	do_notify_resume  	b	ret_to_user  work_resched: -	enable_dbg  	bl	schedule  /* @@ -609,9 +580,7 @@ ret_to_user:  	ldr	x1, [tsk, #TI_FLAGS]  	and	x2, x1, #_TIF_WORK_MASK  	cbnz	x2, work_pending -	tbz	x1, #TIF_SINGLESTEP, no_work_pending -	disable_dbg -	enable_step x2 +	enable_step_tsk x1, x2  no_work_pending:  	kernel_exit 0, ret = 0  ENDPROC(ret_to_user) @@ -638,14 +607,11 @@ el0_svc:  	mov	sc_nr, #__NR_syscalls  el0_svc_naked:					// compat entry point  	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number -	disable_step x16 -	isb -	enable_dbg -	enable_irq +	enable_dbg_and_irq -	get_thread_info tsk -	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall tracing -	tbnz	x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls? +	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall hooks +	tst	x16, #_TIF_SYSCALL_WORK +	b.ne	__sys_trace  	adr	lr, ret_fast_syscall		// return address  	cmp     scno, sc_nr                     // check upper syscall limit  	b.hs	ni_sys @@ -661,9 +627,8 @@ ENDPROC(el0_svc)  	 * switches, and waiting for our parent to respond.  	 */  __sys_trace: -	mov	x1, sp -	mov	w0, #0				// trace entry -	bl	syscall_trace +	mov	x0, sp +	bl	syscall_trace_enter  	adr	lr, __sys_trace_return		// return address  	uxtw	scno, w0			// syscall number (possibly new)  	mov	x1, sp				// pointer to regs @@ -678,9 +643,8 @@ __sys_trace:  __sys_trace_return:  	str	x0, [sp]			// save returned x0 -	mov	x1, sp -	mov	w0, #1				// trace exit -	bl	syscall_trace +	mov	x0, sp +	bl	syscall_trace_exit  	b	ret_to_user  /* diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 1f2e4d5a5c0..ad8aebb1cde 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -17,6 +17,7 @@   * along with this program.  If not, see <http://www.gnu.org/licenses/>.   */ +#include <linux/cpu_pm.h>  #include <linux/kernel.h>  #include <linux/init.h>  #include <linux/sched.h> @@ -34,6 +35,60 @@  #define FPEXC_IDF	(1 << 7)  /* + * In order to reduce the number of times the FPSIMD state is needlessly saved + * and restored, we need to keep track of two things: + * (a) for each task, we need to remember which CPU was the last one to have + *     the task's FPSIMD state loaded into its FPSIMD registers; + * (b) for each CPU, we need to remember which task's userland FPSIMD state has + *     been loaded into its FPSIMD registers most recently, or whether it has + *     been used to perform kernel mode NEON in the meantime. + * + * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to + * the id of the current CPU everytime the state is loaded onto a CPU. For (b), + * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the + * address of the userland FPSIMD state of the task that was loaded onto the CPU + * the most recently, or NULL if kernel mode NEON has been performed after that. + * + * With this in place, we no longer have to restore the next FPSIMD state right + * when switching between tasks. Instead, we can defer this check to userland + * resume, at which time we verify whether the CPU's fpsimd_last_state and the + * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we + * can omit the FPSIMD restore. + * + * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to + * indicate whether or not the userland FPSIMD state of the current task is + * present in the registers. The flag is set unless the FPSIMD registers of this + * CPU currently contain the most recent userland FPSIMD state of the current + * task. + * + * For a certain task, the sequence may look something like this: + * - the task gets scheduled in; if both the task's fpsimd_state.cpu field + *   contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu + *   variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is + *   cleared, otherwise it is set; + * + * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's + *   userland FPSIMD state is copied from memory to the registers, the task's + *   fpsimd_state.cpu field is set to the id of the current CPU, the current + *   CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the + *   TIF_FOREIGN_FPSTATE flag is cleared; + * + * - the task executes an ordinary syscall; upon return to userland, the + *   TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is + *   restored; + * + * - the task executes a syscall which executes some NEON instructions; this is + *   preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD + *   register contents to memory, clears the fpsimd_last_state per-cpu variable + *   and sets the TIF_FOREIGN_FPSTATE flag; + * + * - the task gets preempted after kernel_neon_end() is called; as we have not + *   returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so + *   whatever is in the FPSIMD registers is not saved to memory, but discarded. + */ +static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); + +/*   * Trapped FP/ASIMD access.   */  void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) @@ -71,46 +126,175 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)  void fpsimd_thread_switch(struct task_struct *next)  { -	/* check if not kernel threads */ -	if (current->mm) +	/* +	 * Save the current FPSIMD state to memory, but only if whatever is in +	 * the registers is in fact the most recent userland FPSIMD state of +	 * 'current'. +	 */ +	if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE))  		fpsimd_save_state(¤t->thread.fpsimd_state); -	if (next->mm) -		fpsimd_load_state(&next->thread.fpsimd_state); + +	if (next->mm) { +		/* +		 * If we are switching to a task whose most recent userland +		 * FPSIMD state is already in the registers of *this* cpu, +		 * we can skip loading the state from memory. Otherwise, set +		 * the TIF_FOREIGN_FPSTATE flag so the state will be loaded +		 * upon the next return to userland. +		 */ +		struct fpsimd_state *st = &next->thread.fpsimd_state; + +		if (__this_cpu_read(fpsimd_last_state) == st +		    && st->cpu == smp_processor_id()) +			clear_ti_thread_flag(task_thread_info(next), +					     TIF_FOREIGN_FPSTATE); +		else +			set_ti_thread_flag(task_thread_info(next), +					   TIF_FOREIGN_FPSTATE); +	}  }  void fpsimd_flush_thread(void)  {  	memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); -	fpsimd_load_state(¤t->thread.fpsimd_state); +	set_thread_flag(TIF_FOREIGN_FPSTATE);  } -#ifdef CONFIG_KERNEL_MODE_NEON +/* + * Save the userland FPSIMD state of 'current' to memory, but only if the state + * currently held in the registers does in fact belong to 'current' + */ +void fpsimd_preserve_current_state(void) +{ +	preempt_disable(); +	if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) +		fpsimd_save_state(¤t->thread.fpsimd_state); +	preempt_enable(); +}  /* - * Kernel-side NEON support functions + * Load the userland FPSIMD state of 'current' from memory, but only if the + * FPSIMD state already held in the registers is /not/ the most recent FPSIMD + * state of 'current'   */ -void kernel_neon_begin(void) +void fpsimd_restore_current_state(void)  { -	/* Avoid using the NEON in interrupt context */ -	BUG_ON(in_interrupt());  	preempt_disable(); +	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { +		struct fpsimd_state *st = ¤t->thread.fpsimd_state; -	if (current->mm) -		fpsimd_save_state(¤t->thread.fpsimd_state); +		fpsimd_load_state(st); +		this_cpu_write(fpsimd_last_state, st); +		st->cpu = smp_processor_id(); +	} +	preempt_enable();  } -EXPORT_SYMBOL(kernel_neon_begin); -void kernel_neon_end(void) +/* + * Load an updated userland FPSIMD state for 'current' from memory and set the + * flag that indicates that the FPSIMD register contents are the most recent + * FPSIMD state of 'current' + */ +void fpsimd_update_current_state(struct fpsimd_state *state)  { -	if (current->mm) -		fpsimd_load_state(¤t->thread.fpsimd_state); +	preempt_disable(); +	fpsimd_load_state(state); +	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { +		struct fpsimd_state *st = ¤t->thread.fpsimd_state; +		this_cpu_write(fpsimd_last_state, st); +		st->cpu = smp_processor_id(); +	}  	preempt_enable();  } + +/* + * Invalidate live CPU copies of task t's FPSIMD state + */ +void fpsimd_flush_task_state(struct task_struct *t) +{ +	t->thread.fpsimd_state.cpu = NR_CPUS; +} + +#ifdef CONFIG_KERNEL_MODE_NEON + +static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); +static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); + +/* + * Kernel-side NEON support functions + */ +void kernel_neon_begin_partial(u32 num_regs) +{ +	if (in_interrupt()) { +		struct fpsimd_partial_state *s = this_cpu_ptr( +			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); + +		BUG_ON(num_regs > 32); +		fpsimd_save_partial_state(s, roundup(num_regs, 2)); +	} else { +		/* +		 * Save the userland FPSIMD state if we have one and if we +		 * haven't done so already. Clear fpsimd_last_state to indicate +		 * that there is no longer userland FPSIMD state in the +		 * registers. +		 */ +		preempt_disable(); +		if (current->mm && +		    !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) +			fpsimd_save_state(¤t->thread.fpsimd_state); +		this_cpu_write(fpsimd_last_state, NULL); +	} +} +EXPORT_SYMBOL(kernel_neon_begin_partial); + +void kernel_neon_end(void) +{ +	if (in_interrupt()) { +		struct fpsimd_partial_state *s = this_cpu_ptr( +			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); +		fpsimd_load_partial_state(s); +	} else { +		preempt_enable(); +	} +}  EXPORT_SYMBOL(kernel_neon_end);  #endif /* CONFIG_KERNEL_MODE_NEON */ +#ifdef CONFIG_CPU_PM +static int fpsimd_cpu_pm_notifier(struct notifier_block *self, +				  unsigned long cmd, void *v) +{ +	switch (cmd) { +	case CPU_PM_ENTER: +		if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) +			fpsimd_save_state(¤t->thread.fpsimd_state); +		break; +	case CPU_PM_EXIT: +		if (current->mm) +			set_thread_flag(TIF_FOREIGN_FPSTATE); +		break; +	case CPU_PM_ENTER_FAILED: +	default: +		return NOTIFY_DONE; +	} +	return NOTIFY_OK; +} + +static struct notifier_block fpsimd_cpu_pm_notifier_block = { +	.notifier_call = fpsimd_cpu_pm_notifier, +}; + +static void fpsimd_pm_init(void) +{ +	cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); +} + +#else +static inline void fpsimd_pm_init(void) { } +#endif /* CONFIG_CPU_PM */ +  /*   * FP/SIMD support code initialisation.   */ @@ -129,6 +313,8 @@ static int __init fpsimd_init(void)  	else  		elf_hwcap |= HWCAP_ASIMD; +	fpsimd_pm_init(); +  	return 0;  }  late_initcall(fpsimd_init); diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c new file mode 100644 index 00000000000..7924d73b647 --- /dev/null +++ b/arch/arm64/kernel/ftrace.c @@ -0,0 +1,176 @@ +/* + * arch/arm64/kernel/ftrace.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ftrace.h> +#include <linux/swab.h> +#include <linux/uaccess.h> + +#include <asm/cacheflush.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Replace a single instruction, which may be a branch or NOP. + * If @validate == true, a replaced instruction is checked against 'old'. + */ +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, +			      bool validate) +{ +	u32 replaced; + +	/* +	 * Note: +	 * Due to modules and __init, code can disappear and change, +	 * we need to protect against faulting as well as code changing. +	 * We do this by aarch64_insn_*() which use the probe_kernel_*(). +	 * +	 * No lock is held here because all the modifications are run +	 * through stop_machine(). +	 */ +	if (validate) { +		if (aarch64_insn_read((void *)pc, &replaced)) +			return -EFAULT; + +		if (replaced != old) +			return -EINVAL; +	} +	if (aarch64_insn_patch_text_nosync((void *)pc, new)) +		return -EPERM; + +	return 0; +} + +/* + * Replace tracer function in ftrace_caller() + */ +int ftrace_update_ftrace_func(ftrace_func_t func) +{ +	unsigned long pc; +	u32 new; + +	pc = (unsigned long)&ftrace_call; +	new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true); + +	return ftrace_modify_code(pc, 0, new, false); +} + +/* + * Turn on the call to ftrace_caller() in instrumented function + */ +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ +	unsigned long pc = rec->ip; +	u32 old, new; + +	old = aarch64_insn_gen_nop(); +	new = aarch64_insn_gen_branch_imm(pc, addr, true); + +	return ftrace_modify_code(pc, old, new, true); +} + +/* + * Turn off the call to ftrace_caller() in instrumented function + */ +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, +		    unsigned long addr) +{ +	unsigned long pc = rec->ip; +	u32 old, new; + +	old = aarch64_insn_gen_branch_imm(pc, addr, true); +	new = aarch64_insn_gen_nop(); + +	return ftrace_modify_code(pc, old, new, true); +} + +int __init ftrace_dyn_arch_init(void) +{ +	return 0; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * function_graph tracer expects ftrace_return_to_handler() to be called + * on the way back to parent. For this purpose, this function is called + * in _mcount() or ftrace_caller() to replace return address (*parent) on + * the call stack to return_to_handler. + * + * Note that @frame_pointer is used only for sanity check later. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, +			   unsigned long frame_pointer) +{ +	unsigned long return_hooker = (unsigned long)&return_to_handler; +	unsigned long old; +	struct ftrace_graph_ent trace; +	int err; + +	if (unlikely(atomic_read(¤t->tracing_graph_pause))) +		return; + +	/* +	 * Note: +	 * No protection against faulting at *parent, which may be seen +	 * on other archs. It's unlikely on AArch64. +	 */ +	old = *parent; +	*parent = return_hooker; + +	trace.func = self_addr; +	trace.depth = current->curr_ret_stack + 1; + +	/* Only trace if the calling function expects to */ +	if (!ftrace_graph_entry(&trace)) { +		*parent = old; +		return; +	} + +	err = ftrace_push_return_trace(old, self_addr, &trace.depth, +				       frame_pointer); +	if (err == -EBUSY) { +		*parent = old; +		return; +	} +} + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Turn on/off the call to ftrace_graph_caller() in ftrace_caller() + * depending on @enable. + */ +static int ftrace_modify_graph_caller(bool enable) +{ +	unsigned long pc = (unsigned long)&ftrace_graph_call; +	u32 branch, nop; + +	branch = aarch64_insn_gen_branch_imm(pc, +			(unsigned long)ftrace_graph_caller, false); +	nop = aarch64_insn_gen_nop(); + +	if (enable) +		return ftrace_modify_code(pc, nop, branch, true); +	else +		return ftrace_modify_code(pc, branch, nop, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ +	return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ +	return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 7090c126797..a2c1195abb7 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -26,6 +26,7 @@  #include <asm/assembler.h>  #include <asm/ptrace.h>  #include <asm/asm-offsets.h> +#include <asm/cache.h>  #include <asm/cputype.h>  #include <asm/memory.h>  #include <asm/thread_info.h> @@ -107,8 +108,18 @@  	/*  	 * DO NOT MODIFY. Image header expected by Linux boot-loaders.  	 */ +#ifdef CONFIG_EFI +efi_head: +	/* +	 * This add instruction has no meaningful effect except that +	 * its opcode forms the magic "MZ" signature required by UEFI. +	 */ +	add	x13, x18, #0x16 +	b	stext +#else  	b	stext				// branch to kernel start, magic  	.long	0				// reserved +#endif  	.quad	TEXT_OFFSET			// Image load offset from start of RAM  	.quad	0				// reserved  	.quad	0				// reserved @@ -119,12 +130,115 @@  	.byte	0x52  	.byte	0x4d  	.byte	0x64 +#ifdef CONFIG_EFI +	.long	pe_header - efi_head		// Offset to the PE header. +#else  	.word	0				// reserved +#endif + +#ifdef CONFIG_EFI +	.align 3 +pe_header: +	.ascii	"PE" +	.short 	0 +coff_header: +	.short	0xaa64				// AArch64 +	.short	2				// nr_sections +	.long	0 				// TimeDateStamp +	.long	0				// PointerToSymbolTable +	.long	1				// NumberOfSymbols +	.short	section_table - optional_header	// SizeOfOptionalHeader +	.short	0x206				// Characteristics. +						// IMAGE_FILE_DEBUG_STRIPPED | +						// IMAGE_FILE_EXECUTABLE_IMAGE | +						// IMAGE_FILE_LINE_NUMS_STRIPPED +optional_header: +	.short	0x20b				// PE32+ format +	.byte	0x02				// MajorLinkerVersion +	.byte	0x14				// MinorLinkerVersion +	.long	_edata - stext			// SizeOfCode +	.long	0				// SizeOfInitializedData +	.long	0				// SizeOfUninitializedData +	.long	efi_stub_entry - efi_head	// AddressOfEntryPoint +	.long	stext - efi_head		// BaseOfCode + +extra_header_fields: +	.quad	0				// ImageBase +	.long	0x20				// SectionAlignment +	.long	0x8				// FileAlignment +	.short	0				// MajorOperatingSystemVersion +	.short	0				// MinorOperatingSystemVersion +	.short	0				// MajorImageVersion +	.short	0				// MinorImageVersion +	.short	0				// MajorSubsystemVersion +	.short	0				// MinorSubsystemVersion +	.long	0				// Win32VersionValue + +	.long	_edata - efi_head		// SizeOfImage + +	// Everything before the kernel image is considered part of the header +	.long	stext - efi_head		// SizeOfHeaders +	.long	0				// CheckSum +	.short	0xa				// Subsystem (EFI application) +	.short	0				// DllCharacteristics +	.quad	0				// SizeOfStackReserve +	.quad	0				// SizeOfStackCommit +	.quad	0				// SizeOfHeapReserve +	.quad	0				// SizeOfHeapCommit +	.long	0				// LoaderFlags +	.long	0x6				// NumberOfRvaAndSizes + +	.quad	0				// ExportTable +	.quad	0				// ImportTable +	.quad	0				// ResourceTable +	.quad	0				// ExceptionTable +	.quad	0				// CertificationTable +	.quad	0				// BaseRelocationTable + +	// Section table +section_table: + +	/* +	 * The EFI application loader requires a relocation section +	 * because EFI applications must be relocatable.  This is a +	 * dummy section as far as we are concerned. +	 */ +	.ascii	".reloc" +	.byte	0 +	.byte	0			// end of 0 padding of section name +	.long	0 +	.long	0 +	.long	0			// SizeOfRawData +	.long	0			// PointerToRawData +	.long	0			// PointerToRelocations +	.long	0			// PointerToLineNumbers +	.short	0			// NumberOfRelocations +	.short	0			// NumberOfLineNumbers +	.long	0x42100040		// Characteristics (section flags) + + +	.ascii	".text" +	.byte	0 +	.byte	0 +	.byte	0        		// end of 0 padding of section name +	.long	_edata - stext		// VirtualSize +	.long	stext - efi_head	// VirtualAddress +	.long	_edata - stext		// SizeOfRawData +	.long	stext - efi_head	// PointerToRawData + +	.long	0		// PointerToRelocations (0 for executables) +	.long	0		// PointerToLineNumbers (0 for executables) +	.short	0		// NumberOfRelocations  (0 for executables) +	.short	0		// NumberOfLineNumbers  (0 for executables) +	.long	0xe0500020	// Characteristics (section flags) +	.align 5 +#endif  ENTRY(stext)  	mov	x21, x0				// x21=FDT +	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode  	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET -	bl	el2_setup			// Drop to EL1 +	bl	set_cpu_boot_mode_flag  	mrs	x22, midr_el1			// x22=cpuid  	mov	x0, x22  	bl	lookup_processor_type @@ -150,21 +264,29 @@ ENDPROC(stext)  /*   * If we're fortunate enough to boot at EL2, ensure that the world is   * sane before dropping to EL1. + * + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if + * booted in EL1 or EL2 respectively.   */  ENTRY(el2_setup)  	mrs	x0, CurrentEL -	cmp	x0, #PSR_MODE_EL2t -	ccmp	x0, #PSR_MODE_EL2h, #0x4, ne -	ldr	x0, =__boot_cpu_mode		// Compute __boot_cpu_mode -	add	x0, x0, x28 -	b.eq	1f -	str	wzr, [x0]			// Remember we don't have EL2... +	cmp	x0, #CurrentEL_EL2 +	b.ne	1f +	mrs	x0, sctlr_el2 +CPU_BE(	orr	x0, x0, #(1 << 25)	)	// Set the EE bit for EL2 +CPU_LE(	bic	x0, x0, #(1 << 25)	)	// Clear the EE bit for EL2 +	msr	sctlr_el2, x0 +	b	2f +1:	mrs	x0, sctlr_el1 +CPU_BE(	orr	x0, x0, #(3 << 24)	)	// Set the EE and E0E bits for EL1 +CPU_LE(	bic	x0, x0, #(3 << 24)	)	// Clear the EE and E0E bits for EL1 +	msr	sctlr_el1, x0 +	mov	w20, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1 +	isb  	ret  	/* Hyp configuration. */ -1:	ldr	w1, =BOOT_CPU_MODE_EL2 -	str	w1, [x0, #4]			// This CPU has EL2 -	mov	x0, #(1 << 31)			// 64-bit EL1 +2:	mov	x0, #(1 << 31)			// 64-bit EL1  	msr	hcr_el2, x0  	/* Generic timers. */ @@ -181,7 +303,8 @@ ENTRY(el2_setup)  	/* sctlr_el1 */  	mov	x0, #0x0800			// Set/clear RES{1,0} bits -	movk	x0, #0x30d0, lsl #16 +CPU_BE(	movk	x0, #0x33d0, lsl #16	)	// Set EE and E0E on BE systems +CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems  	msr	sctlr_el1, x0  	/* Coprocessor traps. */ @@ -204,18 +327,36 @@ ENTRY(el2_setup)  		      PSR_MODE_EL1h)  	msr	spsr_el2, x0  	msr	elr_el2, lr +	mov	w20, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2  	eret  ENDPROC(el2_setup)  /* + * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed + * in x20. See arch/arm64/include/asm/virt.h for more info. + */ +ENTRY(set_cpu_boot_mode_flag) +	ldr	x1, =__boot_cpu_mode		// Compute __boot_cpu_mode +	add	x1, x1, x28 +	cmp	w20, #BOOT_CPU_MODE_EL2 +	b.ne	1f +	add	x1, x1, #4 +1:	str	w20, [x1]			// This CPU has booted in EL1 +	dmb	sy +	dc	ivac, x1			// Invalidate potentially stale cache line +	ret +ENDPROC(set_cpu_boot_mode_flag) + +/*   * We need to find out the CPU boot mode long after boot, so we need to   * store it in a writable variable.   *   * This is not in .bss, because we set it sufficiently early that the boot-time   * zeroing of .bss would clobber it.   */ -	.pushsection	.data +	.pushsection	.data..cacheline_aligned  ENTRY(__boot_cpu_mode) +	.align	L1_CACHE_SHIFT  	.long	BOOT_CPU_MODE_EL2  	.long	0  	.popsection @@ -225,7 +366,6 @@ ENTRY(__boot_cpu_mode)  	.quad	PAGE_OFFSET  #ifdef CONFIG_SMP -	.pushsection    .smp.pen.text, "ax"  	.align	3  1:	.quad	.  	.quad	secondary_holding_pen_release @@ -235,8 +375,9 @@ ENTRY(__boot_cpu_mode)  	 * cores are held until we're ready for them to initialise.  	 */  ENTRY(secondary_holding_pen) -	bl	__calc_phys_offset		// x24=phys offset -	bl	el2_setup			// Drop to EL1 +	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode +	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET +	bl	set_cpu_boot_mode_flag  	mrs	x0, mpidr_el1  	ldr     x1, =MPIDR_HWID_BITMASK  	and	x0, x0, x1 @@ -250,7 +391,17 @@ pen:	ldr	x4, [x3]  	wfe  	b	pen  ENDPROC(secondary_holding_pen) -	.popsection + +	/* +	 * Secondary entry point that jumps straight into the kernel. Only to +	 * be used where CPUs are brought online dynamically by the kernel. +	 */ +ENTRY(secondary_entry) +	bl	el2_setup			// Drop to EL1 +	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET +	bl	set_cpu_boot_mode_flag +	b	secondary_startup +ENDPROC(secondary_entry)  ENTRY(secondary_startup)  	/* @@ -348,26 +499,18 @@ ENDPROC(__calc_phys_offset)   * Preserves:	tbl, flags   * Corrupts:	phys, start, end, pstate   */ -	.macro	create_block_map, tbl, flags, phys, start, end, idmap=0 +	.macro	create_block_map, tbl, flags, phys, start, end  	lsr	\phys, \phys, #BLOCK_SHIFT -	.if	\idmap -	and	\start, \phys, #PTRS_PER_PTE - 1	// table index -	.else  	lsr	\start, \start, #BLOCK_SHIFT  	and	\start, \start, #PTRS_PER_PTE - 1	// table index -	.endif  	orr	\phys, \flags, \phys, lsl #BLOCK_SHIFT	// table entry -	.ifnc	\start,\end  	lsr	\end, \end, #BLOCK_SHIFT  	and	\end, \end, #PTRS_PER_PTE - 1		// table end index -	.endif  9999:	str	\phys, [\tbl, \start, lsl #3]		// store the entry -	.ifnc	\start,\end  	add	\start, \start, #1			// next entry  	add	\phys, \phys, #BLOCK_SIZE		// next block  	cmp	\start, \end  	b.ls	9999b -	.endif  	.endm  /* @@ -376,10 +519,19 @@ ENDPROC(__calc_phys_offset)   *   - identity mapping to enable the MMU (low address, TTBR0)   *   - first few MB of the kernel linear mapping to jump to once the MMU has   *     been enabled, including the FDT blob (TTBR1) - *   - UART mapping if CONFIG_EARLY_PRINTK is enabled (TTBR1) + *   - pgd entry for fixed mappings (TTBR1)   */  __create_page_tables:  	pgtbl	x25, x26, x24			// idmap_pg_dir and swapper_pg_dir addresses +	mov	x27, lr + +	/* +	 * Invalidate the idmap and swapper page tables to avoid potential +	 * dirty cache lines being evicted. +	 */ +	mov	x0, x25 +	add	x1, x26, #SWAPPER_DIR_SIZE +	bl	__inval_cache_range  	/*  	 * Clear the idmap and swapper page tables. @@ -399,9 +551,13 @@ __create_page_tables:  	 * Create the identity mapping.  	 */  	add	x0, x25, #PAGE_SIZE		// section table address -	adr	x3, __turn_mmu_on		// virtual/physical address +	ldr	x3, =KERNEL_START +	add	x3, x3, x28			// __pa(KERNEL_START)  	create_pgd_entry x25, x0, x3, x5, x6 -	create_block_map x0, x7, x3, x5, x5, idmap=1 +	ldr	x6, =KERNEL_END +	mov	x5, x3				// __pa(KERNEL_START) +	add	x6, x6, x28			// __pa(KERNEL_END) +	create_block_map x0, x7, x3, x5, x6  	/*  	 * Map the kernel image (starting with PHYS_OFFSET). @@ -409,7 +565,7 @@ __create_page_tables:  	add	x0, x26, #PAGE_SIZE		// section table address  	mov	x5, #PAGE_OFFSET  	create_pgd_entry x26, x0, x5, x3, x6 -	ldr	x6, =KERNEL_END - 1 +	ldr	x6, =KERNEL_END  	mov	x3, x24				// phys offset  	create_block_map x0, x7, x3, x5, x6 @@ -429,15 +585,23 @@ __create_page_tables:  	sub	x6, x6, #1			// inclusive range  	create_block_map x0, x7, x3, x5, x6  1: -#ifdef CONFIG_EARLY_PRINTK  	/* -	 * Create the pgd entry for the UART mapping. The full mapping is done -	 * later based earlyprintk kernel parameter. +	 * Create the pgd entry for the fixed mappings.  	 */ -	ldr	x5, =EARLYCON_IOBASE		// UART virtual address +	ldr	x5, =FIXADDR_TOP		// Fixed mapping virtual address  	add	x0, x26, #2 * PAGE_SIZE		// section table address  	create_pgd_entry x26, x0, x5, x6, x7 -#endif + +	/* +	 * Since the page tables have been populated with non-cacheable +	 * accesses (MMU disabled), invalidate the idmap and swapper page +	 * tables again to remove any speculatively loaded cache lines. +	 */ +	mov	x0, x25 +	add	x1, x26, #SWAPPER_DIR_SIZE +	bl	__inval_cache_range + +	mov	lr, x27  	ret  ENDPROC(__create_page_tables)  	.ltorg @@ -446,8 +610,6 @@ ENDPROC(__create_page_tables)  	.type	__switch_data, %object  __switch_data:  	.quad	__mmap_switched -	.quad	__data_loc			// x4 -	.quad	_data				// x5  	.quad	__bss_start			// x6  	.quad	_end				// x7  	.quad	processor_id			// x4 @@ -462,15 +624,7 @@ __switch_data:  __mmap_switched:  	adr	x3, __switch_data + 8 -	ldp	x4, x5, [x3], #16  	ldp	x6, x7, [x3], #16 -	cmp	x4, x5				// Copy data segment if needed -1:	ccmp	x5, x6, #4, ne -	b.eq	2f -	ldr	x16, [x4], #8 -	str	x16, [x5], #8 -	b	1b -2:  1:	cmp	x6, x7  	b.hs	2f  	str	xzr, [x6], #8			// Clear BSS diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 329218ca9ff..df1cf15377b 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -20,13 +20,14 @@  #define pr_fmt(fmt) "hw-breakpoint: " fmt +#include <linux/compat.h> +#include <linux/cpu_pm.h>  #include <linux/errno.h>  #include <linux/hw_breakpoint.h>  #include <linux/perf_event.h>  #include <linux/ptrace.h>  #include <linux/smp.h> -#include <asm/compat.h>  #include <asm/current.h>  #include <asm/debug-monitors.h>  #include <asm/hw_breakpoint.h> @@ -169,94 +170,134 @@ static enum debug_el debug_exception_level(int privilege)  	}  } -/* - * Install a perf counter breakpoint. +enum hw_breakpoint_ops { +	HW_BREAKPOINT_INSTALL, +	HW_BREAKPOINT_UNINSTALL, +	HW_BREAKPOINT_RESTORE +}; + +/** + * hw_breakpoint_slot_setup - Find and setup a perf slot according to + *			      operations + * + * @slots: pointer to array of slots + * @max_slots: max number of slots + * @bp: perf_event to setup + * @ops: operation to be carried out on the slot + * + * Return: + *	slot index on success + *	-ENOSPC if no slot is available/matches + *	-EINVAL on wrong operations parameter   */ -int arch_install_hw_breakpoint(struct perf_event *bp) +static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots, +				    struct perf_event *bp, +				    enum hw_breakpoint_ops ops) +{ +	int i; +	struct perf_event **slot; + +	for (i = 0; i < max_slots; ++i) { +		slot = &slots[i]; +		switch (ops) { +		case HW_BREAKPOINT_INSTALL: +			if (!*slot) { +				*slot = bp; +				return i; +			} +			break; +		case HW_BREAKPOINT_UNINSTALL: +			if (*slot == bp) { +				*slot = NULL; +				return i; +			} +			break; +		case HW_BREAKPOINT_RESTORE: +			if (*slot == bp) +				return i; +			break; +		default: +			pr_warn_once("Unhandled hw breakpoint ops %d\n", ops); +			return -EINVAL; +		} +	} +	return -ENOSPC; +} + +static int hw_breakpoint_control(struct perf_event *bp, +				 enum hw_breakpoint_ops ops)  {  	struct arch_hw_breakpoint *info = counter_arch_bp(bp); -	struct perf_event **slot, **slots; +	struct perf_event **slots;  	struct debug_info *debug_info = ¤t->thread.debug;  	int i, max_slots, ctrl_reg, val_reg, reg_enable; +	enum debug_el dbg_el = debug_exception_level(info->ctrl.privilege);  	u32 ctrl;  	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {  		/* Breakpoint */  		ctrl_reg = AARCH64_DBG_REG_BCR;  		val_reg = AARCH64_DBG_REG_BVR; -		slots = __get_cpu_var(bp_on_reg); +		slots = this_cpu_ptr(bp_on_reg);  		max_slots = core_num_brps;  		reg_enable = !debug_info->bps_disabled;  	} else {  		/* Watchpoint */  		ctrl_reg = AARCH64_DBG_REG_WCR;  		val_reg = AARCH64_DBG_REG_WVR; -		slots = __get_cpu_var(wp_on_reg); +		slots = this_cpu_ptr(wp_on_reg);  		max_slots = core_num_wrps;  		reg_enable = !debug_info->wps_disabled;  	} -	for (i = 0; i < max_slots; ++i) { -		slot = &slots[i]; - -		if (!*slot) { -			*slot = bp; -			break; -		} -	} - -	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) -		return -ENOSPC; +	i = hw_breakpoint_slot_setup(slots, max_slots, bp, ops); -	/* Ensure debug monitors are enabled at the correct exception level.  */ -	enable_debug_monitors(debug_exception_level(info->ctrl.privilege)); +	if (WARN_ONCE(i < 0, "Can't find any breakpoint slot")) +		return i; -	/* Setup the address register. */ -	write_wb_reg(val_reg, i, info->address); +	switch (ops) { +	case HW_BREAKPOINT_INSTALL: +		/* +		 * Ensure debug monitors are enabled at the correct exception +		 * level. +		 */ +		enable_debug_monitors(dbg_el); +		/* Fall through */ +	case HW_BREAKPOINT_RESTORE: +		/* Setup the address register. */ +		write_wb_reg(val_reg, i, info->address); + +		/* Setup the control register. */ +		ctrl = encode_ctrl_reg(info->ctrl); +		write_wb_reg(ctrl_reg, i, +			     reg_enable ? ctrl | 0x1 : ctrl & ~0x1); +		break; +	case HW_BREAKPOINT_UNINSTALL: +		/* Reset the control register. */ +		write_wb_reg(ctrl_reg, i, 0); -	/* Setup the control register. */ -	ctrl = encode_ctrl_reg(info->ctrl); -	write_wb_reg(ctrl_reg, i, reg_enable ? ctrl | 0x1 : ctrl & ~0x1); +		/* +		 * Release the debug monitors for the correct exception +		 * level. +		 */ +		disable_debug_monitors(dbg_el); +		break; +	}  	return 0;  } -void arch_uninstall_hw_breakpoint(struct perf_event *bp) +/* + * Install a perf counter breakpoint. + */ +int arch_install_hw_breakpoint(struct perf_event *bp)  { -	struct arch_hw_breakpoint *info = counter_arch_bp(bp); -	struct perf_event **slot, **slots; -	int i, max_slots, base; - -	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { -		/* Breakpoint */ -		base = AARCH64_DBG_REG_BCR; -		slots = __get_cpu_var(bp_on_reg); -		max_slots = core_num_brps; -	} else { -		/* Watchpoint */ -		base = AARCH64_DBG_REG_WCR; -		slots = __get_cpu_var(wp_on_reg); -		max_slots = core_num_wrps; -	} - -	/* Remove the breakpoint. */ -	for (i = 0; i < max_slots; ++i) { -		slot = &slots[i]; - -		if (*slot == bp) { -			*slot = NULL; -			break; -		} -	} - -	if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) -		return; - -	/* Reset the control register. */ -	write_wb_reg(base, i, 0); +	return hw_breakpoint_control(bp, HW_BREAKPOINT_INSTALL); +} -	/* Release the debug monitors for the correct exception level.  */ -	disable_debug_monitors(debug_exception_level(info->ctrl.privilege)); +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ +	hw_breakpoint_control(bp, HW_BREAKPOINT_UNINSTALL);  }  static int get_hbp_len(u8 hbp_len) @@ -505,11 +546,11 @@ static void toggle_bp_registers(int reg, enum debug_el el, int enable)  	switch (reg) {  	case AARCH64_DBG_REG_BCR: -		slots = __get_cpu_var(bp_on_reg); +		slots = this_cpu_ptr(bp_on_reg);  		max_slots = core_num_brps;  		break;  	case AARCH64_DBG_REG_WCR: -		slots = __get_cpu_var(wp_on_reg); +		slots = this_cpu_ptr(wp_on_reg);  		max_slots = core_num_wrps;  		break;  	default: @@ -546,7 +587,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr,  	struct debug_info *debug_info;  	struct arch_hw_breakpoint_ctrl ctrl; -	slots = (struct perf_event **)__get_cpu_var(bp_on_reg); +	slots = this_cpu_ptr(bp_on_reg);  	addr = instruction_pointer(regs);  	debug_info = ¤t->thread.debug; @@ -596,7 +637,7 @@ unlock:  			user_enable_single_step(current);  	} else {  		toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 0); -		kernel_step = &__get_cpu_var(stepping_kernel_bp); +		kernel_step = this_cpu_ptr(&stepping_kernel_bp);  		if (*kernel_step != ARM_KERNEL_STEP_NONE)  			return 0; @@ -623,7 +664,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,  	struct arch_hw_breakpoint *info;  	struct arch_hw_breakpoint_ctrl ctrl; -	slots = (struct perf_event **)__get_cpu_var(wp_on_reg); +	slots = this_cpu_ptr(wp_on_reg);  	debug_info = ¤t->thread.debug;  	for (i = 0; i < core_num_wrps; ++i) { @@ -698,7 +739,7 @@ unlock:  			user_enable_single_step(current);  	} else {  		toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 0); -		kernel_step = &__get_cpu_var(stepping_kernel_bp); +		kernel_step = this_cpu_ptr(&stepping_kernel_bp);  		if (*kernel_step != ARM_KERNEL_STEP_NONE)  			return 0; @@ -722,7 +763,7 @@ int reinstall_suspended_bps(struct pt_regs *regs)  	struct debug_info *debug_info = ¤t->thread.debug;  	int handled_exception = 0, *kernel_step; -	kernel_step = &__get_cpu_var(stepping_kernel_bp); +	kernel_step = this_cpu_ptr(&stepping_kernel_bp);  	/*  	 * Called from single-step exception handler. @@ -806,18 +847,36 @@ void hw_breakpoint_thread_switch(struct task_struct *next)  /*   * CPU initialisation.   */ -static void reset_ctrl_regs(void *unused) +static void hw_breakpoint_reset(void *unused)  {  	int i; - -	for (i = 0; i < core_num_brps; ++i) { -		write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); -		write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); +	struct perf_event **slots; +	/* +	 * When a CPU goes through cold-boot, it does not have any installed +	 * slot, so it is safe to share the same function for restoring and +	 * resetting breakpoints; when a CPU is hotplugged in, it goes +	 * through the slots, which are all empty, hence it just resets control +	 * and value for debug registers. +	 * When this function is triggered on warm-boot through a CPU PM +	 * notifier some slots might be initialized; if so they are +	 * reprogrammed according to the debug slots content. +	 */ +	for (slots = this_cpu_ptr(bp_on_reg), i = 0; i < core_num_brps; ++i) { +		if (slots[i]) { +			hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); +		} else { +			write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); +			write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); +		}  	} -	for (i = 0; i < core_num_wrps; ++i) { -		write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); -		write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); +	for (slots = this_cpu_ptr(wp_on_reg), i = 0; i < core_num_wrps; ++i) { +		if (slots[i]) { +			hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); +		} else { +			write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); +			write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); +		}  	}  } @@ -827,7 +886,7 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self,  {  	int cpu = (long)hcpu;  	if (action == CPU_ONLINE) -		smp_call_function_single(cpu, reset_ctrl_regs, NULL, 1); +		smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1);  	return NOTIFY_OK;  } @@ -835,6 +894,14 @@ static struct notifier_block hw_breakpoint_reset_nb = {  	.notifier_call = hw_breakpoint_reset_notify,  }; +#ifdef CONFIG_ARM64_CPU_SUSPEND +extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)); +#else +static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +{ +} +#endif +  /*   * One-time initialisation.   */ @@ -846,12 +913,14 @@ static int __init arch_hw_breakpoint_init(void)  	pr_info("found %d breakpoint and %d watchpoint registers.\n",  		core_num_brps, core_num_wrps); +	cpu_notifier_register_begin(); +  	/*  	 * Reset the breakpoint resources. We assume that a halting  	 * debugger will leave the world in a nice state for us.  	 */ -	smp_call_function(reset_ctrl_regs, NULL, 1); -	reset_ctrl_regs(NULL); +	smp_call_function(hw_breakpoint_reset, NULL, 1); +	hw_breakpoint_reset(NULL);  	/* Register debug fault handlers. */  	hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, @@ -860,7 +929,12 @@ static int __init arch_hw_breakpoint_init(void)  			      TRAP_HWBKPT, "hw-watchpoint handler");  	/* Register hotplug notifier. */ -	register_cpu_notifier(&hw_breakpoint_reset_nb); +	__register_cpu_notifier(&hw_breakpoint_reset_nb); + +	cpu_notifier_register_done(); + +	/* Register cpu_suspend hw breakpoint restore hook */ +	cpu_suspend_set_dbg_restorer(hw_breakpoint_reset);  	return 0;  } diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c new file mode 100644 index 00000000000..92f36835486 --- /dev/null +++ b/arch/arm64/kernel/insn.c @@ -0,0 +1,304 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/stop_machine.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/insn.h> + +static int aarch64_insn_encoding_class[] = { +	AARCH64_INSN_CLS_UNKNOWN, +	AARCH64_INSN_CLS_UNKNOWN, +	AARCH64_INSN_CLS_UNKNOWN, +	AARCH64_INSN_CLS_UNKNOWN, +	AARCH64_INSN_CLS_LDST, +	AARCH64_INSN_CLS_DP_REG, +	AARCH64_INSN_CLS_LDST, +	AARCH64_INSN_CLS_DP_FPSIMD, +	AARCH64_INSN_CLS_DP_IMM, +	AARCH64_INSN_CLS_DP_IMM, +	AARCH64_INSN_CLS_BR_SYS, +	AARCH64_INSN_CLS_BR_SYS, +	AARCH64_INSN_CLS_LDST, +	AARCH64_INSN_CLS_DP_REG, +	AARCH64_INSN_CLS_LDST, +	AARCH64_INSN_CLS_DP_FPSIMD, +}; + +enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn) +{ +	return aarch64_insn_encoding_class[(insn >> 25) & 0xf]; +} + +/* NOP is an alias of HINT */ +bool __kprobes aarch64_insn_is_nop(u32 insn) +{ +	if (!aarch64_insn_is_hint(insn)) +		return false; + +	switch (insn & 0xFE0) { +	case AARCH64_INSN_HINT_YIELD: +	case AARCH64_INSN_HINT_WFE: +	case AARCH64_INSN_HINT_WFI: +	case AARCH64_INSN_HINT_SEV: +	case AARCH64_INSN_HINT_SEVL: +		return false; +	default: +		return true; +	} +} + +/* + * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always + * little-endian. + */ +int __kprobes aarch64_insn_read(void *addr, u32 *insnp) +{ +	int ret; +	u32 val; + +	ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); +	if (!ret) +		*insnp = le32_to_cpu(val); + +	return ret; +} + +int __kprobes aarch64_insn_write(void *addr, u32 insn) +{ +	insn = cpu_to_le32(insn); +	return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE); +} + +static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) +{ +	if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS) +		return false; + +	return	aarch64_insn_is_b(insn) || +		aarch64_insn_is_bl(insn) || +		aarch64_insn_is_svc(insn) || +		aarch64_insn_is_hvc(insn) || +		aarch64_insn_is_smc(insn) || +		aarch64_insn_is_brk(insn) || +		aarch64_insn_is_nop(insn); +} + +/* + * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a + * Section B2.6.5 "Concurrent modification and execution of instructions": + * Concurrent modification and execution of instructions can lead to the + * resulting instruction performing any behavior that can be achieved by + * executing any sequence of instructions that can be executed from the + * same Exception level, except where the instruction before modification + * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, + * or SMC instruction. + */ +bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn) +{ +	return __aarch64_insn_hotpatch_safe(old_insn) && +	       __aarch64_insn_hotpatch_safe(new_insn); +} + +int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) +{ +	u32 *tp = addr; +	int ret; + +	/* A64 instructions must be word aligned */ +	if ((uintptr_t)tp & 0x3) +		return -EINVAL; + +	ret = aarch64_insn_write(tp, insn); +	if (ret == 0) +		flush_icache_range((uintptr_t)tp, +				   (uintptr_t)tp + AARCH64_INSN_SIZE); + +	return ret; +} + +struct aarch64_insn_patch { +	void		**text_addrs; +	u32		*new_insns; +	int		insn_cnt; +	atomic_t	cpu_count; +}; + +static int __kprobes aarch64_insn_patch_text_cb(void *arg) +{ +	int i, ret = 0; +	struct aarch64_insn_patch *pp = arg; + +	/* The first CPU becomes master */ +	if (atomic_inc_return(&pp->cpu_count) == 1) { +		for (i = 0; ret == 0 && i < pp->insn_cnt; i++) +			ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], +							     pp->new_insns[i]); +		/* +		 * aarch64_insn_patch_text_nosync() calls flush_icache_range(), +		 * which ends with "dsb; isb" pair guaranteeing global +		 * visibility. +		 */ +		atomic_set(&pp->cpu_count, -1); +	} else { +		while (atomic_read(&pp->cpu_count) != -1) +			cpu_relax(); +		isb(); +	} + +	return ret; +} + +int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt) +{ +	struct aarch64_insn_patch patch = { +		.text_addrs = addrs, +		.new_insns = insns, +		.insn_cnt = cnt, +		.cpu_count = ATOMIC_INIT(0), +	}; + +	if (cnt <= 0) +		return -EINVAL; + +	return stop_machine(aarch64_insn_patch_text_cb, &patch, +			    cpu_online_mask); +} + +int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) +{ +	int ret; +	u32 insn; + +	/* Unsafe to patch multiple instructions without synchronizaiton */ +	if (cnt == 1) { +		ret = aarch64_insn_read(addrs[0], &insn); +		if (ret) +			return ret; + +		if (aarch64_insn_hotpatch_safe(insn, insns[0])) { +			/* +			 * ARMv8 architecture doesn't guarantee all CPUs see +			 * the new instruction after returning from function +			 * aarch64_insn_patch_text_nosync(). So send IPIs to +			 * all other CPUs to achieve instruction +			 * synchronization. +			 */ +			ret = aarch64_insn_patch_text_nosync(addrs[0], insns[0]); +			kick_all_cpus_sync(); +			return ret; +		} +	} + +	return aarch64_insn_patch_text_sync(addrs, insns, cnt); +} + +u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, +				  u32 insn, u64 imm) +{ +	u32 immlo, immhi, lomask, himask, mask; +	int shift; + +	switch (type) { +	case AARCH64_INSN_IMM_ADR: +		lomask = 0x3; +		himask = 0x7ffff; +		immlo = imm & lomask; +		imm >>= 2; +		immhi = imm & himask; +		imm = (immlo << 24) | (immhi); +		mask = (lomask << 24) | (himask); +		shift = 5; +		break; +	case AARCH64_INSN_IMM_26: +		mask = BIT(26) - 1; +		shift = 0; +		break; +	case AARCH64_INSN_IMM_19: +		mask = BIT(19) - 1; +		shift = 5; +		break; +	case AARCH64_INSN_IMM_16: +		mask = BIT(16) - 1; +		shift = 5; +		break; +	case AARCH64_INSN_IMM_14: +		mask = BIT(14) - 1; +		shift = 5; +		break; +	case AARCH64_INSN_IMM_12: +		mask = BIT(12) - 1; +		shift = 10; +		break; +	case AARCH64_INSN_IMM_9: +		mask = BIT(9) - 1; +		shift = 12; +		break; +	default: +		pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n", +			type); +		return 0; +	} + +	/* Update the immediate field. */ +	insn &= ~(mask << shift); +	insn |= (imm & mask) << shift; + +	return insn; +} + +u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, +					  enum aarch64_insn_branch_type type) +{ +	u32 insn; +	long offset; + +	/* +	 * PC: A 64-bit Program Counter holding the address of the current +	 * instruction. A64 instructions must be word-aligned. +	 */ +	BUG_ON((pc & 0x3) || (addr & 0x3)); + +	/* +	 * B/BL support [-128M, 128M) offset +	 * ARM64 virtual address arrangement guarantees all kernel and module +	 * texts are within +/-128M. +	 */ +	offset = ((long)addr - (long)pc); +	BUG_ON(offset < -SZ_128M || offset >= SZ_128M); + +	if (type == AARCH64_INSN_BRANCH_LINK) +		insn = aarch64_insn_get_bl_value(); +	else +		insn = aarch64_insn_get_b_value(); + +	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn, +					     offset >> 2); +} + +u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op) +{ +	return aarch64_insn_get_hint_value() | op; +} + +u32 __kprobes aarch64_insn_gen_nop(void) +{ +	return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP); +} diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index ecb3354292e..0f08dfd69eb 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -81,3 +81,68 @@ void __init init_IRQ(void)  	if (!handle_arch_irq)  		panic("No interrupt controller found.");  } + +#ifdef CONFIG_HOTPLUG_CPU +static bool migrate_one_irq(struct irq_desc *desc) +{ +	struct irq_data *d = irq_desc_get_irq_data(desc); +	const struct cpumask *affinity = d->affinity; +	struct irq_chip *c; +	bool ret = false; + +	/* +	 * If this is a per-CPU interrupt, or the affinity does not +	 * include this CPU, then we have nothing to do. +	 */ +	if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) +		return false; + +	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) +		ret = true; + +	/* +	 * when using forced irq_set_affinity we must ensure that the cpu +	 * being offlined is not present in the affinity mask, it may be +	 * selected as the target CPU otherwise +	 */ +	affinity = cpu_online_mask; +	c = irq_data_get_irq_chip(d); +	if (!c->irq_set_affinity) +		pr_debug("IRQ%u: unable to set affinity\n", d->irq); +	else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) +		cpumask_copy(d->affinity, affinity); + +	return ret; +} + +/* + * The current CPU has been marked offline.  Migrate IRQs off this CPU. + * If the affinity settings do not allow other CPUs, force them onto any + * available CPU. + * + * Note: we must iterate over all IRQs, whether they have an attached + * action structure or not, as we need to get chained interrupts too. + */ +void migrate_irqs(void) +{ +	unsigned int i; +	struct irq_desc *desc; +	unsigned long flags; + +	local_irq_save(flags); + +	for_each_irq_desc(i, desc) { +		bool affinity_broken; + +		raw_spin_lock(&desc->lock); +		affinity_broken = migrate_one_irq(desc); +		raw_spin_unlock(&desc->lock); + +		if (affinity_broken) +			pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", +					    i, smp_processor_id()); +	} + +	local_irq_restore(flags); +} +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c new file mode 100644 index 00000000000..263a166291f --- /dev/null +++ b/arch/arm64/kernel/jump_label.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * Based on arch/arm/kernel/jump_label.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/jump_label.h> +#include <asm/insn.h> + +#ifdef HAVE_JUMP_LABEL + +static void __arch_jump_label_transform(struct jump_entry *entry, +					enum jump_label_type type, +					bool is_static) +{ +	void *addr = (void *)entry->code; +	u32 insn; + +	if (type == JUMP_LABEL_ENABLE) { +		insn = aarch64_insn_gen_branch_imm(entry->code, +						   entry->target, +						   AARCH64_INSN_BRANCH_NOLINK); +	} else { +		insn = aarch64_insn_gen_nop(); +	} + +	if (is_static) +		aarch64_insn_patch_text_nosync(addr, insn); +	else +		aarch64_insn_patch_text(&addr, &insn, 1); +} + +void arch_jump_label_transform(struct jump_entry *entry, +			       enum jump_label_type type) +{ +	__arch_jump_label_transform(entry, type, false); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, +				      enum jump_label_type type) +{ +	__arch_jump_label_transform(entry, type, true); +} + +#endif	/* HAVE_JUMP_LABEL */ diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c new file mode 100644 index 00000000000..75c9cf1aafe --- /dev/null +++ b/arch/arm64/kernel/kgdb.c @@ -0,0 +1,336 @@ +/* + * AArch64 KGDB support + * + * Based on arch/arm/kernel/kgdb.c + * + * Copyright (C) 2013 Cavium Inc. + * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/irq.h> +#include <linux/kdebug.h> +#include <linux/kgdb.h> +#include <asm/traps.h> + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { +	{ "x0", 8, offsetof(struct pt_regs, regs[0])}, +	{ "x1", 8, offsetof(struct pt_regs, regs[1])}, +	{ "x2", 8, offsetof(struct pt_regs, regs[2])}, +	{ "x3", 8, offsetof(struct pt_regs, regs[3])}, +	{ "x4", 8, offsetof(struct pt_regs, regs[4])}, +	{ "x5", 8, offsetof(struct pt_regs, regs[5])}, +	{ "x6", 8, offsetof(struct pt_regs, regs[6])}, +	{ "x7", 8, offsetof(struct pt_regs, regs[7])}, +	{ "x8", 8, offsetof(struct pt_regs, regs[8])}, +	{ "x9", 8, offsetof(struct pt_regs, regs[9])}, +	{ "x10", 8, offsetof(struct pt_regs, regs[10])}, +	{ "x11", 8, offsetof(struct pt_regs, regs[11])}, +	{ "x12", 8, offsetof(struct pt_regs, regs[12])}, +	{ "x13", 8, offsetof(struct pt_regs, regs[13])}, +	{ "x14", 8, offsetof(struct pt_regs, regs[14])}, +	{ "x15", 8, offsetof(struct pt_regs, regs[15])}, +	{ "x16", 8, offsetof(struct pt_regs, regs[16])}, +	{ "x17", 8, offsetof(struct pt_regs, regs[17])}, +	{ "x18", 8, offsetof(struct pt_regs, regs[18])}, +	{ "x19", 8, offsetof(struct pt_regs, regs[19])}, +	{ "x20", 8, offsetof(struct pt_regs, regs[20])}, +	{ "x21", 8, offsetof(struct pt_regs, regs[21])}, +	{ "x22", 8, offsetof(struct pt_regs, regs[22])}, +	{ "x23", 8, offsetof(struct pt_regs, regs[23])}, +	{ "x24", 8, offsetof(struct pt_regs, regs[24])}, +	{ "x25", 8, offsetof(struct pt_regs, regs[25])}, +	{ "x26", 8, offsetof(struct pt_regs, regs[26])}, +	{ "x27", 8, offsetof(struct pt_regs, regs[27])}, +	{ "x28", 8, offsetof(struct pt_regs, regs[28])}, +	{ "x29", 8, offsetof(struct pt_regs, regs[29])}, +	{ "x30", 8, offsetof(struct pt_regs, regs[30])}, +	{ "sp", 8, offsetof(struct pt_regs, sp)}, +	{ "pc", 8, offsetof(struct pt_regs, pc)}, +	{ "pstate", 8, offsetof(struct pt_regs, pstate)}, +	{ "v0", 16, -1 }, +	{ "v1", 16, -1 }, +	{ "v2", 16, -1 }, +	{ "v3", 16, -1 }, +	{ "v4", 16, -1 }, +	{ "v5", 16, -1 }, +	{ "v6", 16, -1 }, +	{ "v7", 16, -1 }, +	{ "v8", 16, -1 }, +	{ "v9", 16, -1 }, +	{ "v10", 16, -1 }, +	{ "v11", 16, -1 }, +	{ "v12", 16, -1 }, +	{ "v13", 16, -1 }, +	{ "v14", 16, -1 }, +	{ "v15", 16, -1 }, +	{ "v16", 16, -1 }, +	{ "v17", 16, -1 }, +	{ "v18", 16, -1 }, +	{ "v19", 16, -1 }, +	{ "v20", 16, -1 }, +	{ "v21", 16, -1 }, +	{ "v22", 16, -1 }, +	{ "v23", 16, -1 }, +	{ "v24", 16, -1 }, +	{ "v25", 16, -1 }, +	{ "v26", 16, -1 }, +	{ "v27", 16, -1 }, +	{ "v28", 16, -1 }, +	{ "v29", 16, -1 }, +	{ "v30", 16, -1 }, +	{ "v31", 16, -1 }, +	{ "fpsr", 4, -1 }, +	{ "fpcr", 4, -1 }, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ +	if (regno >= DBG_MAX_REG_NUM || regno < 0) +		return NULL; + +	if (dbg_reg_def[regno].offset != -1) +		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, +		       dbg_reg_def[regno].size); +	else +		memset(mem, 0, dbg_reg_def[regno].size); +	return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ +	if (regno >= DBG_MAX_REG_NUM || regno < 0) +		return -EINVAL; + +	if (dbg_reg_def[regno].offset != -1) +		memcpy((void *)regs + dbg_reg_def[regno].offset, mem, +		       dbg_reg_def[regno].size); +	return 0; +} + +void +sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) +{ +	struct pt_regs *thread_regs; + +	/* Initialize to zero */ +	memset((char *)gdb_regs, 0, NUMREGBYTES); +	thread_regs = task_pt_regs(task); +	memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ +	regs->pc = pc; +} + +static int compiled_break; + +static void kgdb_arch_update_addr(struct pt_regs *regs, +				char *remcom_in_buffer) +{ +	unsigned long addr; +	char *ptr; + +	ptr = &remcom_in_buffer[1]; +	if (kgdb_hex2long(&ptr, &addr)) +		kgdb_arch_set_pc(regs, addr); +	else if (compiled_break == 1) +		kgdb_arch_set_pc(regs, regs->pc + 4); + +	compiled_break = 0; +} + +int kgdb_arch_handle_exception(int exception_vector, int signo, +			       int err_code, char *remcom_in_buffer, +			       char *remcom_out_buffer, +			       struct pt_regs *linux_regs) +{ +	int err; + +	switch (remcom_in_buffer[0]) { +	case 'D': +	case 'k': +		/* +		 * Packet D (Detach), k (kill). No special handling +		 * is required here. Handle same as c packet. +		 */ +	case 'c': +		/* +		 * Packet c (Continue) to continue executing. +		 * Set pc to required address. +		 * Try to read optional parameter and set pc. +		 * If this was a compiled breakpoint, we need to move +		 * to the next instruction else we will just breakpoint +		 * over and over again. +		 */ +		kgdb_arch_update_addr(linux_regs, remcom_in_buffer); +		atomic_set(&kgdb_cpu_doing_single_step, -1); +		kgdb_single_step =  0; + +		/* +		 * Received continue command, disable single step +		 */ +		if (kernel_active_single_step()) +			kernel_disable_single_step(); + +		err = 0; +		break; +	case 's': +		/* +		 * Update step address value with address passed +		 * with step packet. +		 * On debug exception return PC is copied to ELR +		 * So just update PC. +		 * If no step address is passed, resume from the address +		 * pointed by PC. Do not update PC +		 */ +		kgdb_arch_update_addr(linux_regs, remcom_in_buffer); +		atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); +		kgdb_single_step =  1; + +		/* +		 * Enable single step handling +		 */ +		if (!kernel_active_single_step()) +			kernel_enable_single_step(linux_regs); +		err = 0; +		break; +	default: +		err = -1; +	} +	return err; +} + +static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) +{ +	kgdb_handle_exception(1, SIGTRAP, 0, regs); +	return 0; +} + +static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) +{ +	compiled_break = 1; +	kgdb_handle_exception(1, SIGTRAP, 0, regs); + +	return 0; +} + +static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) +{ +	kgdb_handle_exception(1, SIGTRAP, 0, regs); +	return 0; +} + +static struct break_hook kgdb_brkpt_hook = { +	.esr_mask	= 0xffffffff, +	.esr_val	= DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM), +	.fn		= kgdb_brk_fn +}; + +static struct break_hook kgdb_compiled_brkpt_hook = { +	.esr_mask	= 0xffffffff, +	.esr_val	= DBG_ESR_VAL_BRK(KDBG_COMPILED_DBG_BRK_IMM), +	.fn		= kgdb_compiled_brk_fn +}; + +static struct step_hook kgdb_step_hook = { +	.fn		= kgdb_step_brk_fn +}; + +static void kgdb_call_nmi_hook(void *ignored) +{ +	kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); +} + +void kgdb_roundup_cpus(unsigned long flags) +{ +	local_irq_enable(); +	smp_call_function(kgdb_call_nmi_hook, NULL, 0); +	local_irq_disable(); +} + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ +	struct pt_regs *regs = args->regs; + +	if (kgdb_handle_exception(1, args->signr, cmd, regs)) +		return NOTIFY_DONE; +	return NOTIFY_STOP; +} + +static int +kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ +	unsigned long flags; +	int ret; + +	local_irq_save(flags); +	ret = __kgdb_notify(ptr, cmd); +	local_irq_restore(flags); + +	return ret; +} + +static struct notifier_block kgdb_notifier = { +	.notifier_call	= kgdb_notify, +	/* +	 * Want to be lowest priority +	 */ +	.priority	= -INT_MAX, +}; + +/* + * kgdb_arch_init - Perform any architecture specific initalization. + * This function will handle the initalization of any architecture + * specific callbacks. + */ +int kgdb_arch_init(void) +{ +	int ret = register_die_notifier(&kgdb_notifier); + +	if (ret != 0) +		return ret; + +	register_break_hook(&kgdb_brkpt_hook); +	register_break_hook(&kgdb_compiled_brkpt_hook); +	register_step_hook(&kgdb_step_hook); +	return 0; +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ +	unregister_break_hook(&kgdb_brkpt_hook); +	unregister_break_hook(&kgdb_compiled_brkpt_hook); +	unregister_step_hook(&kgdb_step_hook); +	unregister_die_notifier(&kgdb_notifier); +} + +/* + * ARM instructions are always in LE. + * Break instruction is encoded in LE format + */ +struct kgdb_arch arch_kgdb_ops = { +	.gdb_bpt_instr = { +		KGDB_DYN_BRK_INS_BYTE0, +		KGDB_DYN_BRK_INS_BYTE1, +		KGDB_DYN_BRK_INS_BYTE2, +		KGDB_DYN_BRK_INS_BYTE3, +	} +}; diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 8b69ecb1d8b..7787208e8cc 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -27,6 +27,9 @@   *   * See Documentation/arm/kernel_user_helpers.txt for formal definitions.   */ + +#include <asm/unistd32.h> +  	.align	5  	.globl	__kuser_helper_start  __kuser_helper_start: @@ -35,33 +38,32 @@ __kuser_cmpxchg64:			// 0xffff0f60  	.inst	0xe92d00f0		//	push		{r4, r5, r6, r7}  	.inst	0xe1c040d0		//	ldrd		r4, r5, [r0]  	.inst	0xe1c160d0		//	ldrd		r6, r7, [r1] -	.inst	0xf57ff05f		//	dmb		sy  	.inst	0xe1b20f9f		// 1:	ldrexd		r0, r1, [r2]  	.inst	0xe0303004		//	eors		r3, r0, r4  	.inst	0x00313005		//	eoreqs		r3, r1, r5 -	.inst	0x01a23f96		//	strexdeq	r3, r6, [r2] +	.inst	0x01a23e96		//	stlexdeq	r3, r6, [r2]  	.inst	0x03330001		//	teqeq		r3, #1  	.inst	0x0afffff9		//	beq		1b -	.inst	0xf57ff05f		//	dmb		sy +	.inst	0xf57ff05b		//	dmb		ish  	.inst	0xe2730000		//	rsbs		r0, r3, #0  	.inst	0xe8bd00f0		//	pop		{r4, r5, r6, r7}  	.inst	0xe12fff1e		//	bx		lr  	.align	5  __kuser_memory_barrier:			// 0xffff0fa0 -	.inst	0xf57ff05f		//	dmb		sy +	.inst	0xf57ff05b		//	dmb		ish  	.inst	0xe12fff1e		//	bx		lr  	.align	5  __kuser_cmpxchg:			// 0xffff0fc0 -	.inst	0xf57ff05f		//	dmb		sy  	.inst	0xe1923f9f		// 1:	ldrex		r3, [r2]  	.inst	0xe0533000		//	subs		r3, r3, r0 -	.inst	0x01823f91		//	strexeq	r3, r1, [r2] +	.inst	0x01823e91		//	stlexeq		r3, r1, [r2]  	.inst	0x03330001		//	teqeq		r3, #1  	.inst	0x0afffffa		//	beq		1b +	.inst	0xf57ff05b		//	dmb		ish  	.inst	0xe2730000		//	rsbs		r0, r3, #0 -	.inst	0xeaffffef		//	b		<__kuser_memory_barrier> +	.inst	0xe12fff1e		//	bx		lr  	.align	5  __kuser_get_tls:			// 0xffff0fe0 @@ -75,3 +77,42 @@ __kuser_helper_version:			// 0xffff0ffc  	.word	((__kuser_helper_end - __kuser_helper_start) >> 5)  	.globl	__kuser_helper_end  __kuser_helper_end: + +/* + * AArch32 sigreturn code + * + * For ARM syscalls, the syscall number has to be loaded into r7. + * We do not support an OABI userspace. + * + * For Thumb syscalls, we also pass the syscall number via r7. We therefore + * need two 16-bit instructions. + */ +	.globl __aarch32_sigret_code_start +__aarch32_sigret_code_start: + +	/* +	 * ARM Code +	 */ +	.byte	__NR_compat_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_sigreturn +	.byte	__NR_compat_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_sigreturn + +	/* +	 * Thumb code +	 */ +	.byte	__NR_compat_sigreturn, 0x27			// svc	#__NR_compat_sigreturn +	.byte	__NR_compat_sigreturn, 0xdf			// mov	r7, #__NR_compat_sigreturn + +	/* +	 * ARM code +	 */ +	.byte	__NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3	// mov	r7, #__NR_compat_rt_sigreturn +	.byte	__NR_compat_rt_sigreturn, 0x00, 0x00, 0xef	// svc	#__NR_compat_rt_sigreturn + +	/* +	 * Thumb code +	 */ +	.byte	__NR_compat_rt_sigreturn, 0x27			// svc	#__NR_compat_rt_sigreturn +	.byte	__NR_compat_rt_sigreturn, 0xdf			// mov	r7, #__NR_compat_rt_sigreturn + +        .globl __aarch32_sigret_code_end +__aarch32_sigret_code_end: diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index ca0e3d55da9..1eb1cc95513 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -25,11 +25,15 @@  #include <linux/mm.h>  #include <linux/moduleloader.h>  #include <linux/vmalloc.h> +#include <asm/insn.h> + +#define	AARCH64_INSN_IMM_MOVNZ		AARCH64_INSN_IMM_MAX +#define	AARCH64_INSN_IMM_MOVK		AARCH64_INSN_IMM_16  void *module_alloc(unsigned long size)  {  	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, -				    GFP_KERNEL, PAGE_KERNEL_EXEC, -1, +				    GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE,  				    __builtin_return_address(0));  } @@ -94,25 +98,18 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)  	return 0;  } -enum aarch64_imm_type { -	INSN_IMM_MOVNZ, -	INSN_IMM_MOVK, -	INSN_IMM_ADR, -	INSN_IMM_26, -	INSN_IMM_19, -	INSN_IMM_16, -	INSN_IMM_14, -	INSN_IMM_12, -	INSN_IMM_9, -}; - -static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm) +static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, +			   int lsb, enum aarch64_insn_imm_type imm_type)  { -	u32 immlo, immhi, lomask, himask, mask; -	int shift; +	u64 imm, limit = 0; +	s64 sval; +	u32 insn = le32_to_cpu(*(u32 *)place); + +	sval = do_reloc(op, place, val); +	sval >>= lsb; +	imm = sval & 0xffff; -	switch (type) { -	case INSN_IMM_MOVNZ: +	if (imm_type == AARCH64_INSN_IMM_MOVNZ) {  		/*  		 * For signed MOVW relocations, we have to manipulate the  		 * instruction encoding depending on whether or not the @@ -131,70 +128,12 @@ static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm)  			 */  			imm = ~imm;  		} -	case INSN_IMM_MOVK: -		mask = BIT(16) - 1; -		shift = 5; -		break; -	case INSN_IMM_ADR: -		lomask = 0x3; -		himask = 0x7ffff; -		immlo = imm & lomask; -		imm >>= 2; -		immhi = imm & himask; -		imm = (immlo << 24) | (immhi); -		mask = (lomask << 24) | (himask); -		shift = 5; -		break; -	case INSN_IMM_26: -		mask = BIT(26) - 1; -		shift = 0; -		break; -	case INSN_IMM_19: -		mask = BIT(19) - 1; -		shift = 5; -		break; -	case INSN_IMM_16: -		mask = BIT(16) - 1; -		shift = 5; -		break; -	case INSN_IMM_14: -		mask = BIT(14) - 1; -		shift = 5; -		break; -	case INSN_IMM_12: -		mask = BIT(12) - 1; -		shift = 10; -		break; -	case INSN_IMM_9: -		mask = BIT(9) - 1; -		shift = 12; -		break; -	default: -		pr_err("encode_insn_immediate: unknown immediate encoding %d\n", -			type); -		return 0; +		imm_type = AARCH64_INSN_IMM_MOVK;  	} -	/* Update the immediate field. */ -	insn &= ~(mask << shift); -	insn |= (imm & mask) << shift; - -	return insn; -} - -static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, -			   int lsb, enum aarch64_imm_type imm_type) -{ -	u64 imm, limit = 0; -	s64 sval; -	u32 insn = *(u32 *)place; - -	sval = do_reloc(op, place, val); -	sval >>= lsb; -	imm = sval & 0xffff; -  	/* Update the instruction with the new encoding. */ -	*(u32 *)place = encode_insn_immediate(imm_type, insn, imm); +	insn = aarch64_insn_encode_immediate(imm_type, insn, imm); +	*(u32 *)place = cpu_to_le32(insn);  	/* Shift out the immediate field. */  	sval >>= 16; @@ -203,9 +142,9 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,  	 * For unsigned immediates, the overflow check is straightforward.  	 * For signed immediates, the sign bit is actually the bit past the  	 * most significant bit of the field. -	 * The INSN_IMM_16 immediate type is unsigned. +	 * The AARCH64_INSN_IMM_16 immediate type is unsigned.  	 */ -	if (imm_type != INSN_IMM_16) { +	if (imm_type != AARCH64_INSN_IMM_16) {  		sval++;  		limit++;  	} @@ -218,11 +157,11 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,  }  static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, -			  int lsb, int len, enum aarch64_imm_type imm_type) +			  int lsb, int len, enum aarch64_insn_imm_type imm_type)  {  	u64 imm, imm_mask;  	s64 sval; -	u32 insn = *(u32 *)place; +	u32 insn = le32_to_cpu(*(u32 *)place);  	/* Calculate the relocation value. */  	sval = do_reloc(op, place, val); @@ -233,7 +172,8 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val,  	imm = sval & imm_mask;  	/* Update the instruction's immediate field. */ -	*(u32 *)place = encode_insn_immediate(imm_type, insn, imm); +	insn = aarch64_insn_encode_immediate(imm_type, insn, imm); +	*(u32 *)place = cpu_to_le32(insn);  	/*  	 * Extract the upper value bits (including the sign bit) and @@ -315,125 +255,125 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  			overflow_check = false;  		case R_AARCH64_MOVW_UABS_G0:  			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, -					      INSN_IMM_16); +					      AARCH64_INSN_IMM_16);  			break;  		case R_AARCH64_MOVW_UABS_G1_NC:  			overflow_check = false;  		case R_AARCH64_MOVW_UABS_G1:  			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, -					      INSN_IMM_16); +					      AARCH64_INSN_IMM_16);  			break;  		case R_AARCH64_MOVW_UABS_G2_NC:  			overflow_check = false;  		case R_AARCH64_MOVW_UABS_G2:  			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, -					      INSN_IMM_16); +					      AARCH64_INSN_IMM_16);  			break;  		case R_AARCH64_MOVW_UABS_G3:  			/* We're using the top bits so we can't overflow. */  			overflow_check = false;  			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, -					      INSN_IMM_16); +					      AARCH64_INSN_IMM_16);  			break;  		case R_AARCH64_MOVW_SABS_G0:  			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, -					      INSN_IMM_MOVNZ); +					      AARCH64_INSN_IMM_MOVNZ);  			break;  		case R_AARCH64_MOVW_SABS_G1:  			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, -					      INSN_IMM_MOVNZ); +					      AARCH64_INSN_IMM_MOVNZ);  			break;  		case R_AARCH64_MOVW_SABS_G2:  			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, -					      INSN_IMM_MOVNZ); +					      AARCH64_INSN_IMM_MOVNZ);  			break;  		case R_AARCH64_MOVW_PREL_G0_NC:  			overflow_check = false;  			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, -					      INSN_IMM_MOVK); +					      AARCH64_INSN_IMM_MOVK);  			break;  		case R_AARCH64_MOVW_PREL_G0:  			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, -					      INSN_IMM_MOVNZ); +					      AARCH64_INSN_IMM_MOVNZ);  			break;  		case R_AARCH64_MOVW_PREL_G1_NC:  			overflow_check = false;  			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, -					      INSN_IMM_MOVK); +					      AARCH64_INSN_IMM_MOVK);  			break;  		case R_AARCH64_MOVW_PREL_G1:  			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, -					      INSN_IMM_MOVNZ); +					      AARCH64_INSN_IMM_MOVNZ);  			break;  		case R_AARCH64_MOVW_PREL_G2_NC:  			overflow_check = false;  			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, -					      INSN_IMM_MOVK); +					      AARCH64_INSN_IMM_MOVK);  			break;  		case R_AARCH64_MOVW_PREL_G2:  			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, -					      INSN_IMM_MOVNZ); +					      AARCH64_INSN_IMM_MOVNZ);  			break;  		case R_AARCH64_MOVW_PREL_G3:  			/* We're using the top bits so we can't overflow. */  			overflow_check = false;  			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48, -					      INSN_IMM_MOVNZ); +					      AARCH64_INSN_IMM_MOVNZ);  			break;  		/* Immediate instruction relocations. */  		case R_AARCH64_LD_PREL_LO19:  			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, -					     INSN_IMM_19); +					     AARCH64_INSN_IMM_19);  			break;  		case R_AARCH64_ADR_PREL_LO21:  			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, -					     INSN_IMM_ADR); +					     AARCH64_INSN_IMM_ADR);  			break;  		case R_AARCH64_ADR_PREL_PG_HI21_NC:  			overflow_check = false;  		case R_AARCH64_ADR_PREL_PG_HI21:  			ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, -					     INSN_IMM_ADR); +					     AARCH64_INSN_IMM_ADR);  			break;  		case R_AARCH64_ADD_ABS_LO12_NC:  		case R_AARCH64_LDST8_ABS_LO12_NC:  			overflow_check = false;  			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12, -					     INSN_IMM_12); +					     AARCH64_INSN_IMM_12);  			break;  		case R_AARCH64_LDST16_ABS_LO12_NC:  			overflow_check = false;  			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11, -					     INSN_IMM_12); +					     AARCH64_INSN_IMM_12);  			break;  		case R_AARCH64_LDST32_ABS_LO12_NC:  			overflow_check = false;  			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10, -					     INSN_IMM_12); +					     AARCH64_INSN_IMM_12);  			break;  		case R_AARCH64_LDST64_ABS_LO12_NC:  			overflow_check = false;  			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9, -					     INSN_IMM_12); +					     AARCH64_INSN_IMM_12);  			break;  		case R_AARCH64_LDST128_ABS_LO12_NC:  			overflow_check = false;  			ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8, -					     INSN_IMM_12); +					     AARCH64_INSN_IMM_12);  			break;  		case R_AARCH64_TSTBR14:  			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14, -					     INSN_IMM_14); +					     AARCH64_INSN_IMM_14);  			break;  		case R_AARCH64_CONDBR19:  			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, -					     INSN_IMM_19); +					     AARCH64_INSN_IMM_19);  			break;  		case R_AARCH64_JUMP26:  		case R_AARCH64_CALL26:  			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, -					     INSN_IMM_26); +					     AARCH64_INSN_IMM_26);  			break;  		default: diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cea1594ff93..baf5afb7e6a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -22,6 +22,7 @@  #include <linux/bitmap.h>  #include <linux/interrupt.h> +#include <linux/irq.h>  #include <linux/kernel.h>  #include <linux/export.h>  #include <linux/perf_event.h> @@ -363,26 +364,53 @@ validate_group(struct perf_event *event)  }  static void +armpmu_disable_percpu_irq(void *data) +{ +	unsigned int irq = *(unsigned int *)data; +	disable_percpu_irq(irq); +} + +static void  armpmu_release_hardware(struct arm_pmu *armpmu)  { -	int i, irq, irqs; +	int irq; +	unsigned int i, irqs;  	struct platform_device *pmu_device = armpmu->plat_device;  	irqs = min(pmu_device->num_resources, num_possible_cpus()); +	if (!irqs) +		return; -	for (i = 0; i < irqs; ++i) { -		if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) -			continue; -		irq = platform_get_irq(pmu_device, i); -		if (irq >= 0) -			free_irq(irq, armpmu); +	irq = platform_get_irq(pmu_device, 0); +	if (irq <= 0) +		return; + +	if (irq_is_percpu(irq)) { +		on_each_cpu(armpmu_disable_percpu_irq, &irq, 1); +		free_percpu_irq(irq, &cpu_hw_events); +	} else { +		for (i = 0; i < irqs; ++i) { +			if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) +				continue; +			irq = platform_get_irq(pmu_device, i); +			if (irq > 0) +				free_irq(irq, armpmu); +		}  	}  } +static void +armpmu_enable_percpu_irq(void *data) +{ +	unsigned int irq = *(unsigned int *)data; +	enable_percpu_irq(irq, IRQ_TYPE_NONE); +} +  static int  armpmu_reserve_hardware(struct arm_pmu *armpmu)  { -	int i, err, irq, irqs; +	int err, irq; +	unsigned int i, irqs;  	struct platform_device *pmu_device = armpmu->plat_device;  	if (!pmu_device) { @@ -391,39 +419,59 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)  	}  	irqs = min(pmu_device->num_resources, num_possible_cpus()); -	if (irqs < 1) { +	if (!irqs) {  		pr_err("no irqs for PMUs defined\n");  		return -ENODEV;  	} -	for (i = 0; i < irqs; ++i) { -		err = 0; -		irq = platform_get_irq(pmu_device, i); -		if (irq < 0) -			continue; +	irq = platform_get_irq(pmu_device, 0); +	if (irq <= 0) { +		pr_err("failed to get valid irq for PMU device\n"); +		return -ENODEV; +	} -		/* -		 * If we have a single PMU interrupt that we can't shift, -		 * assume that we're running on a uniprocessor machine and -		 * continue. Otherwise, continue without this interrupt. -		 */ -		if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { -			pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", -				    irq, i); -			continue; -		} +	if (irq_is_percpu(irq)) { +		err = request_percpu_irq(irq, armpmu->handle_irq, +				"arm-pmu", &cpu_hw_events); -		err = request_irq(irq, armpmu->handle_irq, -				  IRQF_NOBALANCING, -				  "arm-pmu", armpmu);  		if (err) { -			pr_err("unable to request IRQ%d for ARM PMU counters\n", -				irq); +			pr_err("unable to request percpu IRQ%d for ARM PMU counters\n", +					irq);  			armpmu_release_hardware(armpmu);  			return err;  		} -		cpumask_set_cpu(i, &armpmu->active_irqs); +		on_each_cpu(armpmu_enable_percpu_irq, &irq, 1); +	} else { +		for (i = 0; i < irqs; ++i) { +			err = 0; +			irq = platform_get_irq(pmu_device, i); +			if (irq <= 0) +				continue; + +			/* +			 * If we have a single PMU interrupt that we can't shift, +			 * assume that we're running on a uniprocessor machine and +			 * continue. Otherwise, continue without this interrupt. +			 */ +			if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { +				pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", +						irq, i); +				continue; +			} + +			err = request_irq(irq, armpmu->handle_irq, +					IRQF_NOBALANCING, +					"arm-pmu", armpmu); +			if (err) { +				pr_err("unable to request IRQ%d for ARM PMU counters\n", +						irq); +				armpmu_release_hardware(armpmu); +				return err; +			} + +			cpumask_set_cpu(i, &armpmu->active_irqs); +		}  	}  	return 0; @@ -784,8 +832,8 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]  /*   * PMXEVTYPER: Event selection reg   */ -#define	ARMV8_EVTYPE_MASK	0xc80000ff	/* Mask for writable bits */ -#define	ARMV8_EVTYPE_EVENT	0xff		/* Mask for EVENT bits */ +#define	ARMV8_EVTYPE_MASK	0xc80003ff	/* Mask for writable bits */ +#define	ARMV8_EVTYPE_EVENT	0x3ff		/* Mask for EVENT bits */  /*   * Event filters for PMUv3 @@ -1044,7 +1092,7 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)  	 */  	regs = get_irq_regs(); -	cpuc = &__get_cpu_var(cpu_hw_events); +	cpuc = this_cpu_ptr(&cpu_hw_events);  	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {  		struct perf_event *event = cpuc->events[idx];  		struct hw_perf_event *hwc; @@ -1175,7 +1223,8 @@ static void armv8pmu_reset(void *info)  static int armv8_pmuv3_map_event(struct perf_event *event)  {  	return map_cpu_event(event, &armv8_pmuv3_perf_map, -				&armv8_pmuv3_perf_cache_map, 0xFF); +				&armv8_pmuv3_perf_cache_map, +				ARMV8_EVTYPE_EVENT);  }  static struct arm_pmu armv8pmu = { @@ -1257,7 +1306,7 @@ device_initcall(register_pmu_driver);  static struct pmu_hw_events *armpmu_get_cpu_events(void)  { -	return &__get_cpu_var(cpu_hw_events); +	return this_cpu_ptr(&cpu_hw_events);  }  static void __init cpu_pmu_init(struct arm_pmu *armpmu) @@ -1299,8 +1348,8 @@ early_initcall(init_hw_perf_events);   * Callchain handling code.   */  struct frame_tail { -	struct frame_tail   __user *fp; -	unsigned long	    lr; +	struct frame_tail	__user *fp; +	unsigned long		lr;  } __attribute__((packed));  /* @@ -1337,22 +1386,84 @@ user_backtrace(struct frame_tail __user *tail,  	return buftail.fp;  } +#ifdef CONFIG_COMPAT +/* + * The registers we're interested in are at the end of the variable + * length saved register structure. The fp points at the end of this + * structure so the address of this struct is: + * (struct compat_frame_tail *)(xxx->fp)-1 + * + * This code has been adapted from the ARM OProfile support. + */ +struct compat_frame_tail { +	compat_uptr_t	fp; /* a (struct compat_frame_tail *) in compat mode */ +	u32		sp; +	u32		lr; +} __attribute__((packed)); + +static struct compat_frame_tail __user * +compat_user_backtrace(struct compat_frame_tail __user *tail, +		      struct perf_callchain_entry *entry) +{ +	struct compat_frame_tail buftail; +	unsigned long err; + +	/* Also check accessibility of one struct frame_tail beyond */ +	if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) +		return NULL; + +	pagefault_disable(); +	err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); +	pagefault_enable(); + +	if (err) +		return NULL; + +	perf_callchain_store(entry, buftail.lr); + +	/* +	 * Frame pointers should strictly progress back up the stack +	 * (towards higher addresses). +	 */ +	if (tail + 1 >= (struct compat_frame_tail __user *) +			compat_ptr(buftail.fp)) +		return NULL; + +	return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; +} +#endif /* CONFIG_COMPAT */ +  void perf_callchain_user(struct perf_callchain_entry *entry,  			 struct pt_regs *regs)  { -	struct frame_tail __user *tail; -  	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {  		/* We don't support guest os callchain now */  		return;  	}  	perf_callchain_store(entry, regs->pc); -	tail = (struct frame_tail __user *)regs->regs[29]; -	while (entry->nr < PERF_MAX_STACK_DEPTH && -	       tail && !((unsigned long)tail & 0xf)) -		tail = user_backtrace(tail, entry); +	if (!compat_user_mode(regs)) { +		/* AARCH64 mode */ +		struct frame_tail __user *tail; + +		tail = (struct frame_tail __user *)regs->regs[29]; + +		while (entry->nr < PERF_MAX_STACK_DEPTH && +		       tail && !((unsigned long)tail & 0xf)) +			tail = user_backtrace(tail, entry); +	} else { +#ifdef CONFIG_COMPAT +		/* AARCH32 compat mode */ +		struct compat_frame_tail __user *tail; + +		tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; + +		while ((entry->nr < PERF_MAX_STACK_DEPTH) && +			tail && !((unsigned long)tail & 0x3)) +			tail = compat_user_backtrace(tail, entry); +#endif +	}  }  /* @@ -1380,6 +1491,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,  	frame.fp = regs->regs[29];  	frame.sp = regs->sp;  	frame.pc = regs->pc; +  	walk_stackframe(&frame, callchain_trace, entry);  } diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c new file mode 100644 index 00000000000..422ebd63b61 --- /dev/null +++ b/arch/arm64/kernel/perf_regs.c @@ -0,0 +1,46 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/bug.h> + +#include <asm/compat.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ +	if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX)) +		return 0; + +	/* +	 * Compat (i.e. 32 bit) mode: +	 * - PC has been set in the pt_regs struct in kernel_entry, +	 * - Handle SP and LR here. +	 */ +	if (compat_user_mode(regs)) { +		if ((u32)idx == PERF_REG_ARM64_SP) +			return regs->compat_sp; +		if ((u32)idx == PERF_REG_ARM64_LR) +			return regs->compat_lr; +	} + +	return regs->regs[idx]; +} + +#define REG_RESERVED (~((1ULL << PERF_REG_ARM64_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ +	if (!mask || mask & REG_RESERVED) +		return -EINVAL; + +	return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ +	if (is_compat_thread(task_thread_info(task))) +		return PERF_SAMPLE_REGS_ABI_32; +	else +		return PERF_SAMPLE_REGS_ABI_64; +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7ae8a1f00c3..43b7c34f92c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -20,6 +20,7 @@  #include <stdarg.h> +#include <linux/compat.h>  #include <linux/export.h>  #include <linux/sched.h>  #include <linux/kernel.h> @@ -71,8 +72,17 @@ static void setup_restart(void)  void soft_restart(unsigned long addr)  { +	typedef void (*phys_reset_t)(unsigned long); +	phys_reset_t phys_reset; +  	setup_restart(); -	cpu_reset(addr); + +	/* Switch to the identity mapping */ +	phys_reset = (phys_reset_t)virt_to_phys(cpu_reset); +	phys_reset(addr); + +	/* Should never get here */ +	BUG();  }  /* @@ -84,11 +94,6 @@ EXPORT_SYMBOL_GPL(pm_power_off);  void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);  EXPORT_SYMBOL_GPL(arm_pm_restart); -void arch_cpu_idle_prepare(void) -{ -	local_fiq_enable(); -} -  /*   * This is our default idle handler.   */ @@ -102,33 +107,69 @@ void arch_cpu_idle(void)  	local_irq_enable();  } -void machine_shutdown(void) +#ifdef CONFIG_HOTPLUG_CPU +void arch_cpu_idle_dead(void)  { -#ifdef CONFIG_SMP -	smp_send_stop(); +       cpu_die(); +}  #endif + +/* + * Called by kexec, immediately prior to machine_kexec(). + * + * This must completely disable all secondary CPUs; simply causing those CPUs + * to execute e.g. a RAM-based pin loop is not sufficient. This allows the + * kexec'd kernel to use any and all RAM as it sees fit, without having to + * avoid any code or data used by any SW CPU pin loop. The CPU hotplug + * functionality embodied in disable_nonboot_cpus() to achieve this. + */ +void machine_shutdown(void) +{ +	disable_nonboot_cpus();  } +/* + * Halting simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. + */  void machine_halt(void)  { -	machine_shutdown(); +	local_irq_disable(); +	smp_send_stop();  	while (1);  } +/* + * Power-off simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. When the system power is turned off, it will take all CPUs + * with it. + */  void machine_power_off(void)  { -	machine_shutdown(); +	local_irq_disable(); +	smp_send_stop();  	if (pm_power_off)  		pm_power_off();  } +/* + * Restart requires that the secondary CPUs stop performing any activity + * while the primary CPU resets the system. Systems with a single CPU can + * use soft_restart() as their machine descriptor's .restart hook, since that + * will cause the only available CPU to reset. Systems with multiple CPUs must + * provide a HW restart implementation, to ensure that all CPUs reset at once. + * This is required so that any code running after reset on the primary CPU + * doesn't have to co-ordinate with other CPUs to ensure they aren't still + * executing pre-reset code, and using RAM that the primary CPU's code wishes + * to use. Implementing such co-ordination would be essentially impossible. + */  void machine_restart(char *cmd)  { -	machine_shutdown(); -  	/* Disable interrupts first */  	local_irq_disable(); -	local_fiq_disable(); +	smp_send_stop();  	/* Now call the architecture specific reboot code. */  	if (arm_pm_restart) @@ -195,7 +236,7 @@ void release_thread(struct task_struct *dead_task)  int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)  { -	fpsimd_save_state(¤t->thread.fpsimd_state); +	fpsimd_preserve_current_state();  	*dst = *src;  	return 0;  } @@ -290,7 +331,7 @@ struct task_struct *__switch_to(struct task_struct *prev,  	 * Complete any pending TLB or cache maintenance on this CPU in case  	 * the thread migrates to a different CPU.  	 */ -	dsb(); +	dsb(ish);  	/* the actual thread switch */  	last = cpu_switch_to(prev, next); @@ -301,6 +342,7 @@ struct task_struct *__switch_to(struct task_struct *prev,  unsigned long get_wchan(struct task_struct *p)  {  	struct stackframe frame; +	unsigned long stack_page;  	int count = 0;  	if (!p || p == current || p->state == TASK_RUNNING)  		return 0; @@ -308,9 +350,11 @@ unsigned long get_wchan(struct task_struct *p)  	frame.fp = thread_saved_fp(p);  	frame.sp = thread_saved_sp(p);  	frame.pc = thread_saved_pc(p); +	stack_page = (unsigned long)task_stack_page(p);  	do { -		int ret = unwind_frame(&frame); -		if (ret < 0) +		if (frame.sp < stack_page || +		    frame.sp >= stack_page + THREAD_SIZE || +		    unwind_frame(&frame))  			return 0;  		if (!in_sched_functions(frame.pc))  			return frame.pc; diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 14f73c445ff..9e9798f9117 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -17,61 +17,80 @@  #include <linux/init.h>  #include <linux/of.h> +#include <linux/smp.h> +#include <linux/reboot.h> +#include <linux/pm.h> +#include <linux/delay.h> +#include <uapi/linux/psci.h>  #include <asm/compiler.h> +#include <asm/cpu_ops.h>  #include <asm/errno.h>  #include <asm/psci.h> +#include <asm/smp_plat.h> +#include <asm/system_misc.h> -struct psci_operations psci_ops; +#define PSCI_POWER_STATE_TYPE_STANDBY		0 +#define PSCI_POWER_STATE_TYPE_POWER_DOWN	1 + +struct psci_power_state { +	u16	id; +	u8	type; +	u8	affinity_level; +}; + +struct psci_operations { +	int (*cpu_suspend)(struct psci_power_state state, +			   unsigned long entry_point); +	int (*cpu_off)(struct psci_power_state state); +	int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); +	int (*migrate)(unsigned long cpuid); +	int (*affinity_info)(unsigned long target_affinity, +			unsigned long lowest_affinity_level); +	int (*migrate_info_type)(void); +}; + +static struct psci_operations psci_ops;  static int (*invoke_psci_fn)(u64, u64, u64, u64); +typedef int (*psci_initcall_t)(const struct device_node *);  enum psci_function {  	PSCI_FN_CPU_SUSPEND,  	PSCI_FN_CPU_ON,  	PSCI_FN_CPU_OFF,  	PSCI_FN_MIGRATE, +	PSCI_FN_AFFINITY_INFO, +	PSCI_FN_MIGRATE_INFO_TYPE,  	PSCI_FN_MAX,  };  static u32 psci_function_id[PSCI_FN_MAX]; -#define PSCI_RET_SUCCESS		0 -#define PSCI_RET_EOPNOTSUPP		-1 -#define PSCI_RET_EINVAL			-2 -#define PSCI_RET_EPERM			-3 -  static int psci_to_linux_errno(int errno)  {  	switch (errno) {  	case PSCI_RET_SUCCESS:  		return 0; -	case PSCI_RET_EOPNOTSUPP: +	case PSCI_RET_NOT_SUPPORTED:  		return -EOPNOTSUPP; -	case PSCI_RET_EINVAL: +	case PSCI_RET_INVALID_PARAMS:  		return -EINVAL; -	case PSCI_RET_EPERM: +	case PSCI_RET_DENIED:  		return -EPERM;  	};  	return -EINVAL;  } -#define PSCI_POWER_STATE_ID_MASK	0xffff -#define PSCI_POWER_STATE_ID_SHIFT	0 -#define PSCI_POWER_STATE_TYPE_MASK	0x1 -#define PSCI_POWER_STATE_TYPE_SHIFT	16 -#define PSCI_POWER_STATE_AFFL_MASK	0x3 -#define PSCI_POWER_STATE_AFFL_SHIFT	24 -  static u32 psci_power_state_pack(struct psci_power_state state)  { -	return	((state.id & PSCI_POWER_STATE_ID_MASK) -			<< PSCI_POWER_STATE_ID_SHIFT)	| -		((state.type & PSCI_POWER_STATE_TYPE_MASK) -			<< PSCI_POWER_STATE_TYPE_SHIFT)	| -		((state.affinity_level & PSCI_POWER_STATE_AFFL_MASK) -			<< PSCI_POWER_STATE_AFFL_SHIFT); +	return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) +			& PSCI_0_2_POWER_STATE_ID_MASK) | +		((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) +		 & PSCI_0_2_POWER_STATE_TYPE_MASK) | +		((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) +		 & PSCI_0_2_POWER_STATE_AFFL_MASK);  }  /* @@ -108,6 +127,14 @@ static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1,  	return function_id;  } +static int psci_get_version(void) +{ +	int err; + +	err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); +	return err; +} +  static int psci_cpu_suspend(struct psci_power_state state,  			    unsigned long entry_point)  { @@ -151,28 +178,36 @@ static int psci_migrate(unsigned long cpuid)  	return psci_to_linux_errno(err);  } -static const struct of_device_id psci_of_match[] __initconst = { -	{ .compatible = "arm,psci",	}, -	{}, -}; +static int psci_affinity_info(unsigned long target_affinity, +		unsigned long lowest_affinity_level) +{ +	int err; +	u32 fn; -int __init psci_init(void) +	fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; +	err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); +	return err; +} + +static int psci_migrate_info_type(void)  { -	struct device_node *np; -	const char *method; -	u32 id; -	int err = 0; +	int err; +	u32 fn; -	np = of_find_matching_node(NULL, psci_of_match); -	if (!np) -		return -ENODEV; +	fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; +	err = invoke_psci_fn(fn, 0, 0, 0); +	return err; +} + +static int get_set_conduit_method(struct device_node *np) +{ +	const char *method; -	pr_info("probing function IDs from device-tree\n"); +	pr_info("probing for conduit method from DT.\n");  	if (of_property_read_string(np, "method", &method)) { -		pr_warning("missing \"method\" property\n"); -		err = -ENXIO; -		goto out_put_node; +		pr_warn("missing \"method\" property\n"); +		return -ENXIO;  	}  	if (!strcmp("hvc", method)) { @@ -180,11 +215,99 @@ int __init psci_init(void)  	} else if (!strcmp("smc", method)) {  		invoke_psci_fn = __invoke_psci_fn_smc;  	} else { -		pr_warning("invalid \"method\" property: %s\n", method); -		err = -EINVAL; +		pr_warn("invalid \"method\" property: %s\n", method); +		return -EINVAL; +	} +	return 0; +} + +static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) +{ +	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); +} + +static void psci_sys_poweroff(void) +{ +	invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); +} + +/* + * PSCI Function IDs for v0.2+ are well defined so use + * standard values. + */ +static int psci_0_2_init(struct device_node *np) +{ +	int err, ver; + +	err = get_set_conduit_method(np); + +	if (err)  		goto out_put_node; + +	ver = psci_get_version(); + +	if (ver == PSCI_RET_NOT_SUPPORTED) { +		/* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ +		pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); +		err = -EOPNOTSUPP; +		goto out_put_node; +	} else { +		pr_info("PSCIv%d.%d detected in firmware.\n", +				PSCI_VERSION_MAJOR(ver), +				PSCI_VERSION_MINOR(ver)); + +		if (PSCI_VERSION_MAJOR(ver) == 0 && +				PSCI_VERSION_MINOR(ver) < 2) { +			err = -EINVAL; +			pr_err("Conflicting PSCI version detected.\n"); +			goto out_put_node; +		}  	} +	pr_info("Using standard PSCI v0.2 function IDs\n"); +	psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND; +	psci_ops.cpu_suspend = psci_cpu_suspend; + +	psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; +	psci_ops.cpu_off = psci_cpu_off; + +	psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON; +	psci_ops.cpu_on = psci_cpu_on; + +	psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE; +	psci_ops.migrate = psci_migrate; + +	psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO; +	psci_ops.affinity_info = psci_affinity_info; + +	psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = +		PSCI_0_2_FN_MIGRATE_INFO_TYPE; +	psci_ops.migrate_info_type = psci_migrate_info_type; + +	arm_pm_restart = psci_sys_reset; + +	pm_power_off = psci_sys_poweroff; + +out_put_node: +	of_node_put(np); +	return err; +} + +/* + * PSCI < v0.2 get PSCI Function IDs via DT. + */ +static int psci_0_1_init(struct device_node *np) +{ +	u32 id; +	int err; + +	err = get_set_conduit_method(np); + +	if (err) +		goto out_put_node; + +	pr_info("Using PSCI v0.1 Function IDs from DT\n"); +  	if (!of_property_read_u32(np, "cpu_suspend", &id)) {  		psci_function_id[PSCI_FN_CPU_SUSPEND] = id;  		psci_ops.cpu_suspend = psci_cpu_suspend; @@ -209,3 +332,119 @@ out_put_node:  	of_node_put(np);  	return err;  } + +static const struct of_device_id psci_of_match[] __initconst = { +	{ .compatible = "arm,psci",	.data = psci_0_1_init}, +	{ .compatible = "arm,psci-0.2",	.data = psci_0_2_init}, +	{}, +}; + +int __init psci_init(void) +{ +	struct device_node *np; +	const struct of_device_id *matched_np; +	psci_initcall_t init_fn; + +	np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); + +	if (!np) +		return -ENODEV; + +	init_fn = (psci_initcall_t)matched_np->data; +	return init_fn(np); +} + +#ifdef CONFIG_SMP + +static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu) +{ +	return 0; +} + +static int __init cpu_psci_cpu_prepare(unsigned int cpu) +{ +	if (!psci_ops.cpu_on) { +		pr_err("no cpu_on method, not booting CPU%d\n", cpu); +		return -ENODEV; +	} + +	return 0; +} + +static int cpu_psci_cpu_boot(unsigned int cpu) +{ +	int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); +	if (err) +		pr_err("failed to boot CPU%d (%d)\n", cpu, err); + +	return err; +} + +#ifdef CONFIG_HOTPLUG_CPU +static int cpu_psci_cpu_disable(unsigned int cpu) +{ +	/* Fail early if we don't have CPU_OFF support */ +	if (!psci_ops.cpu_off) +		return -EOPNOTSUPP; +	return 0; +} + +static void cpu_psci_cpu_die(unsigned int cpu) +{ +	int ret; +	/* +	 * There are no known implementations of PSCI actually using the +	 * power state field, pass a sensible default for now. +	 */ +	struct psci_power_state state = { +		.type = PSCI_POWER_STATE_TYPE_POWER_DOWN, +	}; + +	ret = psci_ops.cpu_off(state); + +	pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); +} + +static int cpu_psci_cpu_kill(unsigned int cpu) +{ +	int err, i; + +	if (!psci_ops.affinity_info) +		return 1; +	/* +	 * cpu_kill could race with cpu_die and we can +	 * potentially end up declaring this cpu undead +	 * while it is dying. So, try again a few times. +	 */ + +	for (i = 0; i < 10; i++) { +		err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); +		if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { +			pr_info("CPU%d killed.\n", cpu); +			return 1; +		} + +		msleep(10); +		pr_info("Retrying again to check for CPU kill\n"); +	} + +	pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", +			cpu, err); +	/* Make op_cpu_kill() fail. */ +	return 0; +} +#endif + +const struct cpu_operations cpu_psci_ops = { +	.name		= "psci", +	.cpu_init	= cpu_psci_cpu_init, +	.cpu_prepare	= cpu_psci_cpu_prepare, +	.cpu_boot	= cpu_psci_cpu_boot, +#ifdef CONFIG_HOTPLUG_CPU +	.cpu_disable	= cpu_psci_cpu_disable, +	.cpu_die	= cpu_psci_cpu_die, +	.cpu_kill	= cpu_psci_cpu_kill, +#endif +}; + +#endif diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index fecdbf7de82..9fde010c945 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -19,6 +19,7 @@   * along with this program.  If not, see <http://www.gnu.org/licenses/>.   */ +#include <linux/compat.h>  #include <linux/kernel.h>  #include <linux/sched.h>  #include <linux/mm.h> @@ -41,6 +42,9 @@  #include <asm/traps.h>  #include <asm/system_misc.h> +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> +  /*   * TODO: does not yet catch signals sent when the child dies.   * in exit.c or in signal.c. @@ -214,31 +218,29 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,  {  	int err, len, type, disabled = !ctrl.enabled; -	if (disabled) { -		len = 0; -		type = HW_BREAKPOINT_EMPTY; -	} else { -		err = arch_bp_generic_fields(ctrl, &len, &type); -		if (err) -			return err; - -		switch (note_type) { -		case NT_ARM_HW_BREAK: -			if ((type & HW_BREAKPOINT_X) != type) -				return -EINVAL; -			break; -		case NT_ARM_HW_WATCH: -			if ((type & HW_BREAKPOINT_RW) != type) -				return -EINVAL; -			break; -		default: +	attr->disabled = disabled; +	if (disabled) +		return 0; + +	err = arch_bp_generic_fields(ctrl, &len, &type); +	if (err) +		return err; + +	switch (note_type) { +	case NT_ARM_HW_BREAK: +		if ((type & HW_BREAKPOINT_X) != type)  			return -EINVAL; -		} +		break; +	case NT_ARM_HW_WATCH: +		if ((type & HW_BREAKPOINT_RW) != type) +			return -EINVAL; +		break; +	default: +		return -EINVAL;  	}  	attr->bp_len	= len;  	attr->bp_type	= type; -	attr->disabled	= disabled;  	return 0;  } @@ -519,6 +521,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,  		return ret;  	target->thread.fpsimd_state.user_fpsimd = newstate; +	fpsimd_flush_task_state(target);  	return ret;  } @@ -636,28 +639,32 @@ static int compat_gpr_get(struct task_struct *target,  	for (i = 0; i < num_regs; ++i) {  		unsigned int idx = start + i; -		void *reg; +		compat_ulong_t reg;  		switch (idx) {  		case 15: -			reg = (void *)&task_pt_regs(target)->pc; +			reg = task_pt_regs(target)->pc;  			break;  		case 16: -			reg = (void *)&task_pt_regs(target)->pstate; +			reg = task_pt_regs(target)->pstate;  			break;  		case 17: -			reg = (void *)&task_pt_regs(target)->orig_x0; +			reg = task_pt_regs(target)->orig_x0;  			break;  		default: -			reg = (void *)&task_pt_regs(target)->regs[idx]; +			reg = task_pt_regs(target)->regs[idx];  		} -		ret = copy_to_user(ubuf, reg, sizeof(compat_ulong_t)); +		if (kbuf) { +			memcpy(kbuf, ®, sizeof(reg)); +			kbuf += sizeof(reg); +		} else { +			ret = copy_to_user(ubuf, ®, sizeof(reg)); +			if (ret) +				break; -		if (ret) -			break; -		else -			ubuf += sizeof(compat_ulong_t); +			ubuf += sizeof(reg); +		}  	}  	return ret; @@ -685,28 +692,33 @@ static int compat_gpr_set(struct task_struct *target,  	for (i = 0; i < num_regs; ++i) {  		unsigned int idx = start + i; -		void *reg; +		compat_ulong_t reg; + +		if (kbuf) { +			memcpy(®, kbuf, sizeof(reg)); +			kbuf += sizeof(reg); +		} else { +			ret = copy_from_user(®, ubuf, sizeof(reg)); +			if (ret) +				return ret; + +			ubuf += sizeof(reg); +		}  		switch (idx) {  		case 15: -			reg = (void *)&newregs.pc; +			newregs.pc = reg;  			break;  		case 16: -			reg = (void *)&newregs.pstate; +			newregs.pstate = reg;  			break;  		case 17: -			reg = (void *)&newregs.orig_x0; +			newregs.orig_x0 = reg;  			break;  		default: -			reg = (void *)&newregs.regs[idx]; +			newregs.regs[idx] = reg;  		} -		ret = copy_from_user(reg, ubuf, sizeof(compat_ulong_t)); - -		if (ret) -			goto out; -		else -			ubuf += sizeof(compat_ulong_t);  	}  	if (valid_user_regs(&newregs.user_regs)) @@ -714,7 +726,6 @@ static int compat_gpr_set(struct task_struct *target,  	else  		ret = -EINVAL; -out:  	return ret;  } @@ -768,6 +779,7 @@ static int compat_vfp_set(struct task_struct *target,  		uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;  	} +	fpsimd_flush_task_state(target);  	return ret;  } @@ -825,6 +837,7 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off,  				    compat_ulong_t val)  {  	int ret; +	mm_segment_t old_fs = get_fs();  	if (off & 3 || off >= COMPAT_USER_SZ)  		return -EIO; @@ -832,10 +845,13 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off,  	if (off >= sizeof(compat_elf_gregset_t))  		return 0; +	set_fs(KERNEL_DS);  	ret = copy_regset_from_user(tsk, &user_aarch32_view,  				    REGSET_COMPAT_GPR, off,  				    sizeof(compat_ulong_t),  				    &val); +	set_fs(old_fs); +  	return ret;  } @@ -1062,35 +1078,49 @@ long arch_ptrace(struct task_struct *child, long request,  	return ptrace_request(child, request, addr, data);  } -asmlinkage int syscall_trace(int dir, struct pt_regs *regs) +enum ptrace_syscall_dir { +	PTRACE_SYSCALL_ENTER = 0, +	PTRACE_SYSCALL_EXIT, +}; + +static void tracehook_report_syscall(struct pt_regs *regs, +				     enum ptrace_syscall_dir dir)  { +	int regno;  	unsigned long saved_reg; -	if (!test_thread_flag(TIF_SYSCALL_TRACE)) -		return regs->syscallno; - -	if (is_compat_task()) { -		/* AArch32 uses ip (r12) for scratch */ -		saved_reg = regs->regs[12]; -		regs->regs[12] = dir; -	} else { -		/* -		 * Save X7. X7 is used to denote syscall entry/exit: -		 *   X7 = 0 -> entry, = 1 -> exit -		 */ -		saved_reg = regs->regs[7]; -		regs->regs[7] = dir; -	} +	/* +	 * A scratch register (ip(r12) on AArch32, x7 on AArch64) is +	 * used to denote syscall entry/exit: +	 */ +	regno = (is_compat_task() ? 12 : 7); +	saved_reg = regs->regs[regno]; +	regs->regs[regno] = dir; -	if (dir) +	if (dir == PTRACE_SYSCALL_EXIT)  		tracehook_report_syscall_exit(regs, 0);  	else if (tracehook_report_syscall_entry(regs))  		regs->syscallno = ~0UL; -	if (is_compat_task()) -		regs->regs[12] = saved_reg; -	else -		regs->regs[7] = saved_reg; +	regs->regs[regno] = saved_reg; +} + +asmlinkage int syscall_trace_enter(struct pt_regs *regs) +{ +	if (test_thread_flag(TIF_SYSCALL_TRACE)) +		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + +	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) +		trace_sys_enter(regs, regs->syscallno);  	return regs->syscallno;  } + +asmlinkage void syscall_trace_exit(struct pt_regs *regs) +{ +	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) +		trace_sys_exit(regs, regs_return_value(regs)); + +	if (test_thread_flag(TIF_SYSCALL_TRACE)) +		tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); +} diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c new file mode 100644 index 00000000000..89102a6ffad --- /dev/null +++ b/arch/arm64/kernel/return_address.c @@ -0,0 +1,55 @@ +/* + * arch/arm64/kernel/return_address.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/export.h> +#include <linux/ftrace.h> + +#include <asm/stacktrace.h> + +struct return_address_data { +	unsigned int level; +	void *addr; +}; + +static int save_return_addr(struct stackframe *frame, void *d) +{ +	struct return_address_data *data = d; + +	if (!data->level) { +		data->addr = (void *)frame->pc; +		return 1; +	} else { +		--data->level; +		return 0; +	} +} + +void *return_address(unsigned int level) +{ +	struct return_address_data data; +	struct stackframe frame; +	register unsigned long current_sp asm ("sp"); + +	data.level = level + 2; +	data.addr = NULL; + +	frame.fp = (unsigned long)__builtin_frame_address(0); +	frame.sp = current_sp; +	frame.pc = (unsigned long)return_address; /* dummy */ + +	walk_stackframe(&frame, save_return_addr, &data); + +	if (!data.level) +		return data.addr; +	else +		return NULL; +} +EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 055cfb80e05..46d1125571f 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -25,6 +25,7 @@  #include <linux/utsname.h>  #include <linux/initrd.h>  #include <linux/console.h> +#include <linux/cache.h>  #include <linux/bootmem.h>  #include <linux/seq_file.h>  #include <linux/screen_info.h> @@ -41,10 +42,13 @@  #include <linux/memblock.h>  #include <linux/of_fdt.h>  #include <linux/of_platform.h> +#include <linux/efi.h> +#include <asm/fixmap.h>  #include <asm/cputype.h>  #include <asm/elf.h>  #include <asm/cputable.h> +#include <asm/cpu_ops.h>  #include <asm/sections.h>  #include <asm/setup.h>  #include <asm/smp_plat.h> @@ -53,6 +57,7 @@  #include <asm/traps.h>  #include <asm/memblock.h>  #include <asm/psci.h> +#include <asm/efi.h>  unsigned int processor_id;  EXPORT_SYMBOL(processor_id); @@ -60,6 +65,17 @@ EXPORT_SYMBOL(processor_id);  unsigned long elf_hwcap __read_mostly;  EXPORT_SYMBOL_GPL(elf_hwcap); +#ifdef CONFIG_COMPAT +#define COMPAT_ELF_HWCAP_DEFAULT	\ +				(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ +				 COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ +				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ +				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ +				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) +unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; +unsigned int compat_elf_hwcap2 __read_mostly; +#endif +  static const char *cpu_name;  static const char *machine_name;  phys_addr_t __fdt_pointer __initdata; @@ -97,15 +113,97 @@ void __init early_print(const char *str, ...)  	printk("%s", buf);  } -static void __init setup_processor(void) +void __init smp_setup_processor_id(void)  { -	struct cpu_info *cpu_info; +	/* +	 * clear __my_cpu_offset on boot CPU to avoid hang caused by +	 * using percpu variable early, for example, lockdep will +	 * access percpu variable inside lock_release +	 */ +	set_my_cpu_offset(0); +} + +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ +	return phys_id == cpu_logical_map(cpu); +} +struct mpidr_hash mpidr_hash; +#ifdef CONFIG_SMP +/** + * smp_build_mpidr_hash - Pre-compute shifts required at each affinity + *			  level in order to build a linear index from an + *			  MPIDR value. Resulting algorithm is a collision + *			  free hash carried out through shifting and ORing + */ +static void __init smp_build_mpidr_hash(void) +{ +	u32 i, affinity, fs[4], bits[4], ls; +	u64 mask = 0; +	/* +	 * Pre-scan the list of MPIDRS and filter out bits that do +	 * not contribute to affinity levels, ie they never toggle. +	 */ +	for_each_possible_cpu(i) +		mask |= (cpu_logical_map(i) ^ cpu_logical_map(0)); +	pr_debug("mask of set bits %#llx\n", mask);  	/* -	 * locate processor in the list of supported processor -	 * types.  The linker builds this table for us from the -	 * entries in arch/arm/mm/proc.S +	 * Find and stash the last and first bit set at all affinity levels to +	 * check how many bits are required to represent them.  	 */ +	for (i = 0; i < 4; i++) { +		affinity = MPIDR_AFFINITY_LEVEL(mask, i); +		/* +		 * Find the MSB bit and LSB bits position +		 * to determine how many bits are required +		 * to express the affinity level. +		 */ +		ls = fls(affinity); +		fs[i] = affinity ? ffs(affinity) - 1 : 0; +		bits[i] = ls - fs[i]; +	} +	/* +	 * An index can be created from the MPIDR_EL1 by isolating the +	 * significant bits at each affinity level and by shifting +	 * them in order to compress the 32 bits values space to a +	 * compressed set of values. This is equivalent to hashing +	 * the MPIDR_EL1 through shifting and ORing. It is a collision free +	 * hash though not minimal since some levels might contain a number +	 * of CPUs that is not an exact power of 2 and their bit +	 * representation might contain holes, eg MPIDR_EL1[7:0] = {0x2, 0x80}. +	 */ +	mpidr_hash.shift_aff[0] = MPIDR_LEVEL_SHIFT(0) + fs[0]; +	mpidr_hash.shift_aff[1] = MPIDR_LEVEL_SHIFT(1) + fs[1] - bits[0]; +	mpidr_hash.shift_aff[2] = MPIDR_LEVEL_SHIFT(2) + fs[2] - +						(bits[1] + bits[0]); +	mpidr_hash.shift_aff[3] = MPIDR_LEVEL_SHIFT(3) + +				  fs[3] - (bits[2] + bits[1] + bits[0]); +	mpidr_hash.mask = mask; +	mpidr_hash.bits = bits[3] + bits[2] + bits[1] + bits[0]; +	pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] aff3[%u] mask[%#llx] bits[%u]\n", +		mpidr_hash.shift_aff[0], +		mpidr_hash.shift_aff[1], +		mpidr_hash.shift_aff[2], +		mpidr_hash.shift_aff[3], +		mpidr_hash.mask, +		mpidr_hash.bits); +	/* +	 * 4x is an arbitrary value used to warn on a hash table much bigger +	 * than expected on most systems. +	 */ +	if (mpidr_hash_size() > 4 * num_possible_cpus()) +		pr_warn("Large number of MPIDR hash buckets detected\n"); +	__flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); +} +#endif + +static void __init setup_processor(void) +{ +	struct cpu_info *cpu_info; +	u64 features, block; +	u32 cwg; +	int cls; +  	cpu_info = lookup_processor_type(read_cpuid_id());  	if (!cpu_info) {  		printk("CPU configuration botched (ID %08x), unable to continue.\n", @@ -118,76 +216,99 @@ static void __init setup_processor(void)  	printk("CPU: %s [%08x] revision %d\n",  	       cpu_name, read_cpuid_id(), read_cpuid_id() & 15); -	sprintf(init_utsname()->machine, "aarch64"); +	sprintf(init_utsname()->machine, ELF_PLATFORM);  	elf_hwcap = 0; -} -static void __init setup_machine_fdt(phys_addr_t dt_phys) -{ -	struct boot_param_header *devtree; -	unsigned long dt_root; +	/* +	 * Check for sane CTR_EL0.CWG value. +	 */ +	cwg = cache_type_cwg(); +	cls = cache_line_size(); +	if (!cwg) +		pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", +			cls); +	if (L1_CACHE_BYTES < cls) +		pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", +			L1_CACHE_BYTES, cls); -	/* Check we have a non-NULL DT pointer */ -	if (!dt_phys) { -		early_print("\n" -			"Error: NULL or invalid device tree blob\n" -			"The dtb must be 8-byte aligned and passed in the first 512MB of memory\n" -			"\nPlease check your bootloader.\n"); +	/* +	 * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. +	 * The blocks we test below represent incremental functionality +	 * for non-negative values. Negative values are reserved. +	 */ +	features = read_cpuid(ID_AA64ISAR0_EL1); +	block = (features >> 4) & 0xf; +	if (!(block & 0x8)) { +		switch (block) { +		default: +		case 2: +			elf_hwcap |= HWCAP_PMULL; +		case 1: +			elf_hwcap |= HWCAP_AES; +		case 0: +			break; +		} +	} -		while (true) -			cpu_relax(); +	block = (features >> 8) & 0xf; +	if (block && !(block & 0x8)) +		elf_hwcap |= HWCAP_SHA1; + +	block = (features >> 12) & 0xf; +	if (block && !(block & 0x8)) +		elf_hwcap |= HWCAP_SHA2; + +	block = (features >> 16) & 0xf; +	if (block && !(block & 0x8)) +		elf_hwcap |= HWCAP_CRC32; +#ifdef CONFIG_COMPAT +	/* +	 * ID_ISAR5_EL1 carries similar information as above, but pertaining to +	 * the Aarch32 32-bit execution state. +	 */ +	features = read_cpuid(ID_ISAR5_EL1); +	block = (features >> 4) & 0xf; +	if (!(block & 0x8)) { +		switch (block) { +		default: +		case 2: +			compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL; +		case 1: +			compat_elf_hwcap2 |= COMPAT_HWCAP2_AES; +		case 0: +			break; +		}  	} -	devtree = phys_to_virt(dt_phys); +	block = (features >> 8) & 0xf; +	if (block && !(block & 0x8)) +		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; -	/* Check device tree validity */ -	if (be32_to_cpu(devtree->magic) != OF_DT_HEADER) { +	block = (features >> 12) & 0xf; +	if (block && !(block & 0x8)) +		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; + +	block = (features >> 16) & 0xf; +	if (block && !(block & 0x8)) +		compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; +#endif +} + +static void __init setup_machine_fdt(phys_addr_t dt_phys) +{ +	if (!dt_phys || !early_init_dt_scan(phys_to_virt(dt_phys))) {  		early_print("\n"  			"Error: invalid device tree blob at physical address 0x%p (virtual address 0x%p)\n" -			"Expected 0x%x, found 0x%x\n" +			"The dtb must be 8-byte aligned and passed in the first 512MB of memory\n"  			"\nPlease check your bootloader.\n", -			dt_phys, devtree, OF_DT_HEADER, -			be32_to_cpu(devtree->magic)); +			dt_phys, phys_to_virt(dt_phys));  		while (true)  			cpu_relax();  	} -	initial_boot_params = devtree; -	dt_root = of_get_flat_dt_root(); - -	machine_name = of_get_flat_dt_prop(dt_root, "model", NULL); -	if (!machine_name) -		machine_name = of_get_flat_dt_prop(dt_root, "compatible", NULL); -	if (!machine_name) -		machine_name = "<unknown>"; -	pr_info("Machine: %s\n", machine_name); - -	/* Retrieve various information from the /chosen node */ -	of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line); -	/* Initialize {size,address}-cells info */ -	of_scan_flat_dt(early_init_dt_scan_root, NULL); -	/* Setup memory, calling early_init_dt_add_memory_arch */ -	of_scan_flat_dt(early_init_dt_scan_memory, NULL); -} - -void __init early_init_dt_add_memory_arch(u64 base, u64 size) -{ -	base &= PAGE_MASK; -	size &= PAGE_MASK; -	if (base + size < PHYS_OFFSET) { -		pr_warning("Ignoring memory block 0x%llx - 0x%llx\n", -			   base, base + size); -		return; -	} -	if (base < PHYS_OFFSET) { -		pr_warning("Ignoring memory range 0x%llx - 0x%llx\n", -			   base, PHYS_OFFSET); -		size -= PHYS_OFFSET - base; -		base = PHYS_OFFSET; -	} -	memblock_add(base, size); +	machine_name = of_flat_dt_get_machine_name();  }  /* @@ -241,6 +362,11 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };  void __init setup_arch(char **cmdline_p)  { +	/* +	 * Unmask asynchronous aborts early to catch possible system errors. +	 */ +	local_async_enable(); +  	setup_processor();  	setup_machine_fdt(__fdt_pointer); @@ -252,20 +378,27 @@ void __init setup_arch(char **cmdline_p)  	*cmdline_p = boot_command_line; +	early_ioremap_init(); +  	parse_early_param(); +	efi_init();  	arm64_memblock_init();  	paging_init();  	request_standard_resources(); +	efi_idmap_init(); +  	unflatten_device_tree();  	psci_init();  	cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; +	cpu_read_bootcpu_ops();  #ifdef CONFIG_SMP  	smp_init_cpus(); +	smp_build_mpidr_hash();  #endif  #ifdef CONFIG_VT @@ -279,11 +412,10 @@ void __init setup_arch(char **cmdline_p)  static int __init arm64_device_init(void)  { -	of_clk_init(NULL);  	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);  	return 0;  } -arch_initcall(arm64_device_init); +arch_initcall_sync(arm64_device_init);  static DEFINE_PER_CPU(struct cpu, cpu_data); @@ -304,6 +436,12 @@ subsys_initcall(topology_init);  static const char *hwcap_str[] = {  	"fp",  	"asimd", +	"evtstrm", +	"aes", +	"pmull", +	"sha1", +	"sha2", +	"crc32",  	NULL  }; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 890a591f75d..6357b9c6c90 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -17,6 +17,7 @@   * along with this program.  If not, see <http://www.gnu.org/licenses/>.   */ +#include <linux/compat.h>  #include <linux/errno.h>  #include <linux/signal.h>  #include <linux/personality.h> @@ -25,7 +26,6 @@  #include <linux/tracehook.h>  #include <linux/ratelimit.h> -#include <asm/compat.h>  #include <asm/debug-monitors.h>  #include <asm/elf.h>  #include <asm/cacheflush.h> @@ -51,7 +51,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)  	int err;  	/* dump the hardware registers to the fpsimd_state structure */ -	fpsimd_save_state(fpsimd); +	fpsimd_preserve_current_state();  	/* copy the FP and status/control registers */  	err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); @@ -86,11 +86,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)  	__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);  	/* load the hardware registers from the fpsimd_state structure */ -	if (!err) { -		preempt_disable(); -		fpsimd_load_state(&fpsimd); -		preempt_enable(); -	} +	if (!err) +		fpsimd_update_current_state(&fpsimd);  	return err ? -EFAULT : 0;  } @@ -100,8 +97,7 @@ static int restore_sigframe(struct pt_regs *regs,  {  	sigset_t set;  	int i, err; -	struct aux_context __user *aux = -		(struct aux_context __user *)sf->uc.uc_mcontext.__reserved; +	void *aux = sf->uc.uc_mcontext.__reserved;  	err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));  	if (err == 0) @@ -121,8 +117,11 @@ static int restore_sigframe(struct pt_regs *regs,  	err |= !valid_user_regs(®s->user_regs); -	if (err == 0) -		err |= restore_fpsimd_context(&aux->fpsimd); +	if (err == 0) { +		struct fpsimd_context *fpsimd_ctx = +			container_of(aux, struct fpsimd_context, head); +		err |= restore_fpsimd_context(fpsimd_ctx); +	}  	return err;  } @@ -167,8 +166,8 @@ static int setup_sigframe(struct rt_sigframe __user *sf,  			  struct pt_regs *regs, sigset_t *set)  {  	int i, err = 0; -	struct aux_context __user *aux = -		(struct aux_context __user *)sf->uc.uc_mcontext.__reserved; +	void *aux = sf->uc.uc_mcontext.__reserved; +	struct _aarch64_ctx *end;  	/* set up the stack frame for unwinding */  	__put_user_error(regs->regs[29], &sf->fp, err); @@ -185,12 +184,27 @@ static int setup_sigframe(struct rt_sigframe __user *sf,  	err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); -	if (err == 0) -		err |= preserve_fpsimd_context(&aux->fpsimd); +	if (err == 0) { +		struct fpsimd_context *fpsimd_ctx = +			container_of(aux, struct fpsimd_context, head); +		err |= preserve_fpsimd_context(fpsimd_ctx); +		aux += sizeof(*fpsimd_ctx); +	} + +	/* fault information, if valid */ +	if (current->thread.fault_code) { +		struct esr_context *esr_ctx = +			container_of(aux, struct esr_context, head); +		__put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err); +		__put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err); +		__put_user_error(current->thread.fault_code, &esr_ctx->esr, err); +		aux += sizeof(*esr_ctx); +	}  	/* set the "end" magic */ -	__put_user_error(0, &aux->end.magic, err); -	__put_user_error(0, &aux->end.size, err); +	end = aux; +	__put_user_error(0, &end->magic, err); +	__put_user_error(0, &end->size, err);  	return err;  } @@ -416,4 +430,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,  		clear_thread_flag(TIF_NOTIFY_RESUME);  		tracehook_notify_resume(regs);  	} + +	if (thread_flags & _TIF_FOREIGN_FPSTATE) +		fpsimd_restore_current_state(); +  } diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index e393174fe85..3491c638f17 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -23,6 +23,7 @@  #include <linux/syscalls.h>  #include <linux/ratelimit.h> +#include <asm/esr.h>  #include <asm/fpsimd.h>  #include <asm/signal32.h>  #include <asm/uaccess.h> @@ -81,6 +82,8 @@ struct compat_vfp_sigframe {  #define VFP_MAGIC		0x56465001  #define VFP_STORAGE_SIZE	sizeof(struct compat_vfp_sigframe) +#define FSR_WRITE_SHIFT		(11) +  struct compat_aux_sigframe {  	struct compat_vfp_sigframe	vfp; @@ -100,34 +103,6 @@ struct compat_rt_sigframe {  #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -/* - * For ARM syscalls, the syscall number has to be loaded into r7. - * We do not support an OABI userspace. - */ -#define MOV_R7_NR_SIGRETURN	(0xe3a07000 | __NR_compat_sigreturn) -#define SVC_SYS_SIGRETURN	(0xef000000 | __NR_compat_sigreturn) -#define MOV_R7_NR_RT_SIGRETURN	(0xe3a07000 | __NR_compat_rt_sigreturn) -#define SVC_SYS_RT_SIGRETURN	(0xef000000 | __NR_compat_rt_sigreturn) - -/* - * For Thumb syscalls, we also pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ -#define SVC_THUMB_SIGRETURN	(((0xdf00 | __NR_compat_sigreturn) << 16) | \ -				   0x2700 | __NR_compat_sigreturn) -#define SVC_THUMB_RT_SIGRETURN	(((0xdf00 | __NR_compat_rt_sigreturn) << 16) | \ -				   0x2700 | __NR_compat_rt_sigreturn) - -const compat_ulong_t aarch32_sigret_code[6] = { -	/* -	 * AArch32 sigreturn code. -	 * We don't construct an OABI SWI - instead we just set the imm24 field -	 * to the EABI syscall number so that we create a sane disassembly. -	 */ -	MOV_R7_NR_SIGRETURN,    SVC_SYS_SIGRETURN,    SVC_THUMB_SIGRETURN, -	MOV_R7_NR_RT_SIGRETURN, SVC_SYS_RT_SIGRETURN, SVC_THUMB_RT_SIGRETURN, -}; -  static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)  {  	compat_sigset_t	cset; @@ -150,7 +125,7 @@ static inline int get_sigset_t(sigset_t *set,  	return 0;  } -int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) +int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)  {  	int err; @@ -247,7 +222,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)  	 * Note that this also saves V16-31, which aren't visible  	 * in AArch32.  	 */ -	fpsimd_save_state(fpsimd); +	fpsimd_preserve_current_state();  	/* Place structure header on the stack */  	__put_user_error(magic, &frame->magic, err); @@ -310,11 +285,8 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)  	 * We don't need to touch the exception register, so  	 * reload the hardware state.  	 */ -	if (!err) { -		preempt_disable(); -		fpsimd_load_state(&fpsimd); -		preempt_enable(); -	} +	if (!err) +		fpsimd_update_current_state(&fpsimd);  	return err ? -EFAULT : 0;  } @@ -474,12 +446,13 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,  	/* Check if the handler is written for ARM or Thumb */  	thumb = handler & 1; -	if (thumb) { +	if (thumb)  		spsr |= COMPAT_PSR_T_BIT; -		spsr &= ~COMPAT_PSR_IT_MASK; -	} else { +	else  		spsr &= ~COMPAT_PSR_T_BIT; -	} + +	/* The IT state must be cleared for both ARM and Thumb-2 */ +	spsr &= ~COMPAT_PSR_IT_MASK;  	if (ka->sa.sa_flags & SA_RESTORER) {  		retcode = ptr_to_compat(ka->sa.sa_restorer); @@ -527,7 +500,9 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf,  	__put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err);  	__put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); -	__put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.error_code, err); +	/* set the compat FSR WnR */ +	__put_user_error(!!(current->thread.fault_code & ESR_EL1_WRITE) << +			 FSR_WRITE_SHIFT, &sf->uc.uc_mcontext.error_code, err);  	__put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err);  	__put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err); diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S new file mode 100644 index 00000000000..b1925729c69 --- /dev/null +++ b/arch/arm64/kernel/sleep.S @@ -0,0 +1,184 @@ +#include <linux/errno.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> + +	.text +/* + * Implementation of MPIDR_EL1 hash algorithm through shifting + * and OR'ing. + * + * @dst: register containing hash result + * @rs0: register containing affinity level 0 bit shift + * @rs1: register containing affinity level 1 bit shift + * @rs2: register containing affinity level 2 bit shift + * @rs3: register containing affinity level 3 bit shift + * @mpidr: register containing MPIDR_EL1 value + * @mask: register containing MPIDR mask + * + * Pseudo C-code: + * + *u32 dst; + * + *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 rs3, u64 mpidr, u64 mask) { + *	u32 aff0, aff1, aff2, aff3; + *	u64 mpidr_masked = mpidr & mask; + *	aff0 = mpidr_masked & 0xff; + *	aff1 = mpidr_masked & 0xff00; + *	aff2 = mpidr_masked & 0xff0000; + *	aff2 = mpidr_masked & 0xff00000000; + *	dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); + *} + * Input registers: rs0, rs1, rs2, rs3, mpidr, mask + * Output register: dst + * Note: input and output registers must be disjoint register sets +         (eg: a macro instance with mpidr = x1 and dst = x1 is invalid) + */ +	.macro compute_mpidr_hash dst, rs0, rs1, rs2, rs3, mpidr, mask +	and	\mpidr, \mpidr, \mask		// mask out MPIDR bits +	and	\dst, \mpidr, #0xff		// mask=aff0 +	lsr	\dst ,\dst, \rs0		// dst=aff0>>rs0 +	and	\mask, \mpidr, #0xff00		// mask = aff1 +	lsr	\mask ,\mask, \rs1 +	orr	\dst, \dst, \mask		// dst|=(aff1>>rs1) +	and	\mask, \mpidr, #0xff0000	// mask = aff2 +	lsr	\mask ,\mask, \rs2 +	orr	\dst, \dst, \mask		// dst|=(aff2>>rs2) +	and	\mask, \mpidr, #0xff00000000	// mask = aff3 +	lsr	\mask ,\mask, \rs3 +	orr	\dst, \dst, \mask		// dst|=(aff3>>rs3) +	.endm +/* + * Save CPU state for a suspend.  This saves callee registers, and allocates + * space on the kernel stack to save the CPU specific registers + some + * other data for resume. + * + *  x0 = suspend finisher argument + */ +ENTRY(__cpu_suspend) +	stp	x29, lr, [sp, #-96]! +	stp	x19, x20, [sp,#16] +	stp	x21, x22, [sp,#32] +	stp	x23, x24, [sp,#48] +	stp	x25, x26, [sp,#64] +	stp	x27, x28, [sp,#80] +	mov	x2, sp +	sub	sp, sp, #CPU_SUSPEND_SZ	// allocate cpu_suspend_ctx +	mov	x1, sp +	/* +	 * x1 now points to struct cpu_suspend_ctx allocated on the stack +	 */ +	str	x2, [x1, #CPU_CTX_SP] +	ldr	x2, =sleep_save_sp +	ldr	x2, [x2, #SLEEP_SAVE_SP_VIRT] +#ifdef CONFIG_SMP +	mrs	x7, mpidr_el1 +	ldr	x9, =mpidr_hash +	ldr	x10, [x9, #MPIDR_HASH_MASK] +	/* +	 * Following code relies on the struct mpidr_hash +	 * members size. +	 */ +	ldp	w3, w4, [x9, #MPIDR_HASH_SHIFTS] +	ldp	w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] +	compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 +	add	x2, x2, x8, lsl #3 +#endif +	bl	__cpu_suspend_finisher +        /* +	 * Never gets here, unless suspend fails. +	 * Successful cpu_suspend should return from cpu_resume, returning +	 * through this code path is considered an error +	 * If the return value is set to 0 force x0 = -EOPNOTSUPP +	 * to make sure a proper error condition is propagated +	 */ +	cmp	x0, #0 +	mov	x3, #-EOPNOTSUPP +	csel	x0, x3, x0, eq +	add	sp, sp, #CPU_SUSPEND_SZ	// rewind stack pointer +	ldp	x19, x20, [sp, #16] +	ldp	x21, x22, [sp, #32] +	ldp	x23, x24, [sp, #48] +	ldp	x25, x26, [sp, #64] +	ldp	x27, x28, [sp, #80] +	ldp	x29, lr, [sp], #96 +	ret +ENDPROC(__cpu_suspend) +	.ltorg + +/* + * x0 must contain the sctlr value retrieved from restored context + */ +ENTRY(cpu_resume_mmu) +	ldr	x3, =cpu_resume_after_mmu +	msr	sctlr_el1, x0		// restore sctlr_el1 +	isb +	br	x3			// global jump to virtual address +ENDPROC(cpu_resume_mmu) +cpu_resume_after_mmu: +	mov	x0, #0			// return zero on success +	ldp	x19, x20, [sp, #16] +	ldp	x21, x22, [sp, #32] +	ldp	x23, x24, [sp, #48] +	ldp	x25, x26, [sp, #64] +	ldp	x27, x28, [sp, #80] +	ldp	x29, lr, [sp], #96 +	ret +ENDPROC(cpu_resume_after_mmu) + +	.data +ENTRY(cpu_resume) +	bl	el2_setup		// if in EL2 drop to EL1 cleanly +#ifdef CONFIG_SMP +	mrs	x1, mpidr_el1 +	adr	x4, mpidr_hash_ptr +	ldr	x5, [x4] +	add	x8, x4, x5		// x8 = struct mpidr_hash phys address +        /* retrieve mpidr_hash members to compute the hash */ +	ldr	x2, [x8, #MPIDR_HASH_MASK] +	ldp	w3, w4, [x8, #MPIDR_HASH_SHIFTS] +	ldp	w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] +	compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 +        /* x7 contains hash index, let's use it to grab context pointer */ +#else +	mov	x7, xzr +#endif +	adr	x0, sleep_save_sp +	ldr	x0, [x0, #SLEEP_SAVE_SP_PHYS] +	ldr	x0, [x0, x7, lsl #3] +	/* load sp from context */ +	ldr	x2, [x0, #CPU_CTX_SP] +	adr	x1, sleep_idmap_phys +	/* load physical address of identity map page table in x1 */ +	ldr	x1, [x1] +	mov	sp, x2 +	/* +	 * cpu_do_resume expects x0 to contain context physical address +	 * pointer and x1 to contain physical address of 1:1 page tables +	 */ +	bl	cpu_do_resume		// PC relative jump, MMU off +	b	cpu_resume_mmu		// Resume MMU, never returns +ENDPROC(cpu_resume) + +	.align 3 +mpidr_hash_ptr: +	/* +	 * offset of mpidr_hash symbol from current location +	 * used to obtain run-time mpidr_hash address with MMU off +         */ +	.quad	mpidr_hash - . +/* + * physical address of identity mapped page tables + */ +	.type	sleep_idmap_phys, #object +ENTRY(sleep_idmap_phys) +	.quad	0 +/* + * struct sleep_save_sp { + *	phys_addr_t *save_ptr_stash; + *	phys_addr_t save_ptr_stash_phys; + * }; + */ +	.type	sleep_save_sp, #object +ENTRY(sleep_save_sp) +	.space	SLEEP_SAVE_SP_SZ	// struct sleep_save_sp diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 78db90dcc91..40f38f46c8e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -35,10 +35,12 @@  #include <linux/clockchips.h>  #include <linux/completion.h>  #include <linux/of.h> +#include <linux/irq_work.h>  #include <asm/atomic.h>  #include <asm/cacheflush.h>  #include <asm/cputype.h> +#include <asm/cpu_ops.h>  #include <asm/mmu_context.h>  #include <asm/pgtable.h>  #include <asm/pgalloc.h> @@ -54,70 +56,26 @@   * where to place its SVC stack   */  struct secondary_data secondary_data; -volatile unsigned long secondary_holding_pen_release = INVALID_HWID;  enum ipi_msg_type {  	IPI_RESCHEDULE,  	IPI_CALL_FUNC,  	IPI_CALL_FUNC_SINGLE,  	IPI_CPU_STOP, +	IPI_TIMER, +	IPI_IRQ_WORK,  }; -static DEFINE_RAW_SPINLOCK(boot_lock); - -/* - * Write secondary_holding_pen_release in a way that is guaranteed to be - * visible to all observers, irrespective of whether they're taking part - * in coherency or not.  This is necessary for the hotplug code to work - * reliably. - */ -static void write_pen_release(u64 val) -{ -	void *start = (void *)&secondary_holding_pen_release; -	unsigned long size = sizeof(secondary_holding_pen_release); - -	secondary_holding_pen_release = val; -	__flush_dcache_area(start, size); -} -  /*   * Boot a secondary CPU, and assign it the specified idle task.   * This also gives us the initial stack to use for this CPU.   */  static int boot_secondary(unsigned int cpu, struct task_struct *idle)  { -	unsigned long timeout; - -	/* -	 * Set synchronisation state between this boot processor -	 * and the secondary one -	 */ -	raw_spin_lock(&boot_lock); - -	/* -	 * Update the pen release flag. -	 */ -	write_pen_release(cpu_logical_map(cpu)); - -	/* -	 * Send an event, causing the secondaries to read pen_release. -	 */ -	sev(); - -	timeout = jiffies + (1 * HZ); -	while (time_before(jiffies, timeout)) { -		if (secondary_holding_pen_release == INVALID_HWID) -			break; -		udelay(10); -	} +	if (cpu_ops[cpu]->cpu_boot) +		return cpu_ops[cpu]->cpu_boot(cpu); -	/* -	 * Now the secondary core is starting up let it run its -	 * calibrations, then wait for it to finish -	 */ -	raw_spin_unlock(&boot_lock); - -	return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; +	return -EOPNOTSUPP;  }  static DECLARE_COMPLETION(cpu_running); @@ -158,6 +116,11 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)  	return ret;  } +static void smp_store_cpu_info(unsigned int cpuid) +{ +	store_cpu_topology(cpuid); +} +  /*   * This is the secondary CPU boot entry.  We're using this CPUs   * idle thread stack, but a set of temporary page tables. @@ -167,8 +130,6 @@ asmlinkage void secondary_start_kernel(void)  	struct mm_struct *mm = &init_mm;  	unsigned int cpu = smp_processor_id(); -	printk("CPU%u: Booted secondary processor\n", cpu); -  	/*  	 * All kernel threads share the same mm context; grab a  	 * reference and switch to it. @@ -177,6 +138,9 @@ asmlinkage void secondary_start_kernel(void)  	current->active_mm = mm;  	cpumask_set_cpu(cpu, mm_cpumask(mm)); +	set_my_cpu_offset(per_cpu_offset(smp_processor_id())); +	printk("CPU%u: Booted secondary processor\n", cpu); +  	/*  	 * TTBR0 is only used for the identity mapping at this stage. Make it  	 * point to zero page to avoid speculatively fetching new entries. @@ -187,17 +151,15 @@ asmlinkage void secondary_start_kernel(void)  	preempt_disable();  	trace_hardirqs_off(); -	/* -	 * Let the primary processor know we're out of the -	 * pen, then head off into the C entry point -	 */ -	write_pen_release(INVALID_HWID); +	if (cpu_ops[cpu]->cpu_postboot) +		cpu_ops[cpu]->cpu_postboot();  	/* -	 * Synchronise with the boot thread. +	 * Enable GIC and timers.  	 */ -	raw_spin_lock(&boot_lock); -	raw_spin_unlock(&boot_lock); +	notify_cpu_starting(cpu); + +	smp_store_cpu_info(cpu);  	/*  	 * OK, now it's safe to let the boot CPU continue.  Wait for @@ -207,13 +169,9 @@ asmlinkage void secondary_start_kernel(void)  	set_cpu_online(cpu, true);  	complete(&cpu_running); -	/* -	 * Enable GIC and timers. -	 */ -	notify_cpu_starting(cpu); - +	local_dbg_enable();  	local_irq_enable(); -	local_fiq_enable(); +	local_async_enable();  	/*  	 * OK, it's off to the idle thread for us @@ -221,39 +179,136 @@ asmlinkage void secondary_start_kernel(void)  	cpu_startup_entry(CPUHP_ONLINE);  } -void __init smp_cpus_done(unsigned int max_cpus) +#ifdef CONFIG_HOTPLUG_CPU +static int op_cpu_disable(unsigned int cpu)  { -	pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); +	/* +	 * If we don't have a cpu_die method, abort before we reach the point +	 * of no return. CPU0 may not have an cpu_ops, so test for it. +	 */ +	if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die) +		return -EOPNOTSUPP; + +	/* +	 * We may need to abort a hot unplug for some other mechanism-specific +	 * reason. +	 */ +	if (cpu_ops[cpu]->cpu_disable) +		return cpu_ops[cpu]->cpu_disable(cpu); + +	return 0;  } -void __init smp_prepare_boot_cpu(void) +/* + * __cpu_disable runs on the processor to be shutdown. + */ +int __cpu_disable(void)  { -} +	unsigned int cpu = smp_processor_id(); +	int ret; -static void (*smp_cross_call)(const struct cpumask *, unsigned int); +	ret = op_cpu_disable(cpu); +	if (ret) +		return ret; -static const struct smp_enable_ops *enable_ops[] __initconst = { -	&smp_spin_table_ops, -	&smp_psci_ops, -	NULL, -}; +	/* +	 * Take this CPU offline.  Once we clear this, we can't return, +	 * and we must not schedule until we're ready to give up the cpu. +	 */ +	set_cpu_online(cpu, false); -static const struct smp_enable_ops *smp_enable_ops[NR_CPUS]; +	/* +	 * OK - migrate IRQs away from this CPU +	 */ +	migrate_irqs(); -static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name) +	/* +	 * Remove this CPU from the vm mask set of all processes. +	 */ +	clear_tasks_mm_cpumask(cpu); + +	return 0; +} + +static int op_cpu_kill(unsigned int cpu)  { -	const struct smp_enable_ops **ops = enable_ops; +	/* +	 * If we have no means of synchronising with the dying CPU, then assume +	 * that it is really dead. We can only wait for an arbitrary length of +	 * time and hope that it's dead, so let's skip the wait and just hope. +	 */ +	if (!cpu_ops[cpu]->cpu_kill) +		return 1; + +	return cpu_ops[cpu]->cpu_kill(cpu); +} -	while (*ops) { -		if (!strcmp(name, (*ops)->name)) -			return *ops; +static DECLARE_COMPLETION(cpu_died); -		ops++; +/* + * called on the thread which is asking for a CPU to be shutdown - + * waits until shutdown has completed, or it is timed out. + */ +void __cpu_die(unsigned int cpu) +{ +	if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) { +		pr_crit("CPU%u: cpu didn't die\n", cpu); +		return;  	} +	pr_notice("CPU%u: shutdown\n", cpu); + +	/* +	 * Now that the dying CPU is beyond the point of no return w.r.t. +	 * in-kernel synchronisation, try to get the firwmare to help us to +	 * verify that it has really left the kernel before we consider +	 * clobbering anything it might still be using. +	 */ +	if (!op_cpu_kill(cpu)) +		pr_warn("CPU%d may not have shut down cleanly\n", cpu); +} + +/* + * Called from the idle thread for the CPU which has been shutdown. + * + * Note that we disable IRQs here, but do not re-enable them + * before returning to the caller. This is also the behaviour + * of the other hotplug-cpu capable cores, so presumably coming + * out of idle fixes this. + */ +void cpu_die(void) +{ +	unsigned int cpu = smp_processor_id(); + +	idle_task_exit(); + +	local_irq_disable(); + +	/* Tell __cpu_die() that this CPU is now safe to dispose of */ +	complete(&cpu_died); + +	/* +	 * Actually shutdown the CPU. This must never fail. The specific hotplug +	 * mechanism must perform all required cache maintenance to ensure that +	 * no dirty lines are lost in the process of shutting down the CPU. +	 */ +	cpu_ops[cpu]->cpu_die(cpu); + +	BUG(); +} +#endif + +void __init smp_cpus_done(unsigned int max_cpus) +{ +	pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); +} -	return NULL; +void __init smp_prepare_boot_cpu(void) +{ +	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));  } +static void (*smp_cross_call)(const struct cpumask *, unsigned int); +  /*   * Enumerate the possible CPU set from the device tree and build the   * cpu logical map array containing MPIDR values related to logical @@ -261,9 +316,8 @@ static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name)   */  void __init smp_init_cpus(void)  { -	const char *enable_method;  	struct device_node *dn = NULL; -	int i, cpu = 1; +	unsigned int i, cpu = 1;  	bool bootcpu_valid = false;  	while ((dn = of_find_node_by_type(dn, "cpu"))) { @@ -332,25 +386,10 @@ void __init smp_init_cpus(void)  		if (cpu >= NR_CPUS)  			goto next; -		/* -		 * We currently support only the "spin-table" enable-method. -		 */ -		enable_method = of_get_property(dn, "enable-method", NULL); -		if (!enable_method) { -			pr_err("%s: missing enable-method property\n", -				dn->full_name); -			goto next; -		} - -		smp_enable_ops[cpu] = smp_get_enable_ops(enable_method); - -		if (!smp_enable_ops[cpu]) { -			pr_err("%s: invalid enable-method property: %s\n", -			       dn->full_name, enable_method); +		if (cpu_read_ops(dn, cpu) != 0)  			goto next; -		} -		if (smp_enable_ops[cpu]->init_cpu(dn, cpu)) +		if (cpu_ops[cpu]->cpu_init(dn, cpu))  			goto next;  		pr_debug("cpu logical map 0x%llx\n", hwid); @@ -380,8 +419,12 @@ next:  void __init smp_prepare_cpus(unsigned int max_cpus)  { -	int cpu, err; -	unsigned int ncores = num_possible_cpus(); +	int err; +	unsigned int cpu, ncores = num_possible_cpus(); + +	init_cpu_topology(); + +	smp_store_cpu_info(smp_processor_id());  	/*  	 * are we trying to boot more cores than exist? @@ -408,10 +451,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus)  		if (cpu == smp_processor_id())  			continue; -		if (!smp_enable_ops[cpu]) +		if (!cpu_ops[cpu])  			continue; -		err = smp_enable_ops[cpu]->prepare_cpu(cpu); +		err = cpu_ops[cpu]->cpu_prepare(cpu);  		if (err)  			continue; @@ -436,12 +479,22 @@ void arch_send_call_function_single_ipi(int cpu)  	smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);  } +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) +{ +	if (smp_cross_call) +		smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); +} +#endif +  static const char *ipi_types[NR_IPI] = {  #define S(x,s)	[x - IPI_RESCHEDULE] = s  	S(IPI_RESCHEDULE, "Rescheduling interrupts"),  	S(IPI_CALL_FUNC, "Function call interrupts"),  	S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),  	S(IPI_CPU_STOP, "CPU stop interrupts"), +	S(IPI_TIMER, "Timer broadcast interrupts"), +	S(IPI_IRQ_WORK, "IRQ work interrupts"),  };  void show_ipi_list(struct seq_file *p, int prec) @@ -451,7 +504,7 @@ void show_ipi_list(struct seq_file *p, int prec)  	for (i = 0; i < NR_IPI; i++) {  		seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i + IPI_RESCHEDULE,  			   prec >= 4 ? " " : ""); -		for_each_present_cpu(cpu) +		for_each_online_cpu(cpu)  			seq_printf(p, "%10u ",  				   __get_irq_stat(cpu, ipi_irqs[i]));  		seq_printf(p, "      %s\n", ipi_types[i]); @@ -486,7 +539,6 @@ static void ipi_cpu_stop(unsigned int cpu)  	set_cpu_online(cpu, false); -	local_fiq_disable();  	local_irq_disable();  	while (1) @@ -527,6 +579,22 @@ void handle_IPI(int ipinr, struct pt_regs *regs)  		irq_exit();  		break; +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +	case IPI_TIMER: +		irq_enter(); +		tick_receive_broadcast(); +		irq_exit(); +		break; +#endif + +#ifdef CONFIG_IRQ_WORK +	case IPI_IRQ_WORK: +		irq_enter(); +		irq_work_run(); +		irq_exit(); +		break; +#endif +  	default:  		pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);  		break; @@ -539,6 +607,13 @@ void smp_send_reschedule(int cpu)  	smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);  } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) +{ +	smp_cross_call(mask, IPI_TIMER); +} +#endif +  void smp_send_stop(void)  {  	unsigned long timeout; diff --git a/arch/arm64/kernel/smp_psci.c b/arch/arm64/kernel/smp_psci.c deleted file mode 100644 index 0c533301be7..00000000000 --- a/arch/arm64/kernel/smp_psci.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * PSCI SMP initialisation - * - * Copyright (C) 2013 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program.  If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/init.h> -#include <linux/of.h> -#include <linux/smp.h> - -#include <asm/psci.h> -#include <asm/smp_plat.h> - -static int __init smp_psci_init_cpu(struct device_node *dn, int cpu) -{ -	return 0; -} - -static int __init smp_psci_prepare_cpu(int cpu) -{ -	int err; - -	if (!psci_ops.cpu_on) { -		pr_err("psci: no cpu_on method, not booting CPU%d\n", cpu); -		return -ENODEV; -	} - -	err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen)); -	if (err) { -		pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err); -		return err; -	} - -	return 0; -} - -const struct smp_enable_ops smp_psci_ops __initconst = { -	.name		= "psci", -	.init_cpu	= smp_psci_init_cpu, -	.prepare_cpu	= smp_psci_prepare_cpu, -}; diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 7c35fa682f7..0347d38eea2 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -16,15 +16,38 @@   * along with this program.  If not, see <http://www.gnu.org/licenses/>.   */ +#include <linux/delay.h>  #include <linux/init.h>  #include <linux/of.h>  #include <linux/smp.h>  #include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/cputype.h> +#include <asm/smp_plat.h> + +extern void secondary_holding_pen(void); +volatile unsigned long secondary_holding_pen_release = INVALID_HWID;  static phys_addr_t cpu_release_addr[NR_CPUS]; -static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu) +/* + * Write secondary_holding_pen_release in a way that is guaranteed to be + * visible to all observers, irrespective of whether they're taking part + * in coherency or not.  This is necessary for the hotplug code to work + * reliably. + */ +static void write_pen_release(u64 val) +{ +	void *start = (void *)&secondary_holding_pen_release; +	unsigned long size = sizeof(secondary_holding_pen_release); + +	secondary_holding_pen_release = val; +	__flush_dcache_area(start, size); +} + + +static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu)  {  	/*  	 * Determine the address from which the CPU is polling. @@ -40,7 +63,7 @@ static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu)  	return 0;  } -static int __init smp_spin_table_prepare_cpu(int cpu) +static int smp_spin_table_cpu_prepare(unsigned int cpu)  {  	void **release_addr; @@ -48,7 +71,16 @@ static int __init smp_spin_table_prepare_cpu(int cpu)  		return -ENODEV;  	release_addr = __va(cpu_release_addr[cpu]); -	release_addr[0] = (void *)__pa(secondary_holding_pen); + +	/* +	 * We write the release address as LE regardless of the native +	 * endianess of the kernel. Therefore, any boot-loaders that +	 * read this address need to convert this address to the +	 * boot-loader's endianess before jumping. This is mandated by +	 * the boot protocol. +	 */ +	release_addr[0] = (void *) cpu_to_le64(__pa(secondary_holding_pen)); +  	__flush_dcache_area(release_addr, sizeof(release_addr[0]));  	/* @@ -59,8 +91,24 @@ static int __init smp_spin_table_prepare_cpu(int cpu)  	return 0;  } -const struct smp_enable_ops smp_spin_table_ops __initconst = { +static int smp_spin_table_cpu_boot(unsigned int cpu) +{ +	/* +	 * Update the pen release flag. +	 */ +	write_pen_release(cpu_logical_map(cpu)); + +	/* +	 * Send an event, causing the secondaries to read pen_release. +	 */ +	sev(); + +	return 0; +} + +const struct cpu_operations smp_spin_table_ops = {  	.name		= "spin-table", -	.init_cpu 	= smp_spin_table_init_cpu, -	.prepare_cpu	= smp_spin_table_prepare_cpu, +	.cpu_init	= smp_spin_table_cpu_init, +	.cpu_prepare	= smp_spin_table_cpu_prepare, +	.cpu_boot	= smp_spin_table_cpu_boot,  }; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d25459ff57f..55437ba1f5a 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -35,7 +35,7 @@   *	ldp	x29, x30, [sp]   *	add	sp, sp, #0x10   */ -int unwind_frame(struct stackframe *frame) +int notrace unwind_frame(struct stackframe *frame)  {  	unsigned long high, low;  	unsigned long fp = frame->fp; @@ -43,12 +43,16 @@ int unwind_frame(struct stackframe *frame)  	low  = frame->sp;  	high = ALIGN(low, THREAD_SIZE); -	if (fp < low || fp > high || fp & 0xf) +	if (fp < low || fp > high - 0x18 || fp & 0xf)  		return -EINVAL;  	frame->sp = fp + 0x10;  	frame->fp = *(unsigned long *)(fp); -	frame->pc = *(unsigned long *)(fp + 8); +	/* +	 * -4 here because we care about the PC at time of bl, +	 * not where the return will go. +	 */ +	frame->pc = *(unsigned long *)(fp + 8) - 4;  	return 0;  } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c new file mode 100644 index 00000000000..1fa9ce4afd8 --- /dev/null +++ b/arch/arm64/kernel/suspend.c @@ -0,0 +1,140 @@ +#include <linux/percpu.h> +#include <linux/slab.h> +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/debug-monitors.h> +#include <asm/pgtable.h> +#include <asm/memory.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> +#include <asm/tlbflush.h> + +extern int __cpu_suspend(unsigned long); +/* + * This is called by __cpu_suspend() to save the state, and do whatever + * flushing is required to ensure that when the CPU goes to sleep we have + * the necessary data available when the caches are not searched. + * + * @arg: Argument to pass to suspend operations + * @ptr: CPU context virtual address + * @save_ptr: address of the location where the context physical address + *            must be saved + */ +int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr, +			   phys_addr_t *save_ptr) +{ +	int cpu = smp_processor_id(); + +	*save_ptr = virt_to_phys(ptr); + +	cpu_do_suspend(ptr); +	/* +	 * Only flush the context that must be retrieved with the MMU +	 * off. VA primitives ensure the flush is applied to all +	 * cache levels so context is pushed to DRAM. +	 */ +	__flush_dcache_area(ptr, sizeof(*ptr)); +	__flush_dcache_area(save_ptr, sizeof(*save_ptr)); + +	return cpu_ops[cpu]->cpu_suspend(arg); +} + +/* + * This hook is provided so that cpu_suspend code can restore HW + * breakpoints as early as possible in the resume path, before reenabling + * debug exceptions. Code cannot be run from a CPU PM notifier since by the + * time the notifier runs debug exceptions might have been enabled already, + * with HW breakpoints registers content still in an unknown state. + */ +void (*hw_breakpoint_restore)(void *); +void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +{ +	/* Prevent multiple restore hook initializations */ +	if (WARN_ON(hw_breakpoint_restore)) +		return; +	hw_breakpoint_restore = hw_bp_restore; +} + +/** + * cpu_suspend + * + * @arg: argument to pass to the finisher function + */ +int cpu_suspend(unsigned long arg) +{ +	struct mm_struct *mm = current->active_mm; +	int ret, cpu = smp_processor_id(); +	unsigned long flags; + +	/* +	 * If cpu_ops have not been registered or suspend +	 * has not been initialized, cpu_suspend call fails early. +	 */ +	if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) +		return -EOPNOTSUPP; + +	/* +	 * From this point debug exceptions are disabled to prevent +	 * updates to mdscr register (saved and restored along with +	 * general purpose registers) from kernel debuggers. +	 */ +	local_dbg_save(flags); + +	/* +	 * mm context saved on the stack, it will be restored when +	 * the cpu comes out of reset through the identity mapped +	 * page tables, so that the thread address space is properly +	 * set-up on function return. +	 */ +	ret = __cpu_suspend(arg); +	if (ret == 0) { +		cpu_switch_mm(mm->pgd, mm); +		flush_tlb_all(); + +		/* +		 * Restore per-cpu offset before any kernel +		 * subsystem relying on it has a chance to run. +		 */ +		set_my_cpu_offset(per_cpu_offset(cpu)); + +		/* +		 * Restore HW breakpoint registers to sane values +		 * before debug exceptions are possibly reenabled +		 * through local_dbg_restore. +		 */ +		if (hw_breakpoint_restore) +			hw_breakpoint_restore(NULL); +	} + +	/* +	 * Restore pstate flags. OS lock and mdscr have been already +	 * restored, so from this point onwards, debugging is fully +	 * renabled if it was enabled when core started shutdown. +	 */ +	local_dbg_restore(flags); + +	return ret; +} + +extern struct sleep_save_sp sleep_save_sp; +extern phys_addr_t sleep_idmap_phys; + +static int cpu_suspend_init(void) +{ +	void *ctx_ptr; + +	/* ctx_ptr is an array of physical addresses */ +	ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + +	if (WARN_ON(!ctx_ptr)) +		return -ENOMEM; + +	sleep_save_sp.save_ptr_stash = ctx_ptr; +	sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); +	sleep_idmap_phys = virt_to_phys(idmap_pg_dir); +	__flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); +	__flush_dcache_area(&sleep_idmap_phys, sizeof(sleep_idmap_phys)); + +	return 0; +} +early_initcall(cpu_suspend_init); diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S index a1b19ed7467..423a5b3fc2b 100644 --- a/arch/arm64/kernel/sys32.S +++ b/arch/arm64/kernel/sys32.S @@ -59,48 +59,48 @@ ENDPROC(compat_sys_fstatfs64_wrapper)   * extension.   */  compat_sys_pread64_wrapper: -	orr	x3, x4, x5, lsl #32 +	regs_to_64	x3, x4, x5  	b	sys_pread64  ENDPROC(compat_sys_pread64_wrapper)  compat_sys_pwrite64_wrapper: -	orr	x3, x4, x5, lsl #32 +	regs_to_64	x3, x4, x5  	b	sys_pwrite64  ENDPROC(compat_sys_pwrite64_wrapper)  compat_sys_truncate64_wrapper: -	orr	x1, x2, x3, lsl #32 +	regs_to_64	x1, x2, x3  	b	sys_truncate  ENDPROC(compat_sys_truncate64_wrapper)  compat_sys_ftruncate64_wrapper: -	orr	x1, x2, x3, lsl #32 +	regs_to_64	x1, x2, x3  	b	sys_ftruncate  ENDPROC(compat_sys_ftruncate64_wrapper)  compat_sys_readahead_wrapper: -	orr	x1, x2, x3, lsl #32 +	regs_to_64	x1, x2, x3  	mov	w2, w4  	b	sys_readahead  ENDPROC(compat_sys_readahead_wrapper)  compat_sys_fadvise64_64_wrapper:  	mov	w6, w1 -	orr	x1, x2, x3, lsl #32 -	orr	x2, x4, x5, lsl #32 +	regs_to_64	x1, x2, x3 +	regs_to_64	x2, x4, x5  	mov	w3, w6  	b	sys_fadvise64_64  ENDPROC(compat_sys_fadvise64_64_wrapper)  compat_sys_sync_file_range2_wrapper: -	orr	x2, x2, x3, lsl #32 -	orr	x3, x4, x5, lsl #32 +	regs_to_64	x2, x2, x3 +	regs_to_64	x3, x4, x5  	b	sys_sync_file_range2  ENDPROC(compat_sys_sync_file_range2_wrapper)  compat_sys_fallocate_wrapper: -	orr	x2, x2, x3, lsl #32 -	orr	x3, x4, x5, lsl #32 +	regs_to_64	x2, x2, x3 +	regs_to_64	x3, x4, x5  	b	sys_fallocate  ENDPROC(compat_sys_fallocate_wrapper) diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 03dc3718eb1..1a7125c3099 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -18,6 +18,7 @@   * along with this program.  If not, see <http://www.gnu.org/licenses/>.   */ +#include <linux/clockchips.h>  #include <linux/export.h>  #include <linux/kernel.h>  #include <linux/interrupt.h> @@ -33,6 +34,7 @@  #include <linux/irq.h>  #include <linux/delay.h>  #include <linux/clocksource.h> +#include <linux/clk-provider.h>  #include <clocksource/arm_arch_timer.h> @@ -61,26 +63,19 @@ unsigned long profile_pc(struct pt_regs *regs)  EXPORT_SYMBOL(profile_pc);  #endif -static u64 sched_clock_mult __read_mostly; - -unsigned long long notrace sched_clock(void) -{ -	return arch_timer_read_counter() * sched_clock_mult; -} -  void __init time_init(void)  {  	u32 arch_timer_rate; +	of_clk_init(NULL);  	clocksource_of_init(); +	tick_setup_hrtimer_broadcast(); +  	arch_timer_rate = arch_timer_get_rate();  	if (!arch_timer_rate)  		panic("Unable to initialise architected timer.\n"); -	/* Cache the sched_clock multiplier to save a divide in the hot path. */ -	sched_clock_mult = NSEC_PER_SEC / arch_timer_rate; -  	/* Calibrate the delay loop directly */  	lpj_fine = arch_timer_rate / HZ;  } diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c new file mode 100644 index 00000000000..43514f90591 --- /dev/null +++ b/arch/arm64/kernel/topology.c @@ -0,0 +1,283 @@ +/* + * arch/arm64/kernel/topology.c + * + * Copyright (C) 2011,2013,2014 Linaro Limited. + * + * Based on the arm32 version written by Vincent Guittot in turn based on + * arch/sh/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License.  See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/node.h> +#include <linux/nodemask.h> +#include <linux/of.h> +#include <linux/sched.h> + +#include <asm/topology.h> + +static int __init get_cpu_for_node(struct device_node *node) +{ +	struct device_node *cpu_node; +	int cpu; + +	cpu_node = of_parse_phandle(node, "cpu", 0); +	if (!cpu_node) +		return -1; + +	for_each_possible_cpu(cpu) { +		if (of_get_cpu_node(cpu, NULL) == cpu_node) { +			of_node_put(cpu_node); +			return cpu; +		} +	} + +	pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name); + +	of_node_put(cpu_node); +	return -1; +} + +static int __init parse_core(struct device_node *core, int cluster_id, +			     int core_id) +{ +	char name[10]; +	bool leaf = true; +	int i = 0; +	int cpu; +	struct device_node *t; + +	do { +		snprintf(name, sizeof(name), "thread%d", i); +		t = of_get_child_by_name(core, name); +		if (t) { +			leaf = false; +			cpu = get_cpu_for_node(t); +			if (cpu >= 0) { +				cpu_topology[cpu].cluster_id = cluster_id; +				cpu_topology[cpu].core_id = core_id; +				cpu_topology[cpu].thread_id = i; +			} else { +				pr_err("%s: Can't get CPU for thread\n", +				       t->full_name); +				of_node_put(t); +				return -EINVAL; +			} +			of_node_put(t); +		} +		i++; +	} while (t); + +	cpu = get_cpu_for_node(core); +	if (cpu >= 0) { +		if (!leaf) { +			pr_err("%s: Core has both threads and CPU\n", +			       core->full_name); +			return -EINVAL; +		} + +		cpu_topology[cpu].cluster_id = cluster_id; +		cpu_topology[cpu].core_id = core_id; +	} else if (leaf) { +		pr_err("%s: Can't get CPU for leaf core\n", core->full_name); +		return -EINVAL; +	} + +	return 0; +} + +static int __init parse_cluster(struct device_node *cluster, int depth) +{ +	char name[10]; +	bool leaf = true; +	bool has_cores = false; +	struct device_node *c; +	static int cluster_id __initdata; +	int core_id = 0; +	int i, ret; + +	/* +	 * First check for child clusters; we currently ignore any +	 * information about the nesting of clusters and present the +	 * scheduler with a flat list of them. +	 */ +	i = 0; +	do { +		snprintf(name, sizeof(name), "cluster%d", i); +		c = of_get_child_by_name(cluster, name); +		if (c) { +			leaf = false; +			ret = parse_cluster(c, depth + 1); +			of_node_put(c); +			if (ret != 0) +				return ret; +		} +		i++; +	} while (c); + +	/* Now check for cores */ +	i = 0; +	do { +		snprintf(name, sizeof(name), "core%d", i); +		c = of_get_child_by_name(cluster, name); +		if (c) { +			has_cores = true; + +			if (depth == 0) { +				pr_err("%s: cpu-map children should be clusters\n", +				       c->full_name); +				of_node_put(c); +				return -EINVAL; +			} + +			if (leaf) { +				ret = parse_core(c, cluster_id, core_id++); +			} else { +				pr_err("%s: Non-leaf cluster with core %s\n", +				       cluster->full_name, name); +				ret = -EINVAL; +			} + +			of_node_put(c); +			if (ret != 0) +				return ret; +		} +		i++; +	} while (c); + +	if (leaf && !has_cores) +		pr_warn("%s: empty cluster\n", cluster->full_name); + +	if (leaf) +		cluster_id++; + +	return 0; +} + +static int __init parse_dt_topology(void) +{ +	struct device_node *cn, *map; +	int ret = 0; +	int cpu; + +	cn = of_find_node_by_path("/cpus"); +	if (!cn) { +		pr_err("No CPU information found in DT\n"); +		return 0; +	} + +	/* +	 * When topology is provided cpu-map is essentially a root +	 * cluster with restricted subnodes. +	 */ +	map = of_get_child_by_name(cn, "cpu-map"); +	if (!map) +		goto out; + +	ret = parse_cluster(map, 0); +	if (ret != 0) +		goto out_map; + +	/* +	 * Check that all cores are in the topology; the SMP code will +	 * only mark cores described in the DT as possible. +	 */ +	for_each_possible_cpu(cpu) { +		if (cpu_topology[cpu].cluster_id == -1) { +			pr_err("CPU%d: No topology information specified\n", +			       cpu); +			ret = -EINVAL; +		} +	} + +out_map: +	of_node_put(map); +out: +	of_node_put(cn); +	return ret; +} + +/* + * cpu topology table + */ +struct cpu_topology cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ +	return &cpu_topology[cpu].core_sibling; +} + +static void update_siblings_masks(unsigned int cpuid) +{ +	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; +	int cpu; + +	if (cpuid_topo->cluster_id == -1) { +		/* +		 * DT does not contain topology information for this cpu. +		 */ +		pr_debug("CPU%u: No topology information configured\n", cpuid); +		return; +	} + +	/* update core and thread sibling masks */ +	for_each_possible_cpu(cpu) { +		cpu_topo = &cpu_topology[cpu]; + +		if (cpuid_topo->cluster_id != cpu_topo->cluster_id) +			continue; + +		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); +		if (cpu != cpuid) +			cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + +		if (cpuid_topo->core_id != cpu_topo->core_id) +			continue; + +		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); +		if (cpu != cpuid) +			cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); +	} +} + +void store_cpu_topology(unsigned int cpuid) +{ +	update_siblings_masks(cpuid); +} + +static void __init reset_cpu_topology(void) +{ +	unsigned int cpu; + +	for_each_possible_cpu(cpu) { +		struct cpu_topology *cpu_topo = &cpu_topology[cpu]; + +		cpu_topo->thread_id = -1; +		cpu_topo->core_id = 0; +		cpu_topo->cluster_id = -1; + +		cpumask_clear(&cpu_topo->core_sibling); +		cpumask_set_cpu(cpu, &cpu_topo->core_sibling); +		cpumask_clear(&cpu_topo->thread_sibling); +		cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); +	} +} + +void __init init_cpu_topology(void) +{ +	reset_cpu_topology(); + +	/* +	 * Discard anything that was parsed if we hit an error so we +	 * don't use partial information. +	 */ +	if (parse_dt_topology()) +		reset_cpu_topology(); +} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 7ffadddb645..c43cfa9b830 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -251,10 +251,13 @@ void die(const char *str, struct pt_regs *regs, int err)  void arm64_notify_die(const char *str, struct pt_regs *regs,  		      struct siginfo *info, int err)  { -	if (user_mode(regs)) +	if (user_mode(regs)) { +		current->thread.fault_address = 0; +		current->thread.fault_code = err;  		force_sig_info(info->si_signo, info, current); -	else +	} else {  		die(str, regs, err); +	}  }  asmlinkage void __exception do_undefinstr(struct pt_regs *regs) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 6a389dc1bd4..50384fec56c 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -58,7 +58,10 @@ static struct page *vectors_page[1];  static int alloc_vectors_page(void)  {  	extern char __kuser_helper_start[], __kuser_helper_end[]; +	extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; +  	int kuser_sz = __kuser_helper_end - __kuser_helper_start; +	int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;  	unsigned long vpage;  	vpage = get_zeroed_page(GFP_ATOMIC); @@ -72,7 +75,7 @@ static int alloc_vectors_page(void)  	/* sigreturn code */  	memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET, -		aarch32_sigret_code, sizeof(aarch32_sigret_code)); +               __aarch32_sigret_code_start, sigret_sz);  	flush_icache_range(vpage, vpage + PAGE_SIZE);  	vectors_page[0] = virt_to_page(vpage); @@ -103,49 +106,31 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)  static int __init vdso_init(void)  { -	struct page *pg; -	char *vbase; -	int i, ret = 0; +	int i; + +	if (memcmp(&vdso_start, "\177ELF", 4)) { +		pr_err("vDSO is not a valid ELF object!\n"); +		return -EINVAL; +	}  	vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;  	pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n",  		vdso_pages + 1, vdso_pages, 1L, &vdso_start);  	/* Allocate the vDSO pagelist, plus a page for the data. */ -	vdso_pagelist = kzalloc(sizeof(struct page *) * (vdso_pages + 1), +	vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),  				GFP_KERNEL); -	if (vdso_pagelist == NULL) { -		pr_err("Failed to allocate vDSO pagelist!\n"); +	if (vdso_pagelist == NULL)  		return -ENOMEM; -	}  	/* Grab the vDSO code pages. */ -	for (i = 0; i < vdso_pages; i++) { -		pg = virt_to_page(&vdso_start + i*PAGE_SIZE); -		ClearPageReserved(pg); -		get_page(pg); -		vdso_pagelist[i] = pg; -	} - -	/* Sanity check the shared object header. */ -	vbase = vmap(vdso_pagelist, 1, 0, PAGE_KERNEL); -	if (vbase == NULL) { -		pr_err("Failed to map vDSO pagelist!\n"); -		return -ENOMEM; -	} else if (memcmp(vbase, "\177ELF", 4)) { -		pr_err("vDSO is not a valid ELF object!\n"); -		ret = -EINVAL; -		goto unmap; -	} +	for (i = 0; i < vdso_pages; i++) +		vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE);  	/* Grab the vDSO data page. */ -	pg = virt_to_page(vdso_data); -	get_page(pg); -	vdso_pagelist[i] = pg; +	vdso_pagelist[i] = virt_to_page(vdso_data); -unmap: -	vunmap(vbase); -	return ret; +	return 0;  }  arch_initcall(vdso_init); @@ -235,6 +220,8 @@ void update_vsyscall(struct timekeeper *tk)  	vdso_data->use_syscall			= use_syscall;  	vdso_data->xtime_coarse_sec		= xtime_coarse.tv_sec;  	vdso_data->xtime_coarse_nsec		= xtime_coarse.tv_nsec; +	vdso_data->wtm_clock_sec		= tk->wall_to_monotonic.tv_sec; +	vdso_data->wtm_clock_nsec		= tk->wall_to_monotonic.tv_nsec;  	if (!use_syscall) {  		vdso_data->cs_cycle_last	= tk->clock->cycle_last; @@ -242,8 +229,6 @@ void update_vsyscall(struct timekeeper *tk)  		vdso_data->xtime_clock_nsec	= tk->xtime_nsec;  		vdso_data->cs_mult		= tk->mult;  		vdso_data->cs_shift		= tk->shift; -		vdso_data->wtm_clock_sec	= tk->wall_to_monotonic.tv_sec; -		vdso_data->wtm_clock_nsec	= tk->wall_to_monotonic.tv_nsec;  	}  	smp_wmb(); diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d8064af42e6..6d20b7d162d 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -48,7 +48,7 @@ $(obj-vdso): %.o: %.S  # Actual build commands  quiet_cmd_vdsold = VDSOL $@ -      cmd_vdsold = $(CC) $(c_flags) -Wl,-T $^ -o $@ +      cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@  quiet_cmd_vdsoas = VDSOA $@        cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index f0a6d10b521..fe652ffd34c 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -103,6 +103,8 @@ ENTRY(__kernel_clock_gettime)  	bl	__do_get_tspec  	seqcnt_check w9, 1b +	mov	x30, x2 +  	cmp	w0, #CLOCK_MONOTONIC  	b.ne	6f @@ -118,6 +120,9 @@ ENTRY(__kernel_clock_gettime)  	ccmp	w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne  	b.ne	8f +	/* xtime_coarse_nsec is already right-shifted */ +	mov	x12, #0 +  	/* Get coarse timespec. */  	adr	vdso_data, _vdso_data  3:	seqcnt_acquire @@ -156,7 +161,7 @@ ENTRY(__kernel_clock_gettime)  	lsr	x11, x11, x12  	stp	x10, x11, [x1, #TSPEC_TV_SEC]  	mov	x0, xzr -	ret	x2 +	ret  7:  	mov	x30, x2  8:	/* Syscall fallback. */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index f8ab9d8e2ea..f1e6d5c032e 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -13,7 +13,7 @@  #define ARM_EXIT_DISCARD(x)	x  OUTPUT_ARCH(aarch64) -ENTRY(stext) +ENTRY(_text)  jiffies = jiffies_64; @@ -54,7 +54,6 @@ SECTIONS  	}  	.text : {			/* Real text segment		*/  		_stext = .;		/* Text and read-only data	*/ -			*(.smp.pen.text)  			__exception_text_start = .;  			*(.exception.text)  			__exception_text_end = .; @@ -97,37 +96,17 @@ SECTIONS  	PERCPU_SECTION(64)  	__init_end = .; -	. = ALIGN(THREAD_SIZE); -	__data_loc = .; - -	.data : AT(__data_loc) { -		_data = .;		/* address in memory */ -		_sdata = .; - -		/* -		 * first, the init task union, aligned -		 * to an 8192 byte boundary. -		 */ -		INIT_TASK_DATA(THREAD_SIZE) -		NOSAVE_DATA -		CACHELINE_ALIGNED_DATA(64) -		READ_MOSTLY_DATA(64) - -		/* -		 * and the usual data section -		 */ -		DATA_DATA -		CONSTRUCTORS - -		_edata = .; -	} -	_edata_loc = __data_loc + SIZEOF(.data); + +	. = ALIGN(PAGE_SIZE); +	_data = .; +	_sdata = .; +	RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) +	_edata = .;  	BSS_SECTION(0, 0, 0)  	_end = .;  	STABS_DEBUG -	.comment 0 : { *(.comment) }  }  /*  | 
