From b86040a59feb255a8193173caa4d5199464433d5 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 24 Jul 2009 12:32:54 +0100
Subject: Thumb-2: Implementation of the unified start-up and exceptions code

This patch implements the ARM/Thumb-2 unified kernel start-up and
exception handling code.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/include/asm/assembler.h |  11 +++
 arch/arm/include/asm/futex.h     |   1 +
 arch/arm/kernel/entry-armv.S     | 165 +++++++++++++++++++++++----------------
 arch/arm/kernel/entry-common.S   |  28 ++-----
 arch/arm/kernel/entry-header.S   |  92 ++++++++++++++++++++--
 arch/arm/kernel/head-common.S    |  13 +--
 arch/arm/kernel/head-nommu.S     |  11 ++-
 arch/arm/kernel/head.S           |  28 ++++---
 arch/arm/kernel/process.c        |   2 +-
 arch/arm/kernel/setup.c          |  28 +++++--
 arch/arm/kernel/unwind.c         |   4 +
 11 files changed, 263 insertions(+), 120 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 15f8a092b70..1acd1da36d0 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -127,3 +127,14 @@
 #endif
 #endif
 	.endm
+
+#ifdef CONFIG_THUMB2_KERNEL
+	.macro	setmode, mode, reg
+	mov	\reg, #\mode
+	msr	cpsr_c, \reg
+	.endm
+#else
+	.macro	setmode, mode, reg
+	msr	cpsr_c, #\mode
+	.endm
+#endif
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index 9ee743b95de..bfcc15929a7 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -99,6 +99,7 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 	__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
 	"1:	ldrt	%0, [%3]\n"
 	"	teq	%0, %1\n"
+	"	it	eq	@ explicit IT needed for the 2b label\n"
 	"2:	streqt	%2, [%3]\n"
 	"3:\n"
 	"	.section __ex_table,\"a\"\n"
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0befd1cabf4..468425f937d 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -34,7 +34,7 @@
 	@
 	@ routine called with r0 = irq number, r1 = struct pt_regs *
 	@
-	adrne	lr, 1b
+	adrne	lr, BSYM(1b)
 	bne	asm_do_IRQ
 
 #ifdef CONFIG_SMP
@@ -46,13 +46,13 @@
 	 */
 	test_for_ipi r0, r6, r5, lr
 	movne	r0, sp
-	adrne	lr, 1b
+	adrne	lr, BSYM(1b)
 	bne	do_IPI
 
 #ifdef CONFIG_LOCAL_TIMERS
 	test_for_ltirq r0, r6, r5, lr
 	movne	r0, sp
-	adrne	lr, 1b
+	adrne	lr, BSYM(1b)
 	bne	do_local_timer
 #endif
 #endif
@@ -70,7 +70,10 @@
  */
 	.macro	inv_entry, reason
 	sub	sp, sp, #S_FRAME_SIZE
-	stmib	sp, {r1 - lr}
+ ARM(	stmib	sp, {r1 - lr}		)
+ THUMB(	stmia	sp, {r0 - r12}		)
+ THUMB(	str	sp, [sp, #S_SP]		)
+ THUMB(	str	lr, [sp, #S_LR]		)
 	mov	r1, #\reason
 	.endm
 
@@ -126,17 +129,24 @@ ENDPROC(__und_invalid)
 	.macro	svc_entry, stack_hole=0
  UNWIND(.fnstart		)
  UNWIND(.save {r0 - pc}		)
-	sub	sp, sp, #(S_FRAME_SIZE + \stack_hole)
+	sub	sp, sp, #(S_FRAME_SIZE + \stack_hole - 4)
+#ifdef CONFIG_THUMB2_KERNEL
+ SPFIX(	str	r0, [sp]	)	@ temporarily saved
+ SPFIX(	mov	r0, sp		)
+ SPFIX(	tst	r0, #4		)	@ test original stack alignment
+ SPFIX(	ldr	r0, [sp]	)	@ restored
+#else
  SPFIX(	tst	sp, #4		)
- SPFIX(	bicne	sp, sp, #4	)
-	stmib	sp, {r1 - r12}
+#endif
+ SPFIX(	subeq	sp, sp, #4	)
+	stmia	sp, {r1 - r12}
 
 	ldmia	r0, {r1 - r3}
-	add	r5, sp, #S_SP		@ here for interlock avoidance
+	add	r5, sp, #S_SP - 4	@ here for interlock avoidance
 	mov	r4, #-1			@  ""  ""      ""       ""
-	add	r0, sp, #(S_FRAME_SIZE + \stack_hole)
- SPFIX(	addne	r0, r0, #4	)
-	str	r1, [sp]		@ save the "real" r0 copied
+	add	r0, sp, #(S_FRAME_SIZE + \stack_hole - 4)
+ SPFIX(	addeq	r0, r0, #4	)
+	str	r1, [sp, #-4]!		@ save the "real" r0 copied
 					@ from the exception stack
 
 	mov	r1, lr
@@ -196,9 +206,8 @@ __dabt_svc:
 	@
 	@ restore SPSR and restart the instruction
 	@
-	ldr	r0, [sp, #S_PSR]
-	msr	spsr_cxsf, r0
-	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
+	ldr	r2, [sp, #S_PSR]
+	svc_exit r2				@ return from exception
  UNWIND(.fnend		)
 ENDPROC(__dabt_svc)
 
@@ -225,13 +234,12 @@ __irq_svc:
 	tst	r0, #_TIF_NEED_RESCHED
 	blne	svc_preempt
 #endif
-	ldr	r0, [sp, #S_PSR]		@ irqs are already disabled
-	msr	spsr_cxsf, r0
+	ldr	r4, [sp, #S_PSR]		@ irqs are already disabled
 #ifdef CONFIG_TRACE_IRQFLAGS
-	tst	r0, #PSR_I_BIT
+	tst	r4, #PSR_I_BIT
 	bleq	trace_hardirqs_on
 #endif
-	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
+	svc_exit r4				@ return from exception
  UNWIND(.fnend		)
 ENDPROC(__irq_svc)
 
@@ -266,7 +274,7 @@ __und_svc:
 	@  r0 - instruction
 	@
 	ldr	r0, [r2, #-4]
-	adr	r9, 1f
+	adr	r9, BSYM(1f)
 	bl	call_fpe
 
 	mov	r0, sp				@ struct pt_regs *regs
@@ -280,9 +288,8 @@ __und_svc:
 	@
 	@ restore SPSR and restart the instruction
 	@
-	ldr	lr, [sp, #S_PSR]		@ Get SVC cpsr
-	msr	spsr_cxsf, lr
-	ldmia	sp, {r0 - pc}^			@ Restore SVC registers
+	ldr	r2, [sp, #S_PSR]		@ Get SVC cpsr
+	svc_exit r2				@ return from exception
  UNWIND(.fnend		)
 ENDPROC(__und_svc)
 
@@ -323,9 +330,8 @@ __pabt_svc:
 	@
 	@ restore SPSR and restart the instruction
 	@
-	ldr	r0, [sp, #S_PSR]
-	msr	spsr_cxsf, r0
-	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
+	ldr	r2, [sp, #S_PSR]
+	svc_exit r2				@ return from exception
  UNWIND(.fnend		)
 ENDPROC(__pabt_svc)
 
@@ -353,7 +359,8 @@ ENDPROC(__pabt_svc)
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)	@ don't unwind the user space
 	sub	sp, sp, #S_FRAME_SIZE
-	stmib	sp, {r1 - r12}
+ ARM(	stmib	sp, {r1 - r12}	)
+ THUMB(	stmia	sp, {r0 - r12}	)
 
 	ldmia	r0, {r1 - r3}
 	add	r0, sp, #S_PC		@ here for interlock avoidance
@@ -372,7 +379,8 @@ ENDPROC(__pabt_svc)
 	@ Also, separately save sp_usr and lr_usr
 	@
 	stmia	r0, {r2 - r4}
-	stmdb	r0, {sp, lr}^
+ ARM(	stmdb	r0, {sp, lr}^			)
+ THUMB(	store_user_sp_lr r0, r1, S_SP - S_PC	)
 
 	@
 	@ Enable the alignment trap while in kernel mode
@@ -427,7 +435,7 @@ __dabt_usr:
 	@
 	enable_irq
 	mov	r2, sp
-	adr	lr, ret_from_exception
+	adr	lr, BSYM(ret_from_exception)
 	b	do_DataAbort
  UNWIND(.fnend		)
 ENDPROC(__dabt_usr)
@@ -452,7 +460,9 @@ __irq_usr:
 	ldr	r0, [tsk, #TI_PREEMPT]
 	str	r8, [tsk, #TI_PREEMPT]
 	teq	r0, r7
-	strne	r0, [r0, -r0]
+ ARM(	strne	r0, [r0, -r0]	)
+ THUMB(	movne	r0, #0		)
+ THUMB(	strne	r0, [r0]	)
 #endif
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_on
@@ -476,9 +486,10 @@ __und_usr:
 	@
 	@  r0 - instruction
 	@
-	adr	r9, ret_from_exception
-	adr	lr, __und_usr_unknown
+	adr	r9, BSYM(ret_from_exception)
+	adr	lr, BSYM(__und_usr_unknown)
 	tst	r3, #PSR_T_BIT			@ Thumb mode?
+	itet	eq				@ explicit IT needed for the 1f label
 	subeq	r4, r2, #4			@ ARM instr at LR - 4
 	subne	r4, r2, #2			@ Thumb instr at LR - 2
 1:	ldreqt	r0, [r4]
@@ -488,7 +499,10 @@ __und_usr:
 	beq	call_fpe
 	@ Thumb instruction
 #if __LINUX_ARM_ARCH__ >= 7
-2:	ldrht	r5, [r4], #2
+2:
+ ARM(	ldrht	r5, [r4], #2	)
+ THUMB(	ldrht	r5, [r4]	)
+ THUMB(	add	r4, r4, #2	)
 	and	r0, r5, #0xf800			@ mask bits 111x x... .... ....
 	cmp	r0, #0xe800			@ 32bit instruction if xx != 0
 	blo	__und_usr_unknown
@@ -577,9 +591,11 @@ call_fpe:
 	moveq	pc, lr
 	get_thread_info r10			@ get current thread
 	and	r8, r0, #0x00000f00		@ mask out CP number
+ THUMB(	lsr	r8, r8, #8		)
 	mov	r7, #1
 	add	r6, r10, #TI_USED_CP
-	strb	r7, [r6, r8, lsr #8]		@ set appropriate used_cp[]
+ ARM(	strb	r7, [r6, r8, lsr #8]	)	@ set appropriate used_cp[]
+ THUMB(	strb	r7, [r6, r8]		)	@ set appropriate used_cp[]
 #ifdef CONFIG_IWMMXT
 	@ Test if we need to give access to iWMMXt coprocessors
 	ldr	r5, [r10, #TI_FLAGS]
@@ -587,36 +603,38 @@ call_fpe:
 	movcss	r7, r5, lsr #(TIF_USING_IWMMXT + 1)
 	bcs	iwmmxt_task_enable
 #endif
-	add	pc, pc, r8, lsr #6
-	mov	r0, r0
-
-	mov	pc, lr				@ CP#0
-	b	do_fpe				@ CP#1 (FPE)
-	b	do_fpe				@ CP#2 (FPE)
-	mov	pc, lr				@ CP#3
+ ARM(	add	pc, pc, r8, lsr #6	)
+ THUMB(	lsl	r8, r8, #2		)
+ THUMB(	add	pc, r8			)
+	nop
+
+	W(mov)	pc, lr				@ CP#0
+	W(b)	do_fpe				@ CP#1 (FPE)
+	W(b)	do_fpe				@ CP#2 (FPE)
+	W(mov)	pc, lr				@ CP#3
 #ifdef CONFIG_CRUNCH
 	b	crunch_task_enable		@ CP#4 (MaverickCrunch)
 	b	crunch_task_enable		@ CP#5 (MaverickCrunch)
 	b	crunch_task_enable		@ CP#6 (MaverickCrunch)
 #else
-	mov	pc, lr				@ CP#4
-	mov	pc, lr				@ CP#5
-	mov	pc, lr				@ CP#6
+	W(mov)	pc, lr				@ CP#4
+	W(mov)	pc, lr				@ CP#5
+	W(mov)	pc, lr				@ CP#6
 #endif
-	mov	pc, lr				@ CP#7
-	mov	pc, lr				@ CP#8
-	mov	pc, lr				@ CP#9
+	W(mov)	pc, lr				@ CP#7
+	W(mov)	pc, lr				@ CP#8
+	W(mov)	pc, lr				@ CP#9
 #ifdef CONFIG_VFP
-	b	do_vfp				@ CP#10 (VFP)
-	b	do_vfp				@ CP#11 (VFP)
+	W(b)	do_vfp				@ CP#10 (VFP)
+	W(b)	do_vfp				@ CP#11 (VFP)
 #else
-	mov	pc, lr				@ CP#10 (VFP)
-	mov	pc, lr				@ CP#11 (VFP)
+	W(mov)	pc, lr				@ CP#10 (VFP)
+	W(mov)	pc, lr				@ CP#11 (VFP)
 #endif
-	mov	pc, lr				@ CP#12
-	mov	pc, lr				@ CP#13
-	mov	pc, lr				@ CP#14 (Debug)
-	mov	pc, lr				@ CP#15 (Control)
+	W(mov)	pc, lr				@ CP#12
+	W(mov)	pc, lr				@ CP#13
+	W(mov)	pc, lr				@ CP#14 (Debug)
+	W(mov)	pc, lr				@ CP#15 (Control)
 
 #ifdef CONFIG_NEON
 	.align	6
@@ -667,7 +685,7 @@ no_fp:	mov	pc, lr
 __und_usr_unknown:
 	enable_irq
 	mov	r0, sp
-	adr	lr, ret_from_exception
+	adr	lr, BSYM(ret_from_exception)
 	b	do_undefinstr
 ENDPROC(__und_usr_unknown)
 
@@ -711,7 +729,10 @@ ENTRY(__switch_to)
  UNWIND(.cantunwind	)
 	add	ip, r1, #TI_CPU_SAVE
 	ldr	r3, [r2, #TI_TP_VALUE]
-	stmia	ip!, {r4 - sl, fp, sp, lr}	@ Store most regs on stack
+ ARM(	stmia	ip!, {r4 - sl, fp, sp, lr} )	@ Store most regs on stack
+ THUMB(	stmia	ip!, {r4 - sl, fp}	   )	@ Store most regs on stack
+ THUMB(	str	sp, [ip], #4		   )
+ THUMB(	str	lr, [ip], #4		   )
 #ifdef CONFIG_MMU
 	ldr	r6, [r2, #TI_CPU_DOMAIN]
 #endif
@@ -736,8 +757,12 @@ ENTRY(__switch_to)
 	ldr	r0, =thread_notify_head
 	mov	r1, #THREAD_NOTIFY_SWITCH
 	bl	atomic_notifier_call_chain
+ THUMB(	mov	ip, r4			   )
 	mov	r0, r5
-	ldmia	r4, {r4 - sl, fp, sp, pc}	@ Load all regs saved previously
+ ARM(	ldmia	r4, {r4 - sl, fp, sp, pc}  )	@ Load all regs saved previously
+ THUMB(	ldmia	ip!, {r4 - sl, fp}	   )	@ Load all regs saved previously
+ THUMB(	ldr	sp, [ip], #4		   )
+ THUMB(	ldr	pc, [ip]		   )
  UNWIND(.fnend		)
 ENDPROC(__switch_to)
 
@@ -772,6 +797,7 @@ ENDPROC(__switch_to)
  * if your compiled code is not going to use the new instructions for other
  * purpose.
  */
+ THUMB(	.arm	)
 
 	.macro	usr_ret, reg
 #ifdef CONFIG_ARM_THUMB
@@ -1020,6 +1046,7 @@ __kuser_helper_version:				@ 0xffff0ffc
 	.globl	__kuser_helper_end
 __kuser_helper_end:
 
+ THUMB(	.thumb	)
 
 /*
  * Vector stubs.
@@ -1054,15 +1081,17 @@ vector_\name:
 	@ Prepare for SVC32 mode.  IRQs remain disabled.
 	@
 	mrs	r0, cpsr
-	eor	r0, r0, #(\mode ^ SVC_MODE)
+	eor	r0, r0, #(\mode ^ SVC_MODE | PSR_ISETSTATE)
 	msr	spsr_cxsf, r0
 
 	@
 	@ the branch table must immediately follow this code
 	@
 	and	lr, lr, #0x0f
+ THUMB(	adr	r0, 1f			)
+ THUMB(	ldr	lr, [r0, lr, lsl #2]	)
 	mov	r0, sp
-	ldr	lr, [pc, lr, lsl #2]
+ ARM(	ldr	lr, [pc, lr, lsl #2]	)
 	movs	pc, lr			@ branch to handler in SVC mode
 ENDPROC(vector_\name)
 
@@ -1206,14 +1235,16 @@ __stubs_end:
 
 	.globl	__vectors_start
 __vectors_start:
-	swi	SYS_ERROR0
-	b	vector_und + stubs_offset
-	ldr	pc, .LCvswi + stubs_offset
-	b	vector_pabt + stubs_offset
-	b	vector_dabt + stubs_offset
-	b	vector_addrexcptn + stubs_offset
-	b	vector_irq + stubs_offset
-	b	vector_fiq + stubs_offset
+ ARM(	swi	SYS_ERROR0	)
+ THUMB(	svc	#0		)
+ THUMB(	nop			)
+	W(b)	vector_und + stubs_offset
+	W(ldr)	pc, .LCvswi + stubs_offset
+	W(b)	vector_pabt + stubs_offset
+	W(b)	vector_dabt + stubs_offset
+	W(b)	vector_addrexcptn + stubs_offset
+	W(b)	vector_irq + stubs_offset
+	W(b)	vector_fiq + stubs_offset
 
 	.globl	__vectors_end
 __vectors_end:
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 366e5097a41..a0540c9f1f0 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -33,14 +33,7 @@ ret_fast_syscall:
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
 
-	@ fast_restore_user_regs
-	ldr	r1, [sp, #S_OFF + S_PSR]	@ get calling cpsr
-	ldr	lr, [sp, #S_OFF + S_PC]!	@ get pc
-	msr	spsr_cxsf, r1			@ save in spsr_svc
-	ldmdb	sp, {r1 - lr}^			@ get calling r1 - lr
-	mov	r0, r0
-	add	sp, sp, #S_FRAME_SIZE - S_PC
-	movs	pc, lr				@ return & move spsr_svc into cpsr
+	restore_user_regs fast = 1, offset = S_OFF
  UNWIND(.fnend		)
 
 /*
@@ -73,14 +66,7 @@ no_work_pending:
 	/* perform architecture specific actions before user return */
 	arch_ret_to_user r1, lr
 
-	@ slow_restore_user_regs
-	ldr	r1, [sp, #S_PSR]		@ get calling cpsr
-	ldr	lr, [sp, #S_PC]!		@ get pc
-	msr	spsr_cxsf, r1			@ save in spsr_svc
-	ldmdb	sp, {r0 - lr}^			@ get calling r0 - lr
-	mov	r0, r0
-	add	sp, sp, #S_FRAME_SIZE - S_PC
-	movs	pc, lr				@ return & move spsr_svc into cpsr
+	restore_user_regs fast = 0, offset = 0
 ENDPROC(ret_to_user)
 
 /*
@@ -182,8 +168,10 @@ ftrace_stub:
 ENTRY(vector_swi)
 	sub	sp, sp, #S_FRAME_SIZE
 	stmia	sp, {r0 - r12}			@ Calling r0 - r12
-	add	r8, sp, #S_PC
-	stmdb	r8, {sp, lr}^			@ Calling sp, lr
+ ARM(	add	r8, sp, #S_PC		)
+ ARM(	stmdb	r8, {sp, lr}^		)	@ Calling sp, lr
+ THUMB(	mov	r8, sp			)
+ THUMB(	store_user_sp_lr r8, r10, S_SP	)	@ calling sp, lr
 	mrs	r8, spsr			@ called from non-FIQ mode, so ok.
 	str	lr, [sp, #S_PC]			@ Save calling PC
 	str	r8, [sp, #S_PSR]		@ Save CPSR
@@ -272,7 +260,7 @@ ENTRY(vector_swi)
 	bne	__sys_trace
 
 	cmp	scno, #NR_syscalls		@ check upper syscall limit
-	adr	lr, ret_fast_syscall		@ return address
+	adr	lr, BSYM(ret_fast_syscall)	@ return address
 	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
 
 	add	r1, sp, #S_OFF
@@ -293,7 +281,7 @@ __sys_trace:
 	mov	r0, #0				@ trace entry [IP = 0]
 	bl	syscall_trace
 
-	adr	lr, __sys_trace_return		@ return address
+	adr	lr, BSYM(__sys_trace_return)	@ return address
 	mov	scno, r0			@ syscall number (possibly new)
 	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
 	cmp	scno, #NR_syscalls		@ check upper syscall limit
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 87ab4e15799..a4eaf4f920c 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -36,11 +36,6 @@
 #endif
 	.endm
 
-	.macro	get_thread_info, rd
-	mov	\rd, sp, lsr #13
-	mov	\rd, \rd, lsl #13
-	.endm
-
 	.macro	alignment_trap, rtemp
 #ifdef CONFIG_ALIGNMENT_TRAP
 	ldr	\rtemp, .LCcralign
@@ -49,6 +44,93 @@
 #endif
 	.endm
 
+	@
+	@ Store/load the USER SP and LR registers by switching to the SYS
+	@ mode. Useful in Thumb-2 mode where "stm/ldm rd, {sp, lr}^" is not
+	@ available. Should only be called from SVC mode
+	@
+	.macro	store_user_sp_lr, rd, rtemp, offset = 0
+	mrs	\rtemp, cpsr
+	eor	\rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE)
+	msr	cpsr_c, \rtemp			@ switch to the SYS mode
+
+	str	sp, [\rd, #\offset]		@ save sp_usr
+	str	lr, [\rd, #\offset + 4]		@ save lr_usr
+
+	eor	\rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE)
+	msr	cpsr_c, \rtemp			@ switch back to the SVC mode
+	.endm
+
+	.macro	load_user_sp_lr, rd, rtemp, offset = 0
+	mrs	\rtemp, cpsr
+	eor	\rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE)
+	msr	cpsr_c, \rtemp			@ switch to the SYS mode
+
+	ldr	sp, [\rd, #\offset]		@ load sp_usr
+	ldr	lr, [\rd, #\offset + 4]		@ load lr_usr
+
+	eor	\rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE)
+	msr	cpsr_c, \rtemp			@ switch back to the SVC mode
+	.endm
+
+#ifndef CONFIG_THUMB2_KERNEL
+	.macro	svc_exit, rpsr
+	msr	spsr_cxsf, \rpsr
+	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
+	.endm
+
+	.macro	restore_user_regs, fast = 0, offset = 0
+	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr
+	ldr	lr, [sp, #\offset + S_PC]!	@ get pc
+	msr	spsr_cxsf, r1			@ save in spsr_svc
+	.if	\fast
+	ldmdb	sp, {r1 - lr}^			@ get calling r1 - lr
+	.else
+	ldmdb	sp, {r0 - lr}^			@ get calling r0 - lr
+	.endif
+	add	sp, sp, #S_FRAME_SIZE - S_PC
+	movs	pc, lr				@ return & move spsr_svc into cpsr
+	.endm
+
+	.macro	get_thread_info, rd
+	mov	\rd, sp, lsr #13
+	mov	\rd, \rd, lsl #13
+	.endm
+#else	/* CONFIG_THUMB2_KERNEL */
+	.macro	svc_exit, rpsr
+	ldr	r0, [sp, #S_SP]			@ top of the stack
+	ldr	r1, [sp, #S_PC]			@ return address
+	tst	r0, #4				@ orig stack 8-byte aligned?
+	stmdb	r0, {r1, \rpsr}			@ rfe context
+	ldmia	sp, {r0 - r12}
+	ldr	lr, [sp, #S_LR]
+	addeq	sp, sp, #S_FRAME_SIZE - 8	@ aligned
+	addne	sp, sp, #S_FRAME_SIZE - 4	@ not aligned
+	rfeia	sp!
+	.endm
+
+	.macro	restore_user_regs, fast = 0, offset = 0
+	mov	r2, sp
+	load_user_sp_lr r2, r3, \offset + S_SP	@ calling sp, lr
+	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr
+	ldr	lr, [sp, #\offset + S_PC]	@ get pc
+	add	sp, sp, #\offset + S_SP
+	msr	spsr_cxsf, r1			@ save in spsr_svc
+	.if	\fast
+	ldmdb	sp, {r1 - r12}			@ get calling r1 - r12
+	.else
+	ldmdb	sp, {r0 - r12}			@ get calling r0 - r12
+	.endif
+	add	sp, sp, #S_FRAME_SIZE - S_SP
+	movs	pc, lr				@ return & move spsr_svc into cpsr
+	.endm
+
+	.macro	get_thread_info, rd
+	mov	\rd, sp
+	lsr	\rd, \rd, #13
+	mov	\rd, \rd, lsl #13
+	.endm
+#endif	/* !CONFIG_THUMB2_KERNEL */
 
 /*
  * These are the registers used in the syscall handler, and allow us to
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index dbfaef07a1f..93ad576b2d7 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -52,7 +52,9 @@ __mmap_switched:
 	strcc	fp, [r6],#4
 	bcc	1b
 
-	ldmia	r3, {r4, r5, r6, r7, sp}
+ ARM(	ldmia	r3, {r4, r5, r6, r7, sp})
+ THUMB(	ldmia	r3, {r4, r5, r6, r7}	)
+ THUMB(	ldr	sp, [r3, #16]		)
 	str	r9, [r4]			@ Save processor ID
 	str	r1, [r5]			@ Save machine type
 	str	r2, [r6]			@ Save atags pointer
@@ -156,7 +158,8 @@ ENDPROC(__error)
  */
 __lookup_processor_type:
 	adr	r3, 3f
-	ldmda	r3, {r5 - r7}
+	ldmia	r3, {r5 - r7}
+	add	r3, r3, #8
 	sub	r3, r3, r7			@ get offset between virt&phys
 	add	r5, r5, r3			@ convert virt addresses to
 	add	r6, r6, r3			@ physical address space
@@ -187,9 +190,9 @@ ENDPROC(lookup_processor_type)
  * more information about the __proc_info and __arch_info structures.
  */
 	.align	2
-	.long	__proc_info_begin
+3:	.long	__proc_info_begin
 	.long	__proc_info_end
-3:	.long	.
+4:	.long	.
 	.long	__arch_info_begin
 	.long	__arch_info_end
 
@@ -205,7 +208,7 @@ ENDPROC(lookup_processor_type)
  *  r5 = mach_info pointer in physical address space
  */
 __lookup_machine_type:
-	adr	r3, 3b
+	adr	r3, 4b
 	ldmia	r3, {r4, r5, r6}
 	sub	r3, r3, r4			@ get offset between virt&phys
 	add	r5, r5, r3			@ convert virt addresses to
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index cc87e1765ed..b16393d2b71 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -34,7 +34,7 @@
  */
 	.section ".text.head", "ax"
 ENTRY(stext)
-	msr	cpsr_c, #PSR_F_BIT | PSR_I_BIT | SVC_MODE @ ensure svc mode
+	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
 						@ and irqs disabled
 #ifndef CONFIG_CPU_CP15
 	ldr	r9, =CONFIG_PROCESSOR_ID
@@ -50,8 +50,10 @@ ENTRY(stext)
 
 	ldr	r13, __switch_data		@ address to jump to after
 						@ the initialization is done
-	adr	lr, __after_proc_init		@ return (PIC) address
-	add	pc, r10, #PROCINFO_INITFUNC
+	adr	lr, BSYM(__after_proc_init)	@ return (PIC) address
+ ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
+ THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
+ THUMB(	mov	pc, r12				)
 ENDPROC(stext)
 
 /*
@@ -82,7 +84,8 @@ __after_proc_init:
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
 #endif /* CONFIG_CPU_CP15 */
 
-	mov	pc, r13				@ clear the BSS and jump
+	mov	r3, r13
+	mov	pc, r3				@ clear the BSS and jump
 						@ to start_kernel
 ENDPROC(__after_proc_init)
 	.ltorg
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 21e17dc94cb..38ccbe1d3b2 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -76,7 +76,7 @@
  */
 	.section ".text.head", "ax"
 ENTRY(stext)
-	msr	cpsr_c, #PSR_F_BIT | PSR_I_BIT | SVC_MODE @ ensure svc mode
+	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode
 						@ and irqs disabled
 	mrc	p15, 0, r9, c0, c0		@ get processor id
 	bl	__lookup_processor_type		@ r5=procinfo r9=cpuid
@@ -97,8 +97,10 @@ ENTRY(stext)
 	 */
 	ldr	r13, __switch_data		@ address to jump to after
 						@ mmu has been enabled
-	adr	lr, __enable_mmu		@ return (PIC) address
-	add	pc, r10, #PROCINFO_INITFUNC
+	adr	lr, BSYM(__enable_mmu)		@ return (PIC) address
+ ARM(	add	pc, r10, #PROCINFO_INITFUNC	)
+ THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
+ THUMB(	mov	pc, r12				)
 ENDPROC(stext)
 
 #if defined(CONFIG_SMP)
@@ -110,7 +112,7 @@ ENTRY(secondary_startup)
 	 * the processor type - there is no need to check the machine type
 	 * as it has already been validated by the primary processor.
 	 */
-	msr	cpsr_c, #PSR_F_BIT | PSR_I_BIT | SVC_MODE
+	setmode	PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9
 	mrc	p15, 0, r9, c0, c0		@ get processor id
 	bl	__lookup_processor_type
 	movs	r10, r5				@ invalid processor?
@@ -121,12 +123,15 @@ ENTRY(secondary_startup)
 	 * Use the page tables supplied from  __cpu_up.
 	 */
 	adr	r4, __secondary_data
-	ldmia	r4, {r5, r7, r13}		@ address to jump to after
+	ldmia	r4, {r5, r7, r12}		@ address to jump to after
 	sub	r4, r4, r5			@ mmu has been enabled
 	ldr	r4, [r7, r4]			@ get secondary_data.pgdir
-	adr	lr, __enable_mmu		@ return address
-	add	pc, r10, #PROCINFO_INITFUNC	@ initialise processor
-						@ (return control reg)
+	adr	lr, BSYM(__enable_mmu)		@ return address
+	mov	r13, r12			@ __secondary_switched address
+ ARM(	add	pc, r10, #PROCINFO_INITFUNC	) @ initialise processor
+						  @ (return control reg)
+ THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
+ THUMB(	mov	pc, r12				)
 ENDPROC(secondary_startup)
 
 	/*
@@ -193,8 +198,8 @@ __turn_mmu_on:
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
 	mrc	p15, 0, r3, c0, c0, 0		@ read id reg
 	mov	r3, r3
-	mov	r3, r3
-	mov	pc, r13
+	mov	r3, r13
+	mov	pc, r3
 ENDPROC(__turn_mmu_on)
 
 
@@ -235,7 +240,8 @@ __create_page_tables:
 	 * will be removed by paging_init().  We use our current program
 	 * counter to determine corresponding section base address.
 	 */
-	mov	r6, pc, lsr #20			@ start of kernel section
+	mov	r6, pc
+	mov	r6, r6, lsr #20			@ start of kernel section
 	orr	r3, r7, r6, lsl #20		@ flags + kernel base
 	str	r3, [r4, r6, lsl #2]		@ identity mapping
 
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 39196dff478..790fbee92ec 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -388,7 +388,7 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 	regs.ARM_r2 = (unsigned long)fn;
 	regs.ARM_r3 = (unsigned long)kernel_thread_exit;
 	regs.ARM_pc = (unsigned long)kernel_thread_helper;
-	regs.ARM_cpsr = SVC_MODE | PSR_ENDSTATE;
+	regs.ARM_cpsr = SVC_MODE | PSR_ENDSTATE | PSR_ISETSTATE;
 
 	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
 }
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index bc5e4128f9f..d4d4f77c91b 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -25,6 +25,7 @@
 #include <linux/smp.h>
 #include <linux/fs.h>
 
+#include <asm/unified.h>
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/elf.h>
@@ -326,26 +327,39 @@ void cpu_init(void)
 		BUG();
 	}
 
+	/*
+	 * Define the placement constraint for the inline asm directive below.
+	 * In Thumb-2, msr with an immediate value is not allowed.
+	 */
+#ifdef CONFIG_THUMB2_KERNEL
+#define PLC	"r"
+#else
+#define PLC	"I"
+#endif
+
 	/*
 	 * setup stacks for re-entrant exception handlers
 	 */
 	__asm__ (
 	"msr	cpsr_c, %1\n\t"
-	"add	sp, %0, %2\n\t"
+	"add	r14, %0, %2\n\t"
+	"mov	sp, r14\n\t"
 	"msr	cpsr_c, %3\n\t"
-	"add	sp, %0, %4\n\t"
+	"add	r14, %0, %4\n\t"
+	"mov	sp, r14\n\t"
 	"msr	cpsr_c, %5\n\t"
-	"add	sp, %0, %6\n\t"
+	"add	r14, %0, %6\n\t"
+	"mov	sp, r14\n\t"
 	"msr	cpsr_c, %7"
 	    :
 	    : "r" (stk),
-	      "I" (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
+	      PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
 	      "I" (offsetof(struct stack, irq[0])),
-	      "I" (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
+	      PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
 	      "I" (offsetof(struct stack, abt[0])),
-	      "I" (PSR_F_BIT | PSR_I_BIT | UND_MODE),
+	      PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE),
 	      "I" (offsetof(struct stack, und[0])),
-	      "I" (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
+	      PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
 	    : "r14");
 }
 
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index dd56e11f339..39baf1128bf 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -62,7 +62,11 @@ struct unwind_ctrl_block {
 };
 
 enum regs {
+#ifdef CONFIG_THUMB2_KERNEL
+	FP = 7,
+#else
 	FP = 11,
+#endif
 	SP = 13,
 	LR = 14,
 	PC = 15
-- 
cgit v1.2.3-18-g5258