34 files changed, 3793 insertions, 550 deletions
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 309d54cceda..34e8a256765 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -34,8 +34,11 @@ obj-$(CONFIG_CPU_R6000)		+= r6000_fpu.o r4k_switch.o
 
 obj-$(CONFIG_SMP)		+= smp.o
 
-obj-$(CONFIG_MIPS_MT_SMP)	+= smp_mt.o
+obj-$(CONFIG_MIPS_MT)		+= mips-mt.o
+obj-$(CONFIG_MIPS_MT_SMTC)	+= smtc.o smtc-asm.o smtc-proc.o
+obj-$(CONFIG_MIPS_MT_SMP)	+= smp-mt.o
 
+obj-$(CONFIG_MIPS_APSP_KSPD)	+= kspd.o
 obj-$(CONFIG_MIPS_VPE_LOADER)	+= vpe.o
 obj-$(CONFIG_MIPS_VPE_APSP_API)	+= rtlx.o
 
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index ca6b03c773b..92b28b674d6 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -69,6 +69,9 @@ void output_ptreg_defines(void)
 	offset("#define PT_BVADDR ", struct pt_regs, cp0_badvaddr);
 	offset("#define PT_STATUS ", struct pt_regs, cp0_status);
 	offset("#define PT_CAUSE  ", struct pt_regs, cp0_cause);
+#ifdef CONFIG_MIPS_MT_SMTC
+	offset("#define PT_TCSTATUS  ", struct pt_regs, cp0_tcstatus);
+#endif /* CONFIG_MIPS_MT_SMTC */
 	size("#define PT_SIZE   ", struct pt_regs);
 	linefeed;
 }
diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c
index 374de839558..b6232d9033c 100644
--- a/arch/mips/kernel/branch.c
+++ b/arch/mips/kernel/branch.c
@@ -184,7 +184,7 @@ int __compute_return_epc(struct pt_regs *regs)
 		bit = (insn.i_format.rt >> 2);
 		bit += (bit != 0);
 		bit += 23;
-		switch (insn.i_format.rt) {
+		switch (insn.i_format.rt & 3) {
 		case 0:	/* bc1f */
 		case 2:	/* bc1fl */
 			if (~fcr31 & (1 << bit))
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index 83c87fe4ee4..d101d2fb24c 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -17,6 +17,9 @@
 #include <asm/isadep.h>
 #include <asm/thread_info.h>
 #include <asm/war.h>
+#ifdef CONFIG_MIPS_MT_SMTC
+#include <asm/mipsmtregs.h>
+#endif
 
 #ifdef CONFIG_PREEMPT
 	.macro	preempt_stop
@@ -75,6 +78,37 @@ FEXPORT(syscall_exit)
 	bnez	t0, syscall_exit_work
 
 FEXPORT(restore_all)			# restore full frame
+#ifdef CONFIG_MIPS_MT_SMTC
+/* Detect and execute deferred IPI "interrupts" */
+	move	a0,sp
+	jal	deferred_smtc_ipi
+/* Re-arm any temporarily masked interrupts not explicitly "acked" */
+	mfc0	v0, CP0_TCSTATUS
+	ori	v1, v0, TCSTATUS_IXMT
+	mtc0	v1, CP0_TCSTATUS
+	andi	v0, TCSTATUS_IXMT
+	ehb
+	mfc0	t0, CP0_TCCONTEXT
+	DMT	9				# dmt t1
+	jal	mips_ihb
+	mfc0	t2, CP0_STATUS
+	andi	t3, t0, 0xff00
+	or	t2, t2, t3
+	mtc0	t2, CP0_STATUS
+	ehb
+	andi	t1, t1, VPECONTROL_TE
+	beqz	t1, 1f
+	EMT
+1:
+	mfc0	v1, CP0_TCSTATUS
+	/* We set IXMT above, XOR should cler it here */
+	xori	v1, v1, TCSTATUS_IXMT
+	or	v1, v0, v1
+	mtc0	v1, CP0_TCSTATUS
+	ehb
+	xor	t0, t0, t3
+	mtc0	t0, CP0_TCCONTEXT
+#endif /* CONFIG_MIPS_MT_SMTC */
 	.set	noat
 	RESTORE_TEMP
 	RESTORE_AT
@@ -120,28 +154,17 @@ syscall_exit_work:
 	jal	do_syscall_trace
 	b	resume_userspace
 
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_MIPS_MT)
+
 /*
- * Common spurious interrupt handler.
+ * MIPS32R2 Instruction Hazard Barrier - must be called
+ *
+ * For C code use the inline version named instruction_hazard().
  */
-LEAF(spurious_interrupt)
-	/*
-	 * Someone tried to fool us by sending an interrupt but we
-	 * couldn't find a cause for it.
-	 */
-	PTR_LA	t1, irq_err_count
-#ifdef CONFIG_SMP
-1:	ll      t0, (t1)
-	addiu   t0, 1
-	sc      t0, (t1)
-#if R10000_LLSC_WAR
-	beqzl	t0, 1b
-#else
-	beqz	t0, 1b
-#endif
-#else
-	lw      t0, (t1)
-	addiu   t0, 1
-	sw      t0, (t1)
-#endif
-	j	ret_from_irq
-	END(spurious_interrupt)
+LEAF(mips_ihb)
+	.set	mips32r2
+	jr.hb	ra
+	nop
+	END(mips_ihb)
+
+#endif /* CONFIG_CPU_MIPSR2 or CONFIG_MIPS_MT */
diff --git a/arch/mips/kernel/gdb-low.S b/arch/mips/kernel/gdb-low.S
index 235ad9f6bd3..10f28fb9f00 100644
--- a/arch/mips/kernel/gdb-low.S
+++ b/arch/mips/kernel/gdb-low.S
@@ -283,11 +283,33 @@
  */
 
 3:
+#ifdef CONFIG_MIPS_MT_SMTC
+		/* Read-modify write of Status must be atomic */
+		mfc0	t2, CP0_TCSTATUS
+		ori	t1, t2, TCSTATUS_IXMT
+		mtc0	t1, CP0_TCSTATUS
+		andi	t2, t2, TCSTATUS_IXMT
+		ehb
+		DMT	9				# dmt	t1
+		jal	mips_ihb
+		nop
+#endif /* CONFIG_MIPS_MT_SMTC */
 		mfc0	t0, CP0_STATUS
 		ori	t0, 0x1f
 		xori	t0, 0x1f
 		mtc0	t0, CP0_STATUS
-
+#ifdef CONFIG_MIPS_MT_SMTC
+        	andi    t1, t1, VPECONTROL_TE
+        	beqz    t1, 9f
+		nop
+        	EMT					# emt
+9:
+		mfc0	t1, CP0_TCSTATUS
+		xori	t1, t1, TCSTATUS_IXMT
+		or	t1, t1, t2
+		mtc0	t1, CP0_TCSTATUS
+		ehb
+#endif /* CONFIG_MIPS_MT_SMTC */
 		LONG_L	v0, GDB_FR_STATUS(sp)
 		LONG_L	v1, GDB_FR_EPC(sp)
 		mtc0	v0, CP0_STATUS
diff --git a/arch/mips/kernel/gdb-stub.c b/arch/mips/kernel/gdb-stub.c
index d4f88e0af24..6ecbdc1fefd 100644
--- a/arch/mips/kernel/gdb-stub.c
+++ b/arch/mips/kernel/gdb-stub.c
@@ -140,6 +140,7 @@
 #include <asm/system.h>
 #include <asm/gdb-stub.h>
 #include <asm/inst.h>
+#include <asm/smp.h>
 
 /*
  * external low-level support routines
@@ -669,6 +670,64 @@ static void kgdb_wait(void *arg)
 	local_irq_restore(flags);
 }
 
+/*
+ * GDB stub needs to call kgdb_wait on all processor with interrupts
+ * disabled, so it uses it's own special variant.
+ */
+static int kgdb_smp_call_kgdb_wait(void)
+{
+#ifdef CONFIG_SMP
+	struct call_data_struct data;
+	int i, cpus = num_online_cpus() - 1;
+	int cpu = smp_processor_id();
+
+	/*
+	 * Can die spectacularly if this CPU isn't yet marked online
+	 */
+	BUG_ON(!cpu_online(cpu));
+
+	if (!cpus)
+		return 0;
+
+	if (spin_is_locked(&smp_call_lock)) {
+		/*
+		 * Some other processor is trying to make us do something
+		 * but we're not going to respond... give up
+		 */
+		return -1;
+		}
+
+	/*
+	 * We will continue here, accepting the fact that
+	 * the kernel may deadlock if another CPU attempts
+	 * to call smp_call_function now...
+	 */
+
+	data.func = kgdb_wait;
+	data.info = NULL;
+	atomic_set(&data.started, 0);
+	data.wait = 0;
+
+	spin_lock(&smp_call_lock);
+	call_data = &data;
+	mb();
+
+	/* Send a message to all other CPUs and wait for them to respond */
+	for (i = 0; i < NR_CPUS; i++)
+		if (cpu_online(i) && i != cpu)
+			core_send_ipi(i, SMP_CALL_FUNCTION);
+
+	/* Wait for response */
+	/* FIXME: lock-up detection, backtrace on lock-up */
+	while (atomic_read(&data.started) != cpus)
+		barrier();
+
+	call_data = NULL;
+	spin_unlock(&smp_call_lock);
+#endif
+
+	return 0;
+}
 
 /*
  * This function does all command processing for interfacing to gdb.  It
@@ -718,7 +777,7 @@ void handle_exception (struct gdb_regs *regs)
 	/*
 	 * force other cpus to enter kgdb
 	 */
-	smp_call_function(kgdb_wait, NULL, 0, 0);
+	kgdb_smp_call_kgdb_wait();
 
 	/*
 	 * If we're in breakpoint() increment the PC
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 13f22d1d0e8..ff7af369f28 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 
 #include <asm/asm.h>
+#include <asm/asmmacro.h>
 #include <asm/cacheops.h>
 #include <asm/regdef.h>
 #include <asm/fpregdef.h>
@@ -122,6 +123,20 @@ handle_vcei:
 	.set	pop
 	END(except_vec3_r4000)
 
+	__FINIT
+
+	.align  5
+NESTED(handle_int, PT_SIZE, sp)
+	SAVE_ALL
+	CLI
+
+	PTR_LA	ra, ret_from_irq
+	move	a0, sp
+	j	plat_irq_dispatch
+	END(handle_int)
+
+	__INIT
+
 /*
  * Special interrupt vector for MIPS64 ISA & embedded MIPS processors.
  * This is a dedicated interrupt exception vector which reduces the
@@ -157,6 +172,15 @@ NESTED(except_vec_vi, 0, sp)
 	SAVE_AT
 	.set	push
 	.set	noreorder
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * To keep from blindly blocking *all* interrupts
+	 * during service by SMTC kernel, we also want to
+	 * pass the IM value to be cleared.
+	 */
+EXPORT(except_vec_vi_mori)
+	ori	a0, $0, 0
+#endif /* CONFIG_MIPS_MT_SMTC */
 EXPORT(except_vec_vi_lui)
 	lui	v0, 0		/* Patched */
 	j	except_vec_vi_handler
@@ -173,6 +197,25 @@ EXPORT(except_vec_vi_end)
 NESTED(except_vec_vi_handler, 0, sp)
 	SAVE_TEMP
 	SAVE_STATIC
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * SMTC has an interesting problem that interrupts are level-triggered,
+	 * and the CLI macro will clear EXL, potentially causing a duplicate
+	 * interrupt service invocation. So we need to clear the associated
+	 * IM bit of Status prior to doing CLI, and restore it after the
+	 * service routine has been invoked - we must assume that the
+	 * service routine will have cleared the state, and any active
+	 * level represents a new or otherwised unserviced event...
+	 */
+	mfc0	t1, CP0_STATUS
+	and	t0, a0, t1
+	mfc0	t2, CP0_TCCONTEXT
+	or	t0, t0, t2
+	mtc0	t0, CP0_TCCONTEXT
+	xor	t1, t1, t0
+	mtc0	t1, CP0_STATUS
+	ehb
+#endif /* CONFIG_MIPS_MT_SMTC */
 	CLI
 	move	a0, sp
 	jalr	v0
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 2e9122a4213..bdf6f6eff72 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -18,6 +18,7 @@
 #include <linux/threads.h>
 
 #include <asm/asm.h>
+#include <asm/asmmacro.h>
 #include <asm/regdef.h>
 #include <asm/page.h>
 #include <asm/mipsregs.h>
@@ -82,12 +83,33 @@
 	 */
 	.macro	setup_c0_status set clr
 	.set	push
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * For SMTC, we need to set privilege and disable interrupts only for
+	 * the current TC, using the TCStatus register.
+	 */
+	mfc0	t0, CP0_TCSTATUS
+	/* Fortunately CU 0 is in the same place in both registers */
+	/* Set TCU0, TMX, TKSU (for later inversion) and IXMT */
+	li	t1, ST0_CU0 | 0x08001c00
+	or	t0, t1
+	/* Clear TKSU, leave IXMT */
+	xori	t0, 0x00001800
+	mtc0	t0, CP0_TCSTATUS
+	ehb
+	/* We need to leave the global IE bit set, but clear EXL...*/
+	mfc0	t0, CP0_STATUS
+	or	t0, ST0_CU0 | ST0_EXL | ST0_ERL | \set | \clr
+	xor	t0, ST0_EXL | ST0_ERL | \clr
+	mtc0	t0, CP0_STATUS
+#else
 	mfc0	t0, CP0_STATUS
 	or	t0, ST0_CU0|\set|0x1f|\clr
 	xor	t0, 0x1f|\clr
 	mtc0	t0, CP0_STATUS
 	.set	noreorder
 	sll	zero,3				# ehb
+#endif
 	.set	pop
 	.endm
 
@@ -134,6 +156,24 @@ NESTED(kernel_entry, 16, sp)			# kernel entry point
 
 	ARC64_TWIDDLE_PC
 
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * In SMTC kernel, "CLI" is thread-specific, in TCStatus.
+	 * We still need to enable interrupts globally in Status,
+	 * and clear EXL/ERL.
+	 *
+	 * TCContext is used to track interrupt levels under
+	 * service in SMTC kernel. Clear for boot TC before
+	 * allowing any interrupts.
+	 */
+	mtc0	zero, CP0_TCCONTEXT
+
+	mfc0	t0, CP0_STATUS
+	ori	t0, t0, 0xff1f
+	xori	t0, t0, 0x001e
+	mtc0	t0, CP0_STATUS
+#endif /* CONFIG_MIPS_MT_SMTC */
+
 	PTR_LA		t0, __bss_start		# clear .bss
 	LONG_S		zero, (t0)
 	PTR_LA		t1, __bss_stop - LONGSIZE
@@ -166,8 +206,25 @@ NESTED(kernel_entry, 16, sp)			# kernel entry point
  * function after setting up the stack and gp registers.
  */
 NESTED(smp_bootstrap, 16, sp)
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * Read-modify-writes of Status must be atomic, and this
+	 * is one case where CLI is invoked without EXL being
+	 * necessarily set. The CLI and setup_c0_status will
+	 * in fact be redundant for all but the first TC of
+	 * each VPE being booted.
+	 */
+	DMT	10	# dmt t2 /* t0, t1 are used by CLI and setup_c0_status() */
+	jal	mips_ihb
+#endif /* CONFIG_MIPS_MT_SMTC */
 	setup_c0_status_sec
 	smp_slave_setup
+#ifdef CONFIG_MIPS_MT_SMTC
+	andi	t2, t2, VPECONTROL_TE
+	beqz	t2, 2f
+	EMT		# emt
+2:
+#endif /* CONFIG_MIPS_MT_SMTC */
 	j	start_secondary
 	END(smp_bootstrap)
 #endif /* CONFIG_SMP */
diff --git a/arch/mips/kernel/i8259.c b/arch/mips/kernel/i8259.c
index b974ac9057f..2125ba5f1d9 100644
--- a/arch/mips/kernel/i8259.c
+++ b/arch/mips/kernel/i8259.c
@@ -187,6 +187,10 @@ handle_real_irq:
 		outb(cached_21,0x21);
 		outb(0x60+irq,0x20);	/* 'Specific EOI' to master */
 	}
+#ifdef CONFIG_MIPS_MT_SMTC
+        if (irq_hwmask[irq] & ST0_IM)
+        	set_c0_status(irq_hwmask[irq] & ST0_IM);
+#endif /* CONFIG_MIPS_MT_SMTC */
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 	return;
 
diff --git a/arch/mips/kernel/irq-msc01.c b/arch/mips/kernel/irq-msc01.c
index 3f653c7cfbf..97ebdc754b9 100644
--- a/arch/mips/kernel/irq-msc01.c
+++ b/arch/mips/kernel/irq-msc01.c
@@ -76,6 +76,11 @@ static void level_mask_and_ack_msc_irq(unsigned int irq)
 	mask_msc_irq(irq);
 	if (!cpu_has_veic)
 		MSCIC_WRITE(MSC01_IC_EOI, 0);
+#ifdef CONFIG_MIPS_MT_SMTC
+	/* This actually needs to be a call into platform code */
+	if (irq_hwmask[irq] & ST0_IM)
+		set_c0_status(irq_hwmask[irq] & ST0_IM);
+#endif /* CONFIG_MIPS_MT_SMTC */
 }
 
 /*
@@ -92,6 +97,10 @@ static void edge_mask_and_ack_msc_irq(unsigned int irq)
 		MSCIC_WRITE(MSC01_IC_SUP+irq*8, r | ~MSC01_IC_SUP_EDGE_BIT);
 		MSCIC_WRITE(MSC01_IC_SUP+irq*8, r);
 	}
+#ifdef CONFIG_MIPS_MT_SMTC
+	if (irq_hwmask[irq] & ST0_IM)
+		set_c0_status(irq_hwmask[irq] & ST0_IM);
+#endif /* CONFIG_MIPS_MT_SMTC */
 }
 
 /*
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index 3dd76b3d296..3dce742e716 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -38,6 +38,15 @@ void ack_bad_irq(unsigned int irq)
 
 atomic_t irq_err_count;
 
+#ifdef CONFIG_MIPS_MT_SMTC
+/*
+ * SMTC Kernel needs to manipulate low-level CPU interrupt mask
+ * in do_IRQ. These are passed in setup_irq_smtc() and stored
+ * in this table.
+ */
+unsigned long irq_hwmask[NR_IRQS];
+#endif /* CONFIG_MIPS_MT_SMTC */
+
 #undef do_IRQ
 
 /*
@@ -49,6 +58,7 @@ asmlinkage unsigned int do_IRQ(unsigned int irq, struct pt_regs *regs)
 {
 	irq_enter();
 
+	__DO_IRQ_SMTC_HOOK();
 	__do_IRQ(irq, regs);
 
 	irq_exit();
@@ -101,6 +111,11 @@ skip:
 	return 0;
 }
 
+asmlinkage void spurious_interrupt(struct pt_regs *regs)
+{
+	atomic_inc(&irq_err_count);
+}
+
 #ifdef CONFIG_KGDB
 extern void breakpoint(void);
 extern void set_debug_traps(void);
@@ -124,6 +139,9 @@ void __init init_IRQ(void)
 		irq_desc[i].depth   = 1;
 		irq_desc[i].handler = &no_irq_type;
 		spin_lock_init(&irq_desc[i].lock);
+#ifdef CONFIG_MIPS_MT_SMTC
+		irq_hwmask[i] = 0;
+#endif /* CONFIG_MIPS_MT_SMTC */
 	}
 
 	arch_init_irq();
diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c
new file mode 100644
index 00000000000..f06a144c788
--- /dev/null
+++ b/arch/mips/kernel/kspd.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (C) 2005 MIPS Technologies, Inc.  All rights reserved.
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/unistd.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/syscalls.h>
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+
+#include <asm/vpe.h>
+#include <asm/rtlx.h>
+#include <asm/kspd.h>
+
+static struct workqueue_struct *workqueue = NULL;
+static struct work_struct work;
+
+extern unsigned long cpu_khz;
+
+struct mtsp_syscall {
+	int cmd;
+	unsigned char abi;
+	unsigned char size;
+};
+
+struct mtsp_syscall_ret {
+	int retval;
+	int errno;
+};
+
+struct mtsp_syscall_generic {
+	int arg0;
+	int arg1;
+	int arg2;
+	int arg3;
+	int arg4;
+	int arg5;
+	int arg6;
+};
+
+static struct list_head kspd_notifylist;
+static int sp_stopping = 0;
+
+/* these should match with those in the SDE kit */
+#define MTSP_SYSCALL_BASE	0
+#define MTSP_SYSCALL_EXIT	(MTSP_SYSCALL_BASE + 0)
+#define MTSP_SYSCALL_OPEN	(MTSP_SYSCALL_BASE + 1)
+#define MTSP_SYSCALL_READ	(MTSP_SYSCALL_BASE + 2)
+#define MTSP_SYSCALL_WRITE	(MTSP_SYSCALL_BASE + 3)
+#define MTSP_SYSCALL_CLOSE	(MTSP_SYSCALL_BASE + 4)
+#define MTSP_SYSCALL_LSEEK32	(MTSP_SYSCALL_BASE + 5)
+#define MTSP_SYSCALL_ISATTY	(MTSP_SYSCALL_BASE + 6)
+#define MTSP_SYSCALL_GETTIME	(MTSP_SYSCALL_BASE + 7)
+#define MTSP_SYSCALL_PIPEFREQ	(MTSP_SYSCALL_BASE + 8)
+#define MTSP_SYSCALL_GETTOD	(MTSP_SYSCALL_BASE + 9)
+
+#define MTSP_O_RDONLY		0x0000
+#define MTSP_O_WRONLY		0x0001
+#define MTSP_O_RDWR		0x0002
+#define MTSP_O_NONBLOCK		0x0004
+#define MTSP_O_APPEND		0x0008
+#define MTSP_O_SHLOCK		0x0010
+#define MTSP_O_EXLOCK		0x0020
+#define MTSP_O_ASYNC		0x0040
+#define MTSP_O_FSYNC		O_SYNC
+#define MTSP_O_NOFOLLOW		0x0100
+#define MTSP_O_SYNC		0x0080
+#define MTSP_O_CREAT		0x0200
+#define MTSP_O_TRUNC		0x0400
+#define MTSP_O_EXCL		0x0800
+#define MTSP_O_BINARY		0x8000
+
+#define SP_VPE 1
+
+struct apsp_table  {
+	int sp;
+	int ap;
+};
+
+/* we might want to do the mode flags too */
+struct apsp_table open_flags_table[] = {
+	{ MTSP_O_RDWR, O_RDWR },
+	{ MTSP_O_WRONLY, O_WRONLY },
+	{ MTSP_O_CREAT, O_CREAT },
+	{ MTSP_O_TRUNC, O_TRUNC },
+	{ MTSP_O_NONBLOCK, O_NONBLOCK },
+	{ MTSP_O_APPEND, O_APPEND },
+	{ MTSP_O_NOFOLLOW, O_NOFOLLOW }
+};
+
+struct apsp_table syscall_command_table[] = {
+	{ MTSP_SYSCALL_OPEN, __NR_open },
+	{ MTSP_SYSCALL_CLOSE, __NR_close },
+	{ MTSP_SYSCALL_READ, __NR_read },
+	{ MTSP_SYSCALL_WRITE, __NR_write },
+	{ MTSP_SYSCALL_LSEEK32, __NR_lseek }
+};
+
+static int sp_syscall(int num, int arg0, int arg1, int arg2, int arg3)
+{
+	register long int _num  __asm__ ("$2") = num;
+	register long int _arg0  __asm__ ("$4") = arg0;
+	register long int _arg1  __asm__ ("$5") = arg1;
+	register long int _arg2  __asm__ ("$6") = arg2;
+	register long int _arg3  __asm__ ("$7") = arg3;
+
+	mm_segment_t old_fs;
+
+	old_fs = get_fs();
+ 	set_fs(KERNEL_DS);
+
+  	__asm__ __volatile__ (
+ 	"	syscall					\n"
+ 	: "=r" (_num), "=r" (_arg3)
+ 	: "r" (_num), "r" (_arg0), "r" (_arg1), "r" (_arg2), "r" (_arg3));
+
+	set_fs(old_fs);
+
+	/* $a3 is error flag */
+	if (_arg3)
+		return -_num;
+
+	return _num;
+}
+
+static int translate_syscall_command(int cmd)
+{
+	int i;
+	int ret = -1;
+
+	for (i = 0; i < ARRAY_SIZE(syscall_command_table); i++) {
+		if ((cmd == syscall_command_table[i].sp))
+			return syscall_command_table[i].ap;
+	}
+
+	return ret;
+}
+
+static unsigned int translate_open_flags(int flags)
+{
+	int i;
+	unsigned int ret = 0;
+
+	for (i = 0; i < (sizeof(open_flags_table) / sizeof(struct apsp_table));
+	     i++) {
+		if( (flags & open_flags_table[i].sp) ) {
+			ret |= open_flags_table[i].ap;
+		}
+	}
+
+	return ret;
+}
+
+
+static void sp_setfsuidgid( uid_t uid, gid_t gid)
+{
+	current->fsuid = uid;
+	current->fsgid = gid;
+
+	key_fsuid_changed(current);
+	key_fsgid_changed(current);
+}
+
+/*
+ * Expects a request to be on the sysio channel. Reads it.  Decides whether
+ * its a linux syscall and runs it, or whatever.  Puts the return code back
+ * into the request and sends the whole thing back.
+ */
+void sp_work_handle_request(void)
+{
+	struct mtsp_syscall sc;
+	struct mtsp_syscall_generic generic;
+	struct mtsp_syscall_ret ret;
+	struct kspd_notifications *n;
+	struct timeval tv;
+	struct timezone tz;
+	int cmd;
+
+	char *vcwd;
+	mm_segment_t old_fs;
+	int size;
+
+	ret.retval = -1;
+
+	if (!rtlx_read(RTLX_CHANNEL_SYSIO, &sc, sizeof(struct mtsp_syscall), 0)) {
+		printk(KERN_ERR "Expected request but nothing to read\n");
+		return;
+	}
+
+	size = sc.size;
+
+	if (size) {
+		if (!rtlx_read(RTLX_CHANNEL_SYSIO, &generic, size, 0)) {
+			printk(KERN_ERR "Expected request but nothing to read\n");
+			return;
+		}
+	}
+
+	/* Run the syscall at the priviledge of the user who loaded the
+	   SP program */
+
+	if (vpe_getuid(SP_VPE))
+		sp_setfsuidgid( vpe_getuid(SP_VPE), vpe_getgid(SP_VPE));
+
+	switch (sc.cmd) {
+	/* needs the flags argument translating from SDE kit to
+	   linux */
+ 	case MTSP_SYSCALL_PIPEFREQ:
+ 		ret.retval = cpu_khz * 1000;
+ 		ret.errno = 0;
+ 		break;
+
+ 	case MTSP_SYSCALL_GETTOD:
+ 		memset(&tz, 0, sizeof(tz));
+ 		if ((ret.retval = sp_syscall(__NR_gettimeofday, (int)&tv,
+ 		                             (int)&tz, 0,0)) == 0)
+		ret.retval = tv.tv_sec;
+
+		ret.errno = errno;
+		break;
+
+ 	case MTSP_SYSCALL_EXIT:
+		list_for_each_entry(n, &kspd_notifylist, list)
+ 			n->kspd_sp_exit(SP_VPE);
+		sp_stopping = 1;
+
+		printk(KERN_DEBUG "KSPD got exit syscall from SP exitcode %d\n",
+		       generic.arg0);
+ 		break;
+
+ 	case MTSP_SYSCALL_OPEN:
+ 		generic.arg1 = translate_open_flags(generic.arg1);
+
+ 		vcwd = vpe_getcwd(SP_VPE);
+
+ 		/* change to the cwd of the process that loaded the SP program */
+		old_fs = get_fs();
+		set_fs(KERNEL_DS);
+		sys_chdir(vcwd);
+		set_fs(old_fs);
+
+ 		sc.cmd = __NR_open;
+
+		/* fall through */
+
+  	default:
+ 		if ((sc.cmd >= __NR_Linux) &&
+		    (sc.cmd <= (__NR_Linux +  __NR_Linux_syscalls)) )
+			cmd = sc.cmd;
+		else
+			cmd = translate_syscall_command(sc.cmd);
+
+		if (cmd >= 0) {
+			ret.retval = sp_syscall(cmd, generic.arg0, generic.arg1,
+			                        generic.arg2, generic.arg3);
+			ret.errno = errno;
+		} else
+ 			printk(KERN_WARNING
+			       "KSPD: Unknown SP syscall number %d\n", sc.cmd);
+		break;
+ 	} /* switch */
+
+	if (vpe_getuid(SP_VPE))
+		sp_setfsuidgid( 0, 0);
+
+	if ((rtlx_write(RTLX_CHANNEL_SYSIO, &ret, sizeof(struct mtsp_syscall_ret), 0))
+	    < sizeof(struct mtsp_syscall_ret))
+		printk("KSPD: sp_work_handle_request failed to send to SP\n");
+}
+
+static void sp_cleanup(void)
+{
+	struct files_struct *files = current->files;
+	int i, j;
+	struct fdtable *fdt;
+
+	j = 0;
+
+	/*
+	 * It is safe to dereference the fd table without RCU or
+	 * ->file_lock
+	 */
+	fdt = files_fdtable(files);
+	for (;;) {
+		unsigned long set;
+		i = j * __NFDBITS;
+		if (i >= fdt->max_fdset || i >= fdt->max_fds)
+			break;
+		set = fdt->open_fds->fds_bits[j++];
+		while (set) {
+			if (set & 1) {
+				struct file * file = xchg(&fdt->fd[i], NULL);
+				if (file)
+					filp_close(file, files);
+			}
+			i++;
+			set >>= 1;
+		}
+	}
+}
+
+static int channel_open = 0;
+
+/* the work handler */
+static void sp_work(void *data)
+{
+	if (!channel_open) {
+		if( rtlx_open(RTLX_CHANNEL_SYSIO, 1) != 0) {
+			printk("KSPD: unable to open sp channel\n");
+			sp_stopping = 1;
+		} else {
+			channel_open++;
+			printk(KERN_DEBUG "KSPD: SP channel opened\n");
+		}
+	} else {
+		/* wait for some data, allow it to sleep */
+		rtlx_read_poll(RTLX_CHANNEL_SYSIO, 1);
+
+		/* Check we haven't been woken because we are stopping */
+		if (!sp_stopping)
+			sp_work_handle_request();
+	}
+
+	if (!sp_stopping)
+		queue_work(workqueue, &work);
+	else
+		sp_cleanup();
+}
+
+static void startwork(int vpe)
+{
+	sp_stopping = channel_open = 0;
+
+	if (workqueue == NULL) {
+		if ((workqueue = create_singlethread_workqueue("kspd")) == NULL) {
+			printk(KERN_ERR "unable to start kspd\n");
+			return;
+		}
+
+		INIT_WORK(&work, sp_work, NULL);
+		queue_work(workqueue, &work);
+	} else
+		queue_work(workqueue, &work);
+
+}
+
+static void stopwork(int vpe)
+{
+	sp_stopping = 1;
+
+	printk(KERN_DEBUG "KSPD: SP stopping\n");
+}
+
+void kspd_notify(struct kspd_notifications *notify)
+{
+	list_add(&notify->list, &kspd_notifylist);
+}
+
+static struct vpe_notifications notify;
+static int kspd_module_init(void)
+{
+	INIT_LIST_HEAD(&kspd_notifylist);
+
+	notify.start = startwork;
+	notify.stop = stopwork;
+	vpe_notify(SP_VPE, &notify);
+
+	return 0;
+}
+
+static void kspd_module_exit(void)
+{
+
+}
+
+module_init(kspd_module_init);
+module_exit(kspd_module_exit);
+
+MODULE_DESCRIPTION("MIPS KSPD");
+MODULE_AUTHOR("Elizabeth Oldham, MIPS Technologies, Inc.");
+MODULE_LICENSE("GPL");
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 3f40c37a9ee..a7d2bb3cf83 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -356,73 +356,13 @@ asmlinkage int sys32_llseek(unsigned int fd, unsigned int offset_high,
 asmlinkage ssize_t sys32_pread(unsigned int fd, char __user * buf,
 			       size_t count, u32 unused, u64 a4, u64 a5)
 {
-	ssize_t ret;
-	struct file * file;
-	ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
-	loff_t pos;
-
-	ret = -EBADF;
-	file = fget(fd);
-	if (!file)
-		goto bad_file;
-	if (!(file->f_mode & FMODE_READ))
-		goto out;
-	pos = merge_64(a4, a5);
-	ret = rw_verify_area(READ, file, &pos, count);
-	if (ret < 0)
-		goto out;
-	ret = -EINVAL;
-	if (!file->f_op || !(read = file->f_op->read))
-		goto out;
-	if (pos < 0)
-		goto out;
-	ret = -ESPIPE;
-	if (!(file->f_mode & FMODE_PREAD))
-		goto out;
-	ret = read(file, buf, count, &pos);
-	if (ret > 0)
-		dnotify_parent(file->f_dentry, DN_ACCESS);
-out:
-	fput(file);
-bad_file:
-	return ret;
+	return sys_pread64(fd, buf, count, merge_64(a4, a5));
 }
 
 asmlinkage ssize_t sys32_pwrite(unsigned int fd, const char __user * buf,
 			        size_t count, u32 unused, u64 a4, u64 a5)
 {
-	ssize_t ret;
-	struct file * file;
-	ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
-	loff_t pos;
-
-	ret = -EBADF;
-	file = fget(fd);
-	if (!file)
-		goto bad_file;
-	if (!(file->f_mode & FMODE_WRITE))
-		goto out;
-	pos = merge_64(a4, a5);
-	ret = rw_verify_area(WRITE, file, &pos, count);
-	if (ret < 0)
-		goto out;
-	ret = -EINVAL;
-	if (!file->f_op || !(write = file->f_op->write))
-		goto out;
-	if (pos < 0)
-		goto out;
-
-	ret = -ESPIPE;
-	if (!(file->f_mode & FMODE_PWRITE))
-		goto out;
-
-	ret = write(file, buf, count, &pos);
-	if (ret > 0)
-		dnotify_parent(file->f_dentry, DN_MODIFY);
-out:
-	fput(file);
-bad_file:
-	return ret;
+	return sys_pwrite64(fd, buf, count, merge_64(a4, a5));
 }
 
 asmlinkage int sys32_sched_rr_get_interval(compat_pid_t pid,
@@ -1182,6 +1122,16 @@ asmlinkage ssize_t sys32_readahead(int fd, u32 pad0, u64 a2, u64 a3,
 	return sys_readahead(fd, merge_64(a2, a3), count);
 }
 
+asmlinkage long sys32_sync_file_range(int fd, int __pad,
+	unsigned long a2, unsigned long a3,
+	unsigned long a4, unsigned long a5,
+	int flags)
+{
+	return sys_sync_file_range(fd,
+			merge_64(a2, a3), merge_64(a4, a5),
+			flags);
+}
+
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned int))
 static unsigned char socketcall_nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
diff --git a/arch/mips/kernel/mips-mt.c b/arch/mips/kernel/mips-mt.c
new file mode 100644
index 00000000000..02237a685ec
--- /dev/null
+++ b/arch/mips/kernel/mips-mt.c
@@ -0,0 +1,449 @@
+/*
+ * General MIPS MT support routines, usable in AP/SP, SMVP, or SMTC kernels
+ * Copyright (C) 2005 Mips Technologies, Inc
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+
+#include <asm/cpu.h>
+#include <asm/processor.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/hardirq.h>
+#include <asm/mmu_context.h>
+#include <asm/smp.h>
+#include <asm/mipsmtregs.h>
+#include <asm/r4kcache.h>
+#include <asm/cacheflush.h>
+
+/*
+ * CPU mask used to set process affinity for MT VPEs/TCs with FPUs
+ */
+
+cpumask_t mt_fpu_cpumask;
+
+#ifdef CONFIG_MIPS_MT_FPAFF
+
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <asm/uaccess.h>
+
+unsigned long mt_fpemul_threshold = 0;
+
+/*
+ * Replacement functions for the sys_sched_setaffinity() and
+ * sys_sched_getaffinity() system calls, so that we can integrate
+ * FPU affinity with the user's requested processor affinity.
+ * This code is 98% identical with the sys_sched_setaffinity()
+ * and sys_sched_getaffinity() system calls, and should be
+ * updated when kernel/sched.c changes.
+ */
+
+/*
+ * find_process_by_pid - find a process with a matching PID value.
+ * used in sys_sched_set/getaffinity() in kernel/sched.c, so
+ * cloned here.
+ */
+static inline task_t *find_process_by_pid(pid_t pid)
+{
+	return pid ? find_task_by_pid(pid) : current;
+}
+
+
+/*
+ * mipsmt_sys_sched_setaffinity - set the cpu affinity of a process
+ */
+asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len,
+				      unsigned long __user *user_mask_ptr)
+{
+	cpumask_t new_mask;
+	cpumask_t effective_mask;
+	int retval;
+	task_t *p;
+
+	if (len < sizeof(new_mask))
+		return -EINVAL;
+
+	if (copy_from_user(&new_mask, user_mask_ptr, sizeof(new_mask)))
+		return -EFAULT;
+
+	lock_cpu_hotplug();
+	read_lock(&tasklist_lock);
+
+	p = find_process_by_pid(pid);
+	if (!p) {
+		read_unlock(&tasklist_lock);
+		unlock_cpu_hotplug();
+		return -ESRCH;
+	}
+
+	/*
+	 * It is not safe to call set_cpus_allowed with the
+	 * tasklist_lock held.  We will bump the task_struct's
+	 * usage count and drop tasklist_lock before invoking
+	 * set_cpus_allowed.
+	 */
+	get_task_struct(p);
+
+	retval = -EPERM;
+	if ((current->euid != p->euid) && (current->euid != p->uid) &&
+			!capable(CAP_SYS_NICE)) {
+		read_unlock(&tasklist_lock);
+		goto out_unlock;
+	}
+
+	/* Record new user-specified CPU set for future reference */
+	p->thread.user_cpus_allowed = new_mask;
+
+	/* Unlock the task list */
+	read_unlock(&tasklist_lock);
+
+	/* Compute new global allowed CPU set if necessary */
+	if( (p->thread.mflags & MF_FPUBOUND)
+	&& cpus_intersects(new_mask, mt_fpu_cpumask)) {
+		cpus_and(effective_mask, new_mask, mt_fpu_cpumask);
+		retval = set_cpus_allowed(p, effective_mask);
+	} else {
+		p->thread.mflags &= ~MF_FPUBOUND;
+		retval = set_cpus_allowed(p, new_mask);
+	}
+
+
+out_unlock:
+	put_task_struct(p);
+	unlock_cpu_hotplug();
+	return retval;
+}
+
+/*
+ * mipsmt_sys_sched_getaffinity - get the cpu affinity of a process
+ */
+asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len,
+				      unsigned long __user *user_mask_ptr)
+{
+	unsigned int real_len;
+	cpumask_t mask;
+	int retval;
+	task_t *p;
+
+	real_len = sizeof(mask);
+	if (len < real_len)
+		return -EINVAL;
+
+	lock_cpu_hotplug();
+	read_lock(&tasklist_lock);
+
+	retval = -ESRCH;
+	p = find_process_by_pid(pid);
+	if (!p)
+		goto out_unlock;
+
+	retval = 0;
+
+	cpus_and(mask, p->thread.user_cpus_allowed, cpu_possible_map);
+
+out_unlock:
+	read_unlock(&tasklist_lock);
+	unlock_cpu_hotplug();
+	if (retval)
+		return retval;
+	if (copy_to_user(user_mask_ptr, &mask, real_len))
+		return -EFAULT;
+	return real_len;
+}
+
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
+/*
+ * Dump new MIPS MT state for the core. Does not leave TCs halted.
+ * Takes an argument which taken to be a pre-call MVPControl value.
+ */
+
+void mips_mt_regdump(unsigned long mvpctl)
+{
+	unsigned long flags;
+	unsigned long vpflags;
+	unsigned long mvpconf0;
+	int nvpe;
+	int ntc;
+	int i;
+	int tc;
+	unsigned long haltval;
+	unsigned long tcstatval;
+#ifdef CONFIG_MIPS_MT_SMTC
+	void smtc_soft_dump(void);
+#endif /* CONFIG_MIPT_MT_SMTC */
+
+	local_irq_save(flags);
+	vpflags = dvpe();
+	printk("=== MIPS MT State Dump ===\n");
+	printk("-- Global State --\n");
+	printk("   MVPControl Passed: %08lx\n", mvpctl);
+	printk("   MVPControl Read: %08lx\n", vpflags);
+	printk("   MVPConf0 : %08lx\n", (mvpconf0 = read_c0_mvpconf0()));
+	nvpe = ((mvpconf0 & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1;
+	ntc = ((mvpconf0 & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
+	printk("-- per-VPE State --\n");
+	for(i = 0; i < nvpe; i++) {
+	    for(tc = 0; tc < ntc; tc++) {
+			settc(tc);
+		if((read_tc_c0_tcbind() & TCBIND_CURVPE) == i) {
+		    printk("  VPE %d\n", i);
+		    printk("   VPEControl : %08lx\n", read_vpe_c0_vpecontrol());
+		    printk("   VPEConf0 : %08lx\n", read_vpe_c0_vpeconf0());
+		    printk("   VPE%d.Status : %08lx\n",
+				i, read_vpe_c0_status());
+		    printk("   VPE%d.EPC : %08lx\n", i, read_vpe_c0_epc());
+		    printk("   VPE%d.Cause : %08lx\n", i, read_vpe_c0_cause());
+		    printk("   VPE%d.Config7 : %08lx\n",
+				i, read_vpe_c0_config7());
+		    break; /* Next VPE */
+		}
+	    }
+	}
+	printk("-- per-TC State --\n");
+	for(tc = 0; tc < ntc; tc++) {
+		settc(tc);
+		if(read_tc_c0_tcbind() == read_c0_tcbind()) {
+			/* Are we dumping ourself?  */
+			haltval = 0; /* Then we're not halted, and mustn't be */
+			tcstatval = flags; /* And pre-dump TCStatus is flags */
+			printk("  TC %d (current TC with VPE EPC above)\n", tc);
+		} else {
+			haltval = read_tc_c0_tchalt();
+			write_tc_c0_tchalt(1);
+			tcstatval = read_tc_c0_tcstatus();
+			printk("  TC %d\n", tc);
+		}
+		printk("   TCStatus : %08lx\n", tcstatval);
+		printk("   TCBind : %08lx\n", read_tc_c0_tcbind());
+		printk("   TCRestart : %08lx\n", read_tc_c0_tcrestart());
+		printk("   TCHalt : %08lx\n", haltval);
+		printk("   TCContext : %08lx\n", read_tc_c0_tccontext());
+		if (!haltval)
+			write_tc_c0_tchalt(0);
+	}
+#ifdef CONFIG_MIPS_MT_SMTC
+	smtc_soft_dump();
+#endif /* CONFIG_MIPT_MT_SMTC */
+	printk("===========================\n");
+	evpe(vpflags);
+	local_irq_restore(flags);
+}
+
+static int mt_opt_norps = 0;
+static int mt_opt_rpsctl = -1;
+static int mt_opt_nblsu = -1;
+static int mt_opt_forceconfig7 = 0;
+static int mt_opt_config7 = -1;
+
+static int __init rps_disable(char *s)
+{
+	mt_opt_norps = 1;
+	return 1;
+}
+__setup("norps", rps_disable);
+
+static int __init rpsctl_set(char *str)
+{
+	get_option(&str, &mt_opt_rpsctl);
+	return 1;
+}
+__setup("rpsctl=", rpsctl_set);
+
+static int __init nblsu_set(char *str)
+{
+	get_option(&str, &mt_opt_nblsu);
+	return 1;
+}
+__setup("nblsu=", nblsu_set);
+
+static int __init config7_set(char *str)
+{
+	get_option(&str, &mt_opt_config7);
+	mt_opt_forceconfig7 = 1;
+	return 1;
+}
+__setup("config7=", config7_set);
+
+/* Experimental cache flush control parameters that should go away some day */
+int mt_protiflush = 0;
+int mt_protdflush = 0;
+int mt_n_iflushes = 1;
+int mt_n_dflushes = 1;
+
+static int __init set_protiflush(char *s)
+{
+	mt_protiflush = 1;
+	return 1;
+}
+__setup("protiflush", set_protiflush);
+
+static int __init set_protdflush(char *s)
+{
+	mt_protdflush = 1;
+	return 1;
+}
+__setup("protdflush", set_protdflush);
+
+static int __init niflush(char *s)
+{
+	get_option(&s, &mt_n_iflushes);
+	return 1;
+}
+__setup("niflush=", niflush);
+
+static int __init ndflush(char *s)
+{
+	get_option(&s, &mt_n_dflushes);
+	return 1;
+}
+__setup("ndflush=", ndflush);
+#ifdef CONFIG_MIPS_MT_FPAFF
+static int fpaff_threshold = -1;
+
+static int __init fpaff_thresh(char *str)
+{
+	get_option(&str, &fpaff_threshold);
+	return 1;
+}
+
+__setup("fpaff=", fpaff_thresh);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
+static unsigned int itc_base = 0;
+
+static int __init set_itc_base(char *str)
+{
+	get_option(&str, &itc_base);
+	return 1;
+}
+
+__setup("itcbase=", set_itc_base);
+
+void mips_mt_set_cpuoptions(void)
+{
+	unsigned int oconfig7 = read_c0_config7();
+	unsigned int nconfig7 = oconfig7;
+
+	if (mt_opt_norps) {
+		printk("\"norps\" option deprectated: use \"rpsctl=\"\n");
+	}
+	if (mt_opt_rpsctl >= 0) {
+		printk("34K return prediction stack override set to %d.\n",
+			mt_opt_rpsctl);
+		if (mt_opt_rpsctl)
+			nconfig7 |= (1 << 2);
+		else
+			nconfig7 &= ~(1 << 2);
+	}
+	if (mt_opt_nblsu >= 0) {
+		printk("34K ALU/LSU sync override set to %d.\n", mt_opt_nblsu);
+		if (mt_opt_nblsu)
+			nconfig7 |= (1 << 5);
+		else
+			nconfig7 &= ~(1 << 5);
+	}
+	if (mt_opt_forceconfig7) {
+		printk("CP0.Config7 forced to 0x%08x.\n", mt_opt_config7);
+		nconfig7 = mt_opt_config7;
+	}
+	if (oconfig7 != nconfig7) {
+		__asm__ __volatile("sync");
+		write_c0_config7(nconfig7);
+		ehb ();
+		printk("Config7: 0x%08x\n", read_c0_config7());
+	}
+
+	/* Report Cache management debug options */
+	if (mt_protiflush)
+		printk("I-cache flushes single-threaded\n");
+	if (mt_protdflush)
+		printk("D-cache flushes single-threaded\n");
+	if (mt_n_iflushes != 1)
+		printk("I-Cache Flushes Repeated %d times\n", mt_n_iflushes);
+	if (mt_n_dflushes != 1)
+		printk("D-Cache Flushes Repeated %d times\n", mt_n_dflushes);
+
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/* FPU Use Factor empirically derived from experiments on 34K */
+#define FPUSEFACTOR 333
+
+	if (fpaff_threshold >= 0) {
+		mt_fpemul_threshold = fpaff_threshold;
+	} else {
+		mt_fpemul_threshold =
+			(FPUSEFACTOR * (loops_per_jiffy/(500000/HZ))) / HZ;
+	}
+	printk("FPU Affinity set after %ld emulations\n",
+			mt_fpemul_threshold);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
+	if (itc_base != 0) {
+		/*
+		 * Configure ITC mapping.  This code is very
+		 * specific to the 34K core family, which uses
+		 * a special mode bit ("ITC") in the ErrCtl
+		 * register to enable access to ITC control
+		 * registers via cache "tag" operations.
+		 */
+		unsigned long ectlval;
+		unsigned long itcblkgrn;
+
+		/* ErrCtl register is known as "ecc" to Linux */
+		ectlval = read_c0_ecc();
+		write_c0_ecc(ectlval | (0x1 << 26));
+		ehb();
+#define INDEX_0 (0x80000000)
+#define INDEX_8 (0x80000008)
+		/* Read "cache tag" for Dcache pseudo-index 8 */
+		cache_op(Index_Load_Tag_D, INDEX_8);
+		ehb();
+		itcblkgrn = read_c0_dtaglo();
+		itcblkgrn &= 0xfffe0000;
+		/* Set for 128 byte pitch of ITC cells */
+		itcblkgrn |= 0x00000c00;
+		/* Stage in Tag register */
+		write_c0_dtaglo(itcblkgrn);
+		ehb();
+		/* Write out to ITU with CACHE op */
+		cache_op(Index_Store_Tag_D, INDEX_8);
+		/* Now set base address, and turn ITC on with 0x1 bit */
+		write_c0_dtaglo((itc_base & 0xfffffc00) | 0x1 );
+		ehb();
+		/* Write out to ITU with CACHE op */
+		cache_op(Index_Store_Tag_D, INDEX_0);
+		write_c0_ecc(ectlval);
+		ehb();
+		printk("Mapped %ld ITC cells starting at 0x%08x\n",
+			((itcblkgrn & 0x7fe00000) >> 20), itc_base);
+	}
+}
+
+/*
+ * Function to protect cache flushes from concurrent execution
+ * depends on MP software model chosen.
+ */
+
+void mt_cflush_lockdown(void)
+{
+#ifdef CONFIG_MIPS_MT_SMTC
+	void smtc_cflush_lockdown(void);
+
+	smtc_cflush_lockdown();
+#endif /* CONFIG_MIPS_MT_SMTC */
+	/* FILL IN VSMP and AP/SP VERSIONS HERE */
+}
+
+void mt_cflush_release(void)
+{
+#ifdef CONFIG_MIPS_MT_SMTC
+	void smtc_cflush_release(void);
+
+	smtc_cflush_release();
+#endif /* CONFIG_MIPS_MT_SMTC */
+	/* FILL IN VSMP and AP/SP VERSIONS HERE */
+}
diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c
index e042f9d2ba3..0a71a4c3371 100644
--- a/arch/mips/kernel/mips_ksyms.c
+++ b/arch/mips/kernel/mips_ksyms.c
@@ -28,21 +28,9 @@ extern long __strnlen_user_asm(const char *s);
 /*
  * String functions
  */
-EXPORT_SYMBOL(memchr);
-EXPORT_SYMBOL(memcmp);
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memmove);
-EXPORT_SYMBOL(strcat);
-EXPORT_SYMBOL(strchr);
-#ifdef CONFIG_64BIT
-EXPORT_SYMBOL(strncmp);
-#endif
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strncat);
-EXPORT_SYMBOL(strnlen);
-EXPORT_SYMBOL(strrchr);
-EXPORT_SYMBOL(strstr);
 
 EXPORT_SYMBOL(kernel_thread);
 
@@ -61,6 +49,3 @@ EXPORT_SYMBOL(__strnlen_user_asm);
 EXPORT_SYMBOL(csum_partial);
 
 EXPORT_SYMBOL(invalid_pte_table);
-#ifdef CONFIG_GENERIC_IRQ_PROBE
-EXPORT_SYMBOL(probe_irq_mask);
-#endif
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index c66db5e5ab6..199a06e873c 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -41,6 +41,10 @@
 #include <asm/elf.h>
 #include <asm/isadep.h>
 #include <asm/inst.h>
+#ifdef CONFIG_MIPS_MT_SMTC
+#include <asm/mipsmtregs.h>
+extern void smtc_idle_loop_hook(void);
+#endif /* CONFIG_MIPS_MT_SMTC */
 
 /*
  * The idle thread. There's no useful work to be done, so just try to conserve
@@ -51,9 +55,13 @@ ATTRIB_NORET void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		while (!need_resched())
+		while (!need_resched()) {
+#ifdef CONFIG_MIPS_MT_SMTC
+			smtc_idle_loop_hook();
+#endif /* CONFIG_MIPS_MT_SMTC */
 			if (cpu_wait)
 				(*cpu_wait)();
+		}
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
@@ -177,6 +185,17 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
 	childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
 	clear_tsk_thread_flag(p, TIF_USEDFPU);
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/*
+	 * FPU affinity support is cleaner if we track the
+	 * user-visible CPU affinity from the very beginning.
+	 * The generic cpus_allowed mask will already have
+	 * been copied from the parent before copy_thread
+	 * is invoked.
+	 */
+	p->thread.user_cpus_allowed = p->cpus_allowed;
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
 	if (clone_flags & CLONE_SETTLS)
 		ti->tp_value = regs->regs[7];
 
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index f838b36cc76..9b4733c1239 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -248,10 +248,20 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			break;
 		case FPC_EIR: {	/* implementation / version register */
 			unsigned int flags;
+#ifdef CONFIG_MIPS_MT_SMTC
+			unsigned int irqflags;
+			unsigned int mtflags;
+#endif /* CONFIG_MIPS_MT_SMTC */
 
 			if (!cpu_has_fpu)
 				break;
 
+#ifdef CONFIG_MIPS_MT_SMTC
+			/* Read-modify-write of Status must be atomic */
+			local_irq_save(irqflags);
+			mtflags = dmt();
+#endif /* CONFIG_MIPS_MT_SMTC */
+
 			preempt_disable();
 			if (cpu_has_mipsmt) {
 				unsigned int vpflags = dvpe();
@@ -266,6 +276,10 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 				__asm__ __volatile__("cfc1\t%0,$0": "=r" (tmp));
 				write_c0_status(flags);
 			}
+#ifdef CONFIG_MIPS_MT_SMTC
+			emt(mtflags);
+			local_irq_restore(irqflags);
+#endif /* CONFIG_MIPS_MT_SMTC */
 			preempt_enable();
 			break;
 		}
@@ -469,7 +483,7 @@ static inline int audit_arch(void)
 asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
 {
 	if (unlikely(current->audit_context) && entryexit)
-		audit_syscall_exit(current, AUDITSC_RESULT(regs->regs[2]),
+		audit_syscall_exit(AUDITSC_RESULT(regs->regs[2]),
 		                   regs->regs[2]);
 
 	if (!(current->ptrace & PT_PTRACED))
@@ -493,7 +507,7 @@ asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
 	}
  out:
 	if (unlikely(current->audit_context) && !entryexit)
-		audit_syscall_entry(current, audit_arch(), regs->regs[2],
+		audit_syscall_entry(audit_arch(), regs->regs[2],
 				    regs->regs[4], regs->regs[5],
 				    regs->regs[6], regs->regs[7]);
 }
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 0d5cf97af72..8704dc0496e 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -173,12 +173,22 @@ asmlinkage int sys32_ptrace(int request, int pid, int addr, int data)
 			break;
 		case FPC_EIR: {	/* implementation / version register */
 			unsigned int flags;
+#ifdef CONFIG_MIPS_MT_SMTC
+			unsigned int irqflags;
+			unsigned int mtflags;
+#endif /* CONFIG_MIPS_MT_SMTC */
 
 			if (!cpu_has_fpu) {
 				tmp = 0;
 				break;
 			}
 
+#ifdef CONFIG_MIPS_MT_SMTC
+			/* Read-modify-write of Status must be atomic */
+			local_irq_save(irqflags);
+			mtflags = dmt();
+#endif /* CONFIG_MIPS_MT_SMTC */
+
 			preempt_disable();
 			if (cpu_has_mipsmt) {
 				unsigned int vpflags = dvpe();
@@ -193,6 +203,10 @@ asmlinkage int sys32_ptrace(int request, int pid, int addr, int data)
 				__asm__ __volatile__("cfc1\t%0,$0": "=r" (tmp));
 				write_c0_status(flags);
 			}
+#ifdef CONFIG_MIPS_MT_SMTC
+			emt(mtflags);
+			local_irq_restore(irqflags);
+#endif /* CONFIG_MIPS_MT_SMTC */
 			preempt_enable();
 			break;
 		}
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index d2afbd19a9c..0b1b54acee9 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -88,7 +88,18 @@
 
 	PTR_ADDIU	t0, $28, _THREAD_SIZE - 32
 	set_saved_sp	t0, t1, t2
-
+#ifdef CONFIG_MIPS_MT_SMTC
+	/* Read-modify-writes of Status must be atomic on a VPE */
+	mfc0	t2, CP0_TCSTATUS
+	ori	t1, t2, TCSTATUS_IXMT
+	mtc0	t1, CP0_TCSTATUS
+	andi	t2, t2, TCSTATUS_IXMT
+	ehb
+	DMT	8				# dmt	t0
+	move	t1,ra
+	jal	mips_ihb
+	move	ra,t1
+#endif /* CONFIG_MIPS_MT_SMTC */
 	mfc0	t1, CP0_STATUS		/* Do we really need this? */
 	li	a3, 0xff01
 	and	t1, a3
@@ -97,6 +108,18 @@
 	and	a2, a3
 	or	a2, t1
 	mtc0	a2, CP0_STATUS
+#ifdef CONFIG_MIPS_MT_SMTC
+	ehb
+	andi	t0, t0, VPECONTROL_TE
+	beqz	t0, 1f
+	emt
+1:
+	mfc0	t1, CP0_TCSTATUS
+	xori	t1, t1, TCSTATUS_IXMT
+	or	t1, t1, t2
+	mtc0	t1, CP0_TCSTATUS
+	ehb
+#endif /* CONFIG_MIPS_MT_SMTC */
 	move	v0, a0
 	jr	ra
 	END(resume)
@@ -131,10 +154,19 @@ LEAF(_restore_fp)
 #define FPU_DEFAULT  0x00000000
 
 LEAF(_init_fpu)
+#ifdef CONFIG_MIPS_MT_SMTC
+	/* Rather than manipulate per-VPE Status, set per-TC bit in TCStatus */
+	mfc0	t0, CP0_TCSTATUS
+	/* Bit position is the same for Status, TCStatus */
+	li	t1, ST0_CU1
+	or	t0, t1
+	mtc0	t0, CP0_TCSTATUS
+#else /* Normal MIPS CU1 enable */
 	mfc0	t0, CP0_STATUS
 	li	t1, ST0_CU1
 	or	t0, t1
 	mtc0	t0, CP0_STATUS
+#endif /* CONFIG_MIPS_MT_SMTC */
 	fpu_enable_hazard
 
 	li	t1, FPU_DEFAULT
diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c
index 986a9cf2306..caf777f8328 100644
--- a/arch/mips/kernel/rtlx.c
+++ b/arch/mips/kernel/rtlx.c
@@ -21,45 +21,44 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/init.h>
+#include <asm/uaccess.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/vmalloc.h>
+#include <linux/elf.h>
+#include <linux/seq_file.h>
+#include <linux/syscalls.h>
+#include <linux/moduleloader.h>
 #include <linux/interrupt.h>
-#include <linux/irq.h>
 #include <linux/poll.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
-
 #include <asm/mipsmtregs.h>
-#include <asm/bitops.h>
+#include <asm/cacheflush.h>
+#include <asm/atomic.h>
 #include <asm/cpu.h>
 #include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/vpe.h>
 #include <asm/rtlx.h>
-#include <asm/uaccess.h>
 
 #define RTLX_TARG_VPE 1
 
 static struct rtlx_info *rtlx;
 static int major;
 static char module_name[] = "rtlx";
-static struct irqaction irq;
-static int irq_num;
-
-static inline int spacefree(int read, int write, int size)
-{
-	if (read == write) {
-		/*
-		 * never fill the buffer completely, so indexes are always
-		 * equal if empty and only empty, or !equal if data available
-		 */
-		return size - 1;
-	}
-
-	return ((read + size - write) % size) - 1;
-}
 
 static struct chan_waitqueues {
 	wait_queue_head_t rt_queue;
 	wait_queue_head_t lx_queue;
+	int in_open;
 } channel_wqs[RTLX_CHANNELS];
 
+static struct irqaction irq;
+static int irq_num;
+static struct vpe_notifications notify;
+static int sp_stopping = 0;
+
 extern void *vpe_get_shared(int index);
 
 static void rtlx_dispatch(struct pt_regs *regs)
@@ -67,174 +66,298 @@ static void rtlx_dispatch(struct pt_regs *regs)
 	do_IRQ(MIPSCPU_INT_BASE + MIPS_CPU_RTLX_IRQ, regs);
 }
 
+
+/* Interrupt handler may be called before rtlx_init has otherwise had
+   a chance to run.
+*/
 static irqreturn_t rtlx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	int i;
 
 	for (i = 0; i < RTLX_CHANNELS; i++) {
-		struct rtlx_channel *chan = &rtlx->channel[i];
-
-		if (chan->lx_read != chan->lx_write)
-			wake_up_interruptible(&channel_wqs[i].lx_queue);
+			wake_up(&channel_wqs[i].lx_queue);
+			wake_up(&channel_wqs[i].rt_queue);
 	}
 
 	return IRQ_HANDLED;
 }
 
-/* call when we have the address of the shared structure from the SP side. */
-static int rtlx_init(struct rtlx_info *rtlxi)
+static __attribute_used__ void dump_rtlx(void)
 {
 	int i;
 
-	if (rtlxi->id != RTLX_ID) {
-		printk(KERN_WARNING "no valid RTLX id at 0x%p\n", rtlxi);
-		return -ENOEXEC;
-	}
+	printk("id 0x%lx state %d\n", rtlx->id, rtlx->state);
 
-	/* initialise the wait queues */
 	for (i = 0; i < RTLX_CHANNELS; i++) {
-		init_waitqueue_head(&channel_wqs[i].rt_queue);
-		init_waitqueue_head(&channel_wqs[i].lx_queue);
-	}
+		struct rtlx_channel *chan = &rtlx->channel[i];
 
-	/* set up for interrupt handling */
-	memset(&irq, 0, sizeof(struct irqaction));
+		printk(" rt_state %d lx_state %d buffer_size %d\n",
+		       chan->rt_state, chan->lx_state, chan->buffer_size);
 
-	if (cpu_has_vint)
-		set_vi_handler(MIPS_CPU_RTLX_IRQ, rtlx_dispatch);
+		printk(" rt_read %d rt_write %d\n",
+		       chan->rt_read, chan->rt_write);
+
+		printk(" lx_read %d lx_write %d\n",
+		       chan->lx_read, chan->lx_write);
 
-	irq_num = MIPSCPU_INT_BASE + MIPS_CPU_RTLX_IRQ;
-	irq.handler = rtlx_interrupt;
-	irq.flags = SA_INTERRUPT;
-	irq.name = "RTLX";
-	irq.dev_id = rtlx;
-	setup_irq(irq_num, &irq);
+		printk(" rt_buffer <%s>\n", chan->rt_buffer);
+		printk(" lx_buffer <%s>\n", chan->lx_buffer);
+	}
+}
+
+/* call when we have the address of the shared structure from the SP side. */
+static int rtlx_init(struct rtlx_info *rtlxi)
+{
+	if (rtlxi->id != RTLX_ID) {
+		printk(KERN_ERR "no valid RTLX id at 0x%p 0x%x\n", rtlxi, rtlxi->id);
+		return -ENOEXEC;
+	}
 
 	rtlx = rtlxi;
 
 	return 0;
 }
 
-/* only allow one open process at a time to open each channel */
-static int rtlx_open(struct inode *inode, struct file *filp)
+/* notifications */
+static void starting(int vpe)
+{
+	int i;
+	sp_stopping = 0;
+
+	/* force a reload of rtlx */
+	rtlx=NULL;
+
+	/* wake up any sleeping rtlx_open's */
+	for (i = 0; i < RTLX_CHANNELS; i++)
+		wake_up_interruptible(&channel_wqs[i].lx_queue);
+}
+
+static void stopping(int vpe)
 {
-	int minor, ret;
+	int i;
+
+	sp_stopping = 1;
+	for (i = 0; i < RTLX_CHANNELS; i++)
+		wake_up_interruptible(&channel_wqs[i].lx_queue);
+}
+
+
+int rtlx_open(int index, int can_sleep)
+{
+	int ret;
 	struct rtlx_channel *chan;
+	volatile struct rtlx_info **p;
 
-	/* assume only 1 device at the mo. */
-	minor = MINOR(inode->i_rdev);
+	if (index >= RTLX_CHANNELS) {
+		printk(KERN_DEBUG "rtlx_open index out of range\n");
+		return -ENOSYS;
+	}
+
+	if (channel_wqs[index].in_open) {
+		printk(KERN_DEBUG "rtlx_open channel %d already opened\n", index);
+		return -EBUSY;
+	}
+
+	channel_wqs[index].in_open++;
 
 	if (rtlx == NULL) {
-		struct rtlx_info **p;
 		if( (p = vpe_get_shared(RTLX_TARG_VPE)) == NULL) {
-			printk(KERN_ERR "vpe_get_shared is NULL. "
-			       "Has an SP program been loaded?\n");
-			return -EFAULT;
+			if (can_sleep) {
+				DECLARE_WAITQUEUE(wait, current);
+
+				/* go to sleep */
+				add_wait_queue(&channel_wqs[index].lx_queue, &wait);
+
+				set_current_state(TASK_INTERRUPTIBLE);
+				while ((p = vpe_get_shared(RTLX_TARG_VPE)) == NULL) {
+					schedule();
+					set_current_state(TASK_INTERRUPTIBLE);
+				}
+
+				set_current_state(TASK_RUNNING);
+				remove_wait_queue(&channel_wqs[index].lx_queue, &wait);
+
+				/* back running */
+			} else {
+				printk( KERN_DEBUG "No SP program loaded, and device "
+					"opened with O_NONBLOCK\n");
+				channel_wqs[index].in_open = 0;
+				return -ENOSYS;
+			}
 		}
 
 		if (*p == NULL) {
-			printk(KERN_ERR "vpe_shared %p %p\n", p, *p);
-			return -EFAULT;
+			if (can_sleep) {
+				DECLARE_WAITQUEUE(wait, current);
+
+				/* go to sleep */
+				add_wait_queue(&channel_wqs[index].lx_queue, &wait);
+
+				set_current_state(TASK_INTERRUPTIBLE);
+				while (*p == NULL) {
+					schedule();
+
+					/* reset task state to interruptable otherwise
+					   we'll whizz round here like a very fast loopy
+					   thing. schedule() appears to return with state
+					   set to TASK_RUNNING.
+
+					   If the loaded SP program, for whatever reason,
+					   doesn't set up the shared structure *p will never
+					   become true. So whoever connected to either /dev/rt?
+					   or if it was kspd, will then take up rather a lot of
+					   processor cycles.
+					*/
+
+					set_current_state(TASK_INTERRUPTIBLE);
+				}
+
+				set_current_state(TASK_RUNNING);
+				remove_wait_queue(&channel_wqs[index].lx_queue, &wait);
+
+				/* back running */
+			}
+			else {
+				printk(" *vpe_get_shared is NULL. "
+				       "Has an SP program been loaded?\n");
+				channel_wqs[index].in_open = 0;
+				return -ENOSYS;
+			}
 		}
 
-		if ((ret = rtlx_init(*p)) < 0)
-			return ret;
+		if ((unsigned int)*p < KSEG0) {
+			printk(KERN_WARNING "vpe_get_shared returned an invalid pointer "
+			       "maybe an error code %d\n", (int)*p);
+ 			channel_wqs[index].in_open = 0;
+			return -ENOSYS;
+		}
+
+ 		if ((ret = rtlx_init(*p)) < 0) {
+ 			channel_wqs[index].in_open = 0;
+  			return ret;
+ 		}
 	}
 
-	chan = &rtlx->channel[minor];
+	chan = &rtlx->channel[index];
 
-	if (test_and_set_bit(RTLX_STATE_OPENED, &chan->lx_state))
-		return -EBUSY;
+ 	if (chan->lx_state == RTLX_STATE_OPENED) {
+ 		channel_wqs[index].in_open = 0;
+  		return -EBUSY;
+ 	}
 
+  	chan->lx_state = RTLX_STATE_OPENED;
+ 	channel_wqs[index].in_open = 0;
 	return 0;
 }
 
-static int rtlx_release(struct inode *inode, struct file *filp)
+int rtlx_release(int index)
 {
-	int minor = MINOR(inode->i_rdev);
-
-	clear_bit(RTLX_STATE_OPENED, &rtlx->channel[minor].lx_state);
-	smp_mb__after_clear_bit();
-
+	rtlx->channel[index].lx_state = RTLX_STATE_UNUSED;
 	return 0;
 }
 
-static unsigned int rtlx_poll(struct file *file, poll_table * wait)
+unsigned int rtlx_read_poll(int index, int can_sleep)
 {
-	int minor;
-	unsigned int mask = 0;
-	struct rtlx_channel *chan;
+ 	struct rtlx_channel *chan;
 
-	minor = MINOR(file->f_dentry->d_inode->i_rdev);
-	chan = &rtlx->channel[minor];
+ 	if (rtlx == NULL)
+ 		return 0;
 
-	poll_wait(file, &channel_wqs[minor].rt_queue, wait);
-	poll_wait(file, &channel_wqs[minor].lx_queue, wait);
+ 	chan = &rtlx->channel[index];
 
 	/* data available to read? */
-	if (chan->lx_read != chan->lx_write)
-		mask |= POLLIN | POLLRDNORM;
+	if (chan->lx_read == chan->lx_write) {
+		if (can_sleep) {
+			DECLARE_WAITQUEUE(wait, current);
 
-	/* space to write */
-	if (spacefree(chan->rt_read, chan->rt_write, chan->buffer_size))
-		mask |= POLLOUT | POLLWRNORM;
+			/* go to sleep */
+			add_wait_queue(&channel_wqs[index].lx_queue, &wait);
 
-	return mask;
+			set_current_state(TASK_INTERRUPTIBLE);
+			while (chan->lx_read == chan->lx_write) {
+				schedule();
+
+				set_current_state(TASK_INTERRUPTIBLE);
+
+				if (sp_stopping) {
+					set_current_state(TASK_RUNNING);
+					remove_wait_queue(&channel_wqs[index].lx_queue, &wait);
+					return 0;
+				}
+			}
+
+			set_current_state(TASK_RUNNING);
+			remove_wait_queue(&channel_wqs[index].lx_queue, &wait);
+
+			/* back running */
+		}
+		else
+			return 0;
+	}
+
+	return (chan->lx_write + chan->buffer_size - chan->lx_read)
+	       % chan->buffer_size;
 }
 
-static ssize_t rtlx_read(struct file *file, char __user * buffer, size_t count,
-			 loff_t * ppos)
+static inline int write_spacefree(int read, int write, int size)
 {
-	unsigned long failed;
-	size_t fl = 0L;
-	int minor;
-	struct rtlx_channel *lx;
-	DECLARE_WAITQUEUE(wait, current);
+	if (read == write) {
+		/*
+		 * Never fill the buffer completely, so indexes are always
+		 * equal if empty and only empty, or !equal if data available
+		 */
+		return size - 1;
+	}
 
-	minor = MINOR(file->f_dentry->d_inode->i_rdev);
-	lx = &rtlx->channel[minor];
+	return ((read + size - write) % size) - 1;
+}
 
-	/* data available? */
-	if (lx->lx_write == lx->lx_read) {
-		if (file->f_flags & O_NONBLOCK)
-			return 0;	/* -EAGAIN makes cat whinge */
+unsigned int rtlx_write_poll(int index)
+{
+	struct rtlx_channel *chan = &rtlx->channel[index];
+	return write_spacefree(chan->rt_read, chan->rt_write, chan->buffer_size);
+}
 
-		/* go to sleep */
-		add_wait_queue(&channel_wqs[minor].lx_queue, &wait);
-		set_current_state(TASK_INTERRUPTIBLE);
+static inline void copy_to(void *dst, void *src, size_t count, int user)
+{
+	if (user)
+		copy_to_user(dst, src, count);
+	else
+		memcpy(dst, src, count);
+}
 
-		while (lx->lx_write == lx->lx_read)
-			schedule();
+static inline void copy_from(void *dst, void *src, size_t count, int user)
+{
+	if (user)
+		copy_from_user(dst, src, count);
+	else
+		memcpy(dst, src, count);
+}
 
-		set_current_state(TASK_RUNNING);
-		remove_wait_queue(&channel_wqs[minor].lx_queue, &wait);
+ssize_t rtlx_read(int index, void *buff, size_t count, int user)
+{
+	size_t fl = 0L;
+	struct rtlx_channel *lx;
 
-		/* back running */
-	}
+	if (rtlx == NULL)
+		return -ENOSYS;
+
+	lx = &rtlx->channel[index];
 
 	/* find out how much in total */
 	count = min(count,
-		    (size_t)(lx->lx_write + lx->buffer_size - lx->lx_read) % lx->buffer_size);
+		     (size_t)(lx->lx_write + lx->buffer_size - lx->lx_read)
+		     % lx->buffer_size);
 
 	/* then how much from the read pointer onwards */
-	fl = min(count, (size_t)lx->buffer_size - lx->lx_read);
+	fl = min( count, (size_t)lx->buffer_size - lx->lx_read);
 
-	failed = copy_to_user (buffer, &lx->lx_buffer[lx->lx_read], fl);
-	if (failed) {
-		count = fl - failed;
-		goto out;
-	}
+	copy_to(buff, &lx->lx_buffer[lx->lx_read], fl, user);
 
 	/* and if there is anything left at the beginning of the buffer */
-	if (count - fl) {
-		failed = copy_to_user (buffer + fl, lx->lx_buffer, count - fl);
-		if (failed) {
-			count -= failed;
-			goto out;
-		}
-	}
+	if ( count - fl )
+		copy_to (buff + fl, lx->lx_buffer, count - fl, user);
 
-out:
 	/* update the index */
 	lx->lx_read += count;
 	lx->lx_read %= lx->buffer_size;
@@ -242,20 +365,100 @@ out:
 	return count;
 }
 
-static ssize_t rtlx_write(struct file *file, const char __user * buffer,
+ssize_t rtlx_write(int index, void *buffer, size_t count, int user)
+{
+	struct rtlx_channel *rt;
+	size_t fl;
+
+	if (rtlx == NULL)
+		return(-ENOSYS);
+
+	rt = &rtlx->channel[index];
+
+	/* total number of bytes to copy */
+	count = min(count,
+		    (size_t)write_spacefree(rt->rt_read, rt->rt_write,
+					    rt->buffer_size));
+
+	/* first bit from write pointer to the end of the buffer, or count */
+	fl = min(count, (size_t) rt->buffer_size - rt->rt_write);
+
+	copy_from (&rt->rt_buffer[rt->rt_write], buffer, fl, user);
+
+	/* if there's any left copy to the beginning of the buffer */
+	if( count - fl )
+		copy_from (rt->rt_buffer, buffer + fl, count - fl, user);
+
+	rt->rt_write += count;
+	rt->rt_write %= rt->buffer_size;
+
+	return(count);
+}
+
+
+static int file_open(struct inode *inode, struct file *filp)
+{
+	int minor = iminor(inode);
+
+	return rtlx_open(minor, (filp->f_flags & O_NONBLOCK) ? 0 : 1);
+}
+
+static int file_release(struct inode *inode, struct file *filp)
+{
+	int minor = iminor(inode);
+
+	return rtlx_release(minor);
+}
+
+static unsigned int file_poll(struct file *file, poll_table * wait)
+{
+	int minor;
+	unsigned int mask = 0;
+
+	minor = iminor(file->f_dentry->d_inode);
+
+	poll_wait(file, &channel_wqs[minor].rt_queue, wait);
+	poll_wait(file, &channel_wqs[minor].lx_queue, wait);
+
+	if (rtlx == NULL)
+		return 0;
+
+	/* data available to read? */
+	if (rtlx_read_poll(minor, 0))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* space to write */
+	if (rtlx_write_poll(minor))
+		mask |= POLLOUT | POLLWRNORM;
+
+	return mask;
+}
+
+static ssize_t file_read(struct file *file, char __user * buffer, size_t count,
+			 loff_t * ppos)
+{
+	int minor = iminor(file->f_dentry->d_inode);
+
+	/* data available? */
+	if (!rtlx_read_poll(minor, (file->f_flags & O_NONBLOCK) ? 0 : 1)) {
+		return 0;	// -EAGAIN makes cat whinge
+	}
+
+	return rtlx_read(minor, buffer, count, 1);
+}
+
+static ssize_t file_write(struct file *file, const char __user * buffer,
 			  size_t count, loff_t * ppos)
 {
-	unsigned long failed;
 	int minor;
 	struct rtlx_channel *rt;
-	size_t fl;
 	DECLARE_WAITQUEUE(wait, current);
 
-	minor = MINOR(file->f_dentry->d_inode->i_rdev);
+	minor = iminor(file->f_dentry->d_inode);
 	rt = &rtlx->channel[minor];
 
 	/* any space left... */
-	if (!spacefree(rt->rt_read, rt->rt_write, rt->buffer_size)) {
+	if (!rtlx_write_poll(minor)) {
 
 		if (file->f_flags & O_NONBLOCK)
 			return -EAGAIN;
@@ -263,61 +466,64 @@ static ssize_t rtlx_write(struct file *file, const char __user * buffer,
 		add_wait_queue(&channel_wqs[minor].rt_queue, &wait);
 		set_current_state(TASK_INTERRUPTIBLE);
 
-		while (!spacefree(rt->rt_read, rt->rt_write, rt->buffer_size))
+		while (!rtlx_write_poll(minor))
 			schedule();
 
 		set_current_state(TASK_RUNNING);
 		remove_wait_queue(&channel_wqs[minor].rt_queue, &wait);
 	}
 
-	/* total number of bytes to copy */
-	count = min(count, (size_t)spacefree(rt->rt_read, rt->rt_write, rt->buffer_size) );
-
-	/* first bit from write pointer to the end of the buffer, or count */
-	fl = min(count, (size_t) rt->buffer_size - rt->rt_write);
-
-	failed = copy_from_user(&rt->rt_buffer[rt->rt_write], buffer, fl);
-	if (failed) {
-		count = fl - failed;
-		goto out;
-	}
-
-	/* if there's any left copy to the beginning of the buffer */
-	if (count - fl) {
-		failed = copy_from_user(rt->rt_buffer, buffer + fl, count - fl);
-		if (failed) {
-			count -= failed;
-			goto out;
-		}
-	}
-
-out:
-	rt->rt_write += count;
-	rt->rt_write %= rt->buffer_size;
-
-	return count;
+	return rtlx_write(minor, (void *)buffer, count, 1);
 }
 
 static struct file_operations rtlx_fops = {
-	.owner		= THIS_MODULE,
-	.open		= rtlx_open,
-	.release	= rtlx_release,
-	.write		= rtlx_write,
-	.read		= rtlx_read,
-	.poll		= rtlx_poll
+	.owner =   THIS_MODULE,
+	.open =    file_open,
+	.release = file_release,
+	.write =   file_write,
+	.read =    file_read,
+	.poll =    file_poll
+};
+
+static struct irqaction rtlx_irq = {
+	.handler	= rtlx_interrupt,
+	.flags		= SA_INTERRUPT,
+	.name		= "RTLX",
 };
 
+static int rtlx_irq_num = MIPSCPU_INT_BASE + MIPS_CPU_RTLX_IRQ;
+
 static char register_chrdev_failed[] __initdata =
 	KERN_ERR "rtlx_module_init: unable to register device\n";
 
-static int __init rtlx_module_init(void)
+static int rtlx_module_init(void)
 {
+	int i;
+
 	major = register_chrdev(0, module_name, &rtlx_fops);
 	if (major < 0) {
 		printk(register_chrdev_failed);
 		return major;
 	}
 
+	/* initialise the wait queues */
+	for (i = 0; i < RTLX_CHANNELS; i++) {
+		init_waitqueue_head(&channel_wqs[i].rt_queue);
+		init_waitqueue_head(&channel_wqs[i].lx_queue);
+		channel_wqs[i].in_open = 0;
+	}
+
+	/* set up notifiers */
+	notify.start = starting;
+	notify.stop = stopping;
+	vpe_notify(RTLX_TARG_VPE, &notify);
+
+	if (cpu_has_vint)
+		set_vi_handler(MIPS_CPU_RTLX_IRQ, rtlx_dispatch);
+
+	rtlx_irq.dev_id = rtlx;
+	setup_irq(rtlx_irq_num, &rtlx_irq);
+
 	return 0;
 }
 
@@ -330,5 +536,5 @@ module_init(rtlx_module_init);
 module_exit(rtlx_module_exit);
 
 MODULE_DESCRIPTION("MIPS RTLX");
-MODULE_AUTHOR("Elizabeth Clarke, MIPS Technologies, Inc.");
+MODULE_AUTHOR("Elizabeth Oldham, MIPS Technologies, Inc.");
 MODULE_LICENSE("GPL");
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 2f2dc54b2e2..a0ac0e5f61a 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -569,8 +569,19 @@ einval:	li	v0, -EINVAL
 	sys	sys_tkill		2
 	sys	sys_sendfile64		5
 	sys	sys_futex		6
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/*
+	 * For FPU affinity scheduling on MIPS MT processors, we need to
+	 * intercept sys_sched_xxxaffinity() calls until we get a proper hook
+	 * in kernel/sched.c.  Considered only temporary we only support these
+	 * hooks for the 32-bit kernel - there is no MIPS64 MT processor atm.
+	 */
+	sys	mipsmt_sys_sched_setaffinity	3
+	sys	mipsmt_sys_sched_getaffinity	3
+#else
 	sys	sys_sched_setaffinity	3
 	sys	sys_sched_getaffinity	3	/* 4240 */
+#endif /* CONFIG_MIPS_MT_FPAFF */
 	sys	sys_io_setup		2
 	sys	sys_io_destroy		1
 	sys	sys_io_getevents	5
@@ -634,6 +645,8 @@ einval:	li	v0, -EINVAL
 	sys	sys_pselect6		6
 	sys	sys_ppoll		5
 	sys	sys_unshare		1
+	sys	sys_splice		4
+	sys	sys_sync_file_range	7	/* 4305 */
 	.endm
 
 	/* We pre-compute the number of _instruction_ bytes needed to
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 98bf25df56f..9ba75088737 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -460,3 +460,5 @@ sys_call_table:
 	PTR	sys_pselect6			/* 5260 */
 	PTR	sys_ppoll
 	PTR	sys_unshare
+	PTR	sys_splice
+	PTR	sys_sync_file_range
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 05a2c0567da..942aca26f9c 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -386,3 +386,5 @@ EXPORT(sysn32_call_table)
 	PTR	sys_pselect6
 	PTR	sys_ppoll			/* 6265 */
 	PTR	sys_unshare
+	PTR	sys_splice
+	PTR	sys_sync_file_range
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 19c4ca481b0..b53a9207f53 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -508,4 +508,6 @@ sys_call_table:
 	PTR	sys_pselect6
 	PTR	sys_ppoll
 	PTR	sys_unshare
+	PTR	sys_splice
+	PTR	sys32_sync_file_range		/* 4305 */
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index dcbfd27071f..bcf1b10e518 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -529,7 +529,10 @@ void __init setup_arch(char **cmdline_p)
 
 int __init fpu_disable(char *s)
 {
-	cpu_data[0].options &= ~MIPS_CPU_FPU;
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++)
+		cpu_data[i].options &= ~MIPS_CPU_FPU;
 
 	return 1;
 }
diff --git a/arch/mips/kernel/smp_mt.c b/arch/mips/kernel/smp-mt.c
index 993b8bf56aa..57770902b9a 100644
--- a/arch/mips/kernel/smp_mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -1,8 +1,4 @@
 /*
- * Copyright (C) 2004, 2005 MIPS Technologies, Inc.  All rights reserved.
- *
- *  Elizabeth Clarke (beth@mips.com)
- *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
  *  published by the Free Software Foundation.
@@ -16,6 +12,10 @@
  *  with this program; if not, write to the Free Software Foundation, Inc.,
  *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
  *
+ * Copyright (C) 2004, 05, 06 MIPS Technologies, Inc.
+ *    Elizabeth Clarke (beth@mips.com)
+ *    Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org)
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -24,6 +24,7 @@
 #include <linux/compiler.h>
 
 #include <asm/atomic.h>
+#include <asm/cacheflush.h>
 #include <asm/cpu.h>
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -33,8 +34,8 @@
 #include <asm/time.h>
 #include <asm/mipsregs.h>
 #include <asm/mipsmtregs.h>
-#include <asm/cacheflush.h>
-#include <asm/mips-boards/maltaint.h>
+#include <asm/mips_mt.h>
+#include <asm/mips-boards/maltaint.h>  /* This is f*cking wrong */
 
 #define MIPS_CPU_IPI_RESCHED_IRQ 0
 #define MIPS_CPU_IPI_CALL_IRQ 1
@@ -66,6 +67,7 @@ void __init sanitize_tlb_entries(void)
 	if (!cpu_has_mipsmt)
 		return;
 
+	/* Enable VPC */
 	set_c0_mvpcontrol(MVPCONTROL_VPC);
 
 	back_to_back_c0_hazard();
@@ -106,12 +108,12 @@ void __init sanitize_tlb_entries(void)
 
 static void ipi_resched_dispatch (struct pt_regs *regs)
 {
-	do_IRQ(MIPS_CPU_IPI_RESCHED_IRQ, regs);
+	do_IRQ(MIPSCPU_INT_BASE + MIPS_CPU_IPI_RESCHED_IRQ, regs);
 }
 
 static void ipi_call_dispatch (struct pt_regs *regs)
 {
-	do_IRQ(MIPS_CPU_IPI_CALL_IRQ, regs);
+	do_IRQ(MIPSCPU_INT_BASE + MIPS_CPU_IPI_CALL_IRQ, regs);
 }
 
 irqreturn_t ipi_resched_interrupt(int irq, void *dev_id, struct pt_regs *regs)
@@ -148,6 +150,11 @@ void plat_smp_setup(void)
 	unsigned long val;
 	int i, num;
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/* If we have an FPU, enroll ourselves in the FPU-full mask */
+	if (cpu_has_fpu)
+		cpu_set(0, mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
 	if (!cpu_has_mipsmt)
 		return;
 
@@ -155,6 +162,8 @@ void plat_smp_setup(void)
 	dvpe();
 	dmt();
 
+	mips_mt_set_cpuoptions();
+
 	/* Put MVPE's into 'configuration state' */
 	set_c0_mvpcontrol(MVPCONTROL_VPC);
 
@@ -189,11 +198,13 @@ void plat_smp_setup(void)
 
 			if (i != 0) {
 				write_vpe_c0_status((read_c0_status() & ~(ST0_IM | ST0_IE | ST0_KSU)) | ST0_CU0);
-				write_vpe_c0_cause(read_vpe_c0_cause() & ~CAUSEF_IP);
 
 				/* set config to be the same as vpe0, particularly kseg0 coherency alg */
 				write_vpe_c0_config( read_c0_config());
 
+				/* make sure there are no software interrupts pending */
+				write_vpe_c0_cause(read_vpe_c0_cause() & ~(C_SW1|C_SW0));
+
 				/* Propagate Config7 */
 				write_vpe_c0_config7(read_c0_config7());
 			}
@@ -233,16 +244,16 @@ void plat_smp_setup(void)
 	/* We'll wait until starting the secondaries before starting MVPE */
 
 	printk(KERN_INFO "Detected %i available secondary CPU(s)\n", num);
+}
 
+void __init plat_prepare_cpus(unsigned int max_cpus)
+{
 	/* set up ipi interrupts */
 	if (cpu_has_vint) {
 		set_vi_handler (MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch);
 		set_vi_handler (MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch);
 	}
-}
 
-void __init plat_prepare_cpus(unsigned int max_cpus)
-{
 	cpu_ipi_resched_irq = MIPSCPU_INT_BASE + MIPS_CPU_IPI_RESCHED_IRQ;
 	cpu_ipi_call_irq = MIPSCPU_INT_BASE + MIPS_CPU_IPI_CALL_IRQ;
 
@@ -287,7 +298,8 @@ void prom_boot_secondary(int cpu, struct task_struct *idle)
 	/* global pointer */
 	write_tc_gpr_gp((unsigned long)gp);
 
-	flush_icache_range((unsigned long)gp, (unsigned long)(gp + 1));
+	flush_icache_range((unsigned long)gp,
+	                   (unsigned long)(gp + sizeof(struct thread_info)));
 
 	/* finally out of configuration and into chaos */
 	clear_c0_mvpcontrol(MVPCONTROL_VPC);
@@ -305,6 +317,12 @@ void prom_smp_finish(void)
 {
 	write_c0_compare(read_c0_count() + (8* mips_hpt_frequency/HZ));
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/* If we have an FPU, enroll ourselves in the FPU-full mask */
+	if (cpu_has_fpu)
+		cpu_set(smp_processor_id(), mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
 	local_irq_enable();
 }
 
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 78d171bfa33..d42f358754a 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -38,6 +38,10 @@
 #include <asm/mmu_context.h>
 #include <asm/smp.h>
 
+#ifdef CONFIG_MIPS_MT_SMTC
+#include <asm/mipsmtregs.h>
+#endif /* CONFIG_MIPS_MT_SMTC */
+
 cpumask_t phys_cpu_present_map;		/* Bitmask of available CPUs */
 volatile cpumask_t cpu_callin_map;	/* Bitmask of started secondaries */
 cpumask_t cpu_online_map;		/* Bitmask of currently online CPUs */
@@ -85,6 +89,10 @@ asmlinkage void start_secondary(void)
 {
 	unsigned int cpu;
 
+#ifdef CONFIG_MIPS_MT_SMTC
+	/* Only do cpu_probe for first TC of CPU */
+	if ((read_c0_tcbind() & TCBIND_CURTC) == 0)
+#endif /* CONFIG_MIPS_MT_SMTC */
 	cpu_probe();
 	cpu_report();
 	per_cpu_trap_init();
@@ -179,11 +187,13 @@ int smp_call_function (void (*func) (void *info), void *info, int retry,
 	if (wait)
 		while (atomic_read(&data.finished) != cpus)
 			barrier();
+	call_data = NULL;
 	spin_unlock(&smp_call_lock);
 
 	return 0;
 }
 
+
 void smp_call_function_interrupt(void)
 {
 	void (*func) (void *info) = call_data->func;
@@ -446,5 +456,3 @@ subsys_initcall(topology_init);
 
 EXPORT_SYMBOL(flush_tlb_page);
 EXPORT_SYMBOL(flush_tlb_one);
-EXPORT_SYMBOL(cpu_data);
-EXPORT_SYMBOL(synchronize_irq);
diff --git a/arch/mips/kernel/smtc-asm.S b/arch/mips/kernel/smtc-asm.S
new file mode 100644
index 00000000000..c9d65196d91
--- /dev/null
+++ b/arch/mips/kernel/smtc-asm.S
@@ -0,0 +1,130 @@
+/*
+ * Assembly Language Functions for MIPS MT SMTC support
+ */
+
+/*
+ * This file should be built into the kernel only if CONFIG_MIPS_MT_SMTC is set. */
+
+#include <asm/regdef.h>
+#include <asm/asmmacro.h>
+#include <asm/stackframe.h>
+#include <asm/stackframe.h>
+
+/*
+ * "Software Interrupt" linkage.
+ *
+ * This is invoked when an "Interrupt" is sent from one TC to another,
+ * where the TC to be interrupted is halted, has it's Restart address
+ * and Status values saved by the "remote control" thread, then modified
+ * to cause execution to begin here, in kenel mode. This code then
+ * disguises the TC state as that of an exception and transfers
+ * control to the general exception or vectored interrupt handler.
+ */
+	.set noreorder
+
+/*
+The __smtc_ipi_vector would use k0 and k1 as temporaries and
+1) Set EXL (this is per-VPE, so this can't be done by proxy!)
+2) Restore the K/CU and IXMT bits to the pre "exception" state
+   (EXL means no interrupts and access to the kernel map).
+3) Set EPC to be the saved value of TCRestart.
+4) Jump to the exception handler entry point passed by the sender.
+
+CAN WE PROVE THAT WE WON'T DO THIS IF INTS DISABLED??
+*/
+
+/*
+ * Reviled and slandered vision: Set EXL and restore K/CU/IXMT
+ * state of pre-halt thread, then save everything and call
+ * thought some function pointer to imaginary_exception, which
+ * will parse a register value or memory message queue to
+ * deliver things like interprocessor interrupts. On return
+ * from that function, jump to the global ret_from_irq code
+ * to invoke the scheduler and return as appropriate.
+ */
+
+#define PT_PADSLOT4 (PT_R0-8)
+#define PT_PADSLOT5 (PT_R0-4)
+
+	.text
+	.align 5
+FEXPORT(__smtc_ipi_vector)
+	.set	noat
+	/* Disable thread scheduling to make Status update atomic */
+	DMT	27					# dmt	k1
+	ehb
+	/* Set EXL */
+	mfc0	k0,CP0_STATUS
+	ori	k0,k0,ST0_EXL
+	mtc0	k0,CP0_STATUS
+	ehb
+	/* Thread scheduling now inhibited by EXL. Restore TE state. */
+	andi	k1,k1,VPECONTROL_TE
+	beqz	k1,1f
+	emt
+1:
+	/*
+	 * The IPI sender has put some information on the anticipated
+	 * kernel stack frame.  If we were in user mode, this will be
+	 * built above the saved kernel SP.  If we were already in the
+	 * kernel, it will be built above the current CPU SP.
+	 *
+	 * Were we in kernel mode, as indicated by CU0?
+	 */
+	sll	k1,k0,3
+	.set noreorder
+	bltz	k1,2f
+	move	k1,sp
+	.set reorder
+	/*
+	 * If previously in user mode, set CU0 and use kernel stack.
+	 */
+	li	k1,ST0_CU0
+	or	k1,k1,k0
+	mtc0	k1,CP0_STATUS
+	ehb
+	get_saved_sp
+	/* Interrupting TC will have pre-set values in slots in the new frame */
+2:	subu	k1,k1,PT_SIZE
+	/* Load TCStatus Value */
+	lw	k0,PT_TCSTATUS(k1)
+	/* Write it to TCStatus to restore CU/KSU/IXMT state */
+	mtc0	k0,$2,1
+	ehb
+	lw	k0,PT_EPC(k1)
+	mtc0	k0,CP0_EPC
+	/* Save all will redundantly recompute the SP, but use it for now */
+	SAVE_ALL
+	CLI
+	move	a0,sp
+	/* Function to be invoked passed stack pad slot 5 */
+	lw	t0,PT_PADSLOT5(sp)
+	/* Argument from sender passed in stack pad slot 4 */
+	lw	a1,PT_PADSLOT4(sp)
+	jalr	t0
+	nop
+	j	ret_from_irq
+	nop
+
+/*
+ * Called from idle loop to provoke processing of queued IPIs
+ * First IPI message in queue passed as argument.
+ */
+
+LEAF(self_ipi)
+	/* Before anything else, block interrupts */
+	mfc0	t0,CP0_TCSTATUS
+	ori	t1,t0,TCSTATUS_IXMT
+	mtc0	t1,CP0_TCSTATUS
+	ehb
+	/* We know we're in kernel mode, so prepare stack frame */
+	subu	t1,sp,PT_SIZE
+	sw	ra,PT_EPC(t1)
+	sw	a0,PT_PADSLOT4(t1)
+	la	t2,ipi_decode
+	sw	t2,PT_PADSLOT5(t1)
+	/* Save pre-disable value of TCStatus */
+	sw	t0,PT_TCSTATUS(t1)
+	j	__smtc_ipi_vector
+	nop
+END(self_ipi)
diff --git a/arch/mips/kernel/smtc-proc.c b/arch/mips/kernel/smtc-proc.c
new file mode 100644
index 00000000000..6f370999617
--- /dev/null
+++ b/arch/mips/kernel/smtc-proc.c
@@ -0,0 +1,93 @@
+/*
+ * /proc hooks for SMTC kernel
+ * Copyright (C) 2005 Mips Technologies, Inc
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+
+#include <asm/cpu.h>
+#include <asm/processor.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/hardirq.h>
+#include <asm/mmu_context.h>
+#include <asm/smp.h>
+#include <asm/mipsregs.h>
+#include <asm/cacheflush.h>
+#include <linux/proc_fs.h>
+
+#include <asm/smtc_proc.h>
+
+/*
+ * /proc diagnostic and statistics hooks
+ */
+
+/*
+ * Statistics gathered
+ */
+unsigned long selfipis[NR_CPUS];
+
+struct smtc_cpu_proc smtc_cpu_stats[NR_CPUS];
+
+static struct proc_dir_entry *smtc_stats;
+
+atomic_t smtc_fpu_recoveries;
+
+static int proc_read_smtc(char *page, char **start, off_t off,
+                          int count, int *eof, void *data)
+{
+	int totalen = 0;
+	int len;
+	int i;
+	extern unsigned long ebase;
+
+	len = sprintf(page, "SMTC Status Word: 0x%08x\n", smtc_status);
+	totalen += len;
+	page += len;
+	len = sprintf(page, "Config7: 0x%08x\n", read_c0_config7());
+	totalen += len;
+	page += len;
+	len = sprintf(page, "EBASE: 0x%08lx\n", ebase);
+	totalen += len;
+	page += len;
+	len = sprintf(page, "Counter Interrupts taken per CPU (TC)\n");
+	totalen += len;
+	page += len;
+	for (i=0; i < NR_CPUS; i++) {
+		len = sprintf(page, "%d: %ld\n", i, smtc_cpu_stats[i].timerints);
+		totalen += len;
+		page += len;
+	}
+	len = sprintf(page, "Self-IPIs by CPU:\n");
+	totalen += len;
+	page += len;
+	for(i = 0; i < NR_CPUS; i++) {
+		len = sprintf(page, "%d: %ld\n", i, smtc_cpu_stats[i].selfipis);
+		totalen += len;
+		page += len;
+	}
+	len = sprintf(page, "%d Recoveries of \"stolen\" FPU\n",
+	              atomic_read(&smtc_fpu_recoveries));
+	totalen += len;
+	page += len;
+
+	return totalen;
+}
+
+void init_smtc_stats(void)
+{
+	int i;
+
+	for (i=0; i<NR_CPUS; i++) {
+		smtc_cpu_stats[i].timerints = 0;
+		smtc_cpu_stats[i].selfipis = 0;
+	}
+
+	atomic_set(&smtc_fpu_recoveries, 0);
+
+	smtc_stats = create_proc_read_entry("smtc", 0444, NULL,
+	                                    proc_read_smtc, NULL);
+}
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
new file mode 100644
index 00000000000..2e8e52c135e
--- /dev/null
+++ b/arch/mips/kernel/smtc.c
@@ -0,0 +1,1322 @@
+/* Copyright (C) 2004 Mips Technologies, Inc */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+
+#include <asm/cpu.h>
+#include <asm/processor.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/hardirq.h>
+#include <asm/hazards.h>
+#include <asm/mmu_context.h>
+#include <asm/smp.h>
+#include <asm/mipsregs.h>
+#include <asm/cacheflush.h>
+#include <asm/time.h>
+#include <asm/addrspace.h>
+#include <asm/smtc.h>
+#include <asm/smtc_ipi.h>
+#include <asm/smtc_proc.h>
+
+/*
+ * This file should be built into the kernel only if CONFIG_MIPS_MT_SMTC is set.
+ */
+
+/*
+ * MIPSCPU_INT_BASE is identically defined in both
+ * asm-mips/mips-boards/maltaint.h and asm-mips/mips-boards/simint.h,
+ * but as yet there's no properly organized include structure that
+ * will ensure that the right *int.h file will be included for a
+ * given platform build.
+ */
+
+#define MIPSCPU_INT_BASE	16
+
+#define MIPS_CPU_IPI_IRQ	1
+
+#define LOCK_MT_PRA() \
+	local_irq_save(flags); \
+	mtflags = dmt()
+
+#define UNLOCK_MT_PRA() \
+	emt(mtflags); \
+	local_irq_restore(flags)
+
+#define LOCK_CORE_PRA() \
+	local_irq_save(flags); \
+	mtflags = dvpe()
+
+#define UNLOCK_CORE_PRA() \
+	evpe(mtflags); \
+	local_irq_restore(flags)
+
+/*
+ * Data structures purely associated with SMTC parallelism
+ */
+
+
+/*
+ * Table for tracking ASIDs whose lifetime is prolonged.
+ */
+
+asiduse smtc_live_asid[MAX_SMTC_TLBS][MAX_SMTC_ASIDS];
+
+/*
+ * Clock interrupt "latch" buffers, per "CPU"
+ */
+
+unsigned int ipi_timer_latch[NR_CPUS];
+
+/*
+ * Number of InterProcessor Interupt (IPI) message buffers to allocate
+ */
+
+#define IPIBUF_PER_CPU 4
+
+struct smtc_ipi_q IPIQ[NR_CPUS];
+struct smtc_ipi_q freeIPIq;
+
+
+/* Forward declarations */
+
+void ipi_decode(struct pt_regs *, struct smtc_ipi *);
+void post_direct_ipi(int cpu, struct smtc_ipi *pipi);
+void setup_cross_vpe_interrupts(void);
+void init_smtc_stats(void);
+
+/* Global SMTC Status */
+
+unsigned int smtc_status = 0;
+
+/* Boot command line configuration overrides */
+
+static int vpelimit = 0;
+static int tclimit = 0;
+static int ipibuffers = 0;
+static int nostlb = 0;
+static int asidmask = 0;
+unsigned long smtc_asid_mask = 0xff;
+
+static int __init maxvpes(char *str)
+{
+	get_option(&str, &vpelimit);
+	return 1;
+}
+
+static int __init maxtcs(char *str)
+{
+	get_option(&str, &tclimit);
+	return 1;
+}
+
+static int __init ipibufs(char *str)
+{
+	get_option(&str, &ipibuffers);
+	return 1;
+}
+
+static int __init stlb_disable(char *s)
+{
+	nostlb = 1;
+	return 1;
+}
+
+static int __init asidmask_set(char *str)
+{
+	get_option(&str, &asidmask);
+	switch(asidmask) {
+	case 0x1:
+	case 0x3:
+	case 0x7:
+	case 0xf:
+	case 0x1f:
+	case 0x3f:
+	case 0x7f:
+	case 0xff:
+		smtc_asid_mask = (unsigned long)asidmask;
+		break;
+	default:
+		printk("ILLEGAL ASID mask 0x%x from command line\n", asidmask);
+	}
+	return 1;
+}
+
+__setup("maxvpes=", maxvpes);
+__setup("maxtcs=", maxtcs);
+__setup("ipibufs=", ipibufs);
+__setup("nostlb", stlb_disable);
+__setup("asidmask=", asidmask_set);
+
+/* Enable additional debug checks before going into CPU idle loop */
+#define SMTC_IDLE_HOOK_DEBUG
+
+#ifdef SMTC_IDLE_HOOK_DEBUG
+
+static int hang_trig = 0;
+
+static int __init hangtrig_enable(char *s)
+{
+	hang_trig = 1;
+	return 1;
+}
+
+
+__setup("hangtrig", hangtrig_enable);
+
+#define DEFAULT_BLOCKED_IPI_LIMIT 32
+
+static int timerq_limit = DEFAULT_BLOCKED_IPI_LIMIT;
+
+static int __init tintq(char *str)
+{
+	get_option(&str, &timerq_limit);
+	return 1;
+}
+
+__setup("tintq=", tintq);
+
+int imstuckcount[2][8];
+/* vpemask represents IM/IE bits of per-VPE Status registers, low-to-high */
+int vpemask[2][8] = {{0,1,1,0,0,0,0,1},{0,1,0,0,0,0,0,1}};
+int tcnoprog[NR_CPUS];
+static atomic_t idle_hook_initialized = {0};
+static int clock_hang_reported[NR_CPUS];
+
+#endif /* SMTC_IDLE_HOOK_DEBUG */
+
+/* Initialize shared TLB - the should probably migrate to smtc_setup_cpus() */
+
+void __init sanitize_tlb_entries(void)
+{
+	printk("Deprecated sanitize_tlb_entries() invoked\n");
+}
+
+
+/*
+ * Configure shared TLB - VPC configuration bit must be set by caller
+ */
+
+void smtc_configure_tlb(void)
+{
+	int i,tlbsiz,vpes;
+	unsigned long mvpconf0;
+	unsigned long config1val;
+
+	/* Set up ASID preservation table */
+	for (vpes=0; vpes<MAX_SMTC_TLBS; vpes++) {
+	    for(i = 0; i < MAX_SMTC_ASIDS; i++) {
+		smtc_live_asid[vpes][i] = 0;
+	    }
+	}
+	mvpconf0 = read_c0_mvpconf0();
+
+	if ((vpes = ((mvpconf0 & MVPCONF0_PVPE)
+			>> MVPCONF0_PVPE_SHIFT) + 1) > 1) {
+	    /* If we have multiple VPEs, try to share the TLB */
+	    if ((mvpconf0 & MVPCONF0_TLBS) && !nostlb) {
+		/*
+		 * If TLB sizing is programmable, shared TLB
+		 * size is the total available complement.
+		 * Otherwise, we have to take the sum of all
+		 * static VPE TLB entries.
+		 */
+		if ((tlbsiz = ((mvpconf0 & MVPCONF0_PTLBE)
+				>> MVPCONF0_PTLBE_SHIFT)) == 0) {
+		    /*
+		     * If there's more than one VPE, there had better
+		     * be more than one TC, because we need one to bind
+		     * to each VPE in turn to be able to read
+		     * its configuration state!
+		     */
+		    settc(1);
+		    /* Stop the TC from doing anything foolish */
+		    write_tc_c0_tchalt(TCHALT_H);
+		    mips_ihb();
+		    /* No need to un-Halt - that happens later anyway */
+		    for (i=0; i < vpes; i++) {
+		    	write_tc_c0_tcbind(i);
+			/*
+			 * To be 100% sure we're really getting the right
+			 * information, we exit the configuration state
+			 * and do an IHB after each rebinding.
+			 */
+			write_c0_mvpcontrol(
+				read_c0_mvpcontrol() & ~ MVPCONTROL_VPC );
+			mips_ihb();
+			/*
+			 * Only count if the MMU Type indicated is TLB
+			 */
+			if(((read_vpe_c0_config() & MIPS_CONF_MT) >> 7) == 1) {
+				config1val = read_vpe_c0_config1();
+				tlbsiz += ((config1val >> 25) & 0x3f) + 1;
+			}
+
+			/* Put core back in configuration state */
+			write_c0_mvpcontrol(
+				read_c0_mvpcontrol() | MVPCONTROL_VPC );
+			mips_ihb();
+		    }
+		}
+		write_c0_mvpcontrol(read_c0_mvpcontrol() | MVPCONTROL_STLB);
+
+		/*
+		 * Setup kernel data structures to use software total,
+		 * rather than read the per-VPE Config1 value. The values
+		 * for "CPU 0" gets copied to all the other CPUs as part
+		 * of their initialization in smtc_cpu_setup().
+		 */
+
+		tlbsiz = tlbsiz & 0x3f;	/* MIPS32 limits TLB indices to 64 */
+		cpu_data[0].tlbsize = tlbsiz;
+		smtc_status |= SMTC_TLB_SHARED;
+
+		printk("TLB of %d entry pairs shared by %d VPEs\n",
+			tlbsiz, vpes);
+	    } else {
+		printk("WARNING: TLB Not Sharable on SMTC Boot!\n");
+	    }
+	}
+}
+
+
+/*
+ * Incrementally build the CPU map out of constituent MIPS MT cores,
+ * using the specified available VPEs and TCs.  Plaform code needs
+ * to ensure that each MIPS MT core invokes this routine on reset,
+ * one at a time(!).
+ *
+ * This version of the build_cpu_map and prepare_cpus routines assumes
+ * that *all* TCs of a MIPS MT core will be used for Linux, and that
+ * they will be spread across *all* available VPEs (to minimise the
+ * loss of efficiency due to exception service serialization).
+ * An improved version would pick up configuration information and
+ * possibly leave some TCs/VPEs as "slave" processors.
+ *
+ * Use c0_MVPConf0 to find out how many TCs are available, setting up
+ * phys_cpu_present_map and the logical/physical mappings.
+ */
+
+int __init mipsmt_build_cpu_map(int start_cpu_slot)
+{
+	int i, ntcs;
+
+	/*
+	 * The CPU map isn't actually used for anything at this point,
+	 * so it's not clear what else we should do apart from set
+	 * everything up so that "logical" = "physical".
+	 */
+	ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
+	for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) {
+		cpu_set(i, phys_cpu_present_map);
+		__cpu_number_map[i] = i;
+		__cpu_logical_map[i] = i;
+	}
+	/* Initialize map of CPUs with FPUs */
+	cpus_clear(mt_fpu_cpumask);
+
+	/* One of those TC's is the one booting, and not a secondary... */
+	printk("%i available secondary CPU TC(s)\n", i - 1);
+
+	return i;
+}
+
+/*
+ * Common setup before any secondaries are started
+ * Make sure all CPU's are in a sensible state before we boot any of the
+ * secondaries.
+ *
+ * For MIPS MT "SMTC" operation, we set up all TCs, spread as evenly
+ * as possible across the available VPEs.
+ */
+
+static void smtc_tc_setup(int vpe, int tc, int cpu)
+{
+	settc(tc);
+	write_tc_c0_tchalt(TCHALT_H);
+	mips_ihb();
+	write_tc_c0_tcstatus((read_tc_c0_tcstatus()
+			& ~(TCSTATUS_TKSU | TCSTATUS_DA | TCSTATUS_IXMT))
+			| TCSTATUS_A);
+	write_tc_c0_tccontext(0);
+	/* Bind tc to vpe */
+	write_tc_c0_tcbind(vpe);
+	/* In general, all TCs should have the same cpu_data indications */
+	memcpy(&cpu_data[cpu], &cpu_data[0], sizeof(struct cpuinfo_mips));
+	/* For 34Kf, start with TC/CPU 0 as sole owner of single FPU context */
+	if (cpu_data[0].cputype == CPU_34K)
+		cpu_data[cpu].options &= ~MIPS_CPU_FPU;
+	cpu_data[cpu].vpe_id = vpe;
+	cpu_data[cpu].tc_id = tc;
+}
+
+
+void mipsmt_prepare_cpus(void)
+{
+	int i, vpe, tc, ntc, nvpe, tcpervpe, slop, cpu;
+	unsigned long flags;
+	unsigned long val;
+	int nipi;
+	struct smtc_ipi *pipi;
+
+	/* disable interrupts so we can disable MT */
+	local_irq_save(flags);
+	/* disable MT so we can configure */
+	dvpe();
+	dmt();
+
+	freeIPIq.lock = SPIN_LOCK_UNLOCKED;
+
+	/*
+	 * We probably don't have as many VPEs as we do SMP "CPUs",
+	 * but it's possible - and in any case we'll never use more!
+	 */
+	for (i=0; i<NR_CPUS; i++) {
+		IPIQ[i].head = IPIQ[i].tail = NULL;
+		IPIQ[i].lock = SPIN_LOCK_UNLOCKED;
+		IPIQ[i].depth = 0;
+		ipi_timer_latch[i] = 0;
+	}
+
+	/* cpu_data index starts at zero */
+	cpu = 0;
+	cpu_data[cpu].vpe_id = 0;
+	cpu_data[cpu].tc_id = 0;
+	cpu++;
+
+	/* Report on boot-time options */
+	mips_mt_set_cpuoptions ();
+	if (vpelimit > 0)
+		printk("Limit of %d VPEs set\n", vpelimit);
+	if (tclimit > 0)
+		printk("Limit of %d TCs set\n", tclimit);
+	if (nostlb) {
+		printk("Shared TLB Use Inhibited - UNSAFE for Multi-VPE Operation\n");
+	}
+	if (asidmask)
+		printk("ASID mask value override to 0x%x\n", asidmask);
+
+	/* Temporary */
+#ifdef SMTC_IDLE_HOOK_DEBUG
+	if (hang_trig)
+		printk("Logic Analyser Trigger on suspected TC hang\n");
+#endif /* SMTC_IDLE_HOOK_DEBUG */
+
+	/* Put MVPE's into 'configuration state' */
+	write_c0_mvpcontrol( read_c0_mvpcontrol() | MVPCONTROL_VPC );
+
+	val = read_c0_mvpconf0();
+	nvpe = ((val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1;
+	if (vpelimit > 0 && nvpe > vpelimit)
+		nvpe = vpelimit;
+	ntc = ((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
+	if (ntc > NR_CPUS)
+		ntc = NR_CPUS;
+	if (tclimit > 0 && ntc > tclimit)
+		ntc = tclimit;
+	tcpervpe = ntc / nvpe;
+	slop = ntc % nvpe;	/* Residual TCs, < NVPE */
+
+	/* Set up shared TLB */
+	smtc_configure_tlb();
+
+	for (tc = 0, vpe = 0 ; (vpe < nvpe) && (tc < ntc) ; vpe++) {
+		/*
+		 * Set the MVP bits.
+		 */
+		settc(tc);
+		write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | VPECONF0_MVP);
+		if (vpe != 0)
+			printk(", ");
+		printk("VPE %d: TC", vpe);
+		for (i = 0; i < tcpervpe; i++) {
+			/*
+			 * TC 0 is bound to VPE 0 at reset,
+			 * and is presumably executing this
+			 * code.  Leave it alone!
+			 */
+			if (tc != 0) {
+				smtc_tc_setup(vpe,tc, cpu);
+				cpu++;
+			}
+			printk(" %d", tc);
+			tc++;
+		}
+		if (slop) {
+			if (tc != 0) {
+				smtc_tc_setup(vpe,tc, cpu);
+				cpu++;
+			}
+			printk(" %d", tc);
+			tc++;
+			slop--;
+		}
+		if (vpe != 0) {
+			/*
+			 * Clear any stale software interrupts from VPE's Cause
+			 */
+			write_vpe_c0_cause(0);
+
+			/*
+			 * Clear ERL/EXL of VPEs other than 0
+			 * and set restricted interrupt enable/mask.
+			 */
+			write_vpe_c0_status((read_vpe_c0_status()
+				& ~(ST0_BEV | ST0_ERL | ST0_EXL | ST0_IM))
+				| (STATUSF_IP0 | STATUSF_IP1 | STATUSF_IP7
+				| ST0_IE));
+			/*
+			 * set config to be the same as vpe0,
+			 *  particularly kseg0 coherency alg
+			 */
+			write_vpe_c0_config(read_c0_config());
+			/* Clear any pending timer interrupt */
+			write_vpe_c0_compare(0);
+			/* Propagate Config7 */
+			write_vpe_c0_config7(read_c0_config7());
+		}
+		/* enable multi-threading within VPE */
+		write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() | VPECONTROL_TE);
+		/* enable the VPE */
+		write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | VPECONF0_VPA);
+	}
+
+	/*
+	 * Pull any physically present but unused TCs out of circulation.
+	 */
+	while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) {
+		cpu_clear(tc, phys_cpu_present_map);
+		cpu_clear(tc, cpu_present_map);
+		tc++;
+	}
+
+	/* release config state */
+	write_c0_mvpcontrol( read_c0_mvpcontrol() & ~ MVPCONTROL_VPC );
+
+	printk("\n");
+
+	/* Set up coprocessor affinity CPU mask(s) */
+
+	for (tc = 0; tc < ntc; tc++) {
+		if(cpu_data[tc].options & MIPS_CPU_FPU)
+			cpu_set(tc, mt_fpu_cpumask);
+	}
+
+	/* set up ipi interrupts... */
+
+	/* If we have multiple VPEs running, set up the cross-VPE interrupt */
+
+	if (nvpe > 1)
+		setup_cross_vpe_interrupts();
+
+	/* Set up queue of free IPI "messages". */
+	nipi = NR_CPUS * IPIBUF_PER_CPU;
+	if (ipibuffers > 0)
+		nipi = ipibuffers;
+
+	pipi = kmalloc(nipi *sizeof(struct smtc_ipi), GFP_KERNEL);
+	if (pipi == NULL)
+		panic("kmalloc of IPI message buffers failed\n");
+	else
+		printk("IPI buffer pool of %d buffers\n", nipi);
+	for (i = 0; i < nipi; i++) {
+		smtc_ipi_nq(&freeIPIq, pipi);
+		pipi++;
+	}
+
+	/* Arm multithreading and enable other VPEs - but all TCs are Halted */
+	emt(EMT_ENABLE);
+	evpe(EVPE_ENABLE);
+	local_irq_restore(flags);
+	/* Initialize SMTC /proc statistics/diagnostics */
+	init_smtc_stats();
+}
+
+
+/*
+ * Setup the PC, SP, and GP of a secondary processor and start it
+ * running!
+ * smp_bootstrap is the place to resume from
+ * __KSTK_TOS(idle) is apparently the stack pointer
+ * (unsigned long)idle->thread_info the gp
+ *
+ */
+void smtc_boot_secondary(int cpu, struct task_struct *idle)
+{
+	extern u32 kernelsp[NR_CPUS];
+	long flags;
+	int mtflags;
+
+	LOCK_MT_PRA();
+	if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) {
+		dvpe();
+	}
+	settc(cpu_data[cpu].tc_id);
+
+	/* pc */
+	write_tc_c0_tcrestart((unsigned long)&smp_bootstrap);
+
+	/* stack pointer */
+	kernelsp[cpu] = __KSTK_TOS(idle);
+	write_tc_gpr_sp(__KSTK_TOS(idle));
+
+	/* global pointer */
+	write_tc_gpr_gp((unsigned long)idle->thread_info);
+
+	smtc_status |= SMTC_MTC_ACTIVE;
+	write_tc_c0_tchalt(0);
+	if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) {
+		evpe(EVPE_ENABLE);
+	}
+	UNLOCK_MT_PRA();
+}
+
+void smtc_init_secondary(void)
+{
+	/*
+	 * Start timer on secondary VPEs if necessary.
+	 * mips_timer_setup should already have been invoked by init/main
+	 * on "boot" TC.  Like per_cpu_trap_init() hack, this assumes that
+	 * SMTC init code assigns TCs consdecutively and in ascending order
+	 * to across available VPEs.
+	 */
+	if(((read_c0_tcbind() & TCBIND_CURTC) != 0)
+	&& ((read_c0_tcbind() & TCBIND_CURVPE)
+	    != cpu_data[smp_processor_id() - 1].vpe_id)){
+		write_c0_compare (read_c0_count() + mips_hpt_frequency/HZ);
+	}
+
+	local_irq_enable();
+}
+
+void smtc_smp_finish(void)
+{
+	printk("TC %d going on-line as CPU %d\n",
+		cpu_data[smp_processor_id()].tc_id, smp_processor_id());
+}
+
+void smtc_cpus_done(void)
+{
+}
+
+/*
+ * Support for SMTC-optimized driver IRQ registration
+ */
+
+/*
+ * SMTC Kernel needs to manipulate low-level CPU interrupt mask
+ * in do_IRQ. These are passed in setup_irq_smtc() and stored
+ * in this table.
+ */
+
+int setup_irq_smtc(unsigned int irq, struct irqaction * new,
+			unsigned long hwmask)
+{
+	irq_hwmask[irq] = hwmask;
+
+	return setup_irq(irq, new);
+}
+
+/*
+ * IPI model for SMTC is tricky, because interrupts aren't TC-specific.
+ * Within a VPE one TC can interrupt another by different approaches.
+ * The easiest to get right would probably be to make all TCs except
+ * the target IXMT and set a software interrupt, but an IXMT-based
+ * scheme requires that a handler must run before a new IPI could
+ * be sent, which would break the "broadcast" loops in MIPS MT.
+ * A more gonzo approach within a VPE is to halt the TC, extract
+ * its Restart, Status, and a couple of GPRs, and program the Restart
+ * address to emulate an interrupt.
+ *
+ * Within a VPE, one can be confident that the target TC isn't in
+ * a critical EXL state when halted, since the write to the Halt
+ * register could not have issued on the writing thread if the
+ * halting thread had EXL set. So k0 and k1 of the target TC
+ * can be used by the injection code.  Across VPEs, one can't
+ * be certain that the target TC isn't in a critical exception
+ * state. So we try a two-step process of sending a software
+ * interrupt to the target VPE, which either handles the event
+ * itself (if it was the target) or injects the event within
+ * the VPE.
+ */
+
+void smtc_ipi_qdump(void)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS ;i++) {
+		printk("IPIQ[%d]: head = 0x%x, tail = 0x%x, depth = %d\n",
+			i, (unsigned)IPIQ[i].head, (unsigned)IPIQ[i].tail,
+			IPIQ[i].depth);
+	}
+}
+
+/*
+ * The standard atomic.h primitives don't quite do what we want
+ * here: We need an atomic add-and-return-previous-value (which
+ * could be done with atomic_add_return and a decrement) and an
+ * atomic set/zero-and-return-previous-value (which can't really
+ * be done with the atomic.h primitives). And since this is
+ * MIPS MT, we can assume that we have LL/SC.
+ */
+static __inline__ int atomic_postincrement(unsigned int *pv)
+{
+	unsigned long result;
+
+	unsigned long temp;
+
+	__asm__ __volatile__(
+	"1:	ll	%0, %2					\n"
+	"	addu	%1, %0, 1				\n"
+	"	sc	%1, %2					\n"
+	"	beqz	%1, 1b					\n"
+	"	sync						\n"
+	: "=&r" (result), "=&r" (temp), "=m" (*pv)
+	: "m" (*pv)
+	: "memory");
+
+	return result;
+}
+
+/* No longer used in IPI dispatch, but retained for future recycling */
+
+static __inline__ int atomic_postclear(unsigned int *pv)
+{
+	unsigned long result;
+
+	unsigned long temp;
+
+	__asm__ __volatile__(
+	"1:	ll	%0, %2					\n"
+	"	or	%1, $0, $0				\n"
+	"	sc	%1, %2					\n"
+	"	beqz	%1, 1b					\n"
+	"	sync						\n"
+	: "=&r" (result), "=&r" (temp), "=m" (*pv)
+	: "m" (*pv)
+	: "memory");
+
+	return result;
+}
+
+
+void smtc_send_ipi(int cpu, int type, unsigned int action)
+{
+	int tcstatus;
+	struct smtc_ipi *pipi;
+	long flags;
+	int mtflags;
+
+	if (cpu == smp_processor_id()) {
+		printk("Cannot Send IPI to self!\n");
+		return;
+	}
+	/* Set up a descriptor, to be delivered either promptly or queued */
+	pipi = smtc_ipi_dq(&freeIPIq);
+	if (pipi == NULL) {
+		bust_spinlocks(1);
+		mips_mt_regdump(dvpe());
+		panic("IPI Msg. Buffers Depleted\n");
+	}
+	pipi->type = type;
+	pipi->arg = (void *)action;
+	pipi->dest = cpu;
+	if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) {
+		/* If not on same VPE, enqueue and send cross-VPE interupt */
+		smtc_ipi_nq(&IPIQ[cpu], pipi);
+		LOCK_CORE_PRA();
+		settc(cpu_data[cpu].tc_id);
+		write_vpe_c0_cause(read_vpe_c0_cause() | C_SW1);
+		UNLOCK_CORE_PRA();
+	} else {
+		/*
+		 * Not sufficient to do a LOCK_MT_PRA (dmt) here,
+		 * since ASID shootdown on the other VPE may
+		 * collide with this operation.
+		 */
+		LOCK_CORE_PRA();
+		settc(cpu_data[cpu].tc_id);
+		/* Halt the targeted TC */
+		write_tc_c0_tchalt(TCHALT_H);
+		mips_ihb();
+
+		/*
+	 	 * Inspect TCStatus - if IXMT is set, we have to queue
+		 * a message. Otherwise, we set up the "interrupt"
+		 * of the other TC
+	 	 */
+		tcstatus = read_tc_c0_tcstatus();
+
+		if ((tcstatus & TCSTATUS_IXMT) != 0) {
+			/*
+			 * Spin-waiting here can deadlock,
+			 * so we queue the message for the target TC.
+			 */
+			write_tc_c0_tchalt(0);
+			UNLOCK_CORE_PRA();
+			/* Try to reduce redundant timer interrupt messages */
+			if(type == SMTC_CLOCK_TICK) {
+			    if(atomic_postincrement(&ipi_timer_latch[cpu])!=0) {
+				smtc_ipi_nq(&freeIPIq, pipi);
+				return;
+			    }
+			}
+			smtc_ipi_nq(&IPIQ[cpu], pipi);
+		} else {
+			post_direct_ipi(cpu, pipi);
+			write_tc_c0_tchalt(0);
+			UNLOCK_CORE_PRA();
+		}
+	}
+}
+
+/*
+ * Send IPI message to Halted TC, TargTC/TargVPE already having been set
+ */
+void post_direct_ipi(int cpu, struct smtc_ipi *pipi)
+{
+	struct pt_regs *kstack;
+	unsigned long tcstatus;
+	unsigned long tcrestart;
+	extern u32 kernelsp[NR_CPUS];
+	extern void __smtc_ipi_vector(void);
+
+	/* Extract Status, EPC from halted TC */
+	tcstatus = read_tc_c0_tcstatus();
+	tcrestart = read_tc_c0_tcrestart();
+	/* If TCRestart indicates a WAIT instruction, advance the PC */
+	if ((tcrestart & 0x80000000)
+	    && ((*(unsigned int *)tcrestart & 0xfe00003f) == 0x42000020)) {
+		tcrestart += 4;
+	}
+	/*
+	 * Save on TC's future kernel stack
+	 *
+	 * CU bit of Status is indicator that TC was
+	 * already running on a kernel stack...
+	 */
+	if(tcstatus & ST0_CU0)  {
+		/* Note that this "- 1" is pointer arithmetic */
+		kstack = ((struct pt_regs *)read_tc_gpr_sp()) - 1;
+	} else {
+		kstack = ((struct pt_regs *)kernelsp[cpu]) - 1;
+	}
+
+	kstack->cp0_epc = (long)tcrestart;
+	/* Save TCStatus */
+	kstack->cp0_tcstatus = tcstatus;
+	/* Pass token of operation to be performed kernel stack pad area */
+	kstack->pad0[4] = (unsigned long)pipi;
+	/* Pass address of function to be called likewise */
+	kstack->pad0[5] = (unsigned long)&ipi_decode;
+	/* Set interrupt exempt and kernel mode */
+	tcstatus |= TCSTATUS_IXMT;
+	tcstatus &= ~TCSTATUS_TKSU;
+	write_tc_c0_tcstatus(tcstatus);
+	ehb();
+	/* Set TC Restart address to be SMTC IPI vector */
+	write_tc_c0_tcrestart(__smtc_ipi_vector);
+}
+
+void ipi_resched_interrupt(struct pt_regs *regs)
+{
+	/* Return from interrupt should be enough to cause scheduler check */
+}
+
+
+void ipi_call_interrupt(struct pt_regs *regs)
+{
+	/* Invoke generic function invocation code in smp.c */
+	smp_call_function_interrupt();
+}
+
+void ipi_decode(struct pt_regs *regs, struct smtc_ipi *pipi)
+{
+	void *arg_copy = pipi->arg;
+	int type_copy = pipi->type;
+	int dest_copy = pipi->dest;
+
+	smtc_ipi_nq(&freeIPIq, pipi);
+	switch (type_copy) {
+		case SMTC_CLOCK_TICK:
+			/* Invoke Clock "Interrupt" */
+			ipi_timer_latch[dest_copy] = 0;
+#ifdef SMTC_IDLE_HOOK_DEBUG
+			clock_hang_reported[dest_copy] = 0;
+#endif /* SMTC_IDLE_HOOK_DEBUG */
+			local_timer_interrupt(0, NULL, regs);
+			break;
+		case LINUX_SMP_IPI:
+			switch ((int)arg_copy) {
+			case SMP_RESCHEDULE_YOURSELF:
+				ipi_resched_interrupt(regs);
+				break;
+			case SMP_CALL_FUNCTION:
+				ipi_call_interrupt(regs);
+				break;
+			default:
+				printk("Impossible SMTC IPI Argument 0x%x\n",
+					(int)arg_copy);
+				break;
+			}
+			break;
+		default:
+			printk("Impossible SMTC IPI Type 0x%x\n", type_copy);
+			break;
+	}
+}
+
+void deferred_smtc_ipi(struct pt_regs *regs)
+{
+	struct smtc_ipi *pipi;
+	unsigned long flags;
+/* DEBUG */
+	int q = smp_processor_id();
+
+	/*
+	 * Test is not atomic, but much faster than a dequeue,
+	 * and the vast majority of invocations will have a null queue.
+	 */
+	if(IPIQ[q].head != NULL) {
+		while((pipi = smtc_ipi_dq(&IPIQ[q])) != NULL) {
+			/* ipi_decode() should be called with interrupts off */
+			local_irq_save(flags);
+			ipi_decode(regs, pipi);
+			local_irq_restore(flags);
+		}
+	}
+}
+
+/*
+ * Send clock tick to all TCs except the one executing the funtion
+ */
+
+void smtc_timer_broadcast(int vpe)
+{
+	int cpu;
+	int myTC = cpu_data[smp_processor_id()].tc_id;
+	int myVPE = cpu_data[smp_processor_id()].vpe_id;
+
+	smtc_cpu_stats[smp_processor_id()].timerints++;
+
+	for_each_online_cpu(cpu) {
+		if (cpu_data[cpu].vpe_id == myVPE &&
+		    cpu_data[cpu].tc_id != myTC)
+			smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
+	}
+}
+
+/*
+ * Cross-VPE interrupts in the SMTC prototype use "software interrupts"
+ * set via cross-VPE MTTR manipulation of the Cause register. It would be
+ * in some regards preferable to have external logic for "doorbell" hardware
+ * interrupts.
+ */
+
+static int cpu_ipi_irq = MIPSCPU_INT_BASE + MIPS_CPU_IPI_IRQ;
+
+static irqreturn_t ipi_interrupt(int irq, void *dev_idm, struct pt_regs *regs)
+{
+	int my_vpe = cpu_data[smp_processor_id()].vpe_id;
+	int my_tc = cpu_data[smp_processor_id()].tc_id;
+	int cpu;
+	struct smtc_ipi *pipi;
+	unsigned long tcstatus;
+	int sent;
+	long flags;
+	unsigned int mtflags;
+	unsigned int vpflags;
+
+	/*
+	 * So long as cross-VPE interrupts are done via
+	 * MFTR/MTTR read-modify-writes of Cause, we need
+	 * to stop other VPEs whenever the local VPE does
+	 * anything similar.
+	 */
+	local_irq_save(flags);
+	vpflags = dvpe();
+	clear_c0_cause(0x100 << MIPS_CPU_IPI_IRQ);
+	set_c0_status(0x100 << MIPS_CPU_IPI_IRQ);
+	irq_enable_hazard();
+	evpe(vpflags);
+	local_irq_restore(flags);
+
+	/*
+	 * Cross-VPE Interrupt handler: Try to directly deliver IPIs
+	 * queued for TCs on this VPE other than the current one.
+	 * Return-from-interrupt should cause us to drain the queue
+	 * for the current TC, so we ought not to have to do it explicitly here.
+	 */
+
+	for_each_online_cpu(cpu) {
+		if (cpu_data[cpu].vpe_id != my_vpe)
+			continue;
+
+		pipi = smtc_ipi_dq(&IPIQ[cpu]);
+		if (pipi != NULL) {
+			if (cpu_data[cpu].tc_id != my_tc) {
+				sent = 0;
+				LOCK_MT_PRA();
+				settc(cpu_data[cpu].tc_id);
+				write_tc_c0_tchalt(TCHALT_H);
+				mips_ihb();
+				tcstatus = read_tc_c0_tcstatus();
+				if ((tcstatus & TCSTATUS_IXMT) == 0) {
+					post_direct_ipi(cpu, pipi);
+					sent = 1;
+				}
+				write_tc_c0_tchalt(0);
+				UNLOCK_MT_PRA();
+				if (!sent) {
+					smtc_ipi_req(&IPIQ[cpu], pipi);
+				}
+			} else {
+				/*
+				 * ipi_decode() should be called
+				 * with interrupts off
+				 */
+				local_irq_save(flags);
+				ipi_decode(regs, pipi);
+				local_irq_restore(flags);
+			}
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void ipi_irq_dispatch(struct pt_regs *regs)
+{
+	do_IRQ(cpu_ipi_irq, regs);
+}
+
+static struct irqaction irq_ipi;
+
+void setup_cross_vpe_interrupts(void)
+{
+	if (!cpu_has_vint)
+		panic("SMTC Kernel requires Vectored Interupt support");
+
+	set_vi_handler(MIPS_CPU_IPI_IRQ, ipi_irq_dispatch);
+
+	irq_ipi.handler = ipi_interrupt;
+	irq_ipi.flags = SA_INTERRUPT;
+	irq_ipi.name = "SMTC_IPI";
+
+	setup_irq_smtc(cpu_ipi_irq, &irq_ipi, (0x100 << MIPS_CPU_IPI_IRQ));
+
+	irq_desc[cpu_ipi_irq].status |= IRQ_PER_CPU;
+}
+
+/*
+ * SMTC-specific hacks invoked from elsewhere in the kernel.
+ */
+
+void smtc_idle_loop_hook(void)
+{
+#ifdef SMTC_IDLE_HOOK_DEBUG
+	int im;
+	int flags;
+	int mtflags;
+	int bit;
+	int vpe;
+	int tc;
+	int hook_ntcs;
+	/*
+	 * printk within DMT-protected regions can deadlock,
+	 * so buffer diagnostic messages for later output.
+	 */
+	char *pdb_msg;
+	char id_ho_db_msg[768]; /* worst-case use should be less than 700 */
+
+	if (atomic_read(&idle_hook_initialized) == 0) { /* fast test */
+		if (atomic_add_return(1, &idle_hook_initialized) == 1) {
+			int mvpconf0;
+			/* Tedious stuff to just do once */
+			mvpconf0 = read_c0_mvpconf0();
+			hook_ntcs = ((mvpconf0 & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
+			if (hook_ntcs > NR_CPUS)
+				hook_ntcs = NR_CPUS;
+			for (tc = 0; tc < hook_ntcs; tc++) {
+				tcnoprog[tc] = 0;
+				clock_hang_reported[tc] = 0;
+	    		}
+			for (vpe = 0; vpe < 2; vpe++)
+				for (im = 0; im < 8; im++)
+					imstuckcount[vpe][im] = 0;
+			printk("Idle loop test hook initialized for %d TCs\n", hook_ntcs);
+			atomic_set(&idle_hook_initialized, 1000);
+		} else {
+			/* Someone else is initializing in parallel - let 'em finish */
+			while (atomic_read(&idle_hook_initialized) < 1000)
+				;
+		}
+	}
+
+	/* Have we stupidly left IXMT set somewhere? */
+	if (read_c0_tcstatus() & 0x400) {
+		write_c0_tcstatus(read_c0_tcstatus() & ~0x400);
+		ehb();
+		printk("Dangling IXMT in cpu_idle()\n");
+	}
+
+	/* Have we stupidly left an IM bit turned off? */
+#define IM_LIMIT 2000
+	local_irq_save(flags);
+	mtflags = dmt();
+	pdb_msg = &id_ho_db_msg[0];
+	im = read_c0_status();
+	vpe = cpu_data[smp_processor_id()].vpe_id;
+	for (bit = 0; bit < 8; bit++) {
+		/*
+		 * In current prototype, I/O interrupts
+		 * are masked for VPE > 0
+		 */
+		if (vpemask[vpe][bit]) {
+			if (!(im & (0x100 << bit)))
+				imstuckcount[vpe][bit]++;
+			else
+				imstuckcount[vpe][bit] = 0;
+			if (imstuckcount[vpe][bit] > IM_LIMIT) {
+				set_c0_status(0x100 << bit);
+				ehb();
+				imstuckcount[vpe][bit] = 0;
+				pdb_msg += sprintf(pdb_msg,
+					"Dangling IM %d fixed for VPE %d\n", bit,
+					vpe);
+			}
+		}
+	}
+
+	/*
+	 * Now that we limit outstanding timer IPIs, check for hung TC
+	 */
+	for (tc = 0; tc < NR_CPUS; tc++) {
+		/* Don't check ourself - we'll dequeue IPIs just below */
+		if ((tc != smp_processor_id()) &&
+		    ipi_timer_latch[tc] > timerq_limit) {
+		    if (clock_hang_reported[tc] == 0) {
+			pdb_msg += sprintf(pdb_msg,
+				"TC %d looks hung with timer latch at %d\n",
+				tc, ipi_timer_latch[tc]);
+			clock_hang_reported[tc]++;
+			}
+		}
+	}
+	emt(mtflags);
+	local_irq_restore(flags);
+	if (pdb_msg != &id_ho_db_msg[0])
+		printk("CPU%d: %s", smp_processor_id(), id_ho_db_msg);
+#endif /* SMTC_IDLE_HOOK_DEBUG */
+	/*
+	 * To the extent that we've ever turned interrupts off,
+	 * we may have accumulated deferred IPIs.  This is subtle.
+	 * If we use the smtc_ipi_qdepth() macro, we'll get an
+	 * exact number - but we'll also disable interrupts
+	 * and create a window of failure where a new IPI gets
+	 * queued after we test the depth but before we re-enable
+	 * interrupts. So long as IXMT never gets set, however,
+	 * we should be OK:  If we pick up something and dispatch
+	 * it here, that's great. If we see nothing, but concurrent
+	 * with this operation, another TC sends us an IPI, IXMT
+	 * is clear, and we'll handle it as a real pseudo-interrupt
+	 * and not a pseudo-pseudo interrupt.
+	 */
+	if (IPIQ[smp_processor_id()].depth > 0) {
+		struct smtc_ipi *pipi;
+		extern void self_ipi(struct smtc_ipi *);
+
+		if ((pipi = smtc_ipi_dq(&IPIQ[smp_processor_id()])) != NULL) {
+			self_ipi(pipi);
+			smtc_cpu_stats[smp_processor_id()].selfipis++;
+		}
+	}
+}
+
+void smtc_soft_dump(void)
+{
+	int i;
+
+	printk("Counter Interrupts taken per CPU (TC)\n");
+	for (i=0; i < NR_CPUS; i++) {
+		printk("%d: %ld\n", i, smtc_cpu_stats[i].timerints);
+	}
+	printk("Self-IPI invocations:\n");
+	for (i=0; i < NR_CPUS; i++) {
+		printk("%d: %ld\n", i, smtc_cpu_stats[i].selfipis);
+	}
+	smtc_ipi_qdump();
+	printk("Timer IPI Backlogs:\n");
+	for (i=0; i < NR_CPUS; i++) {
+		printk("%d: %d\n", i, ipi_timer_latch[i]);
+	}
+	printk("%d Recoveries of \"stolen\" FPU\n",
+	       atomic_read(&smtc_fpu_recoveries));
+}
+
+
+/*
+ * TLB management routines special to SMTC
+ */
+
+void smtc_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu)
+{
+	unsigned long flags, mtflags, tcstat, prevhalt, asid;
+	int tlb, i;
+
+	/*
+	 * It would be nice to be able to use a spinlock here,
+	 * but this is invoked from within TLB flush routines
+	 * that protect themselves with DVPE, so if a lock is
+         * held by another TC, it'll never be freed.
+	 *
+	 * DVPE/DMT must not be done with interrupts enabled,
+	 * so even so most callers will already have disabled
+	 * them, let's be really careful...
+	 */
+
+	local_irq_save(flags);
+	if (smtc_status & SMTC_TLB_SHARED) {
+		mtflags = dvpe();
+		tlb = 0;
+	} else {
+		mtflags = dmt();
+		tlb = cpu_data[cpu].vpe_id;
+	}
+	asid = asid_cache(cpu);
+
+	do {
+		if (!((asid += ASID_INC) & ASID_MASK) ) {
+			if (cpu_has_vtag_icache)
+				flush_icache_all();
+			/* Traverse all online CPUs (hack requires contigous range) */
+			for (i = 0; i < num_online_cpus(); i++) {
+				/*
+				 * We don't need to worry about our own CPU, nor those of
+				 * CPUs who don't share our TLB.
+				 */
+				if ((i != smp_processor_id()) &&
+				    ((smtc_status & SMTC_TLB_SHARED) ||
+				     (cpu_data[i].vpe_id == cpu_data[cpu].vpe_id))) {
+					settc(cpu_data[i].tc_id);
+					prevhalt = read_tc_c0_tchalt() & TCHALT_H;
+					if (!prevhalt) {
+						write_tc_c0_tchalt(TCHALT_H);
+						mips_ihb();
+					}
+					tcstat = read_tc_c0_tcstatus();
+					smtc_live_asid[tlb][(tcstat & ASID_MASK)] |= (asiduse)(0x1 << i);
+					if (!prevhalt)
+						write_tc_c0_tchalt(0);
+				}
+			}
+			if (!asid)		/* fix version if needed */
+				asid = ASID_FIRST_VERSION;
+			local_flush_tlb_all();	/* start new asid cycle */
+		}
+	} while (smtc_live_asid[tlb][(asid & ASID_MASK)]);
+
+	/*
+	 * SMTC shares the TLB within VPEs and possibly across all VPEs.
+	 */
+	for (i = 0; i < num_online_cpus(); i++) {
+		if ((smtc_status & SMTC_TLB_SHARED) ||
+		    (cpu_data[i].vpe_id == cpu_data[cpu].vpe_id))
+			cpu_context(i, mm) = asid_cache(i) = asid;
+	}
+
+	if (smtc_status & SMTC_TLB_SHARED)
+		evpe(mtflags);
+	else
+		emt(mtflags);
+	local_irq_restore(flags);
+}
+
+/*
+ * Invoked from macros defined in mmu_context.h
+ * which must already have disabled interrupts
+ * and done a DVPE or DMT as appropriate.
+ */
+
+void smtc_flush_tlb_asid(unsigned long asid)
+{
+	int entry;
+	unsigned long ehi;
+
+	entry = read_c0_wired();
+
+	/* Traverse all non-wired entries */
+	while (entry < current_cpu_data.tlbsize) {
+		write_c0_index(entry);
+		ehb();
+		tlb_read();
+		ehb();
+		ehi = read_c0_entryhi();
+		if((ehi & ASID_MASK) == asid) {
+		    /*
+		     * Invalidate only entries with specified ASID,
+		     * makiing sure all entries differ.
+		     */
+		    write_c0_entryhi(CKSEG0 + (entry << (PAGE_SHIFT + 1)));
+		    write_c0_entrylo0(0);
+		    write_c0_entrylo1(0);
+		    mtc0_tlbw_hazard();
+		    tlb_write_indexed();
+		}
+		entry++;
+	}
+	write_c0_index(PARKED_INDEX);
+	tlbw_use_hazard();
+}
+
+/*
+ * Support for single-threading cache flush operations.
+ */
+
+int halt_state_save[NR_CPUS];
+
+/*
+ * To really, really be sure that nothing is being done
+ * by other TCs, halt them all.  This code assumes that
+ * a DVPE has already been done, so while their Halted
+ * state is theoretically architecturally unstable, in
+ * practice, it's not going to change while we're looking
+ * at it.
+ */
+
+void smtc_cflush_lockdown(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (cpu != smp_processor_id()) {
+			settc(cpu_data[cpu].tc_id);
+			halt_state_save[cpu] = read_tc_c0_tchalt();
+			write_tc_c0_tchalt(TCHALT_H);
+		}
+	}
+	mips_ihb();
+}
+
+/* It would be cheating to change the cpu_online states during a flush! */
+
+void smtc_cflush_release(void)
+{
+	int cpu;
+
+	/*
+	 * Start with a hazard barrier to ensure
+	 * that all CACHE ops have played through.
+	 */
+	mips_ihb();
+
+	for_each_online_cpu(cpu) {
+		if (cpu != smp_processor_id()) {
+			settc(cpu_data[cpu].tc_id);
+			write_tc_c0_tchalt(halt_state_save[cpu]);
+		}
+	}
+	mips_ihb();
+}
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index 5e51a2d8f3f..13ff4da598c 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -116,8 +116,7 @@ static void c0_timer_ack(void)
 	write_c0_compare(expirelo);
 
 	/* Check to see if we have missed any timer interrupts.  */
-	count = read_c0_count();
-	if ((count - expirelo) < 0x7fffffff) {
+	while (((count = read_c0_count()) - expirelo) < 0x7fffffff) {
 		/* missed_timer_count++; */
 		expirelo = count + cycles_per_jiffy;
 		write_c0_compare(expirelo);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index bed0eb6cf55..4901f0a37fc 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -42,6 +42,7 @@
 #include <asm/watch.h>
 #include <asm/types.h>
 
+extern asmlinkage void handle_int(void);
 extern asmlinkage void handle_tlbm(void);
 extern asmlinkage void handle_tlbl(void);
 extern asmlinkage void handle_tlbs(void);
@@ -279,9 +280,16 @@ static DEFINE_SPINLOCK(die_lock);
 NORET_TYPE void ATTRIB_NORET die(const char * str, struct pt_regs * regs)
 {
 	static int die_counter;
+#ifdef CONFIG_MIPS_MT_SMTC
+	unsigned long dvpret = dvpe();
+#endif /* CONFIG_MIPS_MT_SMTC */
 
 	console_verbose();
 	spin_lock_irq(&die_lock);
+	bust_spinlocks(1);
+#ifdef CONFIG_MIPS_MT_SMTC
+	mips_mt_regdump(dvpret);
+#endif /* CONFIG_MIPS_MT_SMTC */
 	printk("%s[#%d]:\n", str, ++die_counter);
 	show_registers(regs);
 	spin_unlock_irq(&die_lock);
@@ -750,12 +758,43 @@ asmlinkage void do_cpu(struct pt_regs *regs)
 						&current->thread.fpu.soft);
 			if (sig)
 				force_sig(sig, current);
+#ifdef CONFIG_MIPS_MT_FPAFF
+			else {
+			/*
+			 * MIPS MT processors may have fewer FPU contexts
+			 * than CPU threads. If we've emulated more than
+			 * some threshold number of instructions, force
+			 * migration to a "CPU" that has FP support.
+			 */
+			 if(mt_fpemul_threshold > 0
+			 && ((current->thread.emulated_fp++
+			    > mt_fpemul_threshold))) {
+			  /*
+			   * If there's no FPU present, or if the
+			   * application has already restricted
+			   * the allowed set to exclude any CPUs
+			   * with FPUs, we'll skip the procedure.
+			   */
+			  if (cpus_intersects(current->cpus_allowed,
+			  			mt_fpu_cpumask)) {
+			    cpumask_t tmask;
+
+			    cpus_and(tmask,
+					current->thread.user_cpus_allowed,
+					mt_fpu_cpumask);
+			    set_cpus_allowed(current, tmask);
+			    current->thread.mflags |= MF_FPUBOUND;
+			  }
+			 }
+			}
+#endif /* CONFIG_MIPS_MT_FPAFF */
 		}
 
 		return;
 
 	case 2:
 	case 3:
+		die_if_kernel("do_cpu invoked from kernel context!", regs);
 		break;
 	}
 
@@ -793,6 +832,36 @@ asmlinkage void do_mcheck(struct pt_regs *regs)
 
 asmlinkage void do_mt(struct pt_regs *regs)
 {
+	int subcode;
+
+	die_if_kernel("MIPS MT Thread exception in kernel", regs);
+
+	subcode = (read_vpe_c0_vpecontrol() & VPECONTROL_EXCPT)
+			>> VPECONTROL_EXCPT_SHIFT;
+	switch (subcode) {
+	case 0:
+		printk(KERN_ERR "Thread Underflow\n");
+		break;
+	case 1:
+		printk(KERN_ERR "Thread Overflow\n");
+		break;
+	case 2:
+		printk(KERN_ERR "Invalid YIELD Qualifier\n");
+		break;
+	case 3:
+		printk(KERN_ERR "Gating Storage Exception\n");
+		break;
+	case 4:
+		printk(KERN_ERR "YIELD Scheduler Exception\n");
+		break;
+	case 5:
+		printk(KERN_ERR "Gating Storage Schedulier Exception\n");
+		break;
+	default:
+		printk(KERN_ERR "*** UNKNOWN THREAD EXCEPTION %d ***\n",
+			subcode);
+		break;
+	}
 	die_if_kernel("MIPS MT Thread exception in kernel", regs);
 
 	force_sig(SIGILL, current);
@@ -928,7 +997,15 @@ void ejtag_exception_handler(struct pt_regs *regs)
  */
 void nmi_exception_handler(struct pt_regs *regs)
 {
+#ifdef CONFIG_MIPS_MT_SMTC
+	unsigned long dvpret = dvpe();
+	bust_spinlocks(1);
+	printk("NMI taken!!!!\n");
+	mips_mt_regdump(dvpret);
+#else
+	bust_spinlocks(1);
 	printk("NMI taken!!!!\n");
+#endif /* CONFIG_MIPS_MT_SMTC */
 	die("NMI", regs);
 	while(1) ;
 }
@@ -960,27 +1037,29 @@ void *set_except_vector(int n, void *addr)
 
 #ifdef CONFIG_CPU_MIPSR2
 /*
- * Shadow register allocation
+ * MIPSR2 shadow register set allocation
  * FIXME: SMP...
  */
 
-/* MIPSR2 shadow register sets */
-struct shadow_registers {
-	spinlock_t sr_lock;	/*  */
-	int sr_supported;	/* Number of shadow register sets supported */
-	int sr_allocated;	/* Bitmap of allocated shadow registers */
+static struct shadow_registers {
+	/*
+	 * Number of shadow register sets supported
+	 */
+	unsigned long sr_supported;
+	/*
+	 * Bitmap of allocated shadow registers
+	 */
+	unsigned long sr_allocated;
 } shadow_registers;
 
-void mips_srs_init(void)
+static void mips_srs_init(void)
 {
 #ifdef CONFIG_CPU_MIPSR2_SRS
 	shadow_registers.sr_supported = ((read_c0_srsctl() >> 26) & 0x0f) + 1;
-	printk ("%d MIPSR2 register sets available\n", shadow_registers.sr_supported);
-#else
-	shadow_registers.sr_supported = 1;
+	printk(KERN_INFO "%d MIPSR2 register sets available\n",
+	       shadow_registers.sr_supported);
 #endif
 	shadow_registers.sr_allocated = 1;	/* Set 0 used by kernel */
-	spin_lock_init(&shadow_registers.sr_lock);
 }
 
 int mips_srs_max(void)
@@ -988,38 +1067,30 @@ int mips_srs_max(void)
 	return shadow_registers.sr_supported;
 }
 
-int mips_srs_alloc (void)
+int mips_srs_alloc(void)
 {
 	struct shadow_registers *sr = &shadow_registers;
-	unsigned long flags;
 	int set;
 
-	spin_lock_irqsave(&sr->sr_lock, flags);
+again:
+	set = find_first_zero_bit(&sr->sr_allocated, sr->sr_supported);
+	if (set >= sr->sr_supported)
+		return -1;
 
-	for (set = 0; set < sr->sr_supported; set++) {
-		if ((sr->sr_allocated & (1 << set)) == 0) {
-			sr->sr_allocated |= 1 << set;
-			spin_unlock_irqrestore(&sr->sr_lock, flags);
-			return set;
-		}
-	}
+	if (test_and_set_bit(set, &sr->sr_allocated))
+		goto again;
 
-	/* None available */
-	spin_unlock_irqrestore(&sr->sr_lock, flags);
-	return -1;
+	return set;
 }
 
-void mips_srs_free (int set)
+void mips_srs_free(int set)
 {
 	struct shadow_registers *sr = &shadow_registers;
-	unsigned long flags;
 
-	spin_lock_irqsave(&sr->sr_lock, flags);
-	sr->sr_allocated &= ~(1 << set);
-	spin_unlock_irqrestore(&sr->sr_lock, flags);
+	clear_bit(set, &sr->sr_allocated);
 }
 
-void *set_vi_srs_handler (int n, void *addr, int srs)
+static void *set_vi_srs_handler(int n, void *addr, int srs)
 {
 	unsigned long handler;
 	unsigned long old_handler = vi_handlers[n];
@@ -1032,8 +1103,7 @@ void *set_vi_srs_handler (int n, void *addr, int srs)
 	if (addr == NULL) {
 		handler = (unsigned long) do_default_vi;
 		srs = 0;
-	}
-	else
+	} else
 		handler = (unsigned long) addr;
 	vi_handlers[n] = (unsigned long) addr;
 
@@ -1045,8 +1115,7 @@ void *set_vi_srs_handler (int n, void *addr, int srs)
 	if (cpu_has_veic) {
 		if (board_bind_eic_interrupt)
 			board_bind_eic_interrupt (n, srs);
-	}
-	else if (cpu_has_vint) {
+	} else if (cpu_has_vint) {
 		/* SRSMap is only defined if shadow sets are implemented */
 		if (mips_srs_max() > 1)
 			change_c0_srsmap (0xf << n*4, srs << n*4);
@@ -1060,6 +1129,15 @@ void *set_vi_srs_handler (int n, void *addr, int srs)
 
 		extern char except_vec_vi, except_vec_vi_lui;
 		extern char except_vec_vi_ori, except_vec_vi_end;
+#ifdef CONFIG_MIPS_MT_SMTC
+		/*
+		 * We need to provide the SMTC vectored interrupt handler
+		 * not only with the address of the handler, but with the
+		 * Status.IM bit to be masked before going there.
+		 */
+		extern char except_vec_vi_mori;
+		const int mori_offset = &except_vec_vi_mori - &except_vec_vi;
+#endif /* CONFIG_MIPS_MT_SMTC */
 		const int handler_len = &except_vec_vi_end - &except_vec_vi;
 		const int lui_offset = &except_vec_vi_lui - &except_vec_vi;
 		const int ori_offset = &except_vec_vi_ori - &except_vec_vi;
@@ -1073,6 +1151,12 @@ void *set_vi_srs_handler (int n, void *addr, int srs)
 		}
 
 		memcpy (b, &except_vec_vi, handler_len);
+#ifdef CONFIG_MIPS_MT_SMTC
+		if (n > 7)
+			printk("Vector index %d exceeds SMTC maximum\n", n);
+		w = (u32 *)(b + mori_offset);
+		*w = (*w & 0xffff0000) | (0x100 << n);
+#endif /* CONFIG_MIPS_MT_SMTC */
 		w = (u32 *)(b + lui_offset);
 		*w = (*w & 0xffff0000) | (((u32)handler >> 16) & 0xffff);
 		w = (u32 *)(b + ori_offset);
@@ -1095,9 +1179,9 @@ void *set_vi_srs_handler (int n, void *addr, int srs)
 	return (void *)old_handler;
 }
 
-void *set_vi_handler (int n, void *addr)
+void *set_vi_handler(int n, void *addr)
 {
-	return set_vi_srs_handler (n, addr, 0);
+	return set_vi_srs_handler(n, addr, 0);
 }
 #endif
 
@@ -1113,8 +1197,29 @@ extern asmlinkage int _restore_fp_context(struct sigcontext *sc);
 extern asmlinkage int fpu_emulator_save_context(struct sigcontext *sc);
 extern asmlinkage int fpu_emulator_restore_context(struct sigcontext *sc);
 
+#ifdef CONFIG_SMP
+static int smp_save_fp_context(struct sigcontext *sc)
+{
+	return cpu_has_fpu
+	       ? _save_fp_context(sc)
+	       : fpu_emulator_save_context(sc);
+}
+
+static int smp_restore_fp_context(struct sigcontext *sc)
+{
+	return cpu_has_fpu
+	       ? _restore_fp_context(sc)
+	       : fpu_emulator_restore_context(sc);
+}
+#endif
+
 static inline void signal_init(void)
 {
+#ifdef CONFIG_SMP
+	/* For now just do the cpu_has_fpu check when the functions are invoked */
+	save_fp_context = smp_save_fp_context;
+	restore_fp_context = smp_restore_fp_context;
+#else
 	if (cpu_has_fpu) {
 		save_fp_context = _save_fp_context;
 		restore_fp_context = _restore_fp_context;
@@ -1122,6 +1227,7 @@ static inline void signal_init(void)
 		save_fp_context = fpu_emulator_save_context;
 		restore_fp_context = fpu_emulator_restore_context;
 	}
+#endif
 }
 
 #ifdef CONFIG_MIPS32_COMPAT
@@ -1158,6 +1264,20 @@ void __init per_cpu_trap_init(void)
 {
 	unsigned int cpu = smp_processor_id();
 	unsigned int status_set = ST0_CU0;
+#ifdef CONFIG_MIPS_MT_SMTC
+	int secondaryTC = 0;
+	int bootTC = (cpu == 0);
+
+	/*
+	 * Only do per_cpu_trap_init() for first TC of Each VPE.
+	 * Note that this hack assumes that the SMTC init code
+	 * assigns TCs consecutively and in ascending order.
+	 */
+
+	if (((read_c0_tcbind() & TCBIND_CURTC) != 0) &&
+	    ((read_c0_tcbind() & TCBIND_CURVPE) == cpu_data[cpu - 1].vpe_id))
+		secondaryTC = 1;
+#endif /* CONFIG_MIPS_MT_SMTC */
 
 	/*
 	 * Disable coprocessors and select 32-bit or 64-bit addressing
@@ -1180,6 +1300,10 @@ void __init per_cpu_trap_init(void)
 	write_c0_hwrena (0x0000000f); /* Allow rdhwr to all registers */
 #endif
 
+#ifdef CONFIG_MIPS_MT_SMTC
+	if (!secondaryTC) {
+#endif /* CONFIG_MIPS_MT_SMTC */
+
 	/*
 	 * Interrupt handling.
 	 */
@@ -1196,6 +1320,9 @@ void __init per_cpu_trap_init(void)
 		} else
 			set_c0_cause(CAUSEF_IV);
 	}
+#ifdef CONFIG_MIPS_MT_SMTC
+	}
+#endif /* CONFIG_MIPS_MT_SMTC */
 
 	cpu_data[cpu].asid_cache = ASID_FIRST_VERSION;
 	TLBMISS_HANDLER_SETUP();
@@ -1205,8 +1332,14 @@ void __init per_cpu_trap_init(void)
 	BUG_ON(current->mm);
 	enter_lazy_tlb(&init_mm, current);
 
-	cpu_cache_init();
-	tlb_init();
+#ifdef CONFIG_MIPS_MT_SMTC
+	if (bootTC) {
+#endif /* CONFIG_MIPS_MT_SMTC */
+		cpu_cache_init();
+		tlb_init();
+#ifdef CONFIG_MIPS_MT_SMTC
+	}
+#endif /* CONFIG_MIPS_MT_SMTC */
 }
 
 /* Install CPU exception handler */
@@ -1278,7 +1411,7 @@ void __init trap_init(void)
 	if (cpu_has_veic || cpu_has_vint) {
 		int nvec = cpu_has_veic ? 64 : 8;
 		for (i = 0; i < nvec; i++)
-			set_vi_handler (i, NULL);
+			set_vi_handler(i, NULL);
 	}
 	else if (cpu_has_divec)
 		set_handler(0x200, &except_vec4, 0x8);
@@ -1297,6 +1430,7 @@ void __init trap_init(void)
 	if (board_be_init)
 		board_be_init();
 
+	set_except_vector(0, handle_int);
 	set_except_vector(1, handle_tlbm);
 	set_except_vector(2, handle_tlbl);
 	set_except_vector(3, handle_tlbs);
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 2ad0cedf29f..14fa00e3cdf 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -2,7 +2,7 @@
 #include <asm/asm-offsets.h>
 #include <asm-generic/vmlinux.lds.h>
 
-#undef mips		/* CPP really sucks for this job  */
+#undef mips
 #define mips mips
 OUTPUT_ARCH(mips)
 ENTRY(kernel_entry)
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index ae83b755cf4..85d7df7b18e 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -13,7 +13,6 @@
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
  *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
  */
 
 /*
@@ -27,11 +26,8 @@
  *
  * To load and run, simply cat a SP 'program file' to /dev/vpe1.
  * i.e cat spapp >/dev/vpe1.
- *
- * You'll need to have the following device files.
- * mknod /dev/vpe0 c 63 0
- * mknod /dev/vpe1 c 63 1
  */
+
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -55,6 +51,8 @@
 #include <asm/cpu.h>
 #include <asm/processor.h>
 #include <asm/system.h>
+#include <asm/vpe.h>
+#include <asm/kspd.h>
 
 typedef void *vpe_handle;
 
@@ -68,6 +66,11 @@ typedef void *vpe_handle;
 static char module_name[] = "vpe";
 static int major;
 
+#ifdef CONFIG_MIPS_APSP_KSPD
+ static struct kspd_notifications kspd_events;
+static int kspd_events_reqd = 0;
+#endif
+
 /* grab the likely amount of memory we will need. */
 #ifdef CONFIG_MIPS_VPE_LOADER_TOM
 #define P_SIZE (2 * 1024 * 1024)
@@ -76,7 +79,10 @@ static int major;
 #define P_SIZE (256 * 1024)
 #endif
 
+extern unsigned long physical_memsize;
+
 #define MAX_VPES 16
+#define VPE_PATH_MAX 256
 
 enum vpe_state {
 	VPE_STATE_UNUSED = 0,
@@ -102,6 +108,8 @@ struct vpe {
 	unsigned long len;
 	char *pbuffer;
 	unsigned long plen;
+	unsigned int uid, gid;
+	char cwd[VPE_PATH_MAX];
 
 	unsigned long __start;
 
@@ -113,6 +121,9 @@ struct vpe {
 
 	/* shared symbol address */
 	void *shared_ptr;
+
+	/* the list of who wants to know when something major happens */
+	struct list_head notify;
 };
 
 struct tc {
@@ -138,7 +149,7 @@ struct vpecontrol_ {
 } vpecontrol;
 
 static void release_progmem(void *ptr);
-static void dump_vpe(struct vpe * v);
+/* static __attribute_used__ void dump_vpe(struct vpe * v); */
 extern void save_gp_address(unsigned int secbase, unsigned int rel);
 
 /* get the vpe associated with this minor */
@@ -146,12 +157,14 @@ struct vpe *get_vpe(int minor)
 {
 	struct vpe *v;
 
+	if (!cpu_has_mipsmt)
+		return NULL;
+
 	list_for_each_entry(v, &vpecontrol.vpe_list, list) {
 		if (v->minor == minor)
 			return v;
 	}
 
-	printk(KERN_DEBUG "VPE: get_vpe minor %d not found\n", minor);
 	return NULL;
 }
 
@@ -165,8 +178,6 @@ struct tc *get_tc(int index)
 			return t;
 	}
 
-	printk(KERN_DEBUG "VPE: get_tc index %d not found\n", index);
-
 	return NULL;
 }
 
@@ -179,8 +190,6 @@ struct tc *get_tc_unused(void)
 			return t;
 	}
 
-	printk(KERN_DEBUG "VPE: All TC's are in use\n");
-
 	return NULL;
 }
 
@@ -190,13 +199,13 @@ struct vpe *alloc_vpe(int minor)
 	struct vpe *v;
 
 	if ((v = kzalloc(sizeof(struct vpe), GFP_KERNEL)) == NULL) {
-		printk(KERN_WARNING "VPE: alloc_vpe no mem\n");
 		return NULL;
 	}
 
 	INIT_LIST_HEAD(&v->tc);
 	list_add_tail(&v->list, &vpecontrol.vpe_list);
 
+	INIT_LIST_HEAD(&v->notify);
 	v->minor = minor;
 	return v;
 }
@@ -207,7 +216,6 @@ struct tc *alloc_tc(int index)
 	struct tc *t;
 
 	if ((t = kzalloc(sizeof(struct tc), GFP_KERNEL)) == NULL) {
-		printk(KERN_WARNING "VPE: alloc_tc no mem\n");
 		return NULL;
 	}
 
@@ -236,20 +244,16 @@ void dump_mtregs(void)
 	printk("config3 0x%lx MT %ld\n", val,
 	       (val & CONFIG3_MT) >> CONFIG3_MT_SHIFT);
 
-	val = read_c0_mvpconf0();
-	printk("mvpconf0 0x%lx, PVPE %ld PTC %ld M %ld\n", val,
-	       (val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT,
-	       val & MVPCONF0_PTC, (val & MVPCONF0_M) >> MVPCONF0_M_SHIFT);
-
 	val = read_c0_mvpcontrol();
 	printk("MVPControl 0x%lx, STLB %ld VPC %ld EVP %ld\n", val,
 	       (val & MVPCONTROL_STLB) >> MVPCONTROL_STLB_SHIFT,
 	       (val & MVPCONTROL_VPC) >> MVPCONTROL_VPC_SHIFT,
 	       (val & MVPCONTROL_EVP));
 
-	val = read_c0_vpeconf0();
-	printk("VPEConf0 0x%lx MVP %ld\n", val,
-	       (val & VPECONF0_MVP) >> VPECONF0_MVP_SHIFT);
+	val = read_c0_mvpconf0();
+	printk("mvpconf0 0x%lx, PVPE %ld PTC %ld M %ld\n", val,
+	       (val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT,
+	       val & MVPCONF0_PTC, (val & MVPCONF0_M) >> MVPCONF0_M_SHIFT);
 }
 
 /* Find some VPE program space  */
@@ -354,9 +358,9 @@ static int apply_r_mips_gprel16(struct module *me, uint32_t *location,
 	}
 
 	if( (rel > 32768) || (rel < -32768) ) {
-		printk(KERN_ERR
-		       "apply_r_mips_gprel16: relative address out of range 0x%x %d\n",
-		       rel, rel);
+		printk(KERN_DEBUG "VPE loader: apply_r_mips_gprel16: "
+		       "relative address 0x%x out of range of gp register\n",
+		       rel);
 		return -ENOEXEC;
 	}
 
@@ -374,8 +378,8 @@ static int apply_r_mips_pc16(struct module *me, uint32_t *location,
 	rel -= 1;		// and one instruction less due to the branch delay slot.
 
 	if( (rel > 32768) || (rel < -32768) ) {
-		printk(KERN_ERR
-		       "apply_r_mips_pc16: relative address out of range 0x%x\n", rel);
+		printk(KERN_DEBUG "VPE loader: "
+ 		       "apply_r_mips_pc16: relative address out of range 0x%x\n", rel);
 		return -ENOEXEC;
 	}
 
@@ -396,7 +400,8 @@ static int apply_r_mips_26(struct module *me, uint32_t *location,
 			   Elf32_Addr v)
 {
 	if (v % 4) {
-		printk(KERN_ERR "module %s: dangerous relocation mod4\n", me->name);
+		printk(KERN_DEBUG "VPE loader: apply_r_mips_26 "
+		       " unaligned relocation\n");
 		return -ENOEXEC;
 	}
 
@@ -459,12 +464,13 @@ static int apply_r_mips_lo16(struct module *me, uint32_t *location,
 			/*
 			 * The value for the HI16 had best be the same.
 			 */
-			if (v != l->value) {
-				printk("%d != %d\n", v, l->value);
-				goto out_danger;
+ 			if (v != l->value) {
+				printk(KERN_DEBUG "VPE loader: "
+				       "apply_r_mips_lo16/hi16: 	"
+				       "inconsistent value information\n");
+				return -ENOEXEC;
 			}
 
-
 			/*
 			 * Do the HI16 relocation.  Note that we actually don't
 			 * need to know anything about the LO16 itself, except
@@ -500,11 +506,6 @@ static int apply_r_mips_lo16(struct module *me, uint32_t *location,
 	*location = insnlo;
 
 	return 0;
-
-out_danger:
-	printk(KERN_ERR "module %s: dangerous " "relocation\n", me->name);
-
-	return -ENOEXEC;
 }
 
 static int (*reloc_handlers[]) (struct module *me, uint32_t *location,
@@ -518,6 +519,15 @@ static int (*reloc_handlers[]) (struct module *me, uint32_t *location,
 	[R_MIPS_PC16] = apply_r_mips_pc16
 };
 
+static char *rstrs[] = {
+    	[R_MIPS_NONE]	= "MIPS_NONE",
+	[R_MIPS_32]	= "MIPS_32",
+	[R_MIPS_26]	= "MIPS_26",
+	[R_MIPS_HI16]	= "MIPS_HI16",
+	[R_MIPS_LO16]	= "MIPS_LO16",
+	[R_MIPS_GPREL16] = "MIPS_GPREL16",
+	[R_MIPS_PC16] = "MIPS_PC16"
+};
 
 int apply_relocations(Elf32_Shdr *sechdrs,
 		      const char *strtab,
@@ -552,15 +562,13 @@ int apply_relocations(Elf32_Shdr *sechdrs,
 
 		res = reloc_handlers[ELF32_R_TYPE(r_info)](me, location, v);
 		if( res ) {
-			printk(KERN_DEBUG
-			       "relocation error 0x%x sym refer <%s> value 0x%x "
-			       "type 0x%x r_info 0x%x\n",
-			       (unsigned int)location, strtab + sym->st_name, v,
-			       r_info, ELF32_R_TYPE(r_info));
-		}
-
-		if (res)
+			char *r = rstrs[ELF32_R_TYPE(r_info)];
+		    	printk(KERN_WARNING "VPE loader: .text+0x%x "
+			       "relocation type %s for symbol \"%s\" failed\n",
+			       rel[i].r_offset, r ? r : "UNKNOWN",
+			       strtab + sym->st_name);
 			return res;
+		}
 	}
 
 	return 0;
@@ -576,7 +584,7 @@ void save_gp_address(unsigned int secbase, unsigned int rel)
 
 
 /* Change all symbols so that sh_value encodes the pointer directly. */
-static int simplify_symbols(Elf_Shdr * sechdrs,
+static void simplify_symbols(Elf_Shdr * sechdrs,
 			    unsigned int symindex,
 			    const char *strtab,
 			    const char *secstrings,
@@ -585,18 +593,21 @@ static int simplify_symbols(Elf_Shdr * sechdrs,
 	Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr;
 	unsigned long secbase, bssbase = 0;
 	unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
-	int ret = 0, size;
+	int size;
 
 	/* find the .bss section for COMMON symbols */
 	for (i = 0; i < nsecs; i++) {
-		if (strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) == 0)
+		if (strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) == 0) {
 			bssbase = sechdrs[i].sh_addr;
+			break;
+		}
 	}
 
 	for (i = 1; i < n; i++) {
 		switch (sym[i].st_shndx) {
 		case SHN_COMMON:
-			/* Allocate space for the symbol in the .bss section. st_value is currently size.
+			/* Allocate space for the symbol in the .bss section.
+			   st_value is currently size.
 			   We want it to have the address of the symbol. */
 
 			size = sym[i].st_value;
@@ -614,11 +625,9 @@ static int simplify_symbols(Elf_Shdr * sechdrs,
 			break;
 
 		case SHN_MIPS_SCOMMON:
-
-			printk(KERN_DEBUG
-			       "simplify_symbols: ignoring SHN_MIPS_SCOMMON symbol <%s> st_shndx %d\n",
-			       strtab + sym[i].st_name, sym[i].st_shndx);
-
+			printk(KERN_DEBUG "simplify_symbols: ignoring SHN_MIPS_SCOMMON"
+			       "symbol <%s> st_shndx %d\n", strtab + sym[i].st_name,
+			       sym[i].st_shndx);
 			// .sbss section
 			break;
 
@@ -632,10 +641,7 @@ static int simplify_symbols(Elf_Shdr * sechdrs,
 			sym[i].st_value += secbase;
 			break;
 		}
-
 	}
-
-	return ret;
 }
 
 #ifdef DEBUG_ELFLOADER
@@ -655,9 +661,26 @@ static void dump_elfsymbols(Elf_Shdr * sechdrs, unsigned int symindex,
 
 static void dump_tc(struct tc *t)
 {
-	printk(KERN_WARNING "VPE: TC index %d TCStatus 0x%lx halt 0x%lx\n",
-	       t->index, read_tc_c0_tcstatus(), read_tc_c0_tchalt());
-	printk(KERN_WARNING "VPE: tcrestart 0x%lx\n", read_tc_c0_tcrestart());
+  	unsigned long val;
+
+  	settc(t->index);
+ 	printk(KERN_DEBUG "VPE loader: TC index %d targtc %ld "
+ 	       "TCStatus 0x%lx halt 0x%lx\n",
+  	       t->index, read_c0_vpecontrol() & VPECONTROL_TARGTC,
+  	       read_tc_c0_tcstatus(), read_tc_c0_tchalt());
+
+ 	printk(KERN_DEBUG " tcrestart 0x%lx\n", read_tc_c0_tcrestart());
+ 	printk(KERN_DEBUG " tcbind 0x%lx\n", read_tc_c0_tcbind());
+
+  	val = read_c0_vpeconf0();
+ 	printk(KERN_DEBUG " VPEConf0 0x%lx MVP %ld\n", val,
+  	       (val & VPECONF0_MVP) >> VPECONF0_MVP_SHIFT);
+
+ 	printk(KERN_DEBUG " c0 status 0x%lx\n", read_vpe_c0_status());
+ 	printk(KERN_DEBUG " c0 cause 0x%lx\n", read_vpe_c0_cause());
+
+ 	printk(KERN_DEBUG " c0 badvaddr 0x%lx\n", read_vpe_c0_badvaddr());
+ 	printk(KERN_DEBUG " c0 epc 0x%lx\n", read_vpe_c0_epc());
 }
 
 static void dump_tclist(void)
@@ -672,96 +695,108 @@ static void dump_tclist(void)
 /* We are prepared so configure and start the VPE... */
 int vpe_run(struct vpe * v)
 {
-	unsigned long val;
+	struct vpe_notifications *n;
+	unsigned long val, dmt_flag;
 	struct tc *t;
 
 	/* check we are the Master VPE */
 	val = read_c0_vpeconf0();
 	if (!(val & VPECONF0_MVP)) {
 		printk(KERN_WARNING
-		       "VPE: only Master VPE's are allowed to configure MT\n");
+		       "VPE loader: only Master VPE's are allowed to configure MT\n");
 		return -1;
 	}
 
 	/* disable MT (using dvpe) */
 	dvpe();
 
+	if (!list_empty(&v->tc)) {
+                if ((t = list_entry(v->tc.next, struct tc, tc)) == NULL) {
+                        printk(KERN_WARNING "VPE loader: TC %d is already in use.\n",
+                               t->index);
+                        return -ENOEXEC;
+                }
+        } else {
+                printk(KERN_WARNING "VPE loader: No TC's associated with VPE %d\n",
+                       v->minor);
+                return -ENOEXEC;
+        }
+
 	/* Put MVPE's into 'configuration state' */
 	set_c0_mvpcontrol(MVPCONTROL_VPC);
 
-	if (!list_empty(&v->tc)) {
-		if ((t = list_entry(v->tc.next, struct tc, tc)) == NULL) {
-			printk(KERN_WARNING "VPE: TC %d is already in use.\n",
-			       t->index);
-			return -ENOEXEC;
-		}
-	} else {
-		printk(KERN_WARNING "VPE: No TC's associated with VPE %d\n",
-		       v->minor);
-		return -ENOEXEC;
-	}
-
 	settc(t->index);
 
-	val = read_vpe_c0_vpeconf0();
-
 	/* should check it is halted, and not activated */
 	if ((read_tc_c0_tcstatus() & TCSTATUS_A) || !(read_tc_c0_tchalt() & TCHALT_H)) {
-		printk(KERN_WARNING "VPE: TC %d is already doing something!\n",
+		printk(KERN_WARNING "VPE loader: TC %d is already doing something!\n",
 		       t->index);
-
 		dump_tclist();
 		return -ENOEXEC;
 	}
 
+	/*
+	 * Disable multi-threaded execution whilst we activate, clear the
+	 * halt bit and bound the tc to the other VPE...
+	 */
+	dmt_flag = dmt();
+
 	/* Write the address we want it to start running from in the TCPC register. */
 	write_tc_c0_tcrestart((unsigned long)v->__start);
-
-	/* write the sivc_info address to tccontext */
 	write_tc_c0_tccontext((unsigned long)0);
-
-	/* Set up the XTC bit in vpeconf0 to point at our tc */
-	write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | (t->index << VPECONF0_XTC_SHIFT));
-
-	/* mark the TC as activated, not interrupt exempt and not dynamically allocatable */
+	/*
+	 * Mark the TC as activated, not interrupt exempt and not dynamically
+	 * allocatable
+	 */
 	val = read_tc_c0_tcstatus();
 	val = (val & ~(TCSTATUS_DA | TCSTATUS_IXMT)) | TCSTATUS_A;
 	write_tc_c0_tcstatus(val);
 
 	write_tc_c0_tchalt(read_tc_c0_tchalt() & ~TCHALT_H);
 
-	/* set up VPE1 */
-	write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() & ~VPECONTROL_TE);	// no multiple TC's
-	write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | VPECONF0_VPA);	// enable this VPE
-
 	/*
 	 * The sde-kit passes 'memsize' to __start in $a3, so set something
-	 * here...
-	 * Or set $a3 (register 7) to zero and define DFLT_STACK_SIZE and
+	 * here...  Or set $a3 to zero and define DFLT_STACK_SIZE and
 	 * DFLT_HEAP_SIZE when you compile your program
 	 */
+ 	mttgpr(7, physical_memsize);
+
+
+	/* set up VPE1 */
+	/*
+	 * bind the TC to VPE 1 as late as possible so we only have the final
+	 * VPE registers to set up, and so an EJTAG probe can trigger on it
+	 */
+ 	write_tc_c0_tcbind((read_tc_c0_tcbind() & ~TCBIND_CURVPE) | v->minor);
 
-	mttgpr(7, 0);
+        /* Set up the XTC bit in vpeconf0 to point at our tc */
+        write_vpe_c0_vpeconf0( (read_vpe_c0_vpeconf0() & ~(VPECONF0_XTC))
+                               | (t->index << VPECONF0_XTC_SHIFT));
 
-	/* set config to be the same as vpe0, particularly kseg0 coherency alg */
-	write_vpe_c0_config(read_c0_config());
+        /* enable this VPE */
+        write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | VPECONF0_VPA);
 
 	/* clear out any left overs from a previous program */
+	write_vpe_c0_status(0);
 	write_vpe_c0_cause(0);
 
 	/* take system out of configuration state */
 	clear_c0_mvpcontrol(MVPCONTROL_VPC);
 
-	/* clear interrupts enabled IE, ERL, EXL, and KSU from c0 status */
-	write_vpe_c0_status(read_vpe_c0_status() & ~(ST0_ERL | ST0_KSU | ST0_IE | ST0_EXL));
+	/* now safe to re-enable multi-threading */
+	emt(dmt_flag);
 
 	/* set it running */
 	evpe(EVPE_ENABLE);
 
+	list_for_each_entry(n, &v->notify, list) {
+		n->start(v->minor);
+	}
+
 	return 0;
 }
 
-static unsigned long find_vpe_symbols(struct vpe * v, Elf_Shdr * sechdrs,
+static int find_vpe_symbols(struct vpe * v, Elf_Shdr * sechdrs,
 				      unsigned int symindex, const char *strtab,
 				      struct module *mod)
 {
@@ -778,26 +813,28 @@ static unsigned long find_vpe_symbols(struct vpe * v, Elf_Shdr * sechdrs,
 		}
 	}
 
+	if ( (v->__start == 0) || (v->shared_ptr == NULL))
+		return -1;
+
 	return 0;
 }
 
 /*
- * Allocates a VPE with some program code space(the load address), copies
- * the contents of the program (p)buffer performing relocatations/etc,
- * free's it when finished.
-*/
+ * Allocates a VPE with some program code space(the load address), copies the
+ * contents of the program (p)buffer performing relocatations/etc, free's it
+ * when finished.
+ */
 int vpe_elfload(struct vpe * v)
 {
 	Elf_Ehdr *hdr;
 	Elf_Shdr *sechdrs;
 	long err = 0;
 	char *secstrings, *strtab = NULL;
-	unsigned int len, i, symindex = 0, strindex = 0;
-
+	unsigned int len, i, symindex = 0, strindex = 0, relocate = 0;
 	struct module mod;	// so we can re-use the relocations code
 
 	memset(&mod, 0, sizeof(struct module));
-	strcpy(mod.name, "VPE dummy prog module");
+	strcpy(mod.name, "VPE loader");
 
 	hdr = (Elf_Ehdr *) v->pbuffer;
 	len = v->plen;
@@ -805,16 +842,22 @@ int vpe_elfload(struct vpe * v)
 	/* Sanity checks against insmoding binaries or wrong arch,
 	   weird elf version */
 	if (memcmp(hdr->e_ident, ELFMAG, 4) != 0
-	    || hdr->e_type != ET_REL || !elf_check_arch(hdr)
+	    || (hdr->e_type != ET_REL && hdr->e_type != ET_EXEC)
+	    || !elf_check_arch(hdr)
 	    || hdr->e_shentsize != sizeof(*sechdrs)) {
 		printk(KERN_WARNING
-		       "VPE program, wrong arch or weird elf version\n");
+		       "VPE loader: program wrong arch or weird elf version\n");
 
 		return -ENOEXEC;
 	}
 
+	if (hdr->e_type == ET_REL)
+		relocate = 1;
+
 	if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) {
-		printk(KERN_ERR "VPE program length %u truncated\n", len);
+		printk(KERN_ERR "VPE loader: program length %u truncated\n",
+		       len);
+
 		return -ENOEXEC;
 	}
 
@@ -826,82 +869,126 @@ int vpe_elfload(struct vpe * v)
 	/* And these should exist, but gcc whinges if we don't init them */
 	symindex = strindex = 0;
 
-	for (i = 1; i < hdr->e_shnum; i++) {
-
-		if (sechdrs[i].sh_type != SHT_NOBITS
-		    && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) {
-			printk(KERN_ERR "VPE program length %u truncated\n",
-			       len);
-			return -ENOEXEC;
-		}
+	if (relocate) {
+		for (i = 1; i < hdr->e_shnum; i++) {
+			if (sechdrs[i].sh_type != SHT_NOBITS
+			    && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) {
+				printk(KERN_ERR "VPE program length %u truncated\n",
+				       len);
+				return -ENOEXEC;
+			}
 
-		/* Mark all sections sh_addr with their address in the
-		   temporary image. */
-		sechdrs[i].sh_addr = (size_t) hdr + sechdrs[i].sh_offset;
+			/* Mark all sections sh_addr with their address in the
+			   temporary image. */
+			sechdrs[i].sh_addr = (size_t) hdr + sechdrs[i].sh_offset;
 
-		/* Internal symbols and strings. */
-		if (sechdrs[i].sh_type == SHT_SYMTAB) {
-			symindex = i;
-			strindex = sechdrs[i].sh_link;
-			strtab = (char *)hdr + sechdrs[strindex].sh_offset;
+			/* Internal symbols and strings. */
+			if (sechdrs[i].sh_type == SHT_SYMTAB) {
+				symindex = i;
+				strindex = sechdrs[i].sh_link;
+				strtab = (char *)hdr + sechdrs[strindex].sh_offset;
+			}
 		}
+		layout_sections(&mod, hdr, sechdrs, secstrings);
 	}
 
-	layout_sections(&mod, hdr, sechdrs, secstrings);
-
 	v->load_addr = alloc_progmem(mod.core_size);
 	memset(v->load_addr, 0, mod.core_size);
 
-	printk("VPE elf_loader: loading to %p\n", v->load_addr);
+	printk("VPE loader: loading to %p\n", v->load_addr);
 
-	for (i = 0; i < hdr->e_shnum; i++) {
-		void *dest;
+	if (relocate) {
+		for (i = 0; i < hdr->e_shnum; i++) {
+			void *dest;
 
-		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
-			continue;
+			if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+				continue;
 
-		dest = v->load_addr + sechdrs[i].sh_entsize;
+			dest = v->load_addr + sechdrs[i].sh_entsize;
 
-		if (sechdrs[i].sh_type != SHT_NOBITS)
-			memcpy(dest, (void *)sechdrs[i].sh_addr,
-			       sechdrs[i].sh_size);
-		/* Update sh_addr to point to copy in image. */
-		sechdrs[i].sh_addr = (unsigned long)dest;
-	}
+			if (sechdrs[i].sh_type != SHT_NOBITS)
+				memcpy(dest, (void *)sechdrs[i].sh_addr,
+				       sechdrs[i].sh_size);
+			/* Update sh_addr to point to copy in image. */
+			sechdrs[i].sh_addr = (unsigned long)dest;
 
-	/* Fix up syms, so that st_value is a pointer to location. */
-	err =
-		simplify_symbols(sechdrs, symindex, strtab, secstrings,
-				 hdr->e_shnum, &mod);
-	if (err < 0) {
-		printk(KERN_WARNING "VPE: unable to simplify symbols\n");
-		goto cleanup;
-	}
+			printk(KERN_DEBUG " section sh_name %s sh_addr 0x%x\n",
+			       secstrings + sechdrs[i].sh_name, sechdrs[i].sh_addr);
+		}
 
-	/* Now do relocations. */
-	for (i = 1; i < hdr->e_shnum; i++) {
-		const char *strtab = (char *)sechdrs[strindex].sh_addr;
-		unsigned int info = sechdrs[i].sh_info;
-
-		/* Not a valid relocation section? */
-		if (info >= hdr->e_shnum)
-			continue;
-
-		/* Don't bother with non-allocated sections */
-		if (!(sechdrs[info].sh_flags & SHF_ALLOC))
-			continue;
-
-		if (sechdrs[i].sh_type == SHT_REL)
-			err =
-				apply_relocations(sechdrs, strtab, symindex, i, &mod);
-		else if (sechdrs[i].sh_type == SHT_RELA)
-			err = apply_relocate_add(sechdrs, strtab, symindex, i,
-						 &mod);
-		if (err < 0) {
-			printk(KERN_WARNING
-			       "vpe_elfload: error in relocations err %ld\n",
-			       err);
-			goto cleanup;
+ 		/* Fix up syms, so that st_value is a pointer to location. */
+ 		simplify_symbols(sechdrs, symindex, strtab, secstrings,
+ 				 hdr->e_shnum, &mod);
+
+ 		/* Now do relocations. */
+ 		for (i = 1; i < hdr->e_shnum; i++) {
+ 			const char *strtab = (char *)sechdrs[strindex].sh_addr;
+ 			unsigned int info = sechdrs[i].sh_info;
+
+ 			/* Not a valid relocation section? */
+ 			if (info >= hdr->e_shnum)
+ 				continue;
+
+ 			/* Don't bother with non-allocated sections */
+ 			if (!(sechdrs[info].sh_flags & SHF_ALLOC))
+ 				continue;
+
+ 			if (sechdrs[i].sh_type == SHT_REL)
+ 				err = apply_relocations(sechdrs, strtab, symindex, i,
+ 							&mod);
+ 			else if (sechdrs[i].sh_type == SHT_RELA)
+ 				err = apply_relocate_add(sechdrs, strtab, symindex, i,
+ 							 &mod);
+ 			if (err < 0)
+ 				return err;
+
+  		}
+  	} else {
+  		for (i = 0; i < hdr->e_shnum; i++) {
+
+ 			/* Internal symbols and strings. */
+ 			if (sechdrs[i].sh_type == SHT_SYMTAB) {
+ 				symindex = i;
+ 				strindex = sechdrs[i].sh_link;
+ 				strtab = (char *)hdr + sechdrs[strindex].sh_offset;
+
+ 				/* mark the symtab's address for when we try to find the
+ 				   magic symbols */
+ 				sechdrs[i].sh_addr = (size_t) hdr + sechdrs[i].sh_offset;
+ 			}
+
+ 			/* filter sections we dont want in the final image */
+ 			if (!(sechdrs[i].sh_flags & SHF_ALLOC) ||
+ 			    (sechdrs[i].sh_type == SHT_MIPS_REGINFO)) {
+ 				printk( KERN_DEBUG " ignoring section, "
+ 					"name %s type %x address 0x%x \n",
+ 					secstrings + sechdrs[i].sh_name,
+ 					sechdrs[i].sh_type, sechdrs[i].sh_addr);
+ 				continue;
+ 			}
+
+  			if (sechdrs[i].sh_addr < (unsigned int)v->load_addr) {
+ 				printk( KERN_WARNING "VPE loader: "
+ 					"fully linked image has invalid section, "
+ 					"name %s type %x address 0x%x, before load "
+ 					"address of 0x%x\n",
+ 					secstrings + sechdrs[i].sh_name,
+ 					sechdrs[i].sh_type, sechdrs[i].sh_addr,
+ 					(unsigned int)v->load_addr);
+  				return -ENOEXEC;
+  			}
+
+ 			printk(KERN_DEBUG " copying section sh_name %s, sh_addr 0x%x "
+			       "size 0x%x0 from x%p\n",
+			       secstrings + sechdrs[i].sh_name, sechdrs[i].sh_addr,
+			       sechdrs[i].sh_size, hdr + sechdrs[i].sh_offset);
+
+  			if (sechdrs[i].sh_type != SHT_NOBITS)
+				memcpy((void *)sechdrs[i].sh_addr,
+				       (char *)hdr + sechdrs[i].sh_offset,
+ 				       sechdrs[i].sh_size);
+			else
+				memset((void *)sechdrs[i].sh_addr, 0, sechdrs[i].sh_size);
 		}
 	}
 
@@ -910,71 +997,104 @@ int vpe_elfload(struct vpe * v)
 			   (unsigned long)v->load_addr + v->len);
 
 	if ((find_vpe_symbols(v, sechdrs, symindex, strtab, &mod)) < 0) {
+		if (v->__start == 0) {
+			printk(KERN_WARNING "VPE loader: program does not contain "
+			       "a __start symbol\n");
+			return -ENOEXEC;
+		}
 
-		printk(KERN_WARNING
-		       "VPE: program doesn't contain __start or vpe_shared symbols\n");
-		err = -ENOEXEC;
+		if (v->shared_ptr == NULL)
+			printk(KERN_WARNING "VPE loader: "
+			       "program does not contain vpe_shared symbol.\n"
+			       " Unable to use AMVP (AP/SP) facilities.\n");
 	}
 
 	printk(" elf loaded\n");
-
-cleanup:
-	return err;
+	return 0;
 }
 
-static void dump_vpe(struct vpe * v)
+__attribute_used__ void dump_vpe(struct vpe * v)
 {
 	struct tc *t;
 
+	settc(v->minor);
+
 	printk(KERN_DEBUG "VPEControl 0x%lx\n", read_vpe_c0_vpecontrol());
 	printk(KERN_DEBUG "VPEConf0 0x%lx\n", read_vpe_c0_vpeconf0());
 
-	list_for_each_entry(t, &vpecontrol.tc_list, list) {
+	list_for_each_entry(t, &vpecontrol.tc_list, list)
 		dump_tc(t);
-	}
 }
 
-/* checks for VPE is unused and gets ready to load program	 */
+static void cleanup_tc(struct tc *tc)
+{
+	int tmp;
+
+	/* Put MVPE's into 'configuration state' */
+	set_c0_mvpcontrol(MVPCONTROL_VPC);
+
+	settc(tc->index);
+	tmp = read_tc_c0_tcstatus();
+
+	/* mark not allocated and not dynamically allocatable */
+	tmp &= ~(TCSTATUS_A | TCSTATUS_DA);
+	tmp |= TCSTATUS_IXMT;	/* interrupt exempt */
+	write_tc_c0_tcstatus(tmp);
+
+	write_tc_c0_tchalt(TCHALT_H);
+
+	/* bind it to anything other than VPE1 */
+	write_tc_c0_tcbind(read_tc_c0_tcbind() & ~TCBIND_CURVPE); // | TCBIND_CURVPE
+
+	clear_c0_mvpcontrol(MVPCONTROL_VPC);
+}
+
+static int getcwd(char *buff, int size)
+{
+	mm_segment_t old_fs;
+	int ret;
+
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+
+	ret = sys_getcwd(buff,size);
+
+	set_fs(old_fs);
+
+	return ret;
+}
+
+/* checks VPE is unused and gets ready to load program  */
 static int vpe_open(struct inode *inode, struct file *filp)
 {
-	int minor;
+	int minor, ret;
 	struct vpe *v;
+	struct vpe_notifications *not;
 
 	/* assume only 1 device at the mo. */
-	if ((minor = MINOR(inode->i_rdev)) != 1) {
-		printk(KERN_WARNING "VPE: only vpe1 is supported\n");
+	if ((minor = iminor(inode)) != 1) {
+		printk(KERN_WARNING "VPE loader: only vpe1 is supported\n");
 		return -ENODEV;
 	}
 
 	if ((v = get_vpe(minor)) == NULL) {
-		printk(KERN_WARNING "VPE: unable to get vpe\n");
+		printk(KERN_WARNING "VPE loader: unable to get vpe\n");
 		return -ENODEV;
 	}
 
 	if (v->state != VPE_STATE_UNUSED) {
-		unsigned long tmp;
-		struct tc *t;
-
-		printk(KERN_WARNING "VPE: device %d already in use\n", minor);
-
 		dvpe();
-		dump_vpe(v);
-
-		printk(KERN_WARNING "VPE: re-initialising %d\n", minor);
-
-		release_progmem(v->load_addr);
 
-		t = get_tc(minor);
-		settc(minor);
-		tmp = read_tc_c0_tcstatus();
+		printk(KERN_DEBUG "VPE loader: tc in use dumping regs\n");
 
-		/* mark not allocated and not dynamically allocatable */
-		tmp &= ~(TCSTATUS_A | TCSTATUS_DA);
-		tmp |= TCSTATUS_IXMT;	/* interrupt exempt */
-		write_tc_c0_tcstatus(tmp);
+		dump_tc(get_tc(minor));
 
-		write_tc_c0_tchalt(TCHALT_H);
+		list_for_each_entry(not, &v->notify, list) {
+			not->stop(minor);
+		}
 
+		release_progmem(v->load_addr);
+		cleanup_tc(get_tc(minor));
 	}
 
 	// allocate it so when we get write ops we know it's expected.
@@ -986,6 +1106,24 @@ static int vpe_open(struct inode *inode, struct file *filp)
 	v->load_addr = NULL;
 	v->len = 0;
 
+	v->uid = filp->f_uid;
+	v->gid = filp->f_gid;
+
+#ifdef CONFIG_MIPS_APSP_KSPD
+	/* get kspd to tell us when a syscall_exit happens */
+	if (!kspd_events_reqd) {
+		kspd_notify(&kspd_events);
+		kspd_events_reqd++;
+	}
+#endif
+
+	v->cwd[0] = 0;
+	ret = getcwd(v->cwd, VPE_PATH_MAX);
+	if (ret < 0)
+		printk(KERN_WARNING "VPE loader: open, getcwd returned %d\n", ret);
+
+	v->shared_ptr = NULL;
+	v->__start = 0;
 	return 0;
 }
 
@@ -995,7 +1133,7 @@ static int vpe_release(struct inode *inode, struct file *filp)
 	struct vpe *v;
 	Elf_Ehdr *hdr;
 
-	minor = MINOR(inode->i_rdev);
+	minor = iminor(inode);
 	if ((v = get_vpe(minor)) == NULL)
 		return -ENODEV;
 
@@ -1006,14 +1144,22 @@ static int vpe_release(struct inode *inode, struct file *filp)
 		if (vpe_elfload(v) >= 0)
 			vpe_run(v);
 		else {
-			printk(KERN_WARNING "VPE: ELF load failed.\n");
+ 			printk(KERN_WARNING "VPE loader: ELF load failed.\n");
 			ret = -ENOEXEC;
 		}
 	} else {
-		printk(KERN_WARNING "VPE: only elf files are supported\n");
+ 		printk(KERN_WARNING "VPE loader: only elf files are supported\n");
 		ret = -ENOEXEC;
 	}
 
+	/* It's good to be able to run the SP and if it chokes have a look at
+	   the /dev/rt?. But if we reset the pointer to the shared struct we
+	   loose what has happened. So perhaps if garbage is sent to the vpe
+	   device, use it as a trigger for the reset. Hopefully a nice
+	   executable will be along shortly. */
+	if (ret < 0)
+		v->shared_ptr = NULL;
+
 	// cleanup any temp buffers
 	if (v->pbuffer)
 		vfree(v->pbuffer);
@@ -1028,26 +1174,24 @@ static ssize_t vpe_write(struct file *file, const char __user * buffer,
 	size_t ret = count;
 	struct vpe *v;
 
-	minor = MINOR(file->f_dentry->d_inode->i_rdev);
+	minor = iminor(file->f_dentry->d_inode);
 	if ((v = get_vpe(minor)) == NULL)
 		return -ENODEV;
 
 	if (v->pbuffer == NULL) {
-		printk(KERN_ERR "vpe_write: no pbuffer\n");
+		printk(KERN_ERR "VPE loader: no buffer for program\n");
 		return -ENOMEM;
 	}
 
 	if ((count + v->len) > v->plen) {
 		printk(KERN_WARNING
-		       "VPE Loader: elf size too big. Perhaps strip uneeded symbols\n");
+		       "VPE loader: elf size too big. Perhaps strip uneeded symbols\n");
 		return -ENOMEM;
 	}
 
 	count -= copy_from_user(v->pbuffer + v->len, buffer, count);
-	if (!count) {
-		printk("vpe_write: copy_to_user failed\n");
+	if (!count)
 		return -EFAULT;
-	}
 
 	v->len += count;
 	return ret;
@@ -1149,16 +1293,70 @@ void *vpe_get_shared(int index)
 {
 	struct vpe *v;
 
-	if ((v = get_vpe(index)) == NULL) {
-		printk(KERN_WARNING "vpe: invalid vpe index %d\n", index);
+	if ((v = get_vpe(index)) == NULL)
 		return NULL;
-	}
 
 	return v->shared_ptr;
 }
 
 EXPORT_SYMBOL(vpe_get_shared);
 
+int vpe_getuid(int index)
+{
+	struct vpe *v;
+
+	if ((v = get_vpe(index)) == NULL)
+		return -1;
+
+	return v->uid;
+}
+
+EXPORT_SYMBOL(vpe_getuid);
+
+int vpe_getgid(int index)
+{
+	struct vpe *v;
+
+	if ((v = get_vpe(index)) == NULL)
+		return -1;
+
+	return v->gid;
+}
+
+EXPORT_SYMBOL(vpe_getgid);
+
+int vpe_notify(int index, struct vpe_notifications *notify)
+{
+	struct vpe *v;
+
+	if ((v = get_vpe(index)) == NULL)
+		return -1;
+
+	list_add(&notify->list, &v->notify);
+	return 0;
+}
+
+EXPORT_SYMBOL(vpe_notify);
+
+char *vpe_getcwd(int index)
+{
+	struct vpe *v;
+
+	if ((v = get_vpe(index)) == NULL)
+		return NULL;
+
+	return v->cwd;
+}
+
+EXPORT_SYMBOL(vpe_getcwd);
+
+#ifdef CONFIG_MIPS_APSP_KSPD
+static void kspd_sp_exit( int sp_id)
+{
+	cleanup_tc(get_tc(sp_id));
+}
+#endif
+
 static int __init vpe_module_init(void)
 {
 	struct vpe *v = NULL;
@@ -1201,7 +1399,8 @@ static int __init vpe_module_init(void)
 				return -ENODEV;
 			}
 
-			list_add(&t->tc, &v->tc);	/* add the tc to the list of this vpe's tc's. */
+			/* add the tc to the list of this vpe's tc's. */
+			list_add(&t->tc, &v->tc);
 
 			/* deactivate all but vpe0 */
 			if (i != 0) {
@@ -1222,10 +1421,12 @@ static int __init vpe_module_init(void)
 						     ~(ST0_IM | ST0_IE | ST0_KSU))
 						    | ST0_CU0);
 
-				/* set config to be the same as vpe0, particularly kseg0 coherency alg */
+				/*
+				 * Set config to be the same as vpe0,
+				 * particularly kseg0 coherency alg
+				 */
 				write_vpe_c0_config(read_c0_config());
 			}
-
 		}
 
 		/* TC's */
@@ -1234,23 +1435,28 @@ static int __init vpe_module_init(void)
 		if (i != 0) {
 			unsigned long tmp;
 
-			/* tc 0 will of course be running.... */
-			if (i == 0)
-				t->state = TC_STATE_RUNNING;
-
 			settc(i);
 
-			/* bind a TC to each VPE, May as well put all excess TC's
-			   on the last VPE */
-			if (i >= (((val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1))
-				write_tc_c0_tcbind(read_tc_c0_tcbind() |
-						   ((val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT));
-			else
-				write_tc_c0_tcbind(read_tc_c0_tcbind() | i);
+			/* Any TC that is bound to VPE0 gets left as is - in case
+			   we are running SMTC on VPE0. A TC that is bound to any
+			   other VPE gets bound to VPE0, ideally I'd like to make
+			   it homeless but it doesn't appear to let me bind a TC
+			   to a non-existent VPE. Which is perfectly reasonable.
+
+			   The (un)bound state is visible to an EJTAG probe so may
+			   notify GDB...
+			*/
+
+			if (((tmp = read_tc_c0_tcbind()) & TCBIND_CURVPE)) {
+				/* tc is bound >vpe0 */
+				write_tc_c0_tcbind(tmp & ~TCBIND_CURVPE);
+
+				t->pvpe = get_vpe(0);	/* set the parent vpe */
+			}
 
 			tmp = read_tc_c0_tcstatus();
 
-			/* mark not allocated and not dynamically allocatable */
+			/* mark not activated and not dynamically allocatable */
 			tmp &= ~(TCSTATUS_A | TCSTATUS_DA);
 			tmp |= TCSTATUS_IXMT;	/* interrupt exempt */
 			write_tc_c0_tcstatus(tmp);
@@ -1262,6 +1468,9 @@ static int __init vpe_module_init(void)
 	/* release config state */
 	clear_c0_mvpcontrol(MVPCONTROL_VPC);
 
+#ifdef CONFIG_MIPS_APSP_KSPD
+	kspd_events.kspd_sp_exit = kspd_sp_exit;
+#endif
 	return 0;
 }
 
@@ -1281,5 +1490,5 @@ static void __exit vpe_module_exit(void)
 module_init(vpe_module_init);
 module_exit(vpe_module_exit);
 MODULE_DESCRIPTION("MIPS VPE Loader");
-MODULE_AUTHOR("Elizabeth Clarke, MIPS Technologies, Inc");
+MODULE_AUTHOR("Elizabeth Oldham, MIPS Technologies, Inc.");
 MODULE_LICENSE("GPL");