1 files changed, 150 insertions, 106 deletions
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index f295b4ac941..6c0571216a9 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -24,7 +24,6 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/tty.h>
@@ -35,8 +34,8 @@
 #include <linux/hugetlb.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
+#include <linux/kdebug.h>
 
-#include <asm/system.h>
 #include <asm/pgalloc.h>
 #include <asm/sections.h>
 #include <asm/traps.h>
@@ -44,15 +43,18 @@
 
 #include <arch/interrupts.h>
 
-static noinline void force_sig_info_fault(int si_signo, int si_code,
-	unsigned long address, int fault_num, struct task_struct *tsk)
+static noinline void force_sig_info_fault(const char *type, int si_signo,
+					  int si_code, unsigned long address,
+					  int fault_num,
+					  struct task_struct *tsk,
+					  struct pt_regs *regs)
 {
 	siginfo_t info;
 
 	if (unlikely(tsk->pid < 2)) {
 		panic("Signal %d (code %d) at %#lx sent to %s!",
 		      si_signo, si_code & 0xffff, address,
-		      tsk->pid ? "init" : "the idle task");
+		      is_idle_task(tsk) ? "the idle task" : "init");
 	}
 
 	info.si_signo = si_signo;
@@ -60,6 +62,7 @@ static noinline void force_sig_info_fault(int si_signo, int si_code,
 	info.si_code = si_code;
 	info.si_addr = (void __user *)address;
 	info.si_trapno = fault_num;
+	trace_unhandled_signal(type, regs, address, si_signo);
 	force_sig_info(si_signo, &info, tsk);
 }
 
@@ -68,15 +71,17 @@ static noinline void force_sig_info_fault(int si_signo, int si_code,
  * Synthesize the fault a PL0 process would get by doing a word-load of
  * an unaligned address or a high kernel address.
  */
-SYSCALL_DEFINE2(cmpxchg_badaddr, unsigned long, address,
-		struct pt_regs *, regs)
+SYSCALL_DEFINE1(cmpxchg_badaddr, unsigned long, address)
 {
+	struct pt_regs *regs = current_pt_regs();
+
 	if (address >= PAGE_OFFSET)
-		force_sig_info_fault(SIGSEGV, SEGV_MAPERR, address,
-				     INT_DTLB_MISS, current);
+		force_sig_info_fault("atomic segfault", SIGSEGV, SEGV_MAPERR,
+				     address, INT_DTLB_MISS, current, regs);
 	else
-		force_sig_info_fault(SIGBUS, BUS_ADRALN, address,
-				     INT_UNALIGN_DATA, current);
+		force_sig_info_fault("atomic alignment fault", SIGBUS,
+				     BUS_ADRALN, address,
+				     INT_UNALIGN_DATA, current, regs);
 
 	/*
 	 * Adjust pc to point at the actual instruction, which is unusual
@@ -118,16 +123,15 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 	pmd_k = pmd_offset(pud_k, address);
 	if (!pmd_present(*pmd_k))
 		return NULL;
-	if (!pmd_present(*pmd)) {
+	if (!pmd_present(*pmd))
 		set_pmd(pmd, *pmd_k);
-		arch_flush_lazy_mmu_mode();
-	} else
+	else
 		BUG_ON(pmd_ptfn(*pmd) != pmd_ptfn(*pmd_k));
 	return pmd_k;
 }
 
 /*
- * Handle a fault on the vmalloc or module mapping area
+ * Handle a fault on the vmalloc area.
  */
 static inline int vmalloc_fault(pgd_t *pgd, unsigned long address)
 {
@@ -145,8 +149,6 @@ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address)
 	pmd_k = vmalloc_sync_one(pgd, address);
 	if (!pmd_k)
 		return -1;
-	if (pmd_huge(*pmd_k))
-		return 0;   /* support TILE huge_vmap() API */
 	pte_k = pte_offset_kernel(pmd_k, address);
 	if (!pte_present(*pte_k))
 		return -1;
@@ -184,7 +186,7 @@ static pgd_t *get_current_pgd(void)
 	HV_Context ctx = hv_inquire_context();
 	unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT;
 	struct page *pgd_page = pfn_to_page(pgd_pfn);
-	BUG_ON(PageHighMem(pgd_page));   /* oops, HIGHPTE? */
+	BUG_ON(PageHighMem(pgd_page));
 	return (pgd_t *) __va(ctx.page_table);
 }
 
@@ -200,9 +202,14 @@ static pgd_t *get_current_pgd(void)
  * interrupt or a critical region, and must do as little as possible.
  * Similarly, we can't use atomic ops here, since we may be handling a
  * fault caused by an atomic op access.
+ *
+ * If we find a migrating PTE while we're in an NMI context, and we're
+ * at a PC that has a registered exception handler, we don't wait,
+ * since this thread may (e.g.) have been interrupted while migrating
+ * its own stack, which would then cause us to self-deadlock.
  */
 static int handle_migrating_pte(pgd_t *pgd, int fault_num,
-				unsigned long address,
+				unsigned long address, unsigned long pc,
 				int is_kernel_mode, int write)
 {
 	pud_t *pud;
@@ -224,6 +231,8 @@ static int handle_migrating_pte(pgd_t *pgd, int fault_num,
 		pte_offset_kernel(pmd, address);
 	pteval = *pte;
 	if (pte_migrating(pteval)) {
+		if (in_nmi() && search_exception_tables(pc))
+			return 0;
 		wait_for_migration(pte);
 		return 1;
 	}
@@ -263,12 +272,15 @@ static int handle_page_fault(struct pt_regs *regs,
 	int si_code;
 	int is_kernel_mode;
 	pgd_t *pgd;
+	unsigned int flags;
 
 	/* on TILE, protection faults are always writes */
 	if (!is_page_fault)
 		write = 1;
 
-	is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL);
+	flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+
+	is_kernel_mode = !user_mode(regs);
 
 	tsk = validate_current();
 
@@ -291,13 +303,13 @@ static int handle_page_fault(struct pt_regs *regs,
 	/*
 	 * Early on, we need to check for migrating PTE entries;
 	 * see homecache.c.  If we find a migrating PTE, we wait until
-	 * the backing page claims to be done migrating, then we procede.
+	 * the backing page claims to be done migrating, then we proceed.
 	 * For kernel PTEs, we rewrite the PTE and return and retry.
 	 * Otherwise, we treat the fault like a normal "no PTE" fault,
 	 * rather than trying to patch up the existing PTE.
 	 */
 	pgd = get_current_pgd();
-	if (handle_migrating_pte(pgd, fault_num, address,
+	if (handle_migrating_pte(pgd, fault_num, address, regs->pc,
 				 is_kernel_mode, write))
 		return 1;
 
@@ -332,9 +344,12 @@ static int handle_page_fault(struct pt_regs *regs,
 	/*
 	 * If we're trying to touch user-space addresses, we must
 	 * be either at PL0, or else with interrupts enabled in the
-	 * kernel, so either way we can re-enable interrupts here.
+	 * kernel, so either way we can re-enable interrupts here
+	 * unless we are doing atomic access to user space with
+	 * interrupts disabled.
 	 */
-	local_irq_enable();
+	if (!(regs->flags & PT_FLAGS_DISABLE_IRQ))
+		local_irq_enable();
 
 	mm = tsk->mm;
 
@@ -347,6 +362,9 @@ static int handle_page_fault(struct pt_regs *regs,
 		goto bad_area_nosemaphore;
 	}
 
+	if (!is_kernel_mode)
+		flags |= FAULT_FLAG_USER;
+
 	/*
 	 * When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -369,6 +387,8 @@ static int handle_page_fault(struct pt_regs *regs,
 			vma = NULL;  /* happy compiler */
 			goto bad_area_nosemaphore;
 		}
+
+retry:
 		down_read(&mm->mmap_sem);
 	}
 
@@ -405,18 +425,22 @@ good_area:
 #endif
 		if (!(vma->vm_flags & VM_WRITE))
 			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
 	} else {
 		if (!is_page_fault || !(vma->vm_flags & VM_READ))
 			goto bad_area;
 	}
 
- survive:
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(mm, vma, address, write);
+	fault = handle_mm_fault(mm, vma, address, flags);
+
+	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+		return 0;
+
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		if (fault & VM_FAULT_OOM)
 			goto out_of_memory;
@@ -424,33 +448,33 @@ good_area:
 			goto do_sigbus;
 		BUG();
 	}
-	if (fault & VM_FAULT_MAJOR)
-		tsk->maj_flt++;
-	else
-		tsk->min_flt++;
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		if (fault & VM_FAULT_MAJOR)
+			tsk->maj_flt++;
+		else
+			tsk->min_flt++;
+		if (fault & VM_FAULT_RETRY) {
+			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
+
+			 /*
+			  * No need to up_read(&mm->mmap_sem) as we would
+			  * have already released it in __lock_page_or_retry
+			  * in mm/filemap.c.
+			  */
+			goto retry;
+		}
+	}
 
-#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
-	/*
-	 * If this was an asynchronous fault,
-	 * restart the appropriate engine.
-	 */
-	switch (fault_num) {
 #if CHIP_HAS_TILE_DMA()
+	/* If this was a DMA TLB fault, restart the DMA engine. */
+	switch (fault_num) {
 	case INT_DMATLB_MISS:
 	case INT_DMATLB_MISS_DWNCL:
 	case INT_DMATLB_ACCESS:
 	case INT_DMATLB_ACCESS_DWNCL:
 		__insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
 		break;
-#endif
-#if CHIP_HAS_SN_PROC()
-	case INT_SNITLB_MISS:
-	case INT_SNITLB_MISS_DWNCL:
-		__insn_mtspr(SPR_SNCTL,
-			     __insn_mfspr(SPR_SNCTL) &
-			     ~SPR_SNCTL__FRZPROC_MASK);
-		break;
-#endif
 	}
 #endif
 
@@ -472,8 +496,8 @@ bad_area_nosemaphore:
 		 */
 		local_irq_enable();
 
-		force_sig_info_fault(SIGSEGV, si_code, address,
-				     fault_num, tsk);
+		force_sig_info_fault("segfault", SIGSEGV, si_code, address,
+				     fault_num, tsk, regs);
 		return 0;
 	}
 
@@ -511,7 +535,7 @@ no_context:
 
 	if (unlikely(tsk->pid < 2)) {
 		panic("Kernel page fault running %s!",
-		      tsk->pid ? "init" : "the idle task");
+		      is_idle_task(tsk) ? "the idle task" : "init");
 	}
 
 	/*
@@ -531,15 +555,10 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_global_init(tsk)) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	pr_alert("VM: killing process %s\n", tsk->comm);
-	if (!is_kernel_mode)
-		do_group_exit(SIGKILL);
-	goto no_context;
+	if (is_kernel_mode)
+		goto no_context;
+	pagefault_out_of_memory();
+	return 0;
 
 do_sigbus:
 	up_read(&mm->mmap_sem);
@@ -548,7 +567,8 @@ do_sigbus:
 	if (is_kernel_mode)
 		goto no_context;
 
-	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, fault_num, tsk);
+	force_sig_info_fault("bus error", SIGBUS, BUS_ADRERR, address,
+			     fault_num, tsk, regs);
 	return 0;
 }
 
@@ -656,20 +676,12 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
 	}
 
 	/*
-	 * NOTE: the one other type of access that might bring us here
-	 * are the memory ops in __tns_atomic_acquire/__tns_atomic_release,
-	 * but we don't have to check specially for them since we can
-	 * always safely return to the address of the fault and retry,
-	 * since no separate atomic locks are involved.
-	 */
-
-	/*
 	 * Now that we have released the atomic lock (if necessary),
 	 * it's safe to spin if the PTE that caused the fault was migrating.
 	 */
 	if (fault_num == INT_DTLB_ACCESS)
 		write = 1;
-	if (handle_migrating_pte(pgd, fault_num, address, 1, write))
+	if (handle_migrating_pte(pgd, fault_num, address, pc, 1, write))
 		return state;
 
 	/* Return zero so that we continue on with normal fault handling. */
@@ -692,8 +704,60 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 {
 	int is_page_fault;
 
+#ifdef CONFIG_KPROBES
+	/*
+	 * This is to notify the fault handler of the kprobes.  The
+	 * exception code is redundant as it is also carried in REGS,
+	 * but we pass it anyhow.
+	 */
+	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1,
+		       regs->faultnum, SIGSEGV) == NOTIFY_STOP)
+		return;
+#endif
+
+#ifdef __tilegx__
+	/*
+	 * We don't need early do_page_fault_ics() support, since unlike
+	 * Pro we don't need to worry about unlocking the atomic locks.
+	 * There is only one current case in GX where we touch any memory
+	 * under ICS other than our own kernel stack, and we handle that
+	 * here.  (If we crash due to trying to touch our own stack,
+	 * we're in too much trouble for C code to help out anyway.)
+	 */
+	if (write & ~1) {
+		unsigned long pc = write & ~1;
+		if (pc >= (unsigned long) __start_unalign_asm_code &&
+		    pc < (unsigned long) __end_unalign_asm_code) {
+			struct thread_info *ti = current_thread_info();
+			/*
+			 * Our EX_CONTEXT is still what it was from the
+			 * initial unalign exception, but now we've faulted
+			 * on the JIT page.  We would like to complete the
+			 * page fault however is appropriate, and then retry
+			 * the instruction that caused the unalign exception.
+			 * Our state has been "corrupted" by setting the low
+			 * bit in "sp", and stashing r0..r3 in the
+			 * thread_info area, so we revert all of that, then
+			 * continue as if this were a normal page fault.
+			 */
+			regs->sp &= ~1UL;
+			regs->regs[0] = ti->unalign_jit_tmp[0];
+			regs->regs[1] = ti->unalign_jit_tmp[1];
+			regs->regs[2] = ti->unalign_jit_tmp[2];
+			regs->regs[3] = ti->unalign_jit_tmp[3];
+			write &= 1;
+		} else {
+			pr_alert("%s/%d: ICS set at page fault at %#lx: %#lx\n",
+				 current->comm, current->pid, pc, address);
+			show_regs(regs);
+			do_group_exit(SIGKILL);
+			return;
+		}
+	}
+#else
 	/* This case should have been handled by do_page_fault_ics(). */
 	BUG_ON(write & ~1);
+#endif
 
 #if CHIP_HAS_TILE_DMA()
 	/*
@@ -722,10 +786,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 	case INT_DMATLB_MISS:
 	case INT_DMATLB_MISS_DWNCL:
 #endif
-#if CHIP_HAS_SN_PROC()
-	case INT_SNITLB_MISS:
-	case INT_SNITLB_MISS_DWNCL:
-#endif
 		is_page_fault = 1;
 		break;
 
@@ -741,7 +801,8 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 		panic("Bad fault number %d in do_page_fault", fault_num);
 	}
 
-	if (EX1_PL(regs->ex1) != USER_PL) {
+#if CHIP_HAS_TILE_DMA()
+	if (!user_mode(regs)) {
 		struct async_tlb *async;
 		switch (fault_num) {
 #if CHIP_HAS_TILE_DMA()
@@ -752,12 +813,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 			async = &current->thread.dma_async_tlb;
 			break;
 #endif
-#if CHIP_HAS_SN_PROC()
-		case INT_SNITLB_MISS:
-		case INT_SNITLB_MISS_DWNCL:
-			async = &current->thread.sn_async_tlb;
-			break;
-#endif
 		default:
 			async = NULL;
 		}
@@ -784,19 +839,28 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 			return;
 		}
 	}
+#endif
 
 	handle_page_fault(regs, fault_num, is_page_fault, address, write);
 }
 
 
-#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+#if CHIP_HAS_TILE_DMA()
 /*
- * Check an async_tlb structure to see if a deferred fault is waiting,
- * and if so pass it to the page-fault code.
+ * This routine effectively re-issues asynchronous page faults
+ * when we are returning to user space.
  */
-static void handle_async_page_fault(struct pt_regs *regs,
-				    struct async_tlb *async)
+void do_async_page_fault(struct pt_regs *regs)
 {
+	struct async_tlb *async = &current->thread.dma_async_tlb;
+
+	/*
+	 * Clear thread flag early.  If we re-interrupt while processing
+	 * code here, we will reset it and recall this routine before
+	 * returning to user space.
+	 */
+	clear_thread_flag(TIF_ASYNC_TLB);
+
 	if (async->fault_num) {
 		/*
 		 * Clear async->fault_num before calling the page-fault
@@ -810,35 +874,15 @@ static void handle_async_page_fault(struct pt_regs *regs,
 				  async->address, async->is_write);
 	}
 }
-#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */
-
-
-/*
- * This routine effectively re-issues asynchronous page faults
- * when we are returning to user space.
- */
-void do_async_page_fault(struct pt_regs *regs)
-{
-	/*
-	 * Clear thread flag early.  If we re-interrupt while processing
-	 * code here, we will reset it and recall this routine before
-	 * returning to user space.
-	 */
-	clear_thread_flag(TIF_ASYNC_TLB);
+#endif /* CHIP_HAS_TILE_DMA() */
 
-#if CHIP_HAS_TILE_DMA()
-	handle_async_page_fault(regs, &current->thread.dma_async_tlb);
-#endif
-#if CHIP_HAS_SN_PROC()
-	handle_async_page_fault(regs, &current->thread.sn_async_tlb);
-#endif
-}
 
 void vmalloc_sync_all(void)
 {
 #ifdef __tilegx__
 	/* Currently all L1 kernel pmd's are static and shared. */
-	BUG_ON(pgd_index(VMALLOC_END) != pgd_index(VMALLOC_START));
+	BUILD_BUG_ON(pgd_index(VMALLOC_END - PAGE_SIZE) !=
+		     pgd_index(VMALLOC_START));
 #else
 	/*
 	 * Note that races in the updates of insync and start aren't