aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/fault.c')
-rw-r--r--arch/powerpc/mm/fault.c323
1 files changed, 216 insertions, 107 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 7b251079926..51ab9e7e6c3 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -29,16 +29,23 @@
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
+#include <linux/perf_event.h>
+#include <linux/magic.h>
+#include <linux/ratelimit.h>
+#include <linux/context_tracking.h>
+#include <asm/firmware.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/tlbflush.h>
#include <asm/siginfo.h>
+#include <asm/debug.h>
+#include <mm/mmu_decl.h>
+#include "icswx.h"
#ifdef CONFIG_KPROBES
static inline int notify_page_fault(struct pt_regs *regs)
@@ -99,31 +106,80 @@ static int store_updates_sp(struct pt_regs *regs)
}
return 0;
}
+/*
+ * do_page_fault error handling helpers
+ */
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-static void do_dabr(struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
+#define MM_FAULT_RETURN 0
+#define MM_FAULT_CONTINUE -1
+#define MM_FAULT_ERR(sig) (sig)
+
+static int do_sigbus(struct pt_regs *regs, unsigned long address)
{
siginfo_t info;
- if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
- 11, SIGSEGV) == NOTIFY_STOP)
- return;
+ up_read(&current->mm->mmap_sem);
- if (debugger_dabr_match(regs))
- return;
+ if (user_mode(regs)) {
+ current->thread.trap_nr = BUS_ADRERR;
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_ADRERR;
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGBUS, &info, current);
+ return MM_FAULT_RETURN;
+ }
+ return MM_FAULT_ERR(SIGBUS);
+}
+
+static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
+{
+ /*
+ * Pagefault was interrupted by SIGKILL. We have no reason to
+ * continue the pagefault.
+ */
+ if (fatal_signal_pending(current)) {
+ /*
+ * If we have retry set, the mmap semaphore will have
+ * alrady been released in __lock_page_or_retry(). Else
+ * we release it now.
+ */
+ if (!(fault & VM_FAULT_RETRY))
+ up_read(&current->mm->mmap_sem);
+ /* Coming from kernel, we need to deal with uaccess fixups */
+ if (user_mode(regs))
+ return MM_FAULT_RETURN;
+ return MM_FAULT_ERR(SIGKILL);
+ }
+
+ /* No fault: be happy */
+ if (!(fault & VM_FAULT_ERROR))
+ return MM_FAULT_CONTINUE;
+
+ /* Out of memory */
+ if (fault & VM_FAULT_OOM) {
+ up_read(&current->mm->mmap_sem);
- /* Clear the DABR */
- set_dabr(0);
+ /*
+ * We ran out of memory, or some other thing happened to us that
+ * made us unable to handle the page fault gracefully.
+ */
+ if (!user_mode(regs))
+ return MM_FAULT_ERR(SIGKILL);
+ pagefault_out_of_memory();
+ return MM_FAULT_RETURN;
+ }
+
+ /* Bus error. x86 handles HWPOISON here, we'll add this if/when
+ * we support the feature in HW
+ */
+ if (fault & VM_FAULT_SIGBUS)
+ return do_sigbus(regs, addr);
- /* Deliver the signal to userspace */
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_HWBKPT;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGTRAP, &info, current);
+ /* We don't understand the fault code, this is fatal */
+ BUG();
+ return MM_FAULT_CONTINUE;
}
-#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
/*
* For 600- and 800-family processors, the error_code parameter is DSISR
@@ -141,13 +197,16 @@ static void do_dabr(struct pt_regs *regs, unsigned long address,
int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
+ enum ctx_state prev_state = exception_enter();
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
- siginfo_t info;
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
int code = SEGV_MAPERR;
- int is_write = 0, ret;
+ int is_write = 0;
int trap = TRAP(regs);
int is_exec = trap == 0x400;
+ int fault;
+ int rc = 0, store_update_sp = 0;
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
/*
@@ -164,27 +223,49 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
is_write = error_code & ESR_DST;
#endif /* CONFIG_4xx || CONFIG_BOOKE */
+#ifdef CONFIG_PPC_ICSWX
+ /*
+ * we need to do this early because this "data storage
+ * interrupt" does not update the DAR/DEAR so we don't want to
+ * look at it
+ */
+ if (error_code & ICSWX_DSI_UCT) {
+ rc = acop_handle_fault(regs, address, error_code);
+ if (rc)
+ goto bail;
+ }
+#endif /* CONFIG_PPC_ICSWX */
+
if (notify_page_fault(regs))
- return 0;
+ goto bail;
if (unlikely(debugger_fault_handler(regs)))
- return 0;
+ goto bail;
/* On a kernel SLB miss we can only check for a valid exception entry */
- if (!user_mode(regs) && (address >= TASK_SIZE))
- return SIGSEGV;
+ if (!user_mode(regs) && (address >= TASK_SIZE)) {
+ rc = SIGSEGV;
+ goto bail;
+ }
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \
+ defined(CONFIG_PPC_BOOK3S_64))
if (error_code & DSISR_DABRMATCH) {
- /* DABR match */
- do_dabr(regs, address, error_code);
- return 0;
+ /* breakpoint match */
+ do_break(regs, address, error_code);
+ goto bail;
}
-#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
+#endif
+
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
if (in_atomic() || mm == NULL) {
- if (!user_mode(regs))
- return SIGSEGV;
+ if (!user_mode(regs)) {
+ rc = SIGSEGV;
+ goto bail;
+ }
/* in_atomic() in user mode is really bad,
as is current->mm == NULL. */
printk(KERN_EMERG "Page fault in user mode with "
@@ -194,6 +275,19 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
die("Weird page fault", regs, SIGSEGV);
}
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
+ /*
+ * We want to do this outside mmap_sem, because reading code around nip
+ * can result in fault, which will cause a deadlock when called with
+ * mmap_sem held
+ */
+ if (user_mode(regs))
+ store_update_sp = store_updates_sp(regs);
+
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -213,7 +307,15 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
if (!user_mode(regs) && !search_exception_tables(regs->nip))
goto bad_area_nosemaphore;
+retry:
down_read(&mm->mmap_sem);
+ } else {
+ /*
+ * The above down_read_trylock() might have succeeded in
+ * which case we'll have missed the might_sleep() from
+ * down_read():
+ */
+ might_sleep();
}
vma = find_vma(mm, address);
@@ -251,8 +353,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
* between the last mapped region and the stack will
* expand the stack rather than segfaulting.
*/
- if (address + 2048 < uregs->gpr[1]
- && (!user_mode(regs) || !store_updates_sp(regs)))
+ if (address + 2048 < uregs->gpr[1] && !store_update_sp)
goto bad_area;
}
if (expand_stack(vma, address))
@@ -267,6 +368,12 @@ good_area:
goto bad_area;
#endif /* CONFIG_6xx */
#if defined(CONFIG_8xx)
+ /* 8xx sometimes need to load a invalid/non-present TLBs.
+ * These must be invalidated separately as linux mm don't.
+ */
+ if (error_code & 0x40000000) /* no translation? */
+ _tlbil_va(address, 0, 0, 0);
+
/* The MPC8xx seems to always set 0x80000000, which is
* "undefined". Of those that can be set, this is the only
* one which seems bad.
@@ -277,48 +384,38 @@ good_area:
#endif /* CONFIG_8xx */
if (is_exec) {
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
- /* protection fault */
+#ifdef CONFIG_PPC_STD_MMU
+ /* Protection fault on exec go straight to failure on
+ * Hash based MMUs as they either don't support per-page
+ * execute permission, or if they do, it's handled already
+ * at the hash level. This test would probably have to
+ * be removed if we change the way this works to make hash
+ * processors use the same I/D cache coherency mechanism
+ * as embedded.
+ */
if (error_code & DSISR_PROTFAULT)
goto bad_area;
+#endif /* CONFIG_PPC_STD_MMU */
+
/*
* Allow execution from readable areas if the MMU does not
* provide separate controls over reading and executing.
+ *
+ * Note: That code used to not be enabled for 4xx/BookE.
+ * It is now as I/D cache coherency for these is done at
+ * set_pte_at() time and I see no reason why the test
+ * below wouldn't be valid on those processors. This -may-
+ * break programs compiled with a really old ABI though.
*/
if (!(vma->vm_flags & VM_EXEC) &&
(cpu_has_feature(CPU_FTR_NOEXECUTE) ||
!(vma->vm_flags & (VM_READ | VM_WRITE))))
goto bad_area;
-#else
- pte_t *ptep;
- pmd_t *pmdp;
-
- /* Since 4xx/Book-E supports per-page execute permission,
- * we lazily flush dcache to icache. */
- ptep = NULL;
- if (get_pteptr(mm, address, &ptep, &pmdp)) {
- spinlock_t *ptl = pte_lockptr(mm, pmdp);
- spin_lock(ptl);
- if (pte_present(*ptep)) {
- struct page *page = pte_page(*ptep);
-
- if (!test_bit(PG_arch_1, &page->flags)) {
- flush_dcache_icache_page(page);
- set_bit(PG_arch_1, &page->flags);
- }
- pte_update(ptep, 0, _PAGE_HWEXEC);
- _tlbie(address, mm->context.id);
- pte_unmap_unlock(ptep, ptl);
- up_read(&mm->mmap_sem);
- return 0;
- }
- pte_unmap_unlock(ptep, ptl);
- }
-#endif
/* a write */
} else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
/* a read */
} else {
/* protection fault */
@@ -333,21 +430,52 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- survive:
- ret = handle_mm_fault(mm, vma, address, is_write);
- if (unlikely(ret & VM_FAULT_ERROR)) {
- if (ret & VM_FAULT_OOM)
- goto out_of_memory;
- else if (ret & VM_FAULT_SIGBUS)
- goto do_sigbus;
- BUG();
+ fault = handle_mm_fault(mm, vma, address, flags);
+ if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
+ rc = mm_fault_error(regs, address, fault);
+ if (rc >= MM_FAULT_RETURN)
+ goto bail;
+ else
+ rc = 0;
}
- if (ret & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
+
+ /*
+ * Major/minor page fault accounting is only done on the
+ * initial attempt. If we go through a retry, it is extremely
+ * likely that the page will be found in page cache at that point.
+ */
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ if (fault & VM_FAULT_MAJOR) {
+ current->maj_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+ regs, address);
+#ifdef CONFIG_PPC_SMLPAR
+ if (firmware_has_feature(FW_FEATURE_CMO)) {
+ u32 page_ins;
+
+ preempt_disable();
+ page_ins = be32_to_cpu(get_lppaca()->page_ins);
+ page_ins += 1 << PAGE_FACTOR;
+ get_lppaca()->page_ins = cpu_to_be32(page_ins);
+ preempt_enable();
+ }
+#endif /* CONFIG_PPC_SMLPAR */
+ } else {
+ current->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+ regs, address);
+ }
+ if (fault & VM_FAULT_RETRY) {
+ /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+ * of starvation. */
+ flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
+ goto retry;
+ }
+ }
+
up_read(&mm->mmap_sem);
- return 0;
+ goto bail;
bad_area:
up_read(&mm->mmap_sem);
@@ -356,44 +484,20 @@ bad_area_nosemaphore:
/* User mode accesses cause a SIGSEGV */
if (user_mode(regs)) {
_exception(SIGSEGV, regs, code, address);
- return 0;
+ goto bail;
}
- if (is_exec && (error_code & DSISR_PROTFAULT)
- && printk_ratelimit())
- printk(KERN_CRIT "kernel tried to execute NX-protected"
- " page (%lx) - exploit attempt? (uid: %d)\n",
- address, current->uid);
+ if (is_exec && (error_code & DSISR_PROTFAULT))
+ printk_ratelimited(KERN_CRIT "kernel tried to execute NX-protected"
+ " page (%lx) - exploit attempt? (uid: %d)\n",
+ address, from_kuid(&init_user_ns, current_uid()));
- return SIGSEGV;
+ rc = SIGSEGV;
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
- up_read(&mm->mmap_sem);
- if (is_global_init(current)) {
- yield();
- down_read(&mm->mmap_sem);
- goto survive;
- }
- printk("VM: killing process %s\n", current->comm);
- if (user_mode(regs))
- do_group_exit(SIGKILL);
- return SIGKILL;
+bail:
+ exception_exit(prev_state);
+ return rc;
-do_sigbus:
- up_read(&mm->mmap_sem);
- if (user_mode(regs)) {
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, current);
- return 0;
- }
- return SIGBUS;
}
/*
@@ -404,6 +508,7 @@ do_sigbus:
void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
{
const struct exception_table_entry *entry;
+ unsigned long *stackend;
/* Are we prepared to handle this fault? */
if ((entry = search_exception_tables(regs->nip)) != NULL) {
@@ -432,5 +537,9 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
regs->nip);
+ stackend = end_of_stack(current);
+ if (current != &init_task && *stackend != STACK_END_MAGIC)
+ printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
+
die("Kernel access of bad area", regs, sig);
}