aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/fault.c')
-rw-r--r--arch/powerpc/mm/fault.c250
1 files changed, 186 insertions, 64 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 54f4fb994e9..51ab9e7e6c3 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -31,18 +31,22 @@
#include <linux/kdebug.h>
#include <linux/perf_event.h>
#include <linux/magic.h>
+#include <linux/ratelimit.h>
+#include <linux/context_tracking.h>
#include <asm/firmware.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/tlbflush.h>
#include <asm/siginfo.h>
+#include <asm/debug.h>
#include <mm/mmu_decl.h>
+#include "icswx.h"
+
#ifdef CONFIG_KPROBES
static inline int notify_page_fault(struct pt_regs *regs)
{
@@ -102,6 +106,80 @@ static int store_updates_sp(struct pt_regs *regs)
}
return 0;
}
+/*
+ * do_page_fault error handling helpers
+ */
+
+#define MM_FAULT_RETURN 0
+#define MM_FAULT_CONTINUE -1
+#define MM_FAULT_ERR(sig) (sig)
+
+static int do_sigbus(struct pt_regs *regs, unsigned long address)
+{
+ siginfo_t info;
+
+ up_read(&current->mm->mmap_sem);
+
+ if (user_mode(regs)) {
+ current->thread.trap_nr = BUS_ADRERR;
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_ADRERR;
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGBUS, &info, current);
+ return MM_FAULT_RETURN;
+ }
+ return MM_FAULT_ERR(SIGBUS);
+}
+
+static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
+{
+ /*
+ * Pagefault was interrupted by SIGKILL. We have no reason to
+ * continue the pagefault.
+ */
+ if (fatal_signal_pending(current)) {
+ /*
+ * If we have retry set, the mmap semaphore will have
+ * alrady been released in __lock_page_or_retry(). Else
+ * we release it now.
+ */
+ if (!(fault & VM_FAULT_RETRY))
+ up_read(&current->mm->mmap_sem);
+ /* Coming from kernel, we need to deal with uaccess fixups */
+ if (user_mode(regs))
+ return MM_FAULT_RETURN;
+ return MM_FAULT_ERR(SIGKILL);
+ }
+
+ /* No fault: be happy */
+ if (!(fault & VM_FAULT_ERROR))
+ return MM_FAULT_CONTINUE;
+
+ /* Out of memory */
+ if (fault & VM_FAULT_OOM) {
+ up_read(&current->mm->mmap_sem);
+
+ /*
+ * We ran out of memory, or some other thing happened to us that
+ * made us unable to handle the page fault gracefully.
+ */
+ if (!user_mode(regs))
+ return MM_FAULT_ERR(SIGKILL);
+ pagefault_out_of_memory();
+ return MM_FAULT_RETURN;
+ }
+
+ /* Bus error. x86 handles HWPOISON here, we'll add this if/when
+ * we support the feature in HW
+ */
+ if (fault & VM_FAULT_SIGBUS)
+ return do_sigbus(regs, addr);
+
+ /* We don't understand the fault code, this is fatal */
+ BUG();
+ return MM_FAULT_CONTINUE;
+}
/*
* For 600- and 800-family processors, the error_code parameter is DSISR
@@ -119,13 +197,16 @@ static int store_updates_sp(struct pt_regs *regs)
int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
+ enum ctx_state prev_state = exception_enter();
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
- siginfo_t info;
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
int code = SEGV_MAPERR;
- int is_write = 0, ret;
+ int is_write = 0;
int trap = TRAP(regs);
int is_exec = trap == 0x400;
+ int fault;
+ int rc = 0, store_update_sp = 0;
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
/*
@@ -142,28 +223,49 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
is_write = error_code & ESR_DST;
#endif /* CONFIG_4xx || CONFIG_BOOKE */
+#ifdef CONFIG_PPC_ICSWX
+ /*
+ * we need to do this early because this "data storage
+ * interrupt" does not update the DAR/DEAR so we don't want to
+ * look at it
+ */
+ if (error_code & ICSWX_DSI_UCT) {
+ rc = acop_handle_fault(regs, address, error_code);
+ if (rc)
+ goto bail;
+ }
+#endif /* CONFIG_PPC_ICSWX */
+
if (notify_page_fault(regs))
- return 0;
+ goto bail;
if (unlikely(debugger_fault_handler(regs)))
- return 0;
+ goto bail;
/* On a kernel SLB miss we can only check for a valid exception entry */
- if (!user_mode(regs) && (address >= TASK_SIZE))
- return SIGSEGV;
+ if (!user_mode(regs) && (address >= TASK_SIZE)) {
+ rc = SIGSEGV;
+ goto bail;
+ }
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \
defined(CONFIG_PPC_BOOK3S_64))
if (error_code & DSISR_DABRMATCH) {
- /* DABR match */
- do_dabr(regs, address, error_code);
- return 0;
+ /* breakpoint match */
+ do_break(regs, address, error_code);
+ goto bail;
}
#endif
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
if (in_atomic() || mm == NULL) {
- if (!user_mode(regs))
- return SIGSEGV;
+ if (!user_mode(regs)) {
+ rc = SIGSEGV;
+ goto bail;
+ }
/* in_atomic() in user mode is really bad,
as is current->mm == NULL. */
printk(KERN_EMERG "Page fault in user mode with "
@@ -173,7 +275,18 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
die("Weird page fault", regs, SIGSEGV);
}
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
+ /*
+ * We want to do this outside mmap_sem, because reading code around nip
+ * can result in fault, which will cause a deadlock when called with
+ * mmap_sem held
+ */
+ if (user_mode(regs))
+ store_update_sp = store_updates_sp(regs);
+
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
@@ -194,7 +307,15 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
if (!user_mode(regs) && !search_exception_tables(regs->nip))
goto bad_area_nosemaphore;
+retry:
down_read(&mm->mmap_sem);
+ } else {
+ /*
+ * The above down_read_trylock() might have succeeded in
+ * which case we'll have missed the might_sleep() from
+ * down_read():
+ */
+ might_sleep();
}
vma = find_vma(mm, address);
@@ -232,8 +353,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
* between the last mapped region and the stack will
* expand the stack rather than segfaulting.
*/
- if (address + 2048 < uregs->gpr[1]
- && (!user_mode(regs) || !store_updates_sp(regs)))
+ if (address + 2048 < uregs->gpr[1] && !store_update_sp)
goto bad_area;
}
if (expand_stack(vma, address))
@@ -295,6 +415,7 @@ good_area:
} else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
goto bad_area;
+ flags |= FAULT_FLAG_WRITE;
/* a read */
} else {
/* protection fault */
@@ -309,32 +430,52 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0);
- if (unlikely(ret & VM_FAULT_ERROR)) {
- if (ret & VM_FAULT_OOM)
- goto out_of_memory;
- else if (ret & VM_FAULT_SIGBUS)
- goto do_sigbus;
- BUG();
+ fault = handle_mm_fault(mm, vma, address, flags);
+ if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
+ rc = mm_fault_error(regs, address, fault);
+ if (rc >= MM_FAULT_RETURN)
+ goto bail;
+ else
+ rc = 0;
}
- if (ret & VM_FAULT_MAJOR) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
- regs, address);
+
+ /*
+ * Major/minor page fault accounting is only done on the
+ * initial attempt. If we go through a retry, it is extremely
+ * likely that the page will be found in page cache at that point.
+ */
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ if (fault & VM_FAULT_MAJOR) {
+ current->maj_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+ regs, address);
#ifdef CONFIG_PPC_SMLPAR
- if (firmware_has_feature(FW_FEATURE_CMO)) {
- preempt_disable();
- get_lppaca()->page_ins += (1 << PAGE_FACTOR);
- preempt_enable();
+ if (firmware_has_feature(FW_FEATURE_CMO)) {
+ u32 page_ins;
+
+ preempt_disable();
+ page_ins = be32_to_cpu(get_lppaca()->page_ins);
+ page_ins += 1 << PAGE_FACTOR;
+ get_lppaca()->page_ins = cpu_to_be32(page_ins);
+ preempt_enable();
+ }
+#endif /* CONFIG_PPC_SMLPAR */
+ } else {
+ current->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+ regs, address);
+ }
+ if (fault & VM_FAULT_RETRY) {
+ /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+ * of starvation. */
+ flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ flags |= FAULT_FLAG_TRIED;
+ goto retry;
}
-#endif
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
- regs, address);
}
+
up_read(&mm->mmap_sem);
- return 0;
+ goto bail;
bad_area:
up_read(&mm->mmap_sem);
@@ -343,39 +484,20 @@ bad_area_nosemaphore:
/* User mode accesses cause a SIGSEGV */
if (user_mode(regs)) {
_exception(SIGSEGV, regs, code, address);
- return 0;
+ goto bail;
}
- if (is_exec && (error_code & DSISR_PROTFAULT)
- && printk_ratelimit())
- printk(KERN_CRIT "kernel tried to execute NX-protected"
- " page (%lx) - exploit attempt? (uid: %d)\n",
- address, current_uid());
+ if (is_exec && (error_code & DSISR_PROTFAULT))
+ printk_ratelimited(KERN_CRIT "kernel tried to execute NX-protected"
+ " page (%lx) - exploit attempt? (uid: %d)\n",
+ address, from_kuid(&init_user_ns, current_uid()));
- return SIGSEGV;
+ rc = SIGSEGV;
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
- */
-out_of_memory:
- up_read(&mm->mmap_sem);
- if (!user_mode(regs))
- return SIGKILL;
- pagefault_out_of_memory();
- return 0;
+bail:
+ exception_exit(prev_state);
+ return rc;
-do_sigbus:
- up_read(&mm->mmap_sem);
- if (user_mode(regs)) {
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, current);
- return 0;
- }
- return SIGBUS;
}
/*