diff options
Diffstat (limited to 'arch/powerpc/mm/fault.c')
| -rw-r--r-- | arch/powerpc/mm/fault.c | 235 |
1 files changed, 170 insertions, 65 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 2f0d1b032a8..51ab9e7e6c3 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -32,16 +32,17 @@ #include <linux/perf_event.h> #include <linux/magic.h> #include <linux/ratelimit.h> +#include <linux/context_tracking.h> #include <asm/firmware.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/mmu_context.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/tlbflush.h> #include <asm/siginfo.h> +#include <asm/debug.h> #include <mm/mmu_decl.h> #include "icswx.h" @@ -105,6 +106,80 @@ static int store_updates_sp(struct pt_regs *regs) } return 0; } +/* + * do_page_fault error handling helpers + */ + +#define MM_FAULT_RETURN 0 +#define MM_FAULT_CONTINUE -1 +#define MM_FAULT_ERR(sig) (sig) + +static int do_sigbus(struct pt_regs *regs, unsigned long address) +{ + siginfo_t info; + + up_read(¤t->mm->mmap_sem); + + if (user_mode(regs)) { + current->thread.trap_nr = BUS_ADRERR; + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); + return MM_FAULT_RETURN; + } + return MM_FAULT_ERR(SIGBUS); +} + +static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) +{ + /* + * Pagefault was interrupted by SIGKILL. We have no reason to + * continue the pagefault. + */ + if (fatal_signal_pending(current)) { + /* + * If we have retry set, the mmap semaphore will have + * alrady been released in __lock_page_or_retry(). Else + * we release it now. + */ + if (!(fault & VM_FAULT_RETRY)) + up_read(¤t->mm->mmap_sem); + /* Coming from kernel, we need to deal with uaccess fixups */ + if (user_mode(regs)) + return MM_FAULT_RETURN; + return MM_FAULT_ERR(SIGKILL); + } + + /* No fault: be happy */ + if (!(fault & VM_FAULT_ERROR)) + return MM_FAULT_CONTINUE; + + /* Out of memory */ + if (fault & VM_FAULT_OOM) { + up_read(¤t->mm->mmap_sem); + + /* + * We ran out of memory, or some other thing happened to us that + * made us unable to handle the page fault gracefully. + */ + if (!user_mode(regs)) + return MM_FAULT_ERR(SIGKILL); + pagefault_out_of_memory(); + return MM_FAULT_RETURN; + } + + /* Bus error. x86 handles HWPOISON here, we'll add this if/when + * we support the feature in HW + */ + if (fault & VM_FAULT_SIGBUS) + return do_sigbus(regs, addr); + + /* We don't understand the fault code, this is fatal */ + BUG(); + return MM_FAULT_CONTINUE; +} /* * For 600- and 800-family processors, the error_code parameter is DSISR @@ -122,13 +197,16 @@ static int store_updates_sp(struct pt_regs *regs) int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { + enum ctx_state prev_state = exception_enter(); struct vm_area_struct * vma; struct mm_struct *mm = current->mm; - siginfo_t info; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; int code = SEGV_MAPERR; - int is_write = 0, ret; + int is_write = 0; int trap = TRAP(regs); int is_exec = trap == 0x400; + int fault; + int rc = 0, store_update_sp = 0; #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) /* @@ -152,36 +230,42 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, * look at it */ if (error_code & ICSWX_DSI_UCT) { - int ret; - - ret = acop_handle_fault(regs, address, error_code); - if (ret) - return ret; + rc = acop_handle_fault(regs, address, error_code); + if (rc) + goto bail; } -#endif +#endif /* CONFIG_PPC_ICSWX */ if (notify_page_fault(regs)) - return 0; + goto bail; if (unlikely(debugger_fault_handler(regs))) - return 0; + goto bail; /* On a kernel SLB miss we can only check for a valid exception entry */ - if (!user_mode(regs) && (address >= TASK_SIZE)) - return SIGSEGV; + if (!user_mode(regs) && (address >= TASK_SIZE)) { + rc = SIGSEGV; + goto bail; + } #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \ defined(CONFIG_PPC_BOOK3S_64)) if (error_code & DSISR_DABRMATCH) { - /* DABR match */ - do_dabr(regs, address, error_code); - return 0; + /* breakpoint match */ + do_break(regs, address, error_code); + goto bail; } #endif + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + if (in_atomic() || mm == NULL) { - if (!user_mode(regs)) - return SIGSEGV; + if (!user_mode(regs)) { + rc = SIGSEGV; + goto bail; + } /* in_atomic() in user mode is really bad, as is current->mm == NULL. */ printk(KERN_EMERG "Page fault in user mode with " @@ -193,6 +277,17 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + /* + * We want to do this outside mmap_sem, because reading code around nip + * can result in fault, which will cause a deadlock when called with + * mmap_sem held + */ + if (user_mode(regs)) + store_update_sp = store_updates_sp(regs); + + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -212,7 +307,15 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (!user_mode(regs) && !search_exception_tables(regs->nip)) goto bad_area_nosemaphore; +retry: down_read(&mm->mmap_sem); + } else { + /* + * The above down_read_trylock() might have succeeded in + * which case we'll have missed the might_sleep() from + * down_read(): + */ + might_sleep(); } vma = find_vma(mm, address); @@ -250,8 +353,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, * between the last mapped region and the stack will * expand the stack rather than segfaulting. */ - if (address + 2048 < uregs->gpr[1] - && (!user_mode(regs) || !store_updates_sp(regs))) + if (address + 2048 < uregs->gpr[1] && !store_update_sp) goto bad_area; } if (expand_stack(vma, address)) @@ -313,6 +415,7 @@ good_area: } else if (is_write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; /* a read */ } else { /* protection fault */ @@ -327,32 +430,52 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0); - if (unlikely(ret & VM_FAULT_ERROR)) { - if (ret & VM_FAULT_OOM) - goto out_of_memory; - else if (ret & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); + fault = handle_mm_fault(mm, vma, address, flags); + if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { + rc = mm_fault_error(regs, address, fault); + if (rc >= MM_FAULT_RETURN) + goto bail; + else + rc = 0; } - if (ret & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); + + /* + * Major/minor page fault accounting is only done on the + * initial attempt. If we go through a retry, it is extremely + * likely that the page will be found in page cache at that point. + */ + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + current->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); #ifdef CONFIG_PPC_SMLPAR - if (firmware_has_feature(FW_FEATURE_CMO)) { - preempt_disable(); - get_lppaca()->page_ins += (1 << PAGE_FACTOR); - preempt_enable(); + if (firmware_has_feature(FW_FEATURE_CMO)) { + u32 page_ins; + + preempt_disable(); + page_ins = be32_to_cpu(get_lppaca()->page_ins); + page_ins += 1 << PAGE_FACTOR; + get_lppaca()->page_ins = cpu_to_be32(page_ins); + preempt_enable(); + } +#endif /* CONFIG_PPC_SMLPAR */ + } else { + current->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); + } + if (fault & VM_FAULT_RETRY) { + /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. */ + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + goto retry; } -#endif - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); } + up_read(&mm->mmap_sem); - return 0; + goto bail; bad_area: up_read(&mm->mmap_sem); @@ -361,38 +484,20 @@ bad_area_nosemaphore: /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { _exception(SIGSEGV, regs, code, address); - return 0; + goto bail; } if (is_exec && (error_code & DSISR_PROTFAULT)) printk_ratelimited(KERN_CRIT "kernel tried to execute NX-protected" " page (%lx) - exploit attempt? (uid: %d)\n", - address, current_uid()); + address, from_kuid(&init_user_ns, current_uid())); - return SIGSEGV; + rc = SIGSEGV; -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: - up_read(&mm->mmap_sem); - if (!user_mode(regs)) - return SIGKILL; - pagefault_out_of_memory(); - return 0; +bail: + exception_exit(prev_state); + return rc; -do_sigbus: - up_read(&mm->mmap_sem); - if (user_mode(regs)) { - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, current); - return 0; - } - return SIGBUS; } /* |
