diff options
Diffstat (limited to 'arch/powerpc/mm/fault.c')
| -rw-r--r-- | arch/powerpc/mm/fault.c | 250 | 
1 files changed, 186 insertions, 64 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 54f4fb994e9..51ab9e7e6c3 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -31,18 +31,22 @@  #include <linux/kdebug.h>  #include <linux/perf_event.h>  #include <linux/magic.h> +#include <linux/ratelimit.h> +#include <linux/context_tracking.h>  #include <asm/firmware.h>  #include <asm/page.h>  #include <asm/pgtable.h>  #include <asm/mmu.h>  #include <asm/mmu_context.h> -#include <asm/system.h>  #include <asm/uaccess.h>  #include <asm/tlbflush.h>  #include <asm/siginfo.h> +#include <asm/debug.h>  #include <mm/mmu_decl.h> +#include "icswx.h" +  #ifdef CONFIG_KPROBES  static inline int notify_page_fault(struct pt_regs *regs)  { @@ -102,6 +106,80 @@ static int store_updates_sp(struct pt_regs *regs)  	}  	return 0;  } +/* + * do_page_fault error handling helpers + */ + +#define MM_FAULT_RETURN		0 +#define MM_FAULT_CONTINUE	-1 +#define MM_FAULT_ERR(sig)	(sig) + +static int do_sigbus(struct pt_regs *regs, unsigned long address) +{ +	siginfo_t info; + +	up_read(¤t->mm->mmap_sem); + +	if (user_mode(regs)) { +		current->thread.trap_nr = BUS_ADRERR; +		info.si_signo = SIGBUS; +		info.si_errno = 0; +		info.si_code = BUS_ADRERR; +		info.si_addr = (void __user *)address; +		force_sig_info(SIGBUS, &info, current); +		return MM_FAULT_RETURN; +	} +	return MM_FAULT_ERR(SIGBUS); +} + +static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) +{ +	/* +	 * Pagefault was interrupted by SIGKILL. We have no reason to +	 * continue the pagefault. +	 */ +	if (fatal_signal_pending(current)) { +		/* +		 * If we have retry set, the mmap semaphore will have +		 * alrady been released in __lock_page_or_retry(). Else +		 * we release it now. +		 */ +		if (!(fault & VM_FAULT_RETRY)) +			up_read(¤t->mm->mmap_sem); +		/* Coming from kernel, we need to deal with uaccess fixups */ +		if (user_mode(regs)) +			return MM_FAULT_RETURN; +		return MM_FAULT_ERR(SIGKILL); +	} + +	/* No fault: be happy */ +	if (!(fault & VM_FAULT_ERROR)) +		return MM_FAULT_CONTINUE; + +	/* Out of memory */ +	if (fault & VM_FAULT_OOM) { +		up_read(¤t->mm->mmap_sem); + +		/* +		 * We ran out of memory, or some other thing happened to us that +		 * made us unable to handle the page fault gracefully. +		 */ +		if (!user_mode(regs)) +			return MM_FAULT_ERR(SIGKILL); +		pagefault_out_of_memory(); +		return MM_FAULT_RETURN; +	} + +	/* Bus error. x86 handles HWPOISON here, we'll add this if/when +	 * we support the feature in HW +	 */ +	if (fault & VM_FAULT_SIGBUS) +		return do_sigbus(regs, addr); + +	/* We don't understand the fault code, this is fatal */ +	BUG(); +	return MM_FAULT_CONTINUE; +}  /*   * For 600- and 800-family processors, the error_code parameter is DSISR @@ -119,13 +197,16 @@ static int store_updates_sp(struct pt_regs *regs)  int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,  			    unsigned long error_code)  { +	enum ctx_state prev_state = exception_enter();  	struct vm_area_struct * vma;  	struct mm_struct *mm = current->mm; -	siginfo_t info; +	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;  	int code = SEGV_MAPERR; -	int is_write = 0, ret; +	int is_write = 0;  	int trap = TRAP(regs);   	int is_exec = trap == 0x400; +	int fault; +	int rc = 0, store_update_sp = 0;  #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))  	/* @@ -142,28 +223,49 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,  	is_write = error_code & ESR_DST;  #endif /* CONFIG_4xx || CONFIG_BOOKE */ +#ifdef CONFIG_PPC_ICSWX +	/* +	 * we need to do this early because this "data storage +	 * interrupt" does not update the DAR/DEAR so we don't want to +	 * look at it +	 */ +	if (error_code & ICSWX_DSI_UCT) { +		rc = acop_handle_fault(regs, address, error_code); +		if (rc) +			goto bail; +	} +#endif /* CONFIG_PPC_ICSWX */ +  	if (notify_page_fault(regs)) -		return 0; +		goto bail;  	if (unlikely(debugger_fault_handler(regs))) -		return 0; +		goto bail;  	/* On a kernel SLB miss we can only check for a valid exception entry */ -	if (!user_mode(regs) && (address >= TASK_SIZE)) -		return SIGSEGV; +	if (!user_mode(regs) && (address >= TASK_SIZE)) { +		rc = SIGSEGV; +		goto bail; +	}  #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \  			     defined(CONFIG_PPC_BOOK3S_64))    	if (error_code & DSISR_DABRMATCH) { -		/* DABR match */ -		do_dabr(regs, address, error_code); -		return 0; +		/* breakpoint match */ +		do_break(regs, address, error_code); +		goto bail;  	}  #endif +	/* We restore the interrupt state now */ +	if (!arch_irq_disabled_regs(regs)) +		local_irq_enable(); +  	if (in_atomic() || mm == NULL) { -		if (!user_mode(regs)) -			return SIGSEGV; +		if (!user_mode(regs)) { +			rc = SIGSEGV; +			goto bail; +		}  		/* in_atomic() in user mode is really bad,  		   as is current->mm == NULL. */  		printk(KERN_EMERG "Page fault in user mode with " @@ -173,7 +275,18 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,  		die("Weird page fault", regs, SIGSEGV);  	} -	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); +	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + +	/* +	 * We want to do this outside mmap_sem, because reading code around nip +	 * can result in fault, which will cause a deadlock when called with +	 * mmap_sem held +	 */ +	if (user_mode(regs)) +		store_update_sp = store_updates_sp(regs); + +	if (user_mode(regs)) +		flags |= FAULT_FLAG_USER;  	/* When running in the kernel we expect faults to occur only to  	 * addresses in user space.  All other faults represent errors in the @@ -194,7 +307,15 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,  		if (!user_mode(regs) && !search_exception_tables(regs->nip))  			goto bad_area_nosemaphore; +retry:  		down_read(&mm->mmap_sem); +	} else { +		/* +		 * The above down_read_trylock() might have succeeded in +		 * which case we'll have missed the might_sleep() from +		 * down_read(): +		 */ +		might_sleep();  	}  	vma = find_vma(mm, address); @@ -232,8 +353,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,  		 * between the last mapped region and the stack will  		 * expand the stack rather than segfaulting.  		 */ -		if (address + 2048 < uregs->gpr[1] -		    && (!user_mode(regs) || !store_updates_sp(regs))) +		if (address + 2048 < uregs->gpr[1] && !store_update_sp)  			goto bad_area;  	}  	if (expand_stack(vma, address)) @@ -295,6 +415,7 @@ good_area:  	} else if (is_write) {  		if (!(vma->vm_flags & VM_WRITE))  			goto bad_area; +		flags |= FAULT_FLAG_WRITE;  	/* a read */  	} else {  		/* protection fault */ @@ -309,32 +430,52 @@ good_area:  	 * make sure we exit gracefully rather than endlessly redo  	 * the fault.  	 */ -	ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0); -	if (unlikely(ret & VM_FAULT_ERROR)) { -		if (ret & VM_FAULT_OOM) -			goto out_of_memory; -		else if (ret & VM_FAULT_SIGBUS) -			goto do_sigbus; -		BUG(); +	fault = handle_mm_fault(mm, vma, address, flags); +	if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { +		rc = mm_fault_error(regs, address, fault); +		if (rc >= MM_FAULT_RETURN) +			goto bail; +		else +			rc = 0;  	} -	if (ret & VM_FAULT_MAJOR) { -		current->maj_flt++; -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, -				     regs, address); + +	/* +	 * Major/minor page fault accounting is only done on the +	 * initial attempt. If we go through a retry, it is extremely +	 * likely that the page will be found in page cache at that point. +	 */ +	if (flags & FAULT_FLAG_ALLOW_RETRY) { +		if (fault & VM_FAULT_MAJOR) { +			current->maj_flt++; +			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, +				      regs, address);  #ifdef CONFIG_PPC_SMLPAR -		if (firmware_has_feature(FW_FEATURE_CMO)) { -			preempt_disable(); -			get_lppaca()->page_ins += (1 << PAGE_FACTOR); -			preempt_enable(); +			if (firmware_has_feature(FW_FEATURE_CMO)) { +				u32 page_ins; + +				preempt_disable(); +				page_ins = be32_to_cpu(get_lppaca()->page_ins); +				page_ins += 1 << PAGE_FACTOR; +				get_lppaca()->page_ins = cpu_to_be32(page_ins); +				preempt_enable(); +			} +#endif /* CONFIG_PPC_SMLPAR */ +		} else { +			current->min_flt++; +			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, +				      regs, address); +		} +		if (fault & VM_FAULT_RETRY) { +			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk +			 * of starvation. */ +			flags &= ~FAULT_FLAG_ALLOW_RETRY; +			flags |= FAULT_FLAG_TRIED; +			goto retry;  		} -#endif -	} else { -		current->min_flt++; -		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, -				     regs, address);  	} +  	up_read(&mm->mmap_sem); -	return 0; +	goto bail;  bad_area:  	up_read(&mm->mmap_sem); @@ -343,39 +484,20 @@ bad_area_nosemaphore:  	/* User mode accesses cause a SIGSEGV */  	if (user_mode(regs)) {  		_exception(SIGSEGV, regs, code, address); -		return 0; +		goto bail;  	} -	if (is_exec && (error_code & DSISR_PROTFAULT) -	    && printk_ratelimit()) -		printk(KERN_CRIT "kernel tried to execute NX-protected" -		       " page (%lx) - exploit attempt? (uid: %d)\n", -		       address, current_uid()); +	if (is_exec && (error_code & DSISR_PROTFAULT)) +		printk_ratelimited(KERN_CRIT "kernel tried to execute NX-protected" +				   " page (%lx) - exploit attempt? (uid: %d)\n", +				   address, from_kuid(&init_user_ns, current_uid())); -	return SIGSEGV; +	rc = SIGSEGV; -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: -	up_read(&mm->mmap_sem); -	if (!user_mode(regs)) -		return SIGKILL; -	pagefault_out_of_memory(); -	return 0; +bail: +	exception_exit(prev_state); +	return rc; -do_sigbus: -	up_read(&mm->mmap_sem); -	if (user_mode(regs)) { -		info.si_signo = SIGBUS; -		info.si_errno = 0; -		info.si_code = BUS_ADRERR; -		info.si_addr = (void __user *)address; -		force_sig_info(SIGBUS, &info, current); -		return 0; -	} -	return SIGBUS;  }  /*  | 
