diff options
| author | Steve French <sfrench@us.ibm.com> | 2005-10-31 08:36:11 -0800 | 
|---|---|---|
| committer | Steve French <sfrench@us.ibm.com> | 2005-10-31 08:36:11 -0800 | 
| commit | 53b2ec5518aa2623e8c0cb36f1c304a797988a46 (patch) | |
| tree | 465d8631ade6c2fcbd7576ff9813d00116c6a1e8 /arch/powerpc/mm/fault.c | |
| parent | 0753ca7bc2b876dd136e9db11a20f85cbe4e08b1 (diff) | |
| parent | 581c1b14394aee60aff46ea67d05483261ed6527 (diff) | |
Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Diffstat (limited to 'arch/powerpc/mm/fault.c')
| -rw-r--r-- | arch/powerpc/mm/fault.c | 393 | 
1 files changed, 393 insertions, 0 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c new file mode 100644 index 00000000000..841d8b6323a --- /dev/null +++ b/arch/powerpc/mm/fault.c @@ -0,0 +1,393 @@ +/* + *  arch/ppc/mm/fault.c + * + *  PowerPC version + *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + *  Derived from "arch/i386/mm/fault.c" + *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds + * + *  Modified by Cort Dougan and Paul Mackerras. + * + *  Modified for PPC64 by Dave Engebretsen (engebret@ibm.com) + * + *  This program is free software; you can redistribute it and/or + *  modify it under the terms of the GNU General Public License + *  as published by the Free Software Foundation; either version + *  2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/highmem.h> +#include <linux/module.h> +#include <linux/kprobes.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/tlbflush.h> +#include <asm/kdebug.h> +#include <asm/siginfo.h> + +/* + * Check whether the instruction at regs->nip is a store using + * an update addressing form which will update r1. + */ +static int store_updates_sp(struct pt_regs *regs) +{ +	unsigned int inst; + +	if (get_user(inst, (unsigned int __user *)regs->nip)) +		return 0; +	/* check for 1 in the rA field */ +	if (((inst >> 16) & 0x1f) != 1) +		return 0; +	/* check major opcode */ +	switch (inst >> 26) { +	case 37:	/* stwu */ +	case 39:	/* stbu */ +	case 45:	/* sthu */ +	case 53:	/* stfsu */ +	case 55:	/* stfdu */ +		return 1; +	case 62:	/* std or stdu */ +		return (inst & 3) == 1; +	case 31: +		/* check minor opcode */ +		switch ((inst >> 1) & 0x3ff) { +		case 181:	/* stdux */ +		case 183:	/* stwux */ +		case 247:	/* stbux */ +		case 439:	/* sthux */ +		case 695:	/* stfsux */ +		case 759:	/* stfdux */ +			return 1; +		} +	} +	return 0; +} + +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) +static void do_dabr(struct pt_regs *regs, unsigned long error_code) +{ +	siginfo_t info; + +	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, +			11, SIGSEGV) == NOTIFY_STOP) +		return; + +	if (debugger_dabr_match(regs)) +		return; + +	/* Clear the DABR */ +	set_dabr(0); + +	/* Deliver the signal to userspace */ +	info.si_signo = SIGTRAP; +	info.si_errno = 0; +	info.si_code = TRAP_HWBKPT; +	info.si_addr = (void __user *)regs->nip; +	force_sig_info(SIGTRAP, &info, current); +} +#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ + +/* + * For 600- and 800-family processors, the error_code parameter is DSISR + * for a data fault, SRR1 for an instruction fault. For 400-family processors + * the error_code parameter is ESR for a data fault, 0 for an instruction + * fault. + * For 64-bit processors, the error_code parameter is + *  - DSISR for a non-SLB data access fault, + *  - SRR1 & 0x08000000 for a non-SLB instruction access fault + *  - 0 any SLB fault. + * + * The return value is 0 if the fault was handled, or the signal + * number if this is a kernel fault that can't be handled here. + */ +int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, +			    unsigned long error_code) +{ +	struct vm_area_struct * vma; +	struct mm_struct *mm = current->mm; +	siginfo_t info; +	int code = SEGV_MAPERR; +	int is_write = 0; +	int trap = TRAP(regs); + 	int is_exec = trap == 0x400; + +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) +	/* +	 * Fortunately the bit assignments in SRR1 for an instruction +	 * fault and DSISR for a data fault are mostly the same for the +	 * bits we are interested in.  But there are some bits which +	 * indicate errors in DSISR but can validly be set in SRR1. +	 */ +	if (trap == 0x400) +		error_code &= 0x48200000; +	else +		is_write = error_code & DSISR_ISSTORE; +#else +	is_write = error_code & ESR_DST; +#endif /* CONFIG_4xx || CONFIG_BOOKE */ + +	if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code, +				11, SIGSEGV) == NOTIFY_STOP) +		return 0; + +	if (trap == 0x300) { +		if (debugger_fault_handler(regs)) +			return 0; +	} + +	/* On a kernel SLB miss we can only check for a valid exception entry */ +	if (!user_mode(regs) && (address >= TASK_SIZE)) +		return SIGSEGV; + +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) +  	if (error_code & DSISR_DABRMATCH) { +		/* DABR match */ +		do_dabr(regs, error_code); +		return 0; +	} +#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ + +	if (in_atomic() || mm == NULL) { +		if (!user_mode(regs)) +			return SIGSEGV; +		/* in_atomic() in user mode is really bad, +		   as is current->mm == NULL. */ +		printk(KERN_EMERG "Page fault in user mode with" +		       "in_atomic() = %d mm = %p\n", in_atomic(), mm); +		printk(KERN_EMERG "NIP = %lx  MSR = %lx\n", +		       regs->nip, regs->msr); +		die("Weird page fault", regs, SIGSEGV); +	} + +	/* When running in the kernel we expect faults to occur only to +	 * addresses in user space.  All other faults represent errors in the +	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an +	 * erroneous fault occuring in a code path which already holds mmap_sem +	 * we will deadlock attempting to validate the fault against the +	 * address space.  Luckily the kernel only validly references user +	 * space from well defined areas of code, which are listed in the +	 * exceptions table. +	 * +	 * As the vast majority of faults will be valid we will only perform +	 * the source reference check when there is a possibilty of a deadlock. +	 * Attempt to lock the address space, if we cannot we then validate the +	 * source.  If this is invalid we can skip the address space check, +	 * thus avoiding the deadlock. +	 */ +	if (!down_read_trylock(&mm->mmap_sem)) { +		if (!user_mode(regs) && !search_exception_tables(regs->nip)) +			goto bad_area_nosemaphore; + +		down_read(&mm->mmap_sem); +	} + +	vma = find_vma(mm, address); +	if (!vma) +		goto bad_area; +	if (vma->vm_start <= address) +		goto good_area; +	if (!(vma->vm_flags & VM_GROWSDOWN)) +		goto bad_area; + +	/* +	 * N.B. The POWER/Open ABI allows programs to access up to +	 * 288 bytes below the stack pointer. +	 * The kernel signal delivery code writes up to about 1.5kB +	 * below the stack pointer (r1) before decrementing it. +	 * The exec code can write slightly over 640kB to the stack +	 * before setting the user r1.  Thus we allow the stack to +	 * expand to 1MB without further checks. +	 */ +	if (address + 0x100000 < vma->vm_end) { +		/* get user regs even if this fault is in kernel mode */ +		struct pt_regs *uregs = current->thread.regs; +		if (uregs == NULL) +			goto bad_area; + +		/* +		 * A user-mode access to an address a long way below +		 * the stack pointer is only valid if the instruction +		 * is one which would update the stack pointer to the +		 * address accessed if the instruction completed, +		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb +		 * (or the byte, halfword, float or double forms). +		 * +		 * If we don't check this then any write to the area +		 * between the last mapped region and the stack will +		 * expand the stack rather than segfaulting. +		 */ +		if (address + 2048 < uregs->gpr[1] +		    && (!user_mode(regs) || !store_updates_sp(regs))) +			goto bad_area; +	} +	if (expand_stack(vma, address)) +		goto bad_area; + +good_area: +	code = SEGV_ACCERR; +#if defined(CONFIG_6xx) +	if (error_code & 0x95700000) +		/* an error such as lwarx to I/O controller space, +		   address matching DABR, eciwx, etc. */ +		goto bad_area; +#endif /* CONFIG_6xx */ +#if defined(CONFIG_8xx) +        /* The MPC8xx seems to always set 0x80000000, which is +         * "undefined".  Of those that can be set, this is the only +         * one which seems bad. +         */ +	if (error_code & 0x10000000) +                /* Guarded storage error. */ +		goto bad_area; +#endif /* CONFIG_8xx */ + +	if (is_exec) { +#ifdef CONFIG_PPC64 +		/* protection fault */ +		if (error_code & DSISR_PROTFAULT) +			goto bad_area; +		if (!(vma->vm_flags & VM_EXEC)) +			goto bad_area; +#endif +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +		pte_t *ptep; + +		/* Since 4xx/Book-E supports per-page execute permission, +		 * we lazily flush dcache to icache. */ +		ptep = NULL; +		if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) { +			struct page *page = pte_page(*ptep); + +			if (! test_bit(PG_arch_1, &page->flags)) { +				flush_dcache_icache_page(page); +				set_bit(PG_arch_1, &page->flags); +			} +			pte_update(ptep, 0, _PAGE_HWEXEC); +			_tlbie(address); +			pte_unmap(ptep); +			up_read(&mm->mmap_sem); +			return 0; +		} +		if (ptep != NULL) +			pte_unmap(ptep); +#endif +	/* a write */ +	} else if (is_write) { +		if (!(vma->vm_flags & VM_WRITE)) +			goto bad_area; +	/* a read */ +	} else { +		/* protection fault */ +		if (error_code & 0x08000000) +			goto bad_area; +		if (!(vma->vm_flags & (VM_READ | VM_EXEC))) +			goto bad_area; +	} + +	/* +	 * If for any reason at all we couldn't handle the fault, +	 * make sure we exit gracefully rather than endlessly redo +	 * the fault. +	 */ + survive: +	switch (handle_mm_fault(mm, vma, address, is_write)) { + +	case VM_FAULT_MINOR: +		current->min_flt++; +		break; +	case VM_FAULT_MAJOR: +		current->maj_flt++; +		break; +	case VM_FAULT_SIGBUS: +		goto do_sigbus; +	case VM_FAULT_OOM: +		goto out_of_memory; +	default: +		BUG(); +	} + +	up_read(&mm->mmap_sem); +	return 0; + +bad_area: +	up_read(&mm->mmap_sem); + +bad_area_nosemaphore: +	/* User mode accesses cause a SIGSEGV */ +	if (user_mode(regs)) { +		_exception(SIGSEGV, regs, code, address); +		return 0; +	} + +	if (is_exec && (error_code & DSISR_PROTFAULT) +	    && printk_ratelimit()) +		printk(KERN_CRIT "kernel tried to execute NX-protected" +		       " page (%lx) - exploit attempt? (uid: %d)\n", +		       address, current->uid); + +	return SIGSEGV; + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: +	up_read(&mm->mmap_sem); +	if (current->pid == 1) { +		yield(); +		down_read(&mm->mmap_sem); +		goto survive; +	} +	printk("VM: killing process %s\n", current->comm); +	if (user_mode(regs)) +		do_exit(SIGKILL); +	return SIGKILL; + +do_sigbus: +	up_read(&mm->mmap_sem); +	if (user_mode(regs)) { +		info.si_signo = SIGBUS; +		info.si_errno = 0; +		info.si_code = BUS_ADRERR; +		info.si_addr = (void __user *)address; +		force_sig_info(SIGBUS, &info, current); +		return 0; +	} +	return SIGBUS; +} + +/* + * bad_page_fault is called when we have a bad access from the kernel. + * It is called from the DSI and ISI handlers in head.S and from some + * of the procedures in traps.c. + */ +void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +{ +	const struct exception_table_entry *entry; + +	/* Are we prepared to handle this fault?  */ +	if ((entry = search_exception_tables(regs->nip)) != NULL) { +		regs->nip = entry->fixup; +		return; +	} + +	/* kernel has accessed a bad area */ +	die("Kernel access of bad area", regs, sig); +}  | 
