diff options
Diffstat (limited to 'arch/sh/mm/fault.c')
| -rw-r--r-- | arch/sh/mm/fault.c | 662 |
1 files changed, 438 insertions, 224 deletions
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 60d74f793a1..541dc610150 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -2,7 +2,7 @@ * Page fault handler for SH with an MMU. * * Copyright (C) 1999 Niibe Yutaka - * Copyright (C) 2003 - 2007 Paul Mundt + * Copyright (C) 2003 - 2012 Paul Mundt * * Based on linux/arch/i386/mm/fault.c: * Copyright (C) 1995 Linus Torvalds @@ -15,289 +15,503 @@ #include <linux/mm.h> #include <linux/hardirq.h> #include <linux/kprobes.h> -#include <asm/system.h> +#include <linux/perf_event.h> +#include <linux/kdebug.h> +#include <asm/io_trapped.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> -#include <asm/kgdb.h> +#include <asm/traps.h> + +static inline int notify_page_fault(struct pt_regs *regs, int trap) +{ + int ret = 0; + + if (kprobes_built_in() && !user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, trap)) + ret = 1; + preempt_enable(); + } + + return ret; +} + +static void +force_sig_info_fault(int si_signo, int si_code, unsigned long address, + struct task_struct *tsk) +{ + siginfo_t info; + + info.si_signo = si_signo; + info.si_errno = 0; + info.si_code = si_code; + info.si_addr = (void __user *)address; + + force_sig_info(si_signo, &info, tsk); +} /* - * This routine handles page faults. It determines the address, - * and the problem, and then passes it off to one of the appropriate - * routines. + * This is useful to dump out the page tables associated with + * 'addr' in mm 'mm'. */ -asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, - unsigned long writeaccess, - unsigned long address) +static void show_pte(struct mm_struct *mm, unsigned long addr) { - struct task_struct *tsk; - struct mm_struct *mm; - struct vm_area_struct * vma; - int si_code; - int fault; - siginfo_t info; + pgd_t *pgd; - trace_hardirqs_on(); - local_irq_enable(); + if (mm) { + pgd = mm->pgd; + } else { + pgd = get_TTB(); -#ifdef CONFIG_SH_KGDB - if (kgdb_nofault && kgdb_bus_err_hook) - kgdb_bus_err_hook(); -#endif + if (unlikely(!pgd)) + pgd = swapper_pg_dir; + } - tsk = current; - mm = tsk->mm; - si_code = SEGV_MAPERR; + printk(KERN_ALERT "pgd = %p\n", pgd); + pgd += pgd_index(addr); + printk(KERN_ALERT "[%08lx] *pgd=%0*Lx", addr, + (u32)(sizeof(*pgd) * 2), (u64)pgd_val(*pgd)); - if (unlikely(address >= TASK_SIZE)) { - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "tsk" here. We might be inside - * an interrupt in the middle of a task switch.. - */ - int offset = pgd_index(address); - pgd_t *pgd, *pgd_k; - pud_t *pud, *pud_k; - pmd_t *pmd, *pmd_k; - - pgd = get_TTB() + offset; - pgd_k = swapper_pg_dir + offset; - - /* This will never happen with the folded page table. */ - if (!pgd_present(*pgd)) { - if (!pgd_present(*pgd_k)) - goto bad_area_nosemaphore; - set_pgd(pgd, *pgd_k); - return; + do { + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if (pgd_none(*pgd)) + break; + + if (pgd_bad(*pgd)) { + printk("(bad)"); + break; } - pud = pud_offset(pgd, address); - pud_k = pud_offset(pgd_k, address); - if (pud_present(*pud) || !pud_present(*pud_k)) - goto bad_area_nosemaphore; - set_pud(pud, *pud_k); + pud = pud_offset(pgd, addr); + if (PTRS_PER_PUD != 1) + printk(", *pud=%0*Lx", (u32)(sizeof(*pud) * 2), + (u64)pud_val(*pud)); - pmd = pmd_offset(pud, address); - pmd_k = pmd_offset(pud_k, address); - if (pmd_present(*pmd) || !pmd_present(*pmd_k)) - goto bad_area_nosemaphore; - set_pmd(pmd, *pmd_k); + if (pud_none(*pud)) + break; - return; + if (pud_bad(*pud)) { + printk("(bad)"); + break; + } + + pmd = pmd_offset(pud, addr); + if (PTRS_PER_PMD != 1) + printk(", *pmd=%0*Lx", (u32)(sizeof(*pmd) * 2), + (u64)pmd_val(*pmd)); + + if (pmd_none(*pmd)) + break; + + if (pmd_bad(*pmd)) { + printk("(bad)"); + break; + } + + /* We must not map this if we have highmem enabled */ + if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT))) + break; + + pte = pte_offset_kernel(pmd, addr); + printk(", *pte=%0*Lx", (u32)(sizeof(*pte) * 2), + (u64)pte_val(*pte)); + } while (0); + + printk("\n"); +} + +static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) +{ + unsigned index = pgd_index(address); + pgd_t *pgd_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + + pgd += index; + pgd_k = init_mm.pgd + index; + + if (!pgd_present(*pgd_k)) + return NULL; + + pud = pud_offset(pgd, address); + pud_k = pud_offset(pgd_k, address); + if (!pud_present(*pud_k)) + return NULL; + + if (!pud_present(*pud)) + set_pud(pud, *pud_k); + + pmd = pmd_offset(pud, address); + pmd_k = pmd_offset(pud_k, address); + if (!pmd_present(*pmd_k)) + return NULL; + + if (!pmd_present(*pmd)) + set_pmd(pmd, *pmd_k); + else { + /* + * The page tables are fully synchronised so there must + * be another reason for the fault. Return NULL here to + * signal that we have not taken care of the fault. + */ + BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); + return NULL; } - /* - * If we're in an interrupt or have no user - * context, we must not take the fault.. - */ - if (in_atomic() || !mm) - goto no_context; + return pmd_k; +} - down_read(&mm->mmap_sem); +#ifdef CONFIG_SH_STORE_QUEUES +#define __FAULT_ADDR_LIMIT P3_ADDR_MAX +#else +#define __FAULT_ADDR_LIMIT VMALLOC_END +#endif - vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (expand_stack(vma, address)) - goto bad_area; /* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. + * Handle a fault on the vmalloc or module mapping area */ -good_area: - si_code = SEGV_ACCERR; - if (writeaccess) { - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - } else { - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto bad_area; - } +static noinline int vmalloc_fault(unsigned long address) +{ + pgd_t *pgd_k; + pmd_t *pmd_k; + pte_t *pte_k; + + /* Make sure we are in vmalloc/module/P3 area: */ + if (!(address >= VMALLOC_START && address < __FAULT_ADDR_LIMIT)) + return -1; /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo - * the fault. + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "current" here. We might be inside + * an interrupt in the middle of a task switch.. */ -survive: - fault = handle_mm_fault(mm, vma, address, writeaccess); - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); - } - if (fault & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; + pgd_k = get_TTB(); + pmd_k = vmalloc_sync_one(pgd_k, address); + if (!pmd_k) + return -1; - up_read(&mm->mmap_sem); - return; + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + return -1; -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); + return 0; +} -bad_area_nosemaphore: - if (user_mode(regs)) { - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void *) address; - force_sig_info(SIGSEGV, &info, tsk); +static void +show_fault_oops(struct pt_regs *regs, unsigned long address) +{ + if (!oops_may_print()) return; - } -no_context: + printk(KERN_ALERT "BUG: unable to handle kernel "); + if (address < PAGE_SIZE) + printk(KERN_CONT "NULL pointer dereference"); + else + printk(KERN_CONT "paging request"); + + printk(KERN_CONT " at %08lx\n", address); + printk(KERN_ALERT "PC:"); + printk_address(regs->pc, 1); + + show_pte(NULL, address); +} + +static noinline void +no_context(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return; -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - * - */ + if (handle_trapped_io(regs, address)) + return; + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ bust_spinlocks(1); - if (oops_may_print()) { - __typeof__(pte_val(__pte(0))) page; + show_fault_oops(regs, address); - if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL " - "pointer dereference"); - else - printk(KERN_ALERT "Unable to handle kernel paging " - "request"); - printk(" at virtual address %08lx\n", address); - printk(KERN_ALERT "pc = %08lx\n", regs->pc); - page = (unsigned long)get_TTB(); - if (page) { - page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT]; - printk(KERN_ALERT "*pde = %08lx\n", page); - if (page & _PAGE_PRESENT) { - page &= PAGE_MASK; - address &= 0x003ff000; - page = ((__typeof__(page) *) - __va(page))[address >> - PAGE_SHIFT]; - printk(KERN_ALERT "*pte = %08lx\n", page); - } - } - } - - die("Oops", regs, writeaccess); + die("Oops", regs, error_code); bust_spinlocks(0); do_exit(SIGKILL); +} -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: - up_read(&mm->mmap_sem); - if (is_global_init(current)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; +static void +__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, + unsigned long address, int si_code) +{ + struct task_struct *tsk = current; + + /* User mode accesses just cause a SIGSEGV */ + if (user_mode(regs)) { + /* + * It's possible to have interrupts off here: + */ + local_irq_enable(); + + force_sig_info_fault(SIGSEGV, si_code, address, tsk); + + return; } - printk("VM: killing process %s\n", tsk->comm); - if (user_mode(regs)) - do_group_exit(SIGKILL); - goto no_context; -do_sigbus: - up_read(&mm->mmap_sem); + no_context(regs, error_code, address); +} + +static noinline void +bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, + unsigned long address) +{ + __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR); +} + +static void +__bad_area(struct pt_regs *regs, unsigned long error_code, + unsigned long address, int si_code) +{ + struct mm_struct *mm = current->mm; /* - * Send a sigbus, regardless of whether we were in kernel - * or user mode. + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. */ - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void *)address; - force_sig_info(SIGBUS, &info, tsk); + up_read(&mm->mmap_sem); - /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) - goto no_context; + __bad_area_nosemaphore(regs, error_code, address, si_code); } -#ifdef CONFIG_SH_STORE_QUEUES -/* - * This is a special case for the SH-4 store queues, as pages for this - * space still need to be faulted in before it's possible to flush the - * store queue cache for writeout to the remapped region. - */ -#define P3_ADDR_MAX (P4SEG_STORE_QUE + 0x04000000) -#else -#define P3_ADDR_MAX P4SEG -#endif +static noinline void +bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address) +{ + __bad_area(regs, error_code, address, SEGV_MAPERR); +} -/* - * Called with interrupts disabled. - */ -asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs, - unsigned long writeaccess, - unsigned long address) +static noinline void +bad_area_access_error(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - pte_t entry; - -#ifdef CONFIG_SH_KGDB - if (kgdb_nofault && kgdb_bus_err_hook) - kgdb_bus_err_hook(); -#endif + __bad_area(regs, error_code, address, SEGV_ACCERR); +} + +static void +do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + + up_read(&mm->mmap_sem); + + /* Kernel mode? Handle exceptions or die: */ + if (!user_mode(regs)) + no_context(regs, error_code, address); + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); +} + +static noinline int +mm_fault_error(struct pt_regs *regs, unsigned long error_code, + unsigned long address, unsigned int fault) +{ /* - * We don't take page faults for P1, P2, and parts of P4, these - * are always mapped, whether it be due to legacy behaviour in - * 29-bit mode, or due to PMB configuration in 32-bit mode. + * Pagefault was interrupted by SIGKILL. We have no reason to + * continue pagefault. */ - if (address >= P3SEG && address < P3_ADDR_MAX) { - pgd = pgd_offset_k(address); - } else { - if (unlikely(address >= TASK_SIZE || !current->mm)) + if (fatal_signal_pending(current)) { + if (!(fault & VM_FAULT_RETRY)) + up_read(¤t->mm->mmap_sem); + if (!user_mode(regs)) + no_context(regs, error_code, address); + return 1; + } + + if (!(fault & VM_FAULT_ERROR)) + return 0; + + if (fault & VM_FAULT_OOM) { + /* Kernel mode? Handle exceptions or die: */ + if (!user_mode(regs)) { + up_read(¤t->mm->mmap_sem); + no_context(regs, error_code, address); return 1; + } + up_read(¤t->mm->mmap_sem); - pgd = pgd_offset(current->mm, address); + /* + * We ran out of memory, call the OOM killer, and return the + * userspace (which will retry the fault, or kill us if we got + * oom-killed): + */ + pagefault_out_of_memory(); + } else { + if (fault & VM_FAULT_SIGBUS) + do_sigbus(regs, error_code, address); + else + BUG(); } - pud = pud_offset(pgd, address); - if (pud_none_or_clear_bad(pud)) - return 1; - pmd = pmd_offset(pud, address); - if (pmd_none_or_clear_bad(pmd)) - return 1; + return 1; +} + +static inline int access_error(int error_code, struct vm_area_struct *vma) +{ + if (error_code & FAULT_CODE_WRITE) { + /* write, present and write, not present: */ + if (unlikely(!(vma->vm_flags & VM_WRITE))) + return 1; + return 0; + } - pte = pte_offset_kernel(pmd, address); - entry = *pte; - if (unlikely(pte_none(entry) || pte_not_present(entry))) + /* ITLB miss on NX page */ + if (unlikely((error_code & FAULT_CODE_ITLB) && + !(vma->vm_flags & VM_EXEC))) return 1; - if (unlikely(writeaccess && !pte_write(entry))) + + /* read, not present: */ + if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) return 1; - if (writeaccess) - entry = pte_mkdirty(entry); - entry = pte_mkyoung(entry); + return 0; +} - set_pte(pte, entry); - update_mmu_cache(NULL, address, entry); +static int fault_in_kernel_space(unsigned long address) +{ + return address >= TASK_SIZE; +} - return 0; +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + */ +asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code, + unsigned long address) +{ + unsigned long vec; + struct task_struct *tsk; + struct mm_struct *mm; + struct vm_area_struct * vma; + int fault; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + tsk = current; + mm = tsk->mm; + vec = lookup_exception_vector(); + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + */ + if (unlikely(fault_in_kernel_space(address))) { + if (vmalloc_fault(address) >= 0) + return; + if (notify_page_fault(regs, vec)) + return; + + bad_area_nosemaphore(regs, error_code, address); + return; + } + + if (unlikely(notify_page_fault(regs, vec))) + return; + + /* Only enable interrupts if they were on before the fault */ + if ((regs->sr & SR_IMASK) != SR_IMASK) + local_irq_enable(); + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + + /* + * If we're in an interrupt, have no user context or are running + * in an atomic region then we must not take the fault: + */ + if (unlikely(in_atomic() || !mm)) { + bad_area_nosemaphore(regs, error_code, address); + return; + } + +retry: + down_read(&mm->mmap_sem); + + vma = find_vma(mm, address); + if (unlikely(!vma)) { + bad_area(regs, error_code, address); + return; + } + if (likely(vma->vm_start <= address)) + goto good_area; + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { + bad_area(regs, error_code, address); + return; + } + if (unlikely(expand_stack(vma, address))) { + bad_area(regs, error_code, address); + return; + } + + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + if (unlikely(access_error(error_code, vma))) { + bad_area_access_error(regs, error_code, address); + return; + } + + set_thread_fault_code(error_code); + + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (error_code & FAULT_CODE_WRITE) + flags |= FAULT_FLAG_WRITE; + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + fault = handle_mm_fault(mm, vma, address, flags); + + if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR))) + if (mm_fault_error(regs, error_code, address, fault)) + return; + + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); + } + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + + /* + * No need to up_read(&mm->mmap_sem) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + goto retry; + } + } + + up_read(&mm->mmap_sem); } |
