diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/sparc64/mm |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/sparc64/mm')
-rw-r--r-- | arch/sparc64/mm/Makefile | 10 | ||||
-rw-r--r-- | arch/sparc64/mm/extable.c | 80 | ||||
-rw-r--r-- | arch/sparc64/mm/fault.c | 527 | ||||
-rw-r--r-- | arch/sparc64/mm/generic.c | 182 | ||||
-rw-r--r-- | arch/sparc64/mm/hugetlbpage.c | 310 | ||||
-rw-r--r-- | arch/sparc64/mm/init.c | 1769 | ||||
-rw-r--r-- | arch/sparc64/mm/tlb.c | 151 | ||||
-rw-r--r-- | arch/sparc64/mm/ultra.S | 583 |
8 files changed, 3612 insertions, 0 deletions
diff --git a/arch/sparc64/mm/Makefile b/arch/sparc64/mm/Makefile new file mode 100644 index 00000000000..cda87333a77 --- /dev/null +++ b/arch/sparc64/mm/Makefile @@ -0,0 +1,10 @@ +# $Id: Makefile,v 1.8 2000/12/14 22:57:25 davem Exp $ +# Makefile for the linux Sparc64-specific parts of the memory manager. +# + +EXTRA_AFLAGS := -ansi +EXTRA_CFLAGS := -Werror + +obj-y := ultra.o tlb.o fault.o init.o generic.o extable.o + +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/sparc64/mm/extable.c b/arch/sparc64/mm/extable.c new file mode 100644 index 00000000000..ec334297ff4 --- /dev/null +++ b/arch/sparc64/mm/extable.c @@ -0,0 +1,80 @@ +/* + * linux/arch/sparc64/mm/extable.c + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <asm/uaccess.h> + +extern const struct exception_table_entry __start___ex_table[]; +extern const struct exception_table_entry __stop___ex_table[]; + +void sort_extable(struct exception_table_entry *start, + struct exception_table_entry *finish) +{ +} + +/* Caller knows they are in a range if ret->fixup == 0 */ +const struct exception_table_entry * +search_extable(const struct exception_table_entry *start, + const struct exception_table_entry *last, + unsigned long value) +{ + const struct exception_table_entry *walk; + + /* Single insn entries are encoded as: + * word 1: insn address + * word 2: fixup code address + * + * Range entries are encoded as: + * word 1: first insn address + * word 2: 0 + * word 3: last insn address + 4 bytes + * word 4: fixup code address + * + * See asm/uaccess.h for more details. + */ + + /* 1. Try to find an exact match. */ + for (walk = start; walk <= last; walk++) { + if (walk->fixup == 0) { + /* A range entry, skip both parts. */ + walk++; + continue; + } + + if (walk->insn == value) + return walk; + } + + /* 2. Try to find a range match. */ + for (walk = start; walk <= (last - 1); walk++) { + if (walk->fixup) + continue; + + if (walk[0].insn <= value && walk[1].insn > value) + return walk; + + walk++; + } + + return NULL; +} + +/* Special extable search, which handles ranges. Returns fixup */ +unsigned long search_extables_range(unsigned long addr, unsigned long *g2) +{ + const struct exception_table_entry *entry; + + entry = search_exception_tables(addr); + if (!entry) + return 0; + + /* Inside range? Fix g2 and return correct fixup */ + if (!entry->fixup) { + *g2 = (addr - entry->insn) / 4; + return (entry + 1)->fixup; + } + + return entry->fixup; +} diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c new file mode 100644 index 00000000000..3ffee7b51ae --- /dev/null +++ b/arch/sparc64/mm/fault.c @@ -0,0 +1,527 @@ +/* $Id: fault.c,v 1.59 2002/02/09 19:49:31 davem Exp $ + * arch/sparc64/mm/fault.c: Page fault handlers for the 64-bit Sparc. + * + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + */ + +#include <asm/head.h> + +#include <linux/string.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/smp_lock.h> +#include <linux/init.h> +#include <linux/interrupt.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/openprom.h> +#include <asm/oplib.h> +#include <asm/uaccess.h> +#include <asm/asi.h> +#include <asm/lsu.h> +#include <asm/sections.h> +#include <asm/kdebug.h> + +#define ELEMENTS(arr) (sizeof (arr)/sizeof (arr[0])) + +extern struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS]; + +/* + * To debug kernel during syscall entry. + */ +void syscall_trace_entry(struct pt_regs *regs) +{ + printk("scall entry: %s[%d]/cpu%d: %d\n", current->comm, current->pid, smp_processor_id(), (int) regs->u_regs[UREG_G1]); +} + +/* + * To debug kernel during syscall exit. + */ +void syscall_trace_exit(struct pt_regs *regs) +{ + printk("scall exit: %s[%d]/cpu%d: %d\n", current->comm, current->pid, smp_processor_id(), (int) regs->u_regs[UREG_G1]); +} + +/* + * To debug kernel to catch accesses to certain virtual/physical addresses. + * Mode = 0 selects physical watchpoints, mode = 1 selects virtual watchpoints. + * flags = VM_READ watches memread accesses, flags = VM_WRITE watches memwrite accesses. + * Caller passes in a 64bit aligned addr, with mask set to the bytes that need to be + * watched. This is only useful on a single cpu machine for now. After the watchpoint + * is detected, the process causing it will be killed, thus preventing an infinite loop. + */ +void set_brkpt(unsigned long addr, unsigned char mask, int flags, int mode) +{ + unsigned long lsubits; + + __asm__ __volatile__("ldxa [%%g0] %1, %0" + : "=r" (lsubits) + : "i" (ASI_LSU_CONTROL)); + lsubits &= ~(LSU_CONTROL_PM | LSU_CONTROL_VM | + LSU_CONTROL_PR | LSU_CONTROL_VR | + LSU_CONTROL_PW | LSU_CONTROL_VW); + + __asm__ __volatile__("stxa %0, [%1] %2\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (addr), "r" (mode ? VIRT_WATCHPOINT : PHYS_WATCHPOINT), + "i" (ASI_DMMU)); + + lsubits |= ((unsigned long)mask << (mode ? 25 : 33)); + if (flags & VM_READ) + lsubits |= (mode ? LSU_CONTROL_VR : LSU_CONTROL_PR); + if (flags & VM_WRITE) + lsubits |= (mode ? LSU_CONTROL_VW : LSU_CONTROL_PW); + __asm__ __volatile__("stxa %0, [%%g0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (lsubits), "i" (ASI_LSU_CONTROL) + : "memory"); +} + +/* Nice, simple, prom library does all the sweating for us. ;) */ +unsigned long __init prom_probe_memory (void) +{ + register struct linux_mlist_p1275 *mlist; + register unsigned long bytes, base_paddr, tally; + register int i; + + i = 0; + mlist = *prom_meminfo()->p1275_available; + bytes = tally = mlist->num_bytes; + base_paddr = mlist->start_adr; + + sp_banks[0].base_addr = base_paddr; + sp_banks[0].num_bytes = bytes; + + while (mlist->theres_more != (void *) 0) { + i++; + mlist = mlist->theres_more; + bytes = mlist->num_bytes; + tally += bytes; + if (i >= SPARC_PHYS_BANKS-1) { + printk ("The machine has more banks than " + "this kernel can support\n" + "Increase the SPARC_PHYS_BANKS " + "setting (currently %d)\n", + SPARC_PHYS_BANKS); + i = SPARC_PHYS_BANKS-1; + break; + } + + sp_banks[i].base_addr = mlist->start_adr; + sp_banks[i].num_bytes = mlist->num_bytes; + } + + i++; + sp_banks[i].base_addr = 0xdeadbeefbeefdeadUL; + sp_banks[i].num_bytes = 0; + + /* Now mask all bank sizes on a page boundary, it is all we can + * use anyways. + */ + for (i = 0; sp_banks[i].num_bytes != 0; i++) + sp_banks[i].num_bytes &= PAGE_MASK; + + return tally; +} + +static void unhandled_fault(unsigned long address, struct task_struct *tsk, + struct pt_regs *regs) +{ + if ((unsigned long) address < PAGE_SIZE) { + printk(KERN_ALERT "Unable to handle kernel NULL " + "pointer dereference\n"); + } else { + printk(KERN_ALERT "Unable to handle kernel paging request " + "at virtual address %016lx\n", (unsigned long)address); + } + printk(KERN_ALERT "tsk->{mm,active_mm}->context = %016lx\n", + (tsk->mm ? + CTX_HWBITS(tsk->mm->context) : + CTX_HWBITS(tsk->active_mm->context))); + printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %016lx\n", + (tsk->mm ? (unsigned long) tsk->mm->pgd : + (unsigned long) tsk->active_mm->pgd)); + if (notify_die(DIE_GPF, "general protection fault", regs, + 0, 0, SIGSEGV) == NOTIFY_STOP) + return; + die_if_kernel("Oops", regs); +} + +static void bad_kernel_pc(struct pt_regs *regs) +{ + unsigned long *ksp; + + printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n", + regs->tpc); + __asm__("mov %%sp, %0" : "=r" (ksp)); + show_stack(current, ksp); + unhandled_fault(regs->tpc, current, regs); +} + +/* + * We now make sure that mmap_sem is held in all paths that call + * this. Additionally, to prevent kswapd from ripping ptes from + * under us, raise interrupts around the time that we look at the + * pte, kswapd will have to wait to get his smp ipi response from + * us. This saves us having to get page_table_lock. + */ +static unsigned int get_user_insn(unsigned long tpc) +{ + pgd_t *pgdp = pgd_offset(current->mm, tpc); + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep, pte; + unsigned long pa; + u32 insn = 0; + unsigned long pstate; + + if (pgd_none(*pgdp)) + goto outret; + pudp = pud_offset(pgdp, tpc); + if (pud_none(*pudp)) + goto outret; + pmdp = pmd_offset(pudp, tpc); + if (pmd_none(*pmdp)) + goto outret; + + /* This disables preemption for us as well. */ + __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); + __asm__ __volatile__("wrpr %0, %1, %%pstate" + : : "r" (pstate), "i" (PSTATE_IE)); + ptep = pte_offset_map(pmdp, tpc); + pte = *ptep; + if (!pte_present(pte)) + goto out; + + pa = (pte_val(pte) & _PAGE_PADDR); + pa += (tpc & ~PAGE_MASK); + + /* Use phys bypass so we don't pollute dtlb/dcache. */ + __asm__ __volatile__("lduwa [%1] %2, %0" + : "=r" (insn) + : "r" (pa), "i" (ASI_PHYS_USE_EC)); + +out: + pte_unmap(ptep); + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); +outret: + return insn; +} + +extern unsigned long compute_effective_address(struct pt_regs *, unsigned int, unsigned int); + +static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, + unsigned int insn, int fault_code) +{ + siginfo_t info; + + info.si_code = code; + info.si_signo = sig; + info.si_errno = 0; + if (fault_code & FAULT_CODE_ITLB) + info.si_addr = (void __user *) regs->tpc; + else + info.si_addr = (void __user *) + compute_effective_address(regs, insn, 0); + info.si_trapno = 0; + force_sig_info(sig, &info, current); +} + +extern int handle_ldf_stq(u32, struct pt_regs *); +extern int handle_ld_nf(u32, struct pt_regs *); + +static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn) +{ + if (!insn) { + if (!regs->tpc || (regs->tpc & 0x3)) + return 0; + if (regs->tstate & TSTATE_PRIV) { + insn = *(unsigned int *) regs->tpc; + } else { + insn = get_user_insn(regs->tpc); + } + } + return insn; +} + +static void do_kernel_fault(struct pt_regs *regs, int si_code, int fault_code, + unsigned int insn, unsigned long address) +{ + unsigned long g2; + unsigned char asi = ASI_P; + + if ((!insn) && (regs->tstate & TSTATE_PRIV)) + goto cannot_handle; + + /* If user insn could be read (thus insn is zero), that + * is fine. We will just gun down the process with a signal + * in that case. + */ + + if (!(fault_code & (FAULT_CODE_WRITE|FAULT_CODE_ITLB)) && + (insn & 0xc0800000) == 0xc0800000) { + if (insn & 0x2000) + asi = (regs->tstate >> 24); + else + asi = (insn >> 5); + if ((asi & 0xf2) == 0x82) { + if (insn & 0x1000000) { + handle_ldf_stq(insn, regs); + } else { + /* This was a non-faulting load. Just clear the + * destination register(s) and continue with the next + * instruction. -jj + */ + handle_ld_nf(insn, regs); + } + return; + } + } + + g2 = regs->u_regs[UREG_G2]; + + /* Is this in ex_table? */ + if (regs->tstate & TSTATE_PRIV) { + unsigned long fixup; + + if (asi == ASI_P && (insn & 0xc0800000) == 0xc0800000) { + if (insn & 0x2000) + asi = (regs->tstate >> 24); + else + asi = (insn >> 5); + } + + /* Look in asi.h: All _S asis have LS bit set */ + if ((asi & 0x1) && + (fixup = search_extables_range(regs->tpc, &g2))) { + regs->tpc = fixup; + regs->tnpc = regs->tpc + 4; + regs->u_regs[UREG_G2] = g2; + return; + } + } else { + /* The si_code was set to make clear whether + * this was a SEGV_MAPERR or SEGV_ACCERR fault. + */ + do_fault_siginfo(si_code, SIGSEGV, regs, insn, fault_code); + return; + } + +cannot_handle: + unhandled_fault (address, current, regs); +} + +asmlinkage void do_sparc64_fault(struct pt_regs *regs) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned int insn = 0; + int si_code, fault_code; + unsigned long address; + + fault_code = get_thread_fault_code(); + + if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, + fault_code, 0, SIGSEGV) == NOTIFY_STOP) + return; + + si_code = SEGV_MAPERR; + address = current_thread_info()->fault_address; + + if ((fault_code & FAULT_CODE_ITLB) && + (fault_code & FAULT_CODE_DTLB)) + BUG(); + + if (regs->tstate & TSTATE_PRIV) { + unsigned long tpc = regs->tpc; + + /* Sanity check the PC. */ + if ((tpc >= KERNBASE && tpc < (unsigned long) _etext) || + (tpc >= MODULES_VADDR && tpc < MODULES_END)) { + /* Valid, no problems... */ + } else { + bad_kernel_pc(regs); + return; + } + } + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (in_atomic() || !mm) + goto intr_or_no_mm; + + if (test_thread_flag(TIF_32BIT)) { + if (!(regs->tstate & TSTATE_PRIV)) + regs->tpc &= 0xffffffff; + address &= 0xffffffff; + } + + if (!down_read_trylock(&mm->mmap_sem)) { + if ((regs->tstate & TSTATE_PRIV) && + !search_exception_tables(regs->tpc)) { + insn = get_fault_insn(regs, insn); + goto handle_kernel_fault; + } + down_read(&mm->mmap_sem); + } + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + + /* Pure DTLB misses do not tell us whether the fault causing + * load/store/atomic was a write or not, it only says that there + * was no match. So in such a case we (carefully) read the + * instruction to try and figure this out. It's an optimization + * so it's ok if we can't do this. + * + * Special hack, window spill/fill knows the exact fault type. + */ + if (((fault_code & + (FAULT_CODE_DTLB | FAULT_CODE_WRITE | FAULT_CODE_WINFIXUP)) == FAULT_CODE_DTLB) && + (vma->vm_flags & VM_WRITE) != 0) { + insn = get_fault_insn(regs, 0); + if (!insn) + goto continue_fault; + if ((insn & 0xc0200000) == 0xc0200000 && + (insn & 0x1780000) != 0x1680000) { + /* Don't bother updating thread struct value, + * because update_mmu_cache only cares which tlb + * the access came from. + */ + fault_code |= FAULT_CODE_WRITE; + } + } +continue_fault: + + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (!(fault_code & FAULT_CODE_WRITE)) { + /* Non-faulting loads shouldn't expand stack. */ + insn = get_fault_insn(regs, insn); + if ((insn & 0xc0800000) == 0xc0800000) { + unsigned char asi; + + if (insn & 0x2000) + asi = (regs->tstate >> 24); + else + asi = (insn >> 5); + if ((asi & 0xf2) == 0x82) + goto bad_area; + } + } + if (expand_stack(vma, address)) + goto bad_area; + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + si_code = SEGV_ACCERR; + + /* If we took a ITLB miss on a non-executable page, catch + * that here. + */ + if ((fault_code & FAULT_CODE_ITLB) && !(vma->vm_flags & VM_EXEC)) { + BUG_ON(address != regs->tpc); + BUG_ON(regs->tstate & TSTATE_PRIV); + goto bad_area; + } + + if (fault_code & FAULT_CODE_WRITE) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + + /* Spitfire has an icache which does not snoop + * processor stores. Later processors do... + */ + if (tlb_type == spitfire && + (vma->vm_flags & VM_EXEC) != 0 && + vma->vm_file != NULL) + set_thread_fault_code(fault_code | + FAULT_CODE_BLKCOMMIT); + } else { + /* Allow reads even for write-only mappings */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + switch (handle_mm_fault(mm, vma, address, (fault_code & FAULT_CODE_WRITE))) { + case VM_FAULT_MINOR: + current->min_flt++; + break; + case VM_FAULT_MAJOR: + current->maj_flt++; + break; + case VM_FAULT_SIGBUS: + goto do_sigbus; + case VM_FAULT_OOM: + goto out_of_memory; + default: + BUG(); + } + + up_read(&mm->mmap_sem); + goto fault_done; + + /* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + insn = get_fault_insn(regs, insn); + up_read(&mm->mmap_sem); + +handle_kernel_fault: + do_kernel_fault(regs, si_code, fault_code, insn, address); + + goto fault_done; + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + insn = get_fault_insn(regs, insn); + up_read(&mm->mmap_sem); + printk("VM: killing process %s\n", current->comm); + if (!(regs->tstate & TSTATE_PRIV)) + do_exit(SIGKILL); + goto handle_kernel_fault; + +intr_or_no_mm: + insn = get_fault_insn(regs, 0); + goto handle_kernel_fault; + +do_sigbus: + insn = get_fault_insn(regs, insn); + up_read(&mm->mmap_sem); + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, insn, fault_code); + + /* Kernel mode? Handle exceptions or die */ + if (regs->tstate & TSTATE_PRIV) + goto handle_kernel_fault; + +fault_done: + /* These values are no longer needed, clear them. */ + set_thread_fault_code(0); + current_thread_info()->fault_address = 0; +} diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c new file mode 100644 index 00000000000..6b31f6117a9 --- /dev/null +++ b/arch/sparc64/mm/generic.c @@ -0,0 +1,182 @@ +/* $Id: generic.c,v 1.18 2001/12/21 04:56:15 davem Exp $ + * generic.c: Generic Sparc mm routines that are not dependent upon + * MMU type but are Sparc specific. + * + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/pagemap.h> + +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/tlbflush.h> + +/* Remap IO memory, the same way as remap_pfn_range(), but use + * the obio memory space. + * + * They use a pgprot that sets PAGE_IO and does not check the + * mem_map table as this is independent of normal memory. + */ +static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte, + unsigned long address, + unsigned long size, + unsigned long offset, pgprot_t prot, + int space) +{ + unsigned long end; + + /* clear hack bit that was used as a write_combine side-effect flag */ + offset &= ~0x1UL; + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) + end = PMD_SIZE; + do { + pte_t entry; + unsigned long curend = address + PAGE_SIZE; + + entry = mk_pte_io(offset, prot, space); + if (!(address & 0xffff)) { + if (!(address & 0x3fffff) && !(offset & 0x3ffffe) && end >= address + 0x400000) { + entry = mk_pte_io(offset, + __pgprot(pgprot_val (prot) | _PAGE_SZ4MB), + space); + curend = address + 0x400000; + offset += 0x400000; + } else if (!(address & 0x7ffff) && !(offset & 0x7fffe) && end >= address + 0x80000) { + entry = mk_pte_io(offset, + __pgprot(pgprot_val (prot) | _PAGE_SZ512K), + space); + curend = address + 0x80000; + offset += 0x80000; + } else if (!(offset & 0xfffe) && end >= address + 0x10000) { + entry = mk_pte_io(offset, + __pgprot(pgprot_val (prot) | _PAGE_SZ64K), + space); + curend = address + 0x10000; + offset += 0x10000; + } else + offset += PAGE_SIZE; + } else + offset += PAGE_SIZE; + + do { + BUG_ON(!pte_none(*pte)); + set_pte_at(mm, address, pte, entry); + address += PAGE_SIZE; + pte++; + } while (address < curend); + } while (address < end); +} + +static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, + unsigned long offset, pgprot_t prot, int space) +{ + unsigned long end; + + address &= ~PGDIR_MASK; + end = address + size; + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + offset -= address; + do { + pte_t * pte = pte_alloc_map(mm, pmd, address); + if (!pte) + return -ENOMEM; + io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space); + pte_unmap(pte); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address < end); + return 0; +} + +static inline int io_remap_pud_range(struct mm_struct *mm, pud_t * pud, unsigned long address, unsigned long size, + unsigned long offset, pgprot_t prot, int space) +{ + unsigned long end; + + address &= ~PUD_MASK; + end = address + size; + if (end > PUD_SIZE) + end = PUD_SIZE; + offset -= address; + do { + pmd_t *pmd = pmd_alloc(mm, pud, address); + if (!pud) + return -ENOMEM; + io_remap_pmd_range(mm, pmd, address, end - address, address + offset, prot, space); + address = (address + PUD_SIZE) & PUD_MASK; + pud++; + } while (address < end); + return 0; +} + +int io_remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long offset, unsigned long size, pgprot_t prot, int space) +{ + int error = 0; + pgd_t * dir; + unsigned long beg = from; + unsigned long end = from + size; + struct mm_struct *mm = vma->vm_mm; + + prot = __pgprot(pg_iobits); + offset -= from; + dir = pgd_offset(mm, from); + flush_cache_range(vma, beg, end); + + spin_lock(&mm->page_table_lock); + while (from < end) { + pud_t *pud = pud_alloc(mm, dir, from); + error = -ENOMEM; + if (!pud) + break; + error = io_remap_pud_range(mm, pud, from, end - from, offset + from, prot, space); + if (error) + break; + from = (from + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } + flush_tlb_range(vma, beg, end); + spin_unlock(&mm->page_table_lock); + + return error; +} + +int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + int error = 0; + pgd_t * dir; + unsigned long beg = from; + unsigned long end = from + size; + struct mm_struct *mm = vma->vm_mm; + int space = GET_IOSPACE(pfn); + unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; + + prot = __pgprot(pg_iobits); + offset -= from; + dir = pgd_offset(mm, from); + flush_cache_range(vma, beg, end); + + spin_lock(&mm->page_table_lock); + while (from < end) { + pud_t *pud = pud_alloc(current->mm, dir, from); + error = -ENOMEM; + if (!pud) + break; + error = io_remap_pud_range(mm, pud, from, end - from, offset + from, prot, space); + if (error) + break; + from = (from + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } + flush_tlb_range(vma, beg, end); + spin_unlock(&mm->page_table_lock); + + return error; +} diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c new file mode 100644 index 00000000000..5a1f831b2de --- /dev/null +++ b/arch/sparc64/mm/hugetlbpage.c @@ -0,0 +1,310 @@ +/* + * SPARC64 Huge TLB page support. + * + * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com) + */ + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/smp_lock.h> +#include <linux/slab.h> +#include <linux/sysctl.h> + +#include <asm/mman.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> +#include <asm/mmu_context.h> + +static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd) { + pud = pud_offset(pgd, addr); + if (pud) { + pmd = pmd_alloc(mm, pud, addr); + if (pmd) + pte = pte_alloc_map(mm, pmd, addr); + } + } + return pte; +} + +static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd) { + pud = pud_offset(pgd, addr); + if (pud) { + pmd = pmd_offset(pud, addr); + if (pmd) + pte = pte_offset_map(pmd, addr); + } + } + return pte; +} + +#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0) + +static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, + struct page *page, pte_t * page_table, int write_access) +{ + unsigned long i; + pte_t entry; + + add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); + + if (write_access) + entry = pte_mkwrite(pte_mkdirty(mk_pte(page, + vma->vm_page_prot))); + else + entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); + entry = pte_mkyoung(entry); + mk_pte_huge(entry); + + for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { + set_pte_at(mm, addr, page_table, entry); + page_table++; + addr += PAGE_SIZE; + + pte_val(entry) += PAGE_SIZE; + } +} + +/* + * This function checks for proper alignment of input addr and len parameters. + */ +int is_aligned_hugepage_range(unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + return 0; +} + +int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, + struct vm_area_struct *vma) +{ + pte_t *src_pte, *dst_pte, entry; + struct page *ptepage; + unsigned long addr = vma->vm_start; + unsigned long end = vma->vm_end; + int i; + + while (addr < end) { + dst_pte = huge_pte_alloc(dst, addr); + if (!dst_pte) + goto nomem; + src_pte = huge_pte_offset(src, addr); + BUG_ON(!src_pte || pte_none(*src_pte)); + entry = *src_pte; + ptepage = pte_page(entry); + get_page(ptepage); + for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { + set_pte_at(dst, addr, dst_pte, entry); + pte_val(entry) += PAGE_SIZE; + dst_pte++; + addr += PAGE_SIZE; + } + add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); + } + return 0; + +nomem: + return -ENOMEM; +} + +int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, + struct page **pages, struct vm_area_struct **vmas, + unsigned long *position, int *length, int i) +{ + unsigned long vaddr = *position; + int remainder = *length; + + WARN_ON(!is_vm_hugetlb_page(vma)); + + while (vaddr < vma->vm_end && remainder) { + if (pages) { + pte_t *pte; + struct page *page; + + pte = huge_pte_offset(mm, vaddr); + + /* hugetlb should be locked, and hence, prefaulted */ + BUG_ON(!pte || pte_none(*pte)); + + page = pte_page(*pte); + + WARN_ON(!PageCompound(page)); + + get_page(page); + pages[i] = page; + } + + if (vmas) + vmas[i] = vma; + + vaddr += PAGE_SIZE; + --remainder; + ++i; + } + + *length = remainder; + *position = vaddr; + + return i; +} + +struct page *follow_huge_addr(struct mm_struct *mm, + unsigned long address, int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return 0; +} + +struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + return NULL; +} + +void unmap_hugepage_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long address; + pte_t *pte; + struct page *page; + int i; + + BUG_ON(start & (HPAGE_SIZE - 1)); + BUG_ON(end & (HPAGE_SIZE - 1)); + + for (address = start; address < end; address += HPAGE_SIZE) { + pte = huge_pte_offset(mm, address); + BUG_ON(!pte); + if (pte_none(*pte)) + continue; + page = pte_page(*pte); + put_page(page); + for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { + pte_clear(mm, address+(i*PAGE_SIZE), pte); + pte++; + } + } + add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT)); + flush_tlb_range(vma, start, end); +} + +static void context_reload(void *__data) +{ + struct mm_struct *mm = __data; + + if (mm == current->mm) + load_secondary_context(mm); +} + +int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) +{ + struct mm_struct *mm = current->mm; + unsigned long addr; + int ret = 0; + + /* On UltraSPARC-III+ and later, configure the second half of + * the Data-TLB for huge pages. + */ + if (tlb_type == cheetah_plus) { + unsigned long ctx; + + spin_lock(&ctx_alloc_lock); + ctx = mm->context.sparc64_ctx_val; + ctx &= ~CTX_PGSZ_MASK; + ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; + ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT; + + if (ctx != mm->context.sparc64_ctx_val) { + /* When changing the page size fields, we + * must perform a context flush so that no + * stale entries match. This flush must + * occur with the original context register + * settings. + */ + do_flush_tlb_mm(mm); + + /* Reload the context register of all processors + * also executing in this address space. + */ + mm->context.sparc64_ctx_val = ctx; + on_each_cpu(context_reload, mm, 0, 0); + } + spin_unlock(&ctx_alloc_lock); + } + + BUG_ON(vma->vm_start & ~HPAGE_MASK); + BUG_ON(vma->vm_end & ~HPAGE_MASK); + + spin_lock(&mm->page_table_lock); + for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { + unsigned long idx; + pte_t *pte = huge_pte_alloc(mm, addr); + struct page *page; + + if (!pte) { + ret = -ENOMEM; + goto out; + } + if (!pte_none(*pte)) + continue; + + idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) + + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); + page = find_get_page(mapping, idx); + if (!page) { + /* charge the fs quota first */ + if (hugetlb_get_quota(mapping)) { + ret = -ENOMEM; + goto out; + } + page = alloc_huge_page(); + if (!page) { + hugetlb_put_quota(mapping); + ret = -ENOMEM; + goto out; + } + ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); + if (! ret) { + unlock_page(page); + } else { + hugetlb_put_quota(mapping); + free_huge_page(page); + goto out; + } |