diff options
author | Sam Ravnborg <sam@ravnborg.org> | 2008-11-16 20:08:45 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-12-04 09:16:59 -0800 |
commit | 27137e5285a3388e8f86d7bc5fe0ed8b92bd4624 (patch) | |
tree | 70cd698fb5561743913b5f7615f61df6e8883537 /arch/sparc/mm | |
parent | c37ddd936d96b46cf2bb17e7b1a18b2bd24ec1fb (diff) |
sparc,sparc64: unify mm/
- move all sparc64/mm/ files to arch/sparc/mm/
- commonly named files are named _64.c
- add files to sparc/mm/Makefile preserving link order
- delete now unused sparc64/mm/Makefile
- sparc64 now finds mm/ in sparc
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/mm')
-rw-r--r-- | arch/sparc/mm/Makefile | 6 | ||||
-rw-r--r-- | arch/sparc/mm/fault_64.c | 440 | ||||
-rw-r--r-- | arch/sparc/mm/generic_64.c | 163 | ||||
-rw-r--r-- | arch/sparc/mm/hugetlbpage.c | 357 | ||||
-rw-r--r-- | arch/sparc/mm/init_64.c | 2360 | ||||
-rw-r--r-- | arch/sparc/mm/init_64.h | 49 | ||||
-rw-r--r-- | arch/sparc/mm/tlb.c | 97 | ||||
-rw-r--r-- | arch/sparc/mm/tsb.c | 496 | ||||
-rw-r--r-- | arch/sparc/mm/ultra.S | 767 |
9 files changed, 4734 insertions, 1 deletions
diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index 3ad1b1f9953..681abe0a459 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -4,13 +4,17 @@ asflags-y := -ansi ccflags-y := -Werror -obj-y := fault_$(BITS).o +obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o +obj-y += fault_$(BITS).o obj-y += init_$(BITS).o obj-$(CONFIG_SPARC32) += loadmmu.o obj-y += generic_$(BITS).o obj-$(CONFIG_SPARC32) += extable.o btfixup.o srmmu.o iommu.o io-unit.o obj-$(CONFIG_SPARC32) += hypersparc.o viking.o tsunami.o swift.o +# Only used by sparc64 +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o + # Only used by sparc32 obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c new file mode 100644 index 00000000000..a9e474bf638 --- /dev/null +++ b/arch/sparc/mm/fault_64.c @@ -0,0 +1,440 @@ +/* + * arch/sparc64/mm/fault.c: Page fault handlers for the 64-bit Sparc. + * + * Copyright (C) 1996, 2008 David S. Miller (davem@davemloft.net) + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + */ + +#include <asm/head.h> + +#include <linux/string.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kprobes.h> +#include <linux/kdebug.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/openprom.h> +#include <asm/oplib.h> +#include <asm/uaccess.h> +#include <asm/asi.h> +#include <asm/lsu.h> +#include <asm/sections.h> +#include <asm/mmu_context.h> + +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs) +{ + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 0)) + ret = 1; + preempt_enable(); + } + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs) +{ + return 0; +} +#endif + +static void __kprobes unhandled_fault(unsigned long address, + struct task_struct *tsk, + struct pt_regs *regs) +{ + if ((unsigned long) address < PAGE_SIZE) { + printk(KERN_ALERT "Unable to handle kernel NULL " + "pointer dereference\n"); + } else { + printk(KERN_ALERT "Unable to handle kernel paging request " + "at virtual address %016lx\n", (unsigned long)address); + } + printk(KERN_ALERT "tsk->{mm,active_mm}->context = %016lx\n", + (tsk->mm ? + CTX_HWBITS(tsk->mm->context) : + CTX_HWBITS(tsk->active_mm->context))); + printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %016lx\n", + (tsk->mm ? (unsigned long) tsk->mm->pgd : + (unsigned long) tsk->active_mm->pgd)); + die_if_kernel("Oops", regs); +} + +static void bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr) +{ + printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n", + regs->tpc); + printk(KERN_CRIT "OOPS: RPC [%016lx]\n", regs->u_regs[15]); + printk("OOPS: RPC <%pS>\n", (void *) regs->u_regs[15]); + printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr); + dump_stack(); + unhandled_fault(regs->tpc, current, regs); +} + +/* + * We now make sure that mmap_sem is held in all paths that call + * this. Additionally, to prevent kswapd from ripping ptes from + * under us, raise interrupts around the time that we look at the + * pte, kswapd will have to wait to get his smp ipi response from + * us. vmtruncate likewise. This saves us having to get pte lock. + */ +static unsigned int get_user_insn(unsigned long tpc) +{ + pgd_t *pgdp = pgd_offset(current->mm, tpc); + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep, pte; + unsigned long pa; + u32 insn = 0; + unsigned long pstate; + + if (pgd_none(*pgdp)) + goto outret; + pudp = pud_offset(pgdp, tpc); + if (pud_none(*pudp)) + goto outret; + pmdp = pmd_offset(pudp, tpc); + if (pmd_none(*pmdp)) + goto outret; + + /* This disables preemption for us as well. */ + __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); + __asm__ __volatile__("wrpr %0, %1, %%pstate" + : : "r" (pstate), "i" (PSTATE_IE)); + ptep = pte_offset_map(pmdp, tpc); + pte = *ptep; + if (!pte_present(pte)) + goto out; + + pa = (pte_pfn(pte) << PAGE_SHIFT); + pa += (tpc & ~PAGE_MASK); + + /* Use phys bypass so we don't pollute dtlb/dcache. */ + __asm__ __volatile__("lduwa [%1] %2, %0" + : "=r" (insn) + : "r" (pa), "i" (ASI_PHYS_USE_EC)); + +out: + pte_unmap(ptep); + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); +outret: + return insn; +} + +extern unsigned long compute_effective_address(struct pt_regs *, unsigned int, unsigned int); + +static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, + unsigned int insn, int fault_code) +{ + siginfo_t info; + + info.si_code = code; + info.si_signo = sig; + info.si_errno = 0; + if (fault_code & FAULT_CODE_ITLB) + info.si_addr = (void __user *) regs->tpc; + else + info.si_addr = (void __user *) + compute_effective_address(regs, insn, 0); + info.si_trapno = 0; + force_sig_info(sig, &info, current); +} + +extern int handle_ldf_stq(u32, struct pt_regs *); +extern int handle_ld_nf(u32, struct pt_regs *); + +static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn) +{ + if (!insn) { + if (!regs->tpc || (regs->tpc & 0x3)) + return 0; + if (regs->tstate & TSTATE_PRIV) { + insn = *(unsigned int *) regs->tpc; + } else { + insn = get_user_insn(regs->tpc); + } + } + return insn; +} + +static void do_kernel_fault(struct pt_regs *regs, int si_code, int fault_code, + unsigned int insn, unsigned long address) +{ + unsigned char asi = ASI_P; + + if ((!insn) && (regs->tstate & TSTATE_PRIV)) + goto cannot_handle; + + /* If user insn could be read (thus insn is zero), that + * is fine. We will just gun down the process with a signal + * in that case. + */ + + if (!(fault_code & (FAULT_CODE_WRITE|FAULT_CODE_ITLB)) && + (insn & 0xc0800000) == 0xc0800000) { + if (insn & 0x2000) + asi = (regs->tstate >> 24); + else + asi = (insn >> 5); + if ((asi & 0xf2) == 0x82) { + if (insn & 0x1000000) { + handle_ldf_stq(insn, regs); + } else { + /* This was a non-faulting load. Just clear the + * destination register(s) and continue with the next + * instruction. -jj + */ + handle_ld_nf(insn, regs); + } + return; + } + } + + /* Is this in ex_table? */ + if (regs->tstate & TSTATE_PRIV) { + const struct exception_table_entry *entry; + + entry = search_exception_tables(regs->tpc); + if (entry) { + regs->tpc = entry->fixup; + regs->tnpc = regs->tpc + 4; + return; + } + } else { + /* The si_code was set to make clear whether + * this was a SEGV_MAPERR or SEGV_ACCERR fault. + */ + do_fault_siginfo(si_code, SIGSEGV, regs, insn, fault_code); + return; + } + +cannot_handle: + unhandled_fault (address, current, regs); +} + +asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned int insn = 0; + int si_code, fault_code, fault; + unsigned long address, mm_rss; + + fault_code = get_thread_fault_code(); + + if (notify_page_fault(regs)) + return; + + si_code = SEGV_MAPERR; + address = current_thread_info()->fault_address; + + if ((fault_code & FAULT_CODE_ITLB) && + (fault_code & FAULT_CODE_DTLB)) + BUG(); + + if (regs->tstate & TSTATE_PRIV) { + unsigned long tpc = regs->tpc; + + /* Sanity check the PC. */ + if ((tpc >= KERNBASE && tpc < (unsigned long) __init_end) || + (tpc >= MODULES_VADDR && tpc < MODULES_END)) { + /* Valid, no problems... */ + } else { + bad_kernel_pc(regs, address); + return; + } + } + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (in_atomic() || !mm) + goto intr_or_no_mm; + + if (test_thread_flag(TIF_32BIT)) { + if (!(regs->tstate & TSTATE_PRIV)) + regs->tpc &= 0xffffffff; + address &= 0xffffffff; + } + + if (!down_read_trylock(&mm->mmap_sem)) { + if ((regs->tstate & TSTATE_PRIV) && + !search_exception_tables(regs->tpc)) { + insn = get_fault_insn(regs, insn); + goto handle_kernel_fault; + } + down_read(&mm->mmap_sem); + } + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + + /* Pure DTLB misses do not tell us whether the fault causing + * load/store/atomic was a write or not, it only says that there + * was no match. So in such a case we (carefully) read the + * instruction to try and figure this out. It's an optimization + * so it's ok if we can't do this. + * + * Special hack, window spill/fill knows the exact fault type. + */ + if (((fault_code & + (FAULT_CODE_DTLB | FAULT_CODE_WRITE | FAULT_CODE_WINFIXUP)) == FAULT_CODE_DTLB) && + (vma->vm_flags & VM_WRITE) != 0) { + insn = get_fault_insn(regs, 0); + if (!insn) + goto continue_fault; + /* All loads, stores and atomics have bits 30 and 31 both set + * in the instruction. Bit 21 is set in all stores, but we + * have to avoid prefetches which also have bit 21 set. + */ + if ((insn & 0xc0200000) == 0xc0200000 && + (insn & 0x01780000) != 0x01680000) { + /* Don't bother updating thread struct value, + * because update_mmu_cache only cares which tlb + * the access came from. + */ + fault_code |= FAULT_CODE_WRITE; + } + } +continue_fault: + + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (!(fault_code & FAULT_CODE_WRITE)) { + /* Non-faulting loads shouldn't expand stack. */ + insn = get_fault_insn(regs, insn); + if ((insn & 0xc0800000) == 0xc0800000) { + unsigned char asi; + + if (insn & 0x2000) + asi = (regs->tstate >> 24); + else + asi = (insn >> 5); + if ((asi & 0xf2) == 0x82) + goto bad_area; + } + } + if (expand_stack(vma, address)) + goto bad_area; + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + si_code = SEGV_ACCERR; + + /* If we took a ITLB miss on a non-executable page, catch + * that here. + */ + if ((fault_code & FAULT_CODE_ITLB) && !(vma->vm_flags & VM_EXEC)) { + BUG_ON(address != regs->tpc); + BUG_ON(regs->tstate & TSTATE_PRIV); + goto bad_area; + } + + if (fault_code & FAULT_CODE_WRITE) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + + /* Spitfire has an icache which does not snoop + * processor stores. Later processors do... + */ + if (tlb_type == spitfire && + (vma->vm_flags & VM_EXEC) != 0 && + vma->vm_file != NULL) + set_thread_fault_code(fault_code | + FAULT_CODE_BLKCOMMIT); + } else { + /* Allow reads even for write-only mappings */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + fault = handle_mm_fault(mm, vma, address, (fault_code & FAULT_CODE_WRITE)); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); + } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; + + up_read(&mm->mmap_sem); + + mm_rss = get_mm_rss(mm); +#ifdef CONFIG_HUGETLB_PAGE + mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE)); +#endif + if (unlikely(mm_rss > + mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit)) + tsb_grow(mm, MM_TSB_BASE, mm_rss); +#ifdef CONFIG_HUGETLB_PAGE + mm_rss = mm->context.huge_pte_count; + if (unlikely(mm_rss > + mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) + tsb_grow(mm, MM_TSB_HUGE, mm_rss); +#endif + return; + + /* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + insn = get_fault_insn(regs, insn); + up_read(&mm->mmap_sem); + +handle_kernel_fault: + do_kernel_fault(regs, si_code, fault_code, insn, address); + return; + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + insn = get_fault_insn(regs, insn); + up_read(&mm->mmap_sem); + printk("VM: killing process %s\n", current->comm); + if (!(regs->tstate & TSTATE_PRIV)) + do_group_exit(SIGKILL); + goto handle_kernel_fault; + +intr_or_no_mm: + insn = get_fault_insn(regs, 0); + goto handle_kernel_fault; + +do_sigbus: + insn = get_fault_insn(regs, insn); + up_read(&mm->mmap_sem); + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, insn, fault_code); + + /* Kernel mode? Handle exceptions or die */ + if (regs->tstate & TSTATE_PRIV) + goto handle_kernel_fault; +} diff --git a/arch/sparc/mm/generic_64.c b/arch/sparc/mm/generic_64.c new file mode 100644 index 00000000000..f362c203701 --- /dev/null +++ b/arch/sparc/mm/generic_64.c @@ -0,0 +1,163 @@ +/* + * generic.c: Generic Sparc mm routines that are not dependent upon + * MMU type but are Sparc specific. + * + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/pagemap.h> + +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/tlbflush.h> + +/* Remap IO memory, the same way as remap_pfn_range(), but use + * the obio memory space. + * + * They use a pgprot that sets PAGE_IO and does not check the + * mem_map table as this is independent of normal memory. + */ +static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte, + unsigned long address, + unsigned long size, + unsigned long offset, pgprot_t prot, + int space) +{ + unsigned long end; + + /* clear hack bit that was used as a write_combine side-effect flag */ + offset &= ~0x1UL; + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) + end = PMD_SIZE; + do { + pte_t entry; + unsigned long curend = address + PAGE_SIZE; + + entry = mk_pte_io(offset, prot, space, PAGE_SIZE); + if (!(address & 0xffff)) { + if (PAGE_SIZE < (4 * 1024 * 1024) && + !(address & 0x3fffff) && + !(offset & 0x3ffffe) && + end >= address + 0x400000) { + entry = mk_pte_io(offset, prot, space, + 4 * 1024 * 1024); + curend = address + 0x400000; + offset += 0x400000; + } else if (PAGE_SIZE < (512 * 1024) && + !(address & 0x7ffff) && + !(offset & 0x7fffe) && + end >= address + 0x80000) { + entry = mk_pte_io(offset, prot, space, + 512 * 1024 * 1024); + curend = address + 0x80000; + offset += 0x80000; + } else if (PAGE_SIZE < (64 * 1024) && + !(offset & 0xfffe) && + end >= address + 0x10000) { + entry = mk_pte_io(offset, prot, space, + 64 * 1024); + curend = address + 0x10000; + offset += 0x10000; + } else + offset += PAGE_SIZE; + } else + offset += PAGE_SIZE; + + if (pte_write(entry)) + entry = pte_mkdirty(entry); + do { + BUG_ON(!pte_none(*pte)); + set_pte_at(mm, address, pte, entry); + address += PAGE_SIZE; + pte_val(entry) += PAGE_SIZE; + pte++; + } while (address < curend); + } while (address < end); +} + +static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, + unsigned long offset, pgprot_t prot, int space) +{ + unsigned long end; + + address &= ~PGDIR_MASK; + end = address + size; + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + offset -= address; + do { + pte_t * pte = pte_alloc_map(mm, pmd, address); + if (!pte) + return -ENOMEM; + io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space); + pte_unmap(pte); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address < end); + return 0; +} + +static inline int io_remap_pud_range(struct mm_struct *mm, pud_t * pud, unsigned long address, unsigned long size, + unsigned long offset, pgprot_t prot, int space) +{ + unsigned long end; + + address &= ~PUD_MASK; + end = address + size; + if (end > PUD_SIZE) + end = PUD_SIZE; + offset -= address; + do { + pmd_t *pmd = pmd_alloc(mm, pud, address); + if (!pud) + return -ENOMEM; + io_remap_pmd_range(mm, pmd, address, end - address, address + offset, prot, space); + address = (address + PUD_SIZE) & PUD_MASK; + pud++; + } while (address < end); + return 0; +} + +int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + int error = 0; + pgd_t * dir; + unsigned long beg = from; + unsigned long end = from + size; + struct mm_struct *mm = vma->vm_mm; + int space = GET_IOSPACE(pfn); + unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; + unsigned long phys_base; + + phys_base = offset | (((unsigned long) space) << 32UL); + + /* See comment in mm/memory.c remap_pfn_range */ + vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + vma->vm_pgoff = phys_base >> PAGE_SHIFT; + + offset -= from; + dir = pgd_offset(mm, from); + flush_cache_range(vma, beg, end); + + while (from < end) { + pud_t *pud = pud_alloc(mm, dir, from); + error = -ENOMEM; + if (!pud) + break; + error = io_remap_pud_range(mm, pud, from, end - from, offset + from, prot, space); + if (error) + break; + from = (from + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } + + flush_tlb_range(vma, beg, end); + return error; +} diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c new file mode 100644 index 00000000000..f27d10369e0 --- /dev/null +++ b/arch/sparc/mm/hugetlbpage.c @@ -0,0 +1,357 @@ +/* + * SPARC64 Huge TLB page support. + * + * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net) + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/slab.h> +#include <linux/sysctl.h> + +#include <asm/mman.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> +#include <asm/mmu_context.h> + +/* Slightly simplified from the non-hugepage variant because by + * definition we don't have to worry about any page coloring stuff + */ +#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) +#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) + +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct * vma; + unsigned long task_size = TASK_SIZE; + unsigned long start_addr; + + if (test_thread_flag(TIF_32BIT)) + task_size = STACK_TOP32; + if (unlikely(len >= VA_EXCLUDE_START)) + return -ENOMEM; + + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + } + + task_size -= len; + +full_search: + addr = ALIGN(addr, HPAGE_SIZE); + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (addr < VA_EXCLUDE_START && + (addr + len) >= VA_EXCLUDE_START) { + addr = VA_EXCLUDE_END; + vma = find_vma(mm, VA_EXCLUDE_END); + } + if (unlikely(task_size < addr)) { + if (start_addr != TASK_UNMAPPED_BASE) { + start_addr = addr = TASK_UNMAPPED_BASE; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; + } + if (likely(!vma || addr + len <= vma->vm_start)) { + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; + } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + addr = ALIGN(vma->vm_end, HPAGE_SIZE); + } +} + +static unsigned long +hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, + const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long addr = addr0; + + /* This should only ever run for 32-bit processes. */ + BUG_ON(!test_thread_flag(TIF_32BIT)); + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache & HPAGE_MASK; + + /* make sure it can fit in the remaining address space */ + if (likely(addr > len)) { + vma = find_vma(mm, addr-len); + if (!vma || addr <= vma->vm_start) { + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr-len); + } + } + + if (unlikely(mm->mmap_base < len)) + goto bottomup; + + addr = (mm->mmap_base-len) & HPAGE_MASK; + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (likely(!vma || addr+len <= vma->vm_start)) { + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + } + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = (vma->vm_start-len) & HPAGE_MASK; + } while (likely(len < vma->vm_start)); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->cached_hole_size = ~0UL; + mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + + return addr; +} + +unsigned long +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long task_size = TASK_SIZE; + + if (test_thread_flag(TIF_32BIT)) + task_size = STACK_TOP32; + + if (len & ~HPAGE_MASK) + return -EINVAL; + if (len > task_size) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + return addr; + } + + if (addr) { + addr = ALIGN(addr, HPAGE_SIZE); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + if (mm->get_unmapped_area == arch_get_unmapped_area) + return hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); + else + return hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + /* We must align the address, because our caller will run + * set_huge_pte_at() on whatever we return, which writes out + * all of the sub-ptes for the hugepage range. So we have + * to give it the first such sub-pte. + */ + addr &= HPAGE_MASK; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) { + pmd = pmd_alloc(mm, pud, addr); + if (pmd) + pte = pte_alloc_map(mm, pmd, addr); + } + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + addr &= HPAGE_MASK; + + pgd = pgd_offset(mm, addr); + if (!pgd_none(*pgd)) { + pud = pud_offset(pgd, addr); + if (!pud_none(*pud)) { + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) + pte = pte_offset_map(pmd, addr); + } + } + return pte; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + int i; + + if (!pte_present(*ptep) && pte_present(entry)) + mm->context.huge_pte_count++; + + addr &= HPAGE_MASK; + for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { + set_pte_at(mm, addr, ptep, entry); + ptep++; + addr += PAGE_SIZE; + pte_val(entry) += PAGE_SIZE; + } +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_t entry; + int i; + + entry = *ptep; + if (pte_present(entry)) + mm->context.huge_pte_count--; + + addr &= HPAGE_MASK; + + for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { + pte_clear(mm, addr, ptep); + addr += PAGE_SIZE; + ptep++; + } + + return entry; +} + +struct page *follow_huge_addr(struct mm_struct *mm, + unsigned long address, int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return 0; +} + +int pud_huge(pud_t pud) +{ + return 0; +} + +struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + return NULL; +} + +static void context_reload(void *__data) +{ + struct mm_struct *mm = __data; + + if (mm == current->mm) + load_secondary_context(mm); +} + +void hugetlb_prefault_arch_hook(struct mm_struct *mm) +{ + struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE]; + + if (likely(tp->tsb != NULL)) + return; + + tsb_grow(mm, MM_TSB_HUGE, 0); + tsb_context_switch(mm); + smp_tsb_sync(mm); + + /* On UltraSPARC-III+ and later, configure the second half of + * the Data-TLB for huge pages. + */ + if (tlb_type == cheetah_plus) { + unsigned long ctx; + + spin_lock(&ctx_alloc_lock); + ctx = mm->context.sparc64_ctx_val; + ctx &= ~CTX_PGSZ_MASK; + ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; + ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT; + + if (ctx != mm->context.sparc64_ctx_val) { + /* When changing the page size fields, we + * must perform a context flush so that no + * stale entries match. This flush must + * occur with the original context register + * settings. + */ + do_flush_tlb_mm(mm); + + /* Reload the context register of all processors + * also executing in this address space. + */ + mm->context.sparc64_ctx_val = ctx; + on_each_cpu(context_reload, mm, 0); + } + spin_unlock(&ctx_alloc_lock); + } +} diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c new file mode 100644 index 00000000000..6ea73da2931 --- /dev/null +++ b/arch/sparc/mm/init_64.c @@ -0,0 +1,2360 @@ +/* + * arch/sparc64/mm/init.c + * + * Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1997-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/slab.h> +#include <linux/initrd.h> +#include <linux/swap.h> +#include <linux/pagemap.h> +#include <linux/poison.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/kprobes.h> +#include <linux/cache.h> +#include <linux/sort.h> +#include <linux/percpu.h> +#include <linux/lmb.h> +#include <linux/mmzone.h> + +#include <asm/head.h> +#include <asm/system.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/oplib.h> +#include <asm/iommu.h> +#include <asm/io.h> +#include <asm/uaccess.h> +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> +#include <asm/dma.h> +#include <asm/starfire.h> +#include <asm/tlb.h> +#include <asm/spitfire.h> +#include <asm/sections.h> +#include <asm/tsb.h> +#include <asm/hypervisor.h> +#include <asm/prom.h> +#include <asm/mdesc.h> +#include <asm/cpudata.h> +#include <asm/irq.h> + +#include "init_64.h" + +unsigned long kern_linear_pte_xor[2] __read_mostly; + +/* A bitmap, one bit for every 256MB of physical memory. If the bit + * is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else + * if set we should use a 256MB page (via kern_linear_pte_xor[1]). + */ +unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; + +#ifndef CONFIG_DEBUG_PAGEALLOC +/* A special kernel TSB for 4MB and 256MB linear mappings. + * Space is allocated for this right after the trap table + * in arch/sparc64/kernel/head.S + */ +extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; +#endif + +#define MAX_BANKS 32 + +static struct linux_prom64_registers pavail[MAX_BANKS] __initdata; +static int pavail_ents __initdata; + +static int cmp_p64(const void *a, const void *b) +{ + const struct linux_prom64_registers *x = a, *y = b; + + if (x->phys_addr > y->phys_addr) + return 1; + if (x->phys_addr < y->phys_addr) + return -1; + return 0; +} + +static void __init read_obp_memory(const char *property, + struct linux_prom64_registers *regs, + int *num_ents) +{ + int node = prom_finddevice("/memory"); + int prop_size = prom_getproplen(node, property); |