diff options
Diffstat (limited to 'arch/sparc/mm')
| -rw-r--r-- | arch/sparc/mm/fault_32.c | 40 | ||||
| -rw-r--r-- | arch/sparc/mm/fault_64.c | 142 | ||||
| -rw-r--r-- | arch/sparc/mm/gup.c | 58 | ||||
| -rw-r--r-- | arch/sparc/mm/highmem.c | 42 | ||||
| -rw-r--r-- | arch/sparc/mm/hugetlbpage.c | 177 | ||||
| -rw-r--r-- | arch/sparc/mm/hypersparc.S | 8 | ||||
| -rw-r--r-- | arch/sparc/mm/init_32.c | 147 | ||||
| -rw-r--r-- | arch/sparc/mm/init_64.c | 712 | ||||
| -rw-r--r-- | arch/sparc/mm/init_64.h | 12 | ||||
| -rw-r--r-- | arch/sparc/mm/io-unit.c | 21 | ||||
| -rw-r--r-- | arch/sparc/mm/iommu.c | 31 | ||||
| -rw-r--r-- | arch/sparc/mm/leon_mm.c | 4 | ||||
| -rw-r--r-- | arch/sparc/mm/mm_32.h | 24 | ||||
| -rw-r--r-- | arch/sparc/mm/srmmu.c | 368 | ||||
| -rw-r--r-- | arch/sparc/mm/srmmu.h | 4 | ||||
| -rw-r--r-- | arch/sparc/mm/swift.S | 8 | ||||
| -rw-r--r-- | arch/sparc/mm/tlb.c | 183 | ||||
| -rw-r--r-- | arch/sparc/mm/tsb.c | 107 | ||||
| -rw-r--r-- | arch/sparc/mm/tsunami.S | 6 | ||||
| -rw-r--r-- | arch/sparc/mm/ultra.S | 195 | ||||
| -rw-r--r-- | arch/sparc/mm/viking.S | 10 |
21 files changed, 1449 insertions, 850 deletions
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index f46cf6be337..908e8c17c90 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -26,32 +26,14 @@ #include <asm/pgtable.h> #include <asm/openprom.h> #include <asm/oplib.h> +#include <asm/setup.h> #include <asm/smp.h> #include <asm/traps.h> #include <asm/uaccess.h> -int show_unhandled_signals = 1; - -/* At boot time we determine these two values necessary for setting - * up the segment maps and page table entries (pte's). - */ - -int num_contexts; - -/* Return how much physical memory we have. */ -unsigned long probe_memory(void) -{ - unsigned long total = 0; - int i; +#include "mm_32.h" - for (i = 0; sp_banks[i].num_bytes; i++) - total += sp_banks[i].num_bytes; - - return total; -} - -static void unhandled_fault(unsigned long, struct task_struct *, - struct pt_regs *) __attribute__ ((noreturn)); +int show_unhandled_signals = 1; static void __noreturn unhandled_fault(unsigned long address, struct task_struct *tsk, @@ -159,9 +141,6 @@ static void __do_fault_siginfo(int code, int sig, struct pt_regs *regs, force_sig_info (sig, &info, current); } -extern unsigned long safe_compute_effective_address(struct pt_regs *, - unsigned int); - static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault) { unsigned int insn; @@ -195,8 +174,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, unsigned long g2; int from_user = !(regs->psr & PSR_PS); int fault, code; - unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0)); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (text_fault) address = regs->pc; @@ -253,6 +231,11 @@ good_area: goto bad_area; } + if (from_user) + flags |= FAULT_FLAG_USER; + if (write) + flags |= FAULT_FLAG_WRITE; + /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -283,6 +266,7 @@ good_area: } if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would * have already released it in __lock_page_or_retry @@ -400,6 +384,7 @@ static void force_user_fault(unsigned long address, int write) struct vm_area_struct *vma; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; + unsigned int flags = FAULT_FLAG_USER; int code; code = SEGV_MAPERR; @@ -419,11 +404,12 @@ good_area: if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) { + switch (handle_mm_fault(mm, vma, address, flags)) { case VM_FAULT_SIGBUS: case VM_FAULT_OOM: goto do_sigbus; diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 1fe0429b631..587cd056512 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -21,6 +21,7 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/percpu.h> +#include <linux/context_tracking.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -31,6 +32,7 @@ #include <asm/lsu.h> #include <asm/sections.h> #include <asm/mmu_context.h> +#include <asm/setup.h> int show_unhandled_signals = 1; @@ -95,38 +97,51 @@ static unsigned int get_user_insn(unsigned long tpc) pte_t *ptep, pte; unsigned long pa; u32 insn = 0; - unsigned long pstate; - if (pgd_none(*pgdp)) - goto outret; + if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp))) + goto out; pudp = pud_offset(pgdp, tpc); - if (pud_none(*pudp)) - goto outret; - pmdp = pmd_offset(pudp, tpc); - if (pmd_none(*pmdp)) - goto outret; - - /* This disables preemption for us as well. */ - __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); - __asm__ __volatile__("wrpr %0, %1, %%pstate" - : : "r" (pstate), "i" (PSTATE_IE)); - ptep = pte_offset_map(pmdp, tpc); - pte = *ptep; - if (!pte_present(pte)) + if (pud_none(*pudp) || unlikely(pud_bad(*pudp))) goto out; - pa = (pte_pfn(pte) << PAGE_SHIFT); - pa += (tpc & ~PAGE_MASK); - - /* Use phys bypass so we don't pollute dtlb/dcache. */ - __asm__ __volatile__("lduwa [%1] %2, %0" - : "=r" (insn) - : "r" (pa), "i" (ASI_PHYS_USE_EC)); + /* This disables preemption for us as well. */ + local_irq_disable(); + pmdp = pmd_offset(pudp, tpc); + if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp))) + goto out_irq_enable; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (pmd_trans_huge(*pmdp)) { + if (pmd_trans_splitting(*pmdp)) + goto out_irq_enable; + + pa = pmd_pfn(*pmdp) << PAGE_SHIFT; + pa += tpc & ~HPAGE_MASK; + + /* Use phys bypass so we don't pollute dtlb/dcache. */ + __asm__ __volatile__("lduwa [%1] %2, %0" + : "=r" (insn) + : "r" (pa), "i" (ASI_PHYS_USE_EC)); + } else +#endif + { + ptep = pte_offset_map(pmdp, tpc); + pte = *ptep; + if (pte_present(pte)) { + pa = (pte_pfn(pte) << PAGE_SHIFT); + pa += (tpc & ~PAGE_MASK); + + /* Use phys bypass so we don't pollute dtlb/dcache. */ + __asm__ __volatile__("lduwa [%1] %2, %0" + : "=r" (insn) + : "r" (pa), "i" (ASI_PHYS_USE_EC)); + } + pte_unmap(ptep); + } +out_irq_enable: + local_irq_enable(); out: - pte_unmap(ptep); - __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); -outret: return insn; } @@ -151,10 +166,9 @@ show_signal_msg(struct pt_regs *regs, int sig, int code, printk(KERN_CONT "\n"); } -extern unsigned long compute_effective_address(struct pt_regs *, unsigned int, unsigned int); - static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, - unsigned int insn, int fault_code) + unsigned long fault_addr, unsigned int insn, + int fault_code) { unsigned long addr; siginfo_t info; @@ -162,10 +176,18 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, info.si_code = code; info.si_signo = sig; info.si_errno = 0; - if (fault_code & FAULT_CODE_ITLB) + if (fault_code & FAULT_CODE_ITLB) { addr = regs->tpc; - else - addr = compute_effective_address(regs, insn, 0); + } else { + /* If we were able to probe the faulting instruction, use it + * to compute a precise fault address. Otherwise use the fault + * time provided address which may only have page granularity. + */ + if (insn) + addr = compute_effective_address(regs, insn, 0); + else + addr = fault_addr; + } info.si_addr = (void __user *) addr; info.si_trapno = 0; @@ -175,9 +197,6 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, force_sig_info(sig, &info, current); } -extern int handle_ldf_stq(u32, struct pt_regs *); -extern int handle_ld_nf(u32, struct pt_regs *); - static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn) { if (!insn) { @@ -240,7 +259,7 @@ static void __kprobes do_kernel_fault(struct pt_regs *regs, int si_code, /* The si_code was set to make clear whether * this was a SEGV_MAPERR or SEGV_ACCERR fault. */ - do_fault_siginfo(si_code, SIGSEGV, regs, insn, fault_code); + do_fault_siginfo(si_code, SIGSEGV, regs, address, insn, fault_code); return; } @@ -260,20 +279,9 @@ static void noinline __kprobes bogus_32bit_fault_tpc(struct pt_regs *regs) show_regs(regs); } -static void noinline __kprobes bogus_32bit_fault_address(struct pt_regs *regs, - unsigned long addr) -{ - static int times; - - if (times++ < 10) - printk(KERN_ERR "FAULT[%s:%d]: 32-bit process " - "reports 64-bit fault address [%lx]\n", - current->comm, current->pid, addr); - show_regs(regs); -} - asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) { + enum ctx_state prev_state = exception_enter(); struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned int insn = 0; @@ -284,7 +292,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) fault_code = get_thread_fault_code(); if (notify_page_fault(regs)) - return; + goto exit_exception; si_code = SEGV_MAPERR; address = current_thread_info()->fault_address; @@ -300,10 +308,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) goto intr_or_no_mm; } } - if (unlikely((address >> 32) != 0)) { - bogus_32bit_fault_address(regs, address); + if (unlikely((address >> 32) != 0)) goto intr_or_no_mm; - } } if (regs->tstate & TSTATE_PRIV) { @@ -315,9 +321,10 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) /* Valid, no problems... */ } else { bad_kernel_pc(regs, address); - return; + goto exit_exception; } - } + } else + flags |= FAULT_FLAG_USER; /* * If we're in an interrupt or have no user @@ -420,17 +427,18 @@ good_area: vma->vm_file != NULL) set_thread_fault_code(fault_code | FAULT_CODE_BLKCOMMIT); + + flags |= FAULT_FLAG_WRITE; } else { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } - flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0); fault = handle_mm_fault(mm, vma, address, flags); if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) - return; + goto exit_exception; if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) @@ -452,6 +460,7 @@ good_area: } if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would * have already released it in __lock_page_or_retry @@ -464,18 +473,25 @@ good_area: up_read(&mm->mmap_sem); mm_rss = get_mm_rss(mm); -#ifdef CONFIG_HUGETLB_PAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE)); #endif if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit)) tsb_grow(mm, MM_TSB_BASE, mm_rss); -#ifdef CONFIG_HUGETLB_PAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) mm_rss = mm->context.huge_pte_count; if (unlikely(mm_rss > - mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) - tsb_grow(mm, MM_TSB_HUGE, mm_rss); + mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { + if (mm->context.tsb_block[MM_TSB_HUGE].tsb) + tsb_grow(mm, MM_TSB_HUGE, mm_rss); + else + hugetlb_setup(regs); + + } #endif +exit_exception: + exception_exit(prev_state); return; /* @@ -488,7 +504,7 @@ bad_area: handle_kernel_fault: do_kernel_fault(regs, si_code, fault_code, insn, address); - return; + goto exit_exception; /* * We ran out of memory, or some other thing happened to us that made @@ -499,7 +515,7 @@ out_of_memory: up_read(&mm->mmap_sem); if (!(regs->tstate & TSTATE_PRIV)) { pagefault_out_of_memory(); - return; + goto exit_exception; } goto handle_kernel_fault; @@ -515,7 +531,7 @@ do_sigbus: * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, insn, fault_code); + do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, address, insn, fault_code); /* Kernel mode? Handle exceptions or die */ if (regs->tstate & TSTATE_PRIV) diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 42c55df3aec..1aed0432c64 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -66,6 +66,55 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, return 1; } +static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, + int *nr) +{ + struct page *head, *page, *tail; + int refs; + + if (!(pmd_val(pmd) & _PAGE_VALID)) + return 0; + + if (write && !pmd_write(pmd)) + return 0; + + refs = 0; + head = pmd_page(pmd); + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + tail = page; + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + /* Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; + } + + return 1; +} + static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -77,9 +126,14 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pmd_t pmd = *pmdp; next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) + if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; - if (!gup_pte_range(pmd, addr, next, write, pages, nr)) + if (unlikely(pmd_large(pmd))) { + if (!gup_huge_pmd(pmdp, pmd, addr, next, + write, pages, nr)) + return 0; + } else if (!gup_pte_range(pmd, addr, next, write, + pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c index 055c66cf1bf..449f864f0ce 100644 --- a/arch/sparc/mm/highmem.c +++ b/arch/sparc/mm/highmem.c @@ -22,13 +22,31 @@ * shared by CPUs, and so precious, and establishing them requires IPI. * Atomic kmaps are lightweight and we may have NCPUS more of them. */ -#include <linux/mm.h> #include <linux/highmem.h> #include <linux/export.h> -#include <asm/pgalloc.h> +#include <linux/mm.h> + #include <asm/cacheflush.h> #include <asm/tlbflush.h> -#include <asm/fixmap.h> +#include <asm/pgalloc.h> +#include <asm/vaddrs.h> + +pgprot_t kmap_prot; + +static pte_t *kmap_pte; + +void __init kmap_init(void) +{ + unsigned long address; + pmd_t *dir; + + address = __fix_to_virt(FIX_KMAP_BEGIN); + dir = pmd_offset(pgd_offset_k(address), address); + + /* cache the first kmap pte */ + kmap_pte = pte_offset_kernel(dir, address); + kmap_prot = __pgprot(SRMMU_ET_PTE | SRMMU_PRIV | SRMMU_CACHE); +} void *kmap_atomic(struct page *page) { @@ -110,21 +128,3 @@ void __kunmap_atomic(void *kvaddr) pagefault_enable(); } EXPORT_SYMBOL(__kunmap_atomic); - -/* We may be fed a pagetable here by ptep_to_xxx and others. */ -struct page *kmap_atomic_to_page(void *ptr) -{ - unsigned long idx, vaddr = (unsigned long)ptr; - pte_t *pte; - - if (vaddr < SRMMU_NOCACHE_VADDR) - return virt_to_page(ptr); - if (vaddr < PKMAP_BASE) - return pfn_to_page(__nocache_pa(vaddr) >> PAGE_SHIFT); - BUG_ON(vaddr < FIXADDR_START); - BUG_ON(vaddr > FIXADDR_TOP); - - idx = virt_to_fix(vaddr); - pte = kmap_pte - (idx - FIX_KMAP_BEGIN); - return pte_page(*pte); -} diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 07e14535375..d329537739c 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -4,7 +4,6 @@ * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net) */ -#include <linux/init.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/hugetlb.h> @@ -21,8 +20,6 @@ /* Slightly simplified from the non-hugepage variant because by * definition we don't have to worry about any page coloring stuff */ -#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) -#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, unsigned long addr, @@ -30,55 +27,28 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, unsigned long pgoff, unsigned long flags) { - struct mm_struct *mm = current->mm; - struct vm_area_struct * vma; unsigned long task_size = TASK_SIZE; - unsigned long start_addr; + struct vm_unmapped_area_info info; if (test_thread_flag(TIF_32BIT)) task_size = STACK_TOP32; - if (unlikely(len >= VA_EXCLUDE_START)) - return -ENOMEM; - if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; - } else { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; + info.flags = 0; + info.length = len; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = min(task_size, VA_EXCLUDE_START); + info.align_mask = PAGE_MASK & ~HPAGE_MASK; + info.align_offset = 0; + addr = vm_unmapped_area(&info); + + if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) { + VM_BUG_ON(addr != -ENOMEM); + info.low_limit = VA_EXCLUDE_END; + info.high_limit = task_size; + addr = vm_unmapped_area(&info); } - task_size -= len; - -full_search: - addr = ALIGN(addr, HPAGE_SIZE); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (addr < VA_EXCLUDE_START && - (addr + len) >= VA_EXCLUDE_START) { - addr = VA_EXCLUDE_END; - vma = find_vma(mm, VA_EXCLUDE_END); - } - if (unlikely(task_size < addr)) { - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - if (likely(!vma || addr + len <= vma->vm_start)) { - /* - * Remember the place where we stopped the search: - */ - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - - addr = ALIGN(vma->vm_end, HPAGE_SIZE); - } + return addr; } static unsigned long @@ -87,71 +57,34 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long pgoff, const unsigned long flags) { - struct vm_area_struct *vma; struct mm_struct *mm = current->mm; unsigned long addr = addr0; + struct vm_unmapped_area_info info; /* This should only ever run for 32-bit processes. */ BUG_ON(!test_thread_flag(TIF_32BIT)); - /* check if free_area_cache is useful for us */ - if (len <= mm->cached_hole_size) { - mm->cached_hole_size = 0; - mm->free_area_cache = mm->mmap_base; - } - - /* either no address requested or can't fit in requested address hole */ - addr = mm->free_area_cache & HPAGE_MASK; - - /* make sure it can fit in the remaining address space */ - if (likely(addr > len)) { - vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) { - /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr-len); - } - } - - if (unlikely(mm->mmap_base < len)) - goto bottomup; - - addr = (mm->mmap_base-len) & HPAGE_MASK; - - do { - /* - * Lookup failure means no vma is above this address, - * else if new region fits below vma->vm_start, - * return with success: - */ - vma = find_vma(mm, addr); - if (likely(!vma || addr+len <= vma->vm_start)) { - /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr); - } + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = PAGE_SIZE; + info.high_limit = mm->mmap_base; + info.align_mask = PAGE_MASK & ~HPAGE_MASK; + info.align_offset = 0; + addr = vm_unmapped_area(&info); - /* remember the largest hole we saw so far */ - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = (vma->vm_start-len) & HPAGE_MASK; - } while (likely(len < vma->vm_start)); - -bottomup: /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ - mm->cached_hole_size = ~0UL; - mm->free_area_cache = TASK_UNMAPPED_BASE; - addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); - /* - * Restore the topdown base: - */ - mm->free_area_cache = mm->mmap_base; - mm->cached_hole_size = ~0UL; + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = STACK_TOP32; + addr = vm_unmapped_area(&info); + } return addr; } @@ -303,53 +236,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, { return NULL; } - -static void context_reload(void *__data) -{ - struct mm_struct *mm = __data; - - if (mm == current->mm) - load_secondary_context(mm); -} - -void hugetlb_prefault_arch_hook(struct mm_struct *mm) -{ - struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE]; - - if (likely(tp->tsb != NULL)) - return; - - tsb_grow(mm, MM_TSB_HUGE, 0); - tsb_context_switch(mm); - smp_tsb_sync(mm); - - /* On UltraSPARC-III+ and later, configure the second half of - * the Data-TLB for huge pages. - */ - if (tlb_type == cheetah_plus) { - unsigned long ctx; - - spin_lock(&ctx_alloc_lock); - ctx = mm->context.sparc64_ctx_val; - ctx &= ~CTX_PGSZ_MASK; - ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; - ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT; - - if (ctx != mm->context.sparc64_ctx_val) { - /* When changing the page size fields, we - * must perform a context flush so that no - * stale entries match. This flush must - * occur with the original context register - * settings. - */ - do_flush_tlb_mm(mm); - - /* Reload the context register of all processors - * also executing in this address space. - */ - mm->context.sparc64_ctx_val = ctx; - on_each_cpu(context_reload, mm, 0); - } - spin_unlock(&ctx_alloc_lock); - } -} diff --git a/arch/sparc/mm/hypersparc.S b/arch/sparc/mm/hypersparc.S index 44aad32eeb4..969f96450f6 100644 --- a/arch/sparc/mm/hypersparc.S +++ b/arch/sparc/mm/hypersparc.S @@ -74,7 +74,7 @@ hypersparc_flush_cache_mm_out: /* The things we do for performance... */ hypersparc_flush_cache_range: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 #ifndef CONFIG_SMP ld [%o0 + AOFF_mm_context], %g1 cmp %g1, -1 @@ -163,7 +163,7 @@ hypersparc_flush_cache_range_out: */ /* Verified, my ass... */ hypersparc_flush_cache_page: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 ld [%o0 + AOFF_mm_context], %g2 #ifndef CONFIG_SMP cmp %g2, -1 @@ -284,7 +284,7 @@ hypersparc_flush_tlb_mm_out: sta %g5, [%g1] ASI_M_MMUREGS hypersparc_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 @@ -307,7 +307,7 @@ hypersparc_flush_tlb_range_out: sta %g5, [%g1] ASI_M_MMUREGS hypersparc_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 andn %o1, (PAGE_SIZE - 1), %o1 diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index ef5c779ec85..eb828715527 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -31,10 +31,13 @@ #include <asm/pgtable.h> #include <asm/vaddrs.h> #include <asm/pgalloc.h> /* bug in asm-generic/tlb.h: check_pgt_cache */ +#include <asm/setup.h> #include <asm/tlb.h> #include <asm/prom.h> #include <asm/leon.h> +#include "mm_32.h" + unsigned long *sparc_valid_addr_bitmap; EXPORT_SYMBOL(sparc_valid_addr_bitmap); @@ -45,9 +48,6 @@ unsigned long pfn_base; EXPORT_SYMBOL(pfn_base); struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS+1]; -unsigned long sparc_unmapped_base; - -struct pgtable_cache_struct pgt_quicklists; /* Initial ramdisk setup */ extern unsigned int sparc_ramdisk_image; @@ -55,56 +55,17 @@ extern unsigned int sparc_ramdisk_size; unsigned long highstart_pfn, highend_pfn; -pte_t *kmap_pte; -pgprot_t kmap_prot; - -#define kmap_get_fixmap_pte(vaddr) \ - pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) - -void __init kmap_init(void) -{ - /* cache the first kmap pte */ - kmap_pte = kmap_get_fixmap_pte(__fix_to_virt(FIX_KMAP_BEGIN)); - kmap_prot = __pgprot(SRMMU_ET_PTE | SRMMU_PRIV | SRMMU_CACHE); -} - void show_mem(unsigned int filter) { printk("Mem-info:\n"); show_free_areas(filter); printk("Free swap: %6ldkB\n", - nr_swap_pages << (PAGE_SHIFT-10)); + get_nr_swap_pages() << (PAGE_SHIFT-10)); printk("%ld pages of RAM\n", totalram_pages); printk("%ld free pages\n", nr_free_pages()); -#if 0 /* undefined pgtable_cache_size, pgd_cache_size */ - printk("%ld pages in page table cache\n",pgtable_cache_size); -#ifndef CONFIG_SMP - if (sparc_cpu_model == sun4m || sparc_cpu_model == sun4d) - printk("%ld entries in page dir cache\n",pgd_cache_size); -#endif -#endif } -void __init sparc_context_init(int numctx) -{ - int ctx; - - ctx_list_pool = __alloc_bootmem(numctx * sizeof(struct ctx_list), SMP_CACHE_BYTES, 0UL); - - for(ctx = 0; ctx < numctx; ctx++) { - struct ctx_list *clist; - clist = (ctx_list_pool + ctx); - clist->ctx_number = ctx; - clist->ctx_mm = NULL; - } - ctx_free.next = ctx_free.prev = &ctx_free; - ctx_used.next = ctx_used.prev = &ctx_used; - for(ctx = 0; ctx < numctx; ctx++) - add_to_free_ctxlist(ctx_list_pool + ctx); -} - -extern unsigned long cmdline_memory_size; unsigned long last_valid_pfn; unsigned long calc_highpages(void) @@ -287,27 +248,9 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) * init routine based upon the Sun model type on the Sparc. * */ -extern void srmmu_paging_init(void); -extern void device_scan(void); - void __init paging_init(void) { - switch(sparc_cpu_model) { - case sparc_leon: - leon_init(); - /* fall through */ - case sun4m: - case sun4d: - srmmu_paging_init(); - sparc_unmapped_base = 0x50000000; - break; - default: - prom_printf("paging_init: Cannot init paging on this Sparc\n"); - prom_printf("paging_init: sparc_cpu_model = %d\n", sparc_cpu_model); - prom_printf("paging_init: Halting...\n"); - prom_halt(); - } - + srmmu_paging_init(); prom_build_devicetree(); of_fill_in_cpu_data(); device_scan(); @@ -338,22 +281,12 @@ static void map_high_region(unsigned long start_pfn, unsigned long end_pfn) printk("mapping high region %08lx - %08lx\n", start_pfn, end_pfn); #endif - for (tmp = start_pfn; tmp < end_pfn; tmp++) { - struct page *page = pfn_to_page(tmp); - - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalhigh_pages++; - } + for (tmp = start_pfn; tmp < end_pfn; tmp++) + free_highmem_page(pfn_to_page(tmp)); } void __init mem_init(void) { - int codepages = 0; - int datapages = 0; - int initpages = 0; - int reservedpages = 0; int i; if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { @@ -385,15 +318,12 @@ void __init mem_init(void) max_mapnr = last_valid_pfn - pfn_base; high_memory = __va(max_low_pfn << PAGE_SHIFT); - - totalram_pages = free_all_bootmem(); + free_all_bootmem(); for (i = 0; sp_banks[i].num_bytes != 0; i++) { unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT; unsigned long end_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT; - num_physpages += sp_banks[i].num_bytes >> PAGE_SHIFT; - if (end_pfn <= highstart_pfn) continue; @@ -403,72 +333,19 @@ void __init mem_init(void) map_high_region(start_pfn, end_pfn); } - totalram_pages += totalhigh_pages; - - codepages = (((unsigned long) &_etext) - ((unsigned long)&_start)); - codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT; - datapages = (((unsigned long) &_edata) - ((unsigned long)&_etext)); - datapages = PAGE_ALIGN(datapages) >> PAGE_SHIFT; - initpages = (((unsigned long) &__init_end) - ((unsigned long) &__init_begin)); - initpages = PAGE_ALIGN(initpages) >> PAGE_SHIFT; - - /* Ignore memory holes for the purpose of counting reserved pages */ - for (i=0; i < max_low_pfn; i++) - if (test_bit(i >> (20 - PAGE_SHIFT), sparc_valid_addr_bitmap) - && PageReserved(pfn_to_page(i))) - reservedpages++; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", - nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT - 10), - codepages << (PAGE_SHIFT-10), - reservedpages << (PAGE_SHIFT - 10), - datapages << (PAGE_SHIFT-10), - initpages << (PAGE_SHIFT-10), - totalhigh_pages << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); } void free_initmem (void) { - unsigned long addr; - unsigned long freed; - - addr = (unsigned long)(&__init_begin); - freed = (unsigned long)(&__init_end) - addr; - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - struct page *p; - - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - p = virt_to_page(addr); - - ClearPageReserved(p); - init_page_count(p); - __free_page(p); - totalram_pages++; - num_physpages++; - } - printk(KERN_INFO "Freeing unused kernel memory: %ldk freed\n", - freed >> 10); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - if (start < end) - printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", - (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - struct page *p; - - memset((void *)start, POISON_FREE_INITMEM, PAGE_SIZE); - p = virt_to_page(start); - - ClearPageReserved(p); - init_page_count(p); - __free_page(p); - totalram_pages++; - num_physpages++; - } + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 6026fdd1b2e..16b58ff11e6 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -47,30 +47,49 @@ #include <asm/prom.h> #include <asm/mdesc.h> #include <asm/cpudata.h> +#include <asm/setup.h> #include <asm/irq.h> #include "init_64.h" -unsigned long kern_linear_pte_xor[2] __read_mostly; +unsigned long kern_linear_pte_xor[4] __read_mostly; -/* A bitmap, one bit for every 256MB of physical memory. If the bit - * is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else - * if set we should use a 256MB page (via kern_linear_pte_xor[1]). +/* A bitmap, two bits for every 256MB of physical memory. These two + * bits determine what page size we use for kernel linear + * translations. They form an index into kern_linear_pte_xor[]. The + * value in the indexed slot is XOR'd with the TLB miss virtual + * address to form the resulting TTE. The mapping is: + * + * 0 ==> 4MB + * 1 ==> 256MB + * 2 ==> 2GB + * 3 ==> 16GB + * + * All sun4v chips support 256MB pages. Only SPARC-T4 and later + * support 2GB pages, and hopefully future cpus will support the 16GB + * pages as well. For slots 2 and 3, we encode a 256MB TTE xor there + * if these larger page sizes are not supported by the cpu. + * + * It would be nice to determine this from the machine description + * 'cpu' properties, but we need to have this table setup before the + * MDESC is initialized. */ unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; #ifndef CONFIG_DEBUG_PAGEALLOC -/* A special kernel TSB for 4MB and 256MB linear mappings. - * Space is allocated for this right after the trap table - * in arch/sparc64/kernel/head.S +/* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings. + * Space is allocated for this right after the trap table in + * arch/sparc64/kernel/head.S */ extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; #endif +static unsigned long cpu_pgsz_mask; + #define MAX_BANKS 32 -static struct linux_prom64_registers pavail[MAX_BANKS] __devinitdata; -static int pavail_ents __devinitdata; +static struct linux_prom64_registers pavail[MAX_BANKS]; +static int pavail_ents; static int cmp_p64(const void *a, const void *b) { @@ -101,7 +120,8 @@ static void __init read_obp_memory(const char *property, ret = prom_getproperty(node, property, (char *) regs, prop_size); if (ret == -1) { - prom_printf("Couldn't get %s property from /memory.\n"); + prom_printf("Couldn't get %s property from /memory.\n", + property); prom_halt(); } @@ -257,7 +277,6 @@ static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long } unsigned long _PAGE_ALL_SZ_BITS __read_mostly; -unsigned long _PAGE_SZBITS __read_mostly; static void flush_dcache(unsigned long pfn) { @@ -288,12 +307,39 @@ static void flush_dcache(unsigned long pfn) } } +/* mm->context.lock must be held */ +static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index, + unsigned long tsb_hash_shift, unsigned long address, + unsigned long tte) +{ + struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb; + unsigned long tag; + + if (unlikely(!tsb)) + return; + + tsb += ((address >> tsb_hash_shift) & + (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL)); + tag = (address >> 22UL); + tsb_insert(tsb, tag, tte); +} + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +static inline bool is_hugetlb_pte(pte_t pte) +{ + if ((tlb_type == hypervisor && + (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) || + (tlb_type != hypervisor && + (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) + return true; + return false; +} +#endif + void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { struct mm_struct *mm; - struct tsb *tsb; - unsigned long tag, flags; - unsigned long tsb_index, tsb_hash_shift; + unsigned long flags; pte_t pte = *ptep; if (tlb_type != hypervisor) { @@ -305,28 +351,16 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * mm = vma->vm_mm; - tsb_index = MM_TSB_BASE; - tsb_hash_shift = PAGE_SHIFT; - spin_lock_irqsave(&mm->context.lock, flags); -#ifdef CONFIG_HUGETLB_PAGE - if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) { - if ((tlb_type == hypervisor && - (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) || - (tlb_type != hypervisor && - (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) { - tsb_index = MM_TSB_HUGE; - tsb_hash_shift = HPAGE_SHIFT; - } - } +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, + address, pte_val(pte)); + else #endif - - tsb = mm->context.tsb_block[tsb_index].tsb; - tsb += ((address >> tsb_hash_shift) & - (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL)); - tag = (address >> 22UL); - tsb_insert(tsb, tag, pte_val(pte)); + __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT, + address, pte_val(pte)); spin_unlock_irqrestore(&mm->context.lock, flags); } @@ -403,6 +437,12 @@ EXPORT_SYMBOL(flush_icache_range); void mmu_info(struct seq_file *m) { + static const char *pgsz_strings[] = { + "8K", "64K", "512K", "4MB", "32MB", + "256MB", "2GB", "16GB", + }; + int i, printed; + if (tlb_type == cheetah) seq_printf(m, "MMU Type\t: Cheetah\n"); else if (tlb_type == cheetah_plus) @@ -414,6 +454,17 @@ void mmu_info(struct seq_file *m) else seq_printf(m, "MMU Type\t: ???\n"); + seq_printf(m, "MMU PGSZs\t: "); + printed = 0; + for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) { + if (cpu_pgsz_mask & (1UL << i)) { + seq_printf(m, "%s%s", + printed ? "," : "", pgsz_strings[i]); + printed++; + } + } + seq_putc(m, '\n'); + #ifdef CONFIG_DEBUG_DCFLUSH seq_printf(m, "DCPageFlushes\t: %d\n", atomic_read(&dcpage_flushes)); @@ -462,7 +513,7 @@ static void __init read_obp_translations(void) prom_halt(); } if (unlikely(n > sizeof(prom_trans))) { - prom_printf("prom_mappings: Size %Zd is too big.\n", n); + prom_printf("prom_mappings: Size %d is too big.\n", n); prom_halt(); } @@ -524,7 +575,7 @@ static void __init hypervisor_tlb_lock(unsigned long vaddr, unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu); if (ret != 0) { - prom_printf("hypervisor_tlb_lock[%lx:%lx:%lx:%lx]: " + prom_printf("hypervisor_tlb_lock[%lx:%x:%lx:%lx]: " "errors with %lx\n", vaddr, 0, pte, mmu, ret); prom_halt(); } @@ -538,7 +589,7 @@ static void __init remap_kernel(void) int i, tlb_ent = sparc64_highest_locked_tlbent(); tte_vaddr = (unsigned long) KERNBASE; - phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL; + phys_page = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB; tte_data = kern_large_tte(phys_page); kern_locked_tte_data = tte_data; @@ -580,7 +631,7 @@ static void __init inherit_prom_mappings(void) void prom_world(int enter) { if (!enter) - set_fs((mm_segment_t) { get_thread_current_ds() }); + set_fs(get_fs()); __asm__ __volatile__("flushw"); } @@ -631,10 +682,9 @@ void get_new_mmu_context(struct mm_struct *mm) { unsigned long ctx, new_ctx; unsigned long orig_pgsz_bits; - unsigned long flags; int new_version; - spin_lock_irqsave(&ctx_alloc_lock, flags); + spin_lock(&ctx_alloc_lock); orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); ctx = (tlb_context_cache + 1) & CTX_NR_MASK; new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); @@ -670,7 +720,7 @@ void get_new_mmu_context(struct mm_struct *mm) out: tlb_context_cache = new_ctx; mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; - spin_unlock_irqrestore(&ctx_alloc_lock, flags); + spin_unlock(&ctx_alloc_lock); if (unlikely(new_version)) smp_new_mmu_context_version(); @@ -745,11 +795,11 @@ struct node_mem_mask { static struct node_mem_mask node_masks[MAX_NUMNODES]; static int num_node_masks; +#ifdef CONFIG_NEED_MULTIPLE_NODES + int numa_cpu_lookup_table[NR_CPUS]; cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; -#ifdef CONFIG_NEED_MULTIPLE_NODES - struct mdesc_mblock { u64 base; u64 size; @@ -838,17 +888,21 @@ static void __init allocate_node_data(int nid) static void init_node_masks_nonnuma(void) { +#ifdef CONFIG_NEED_MULTIPLE_NODES int i; +#endif numadbg("Initializing tables for non-numa.\n"); node_masks[0].mask = node_masks[0].val = 0; num_node_masks = 1; +#ifdef CONFIG_NEED_MULTIPLE_NODES for (i = 0; i < NR_CPUS; i++) numa_cpu_lookup_table[i] = 0; cpumask_setall(&numa_cpumask_lookup_table[0]); +#endif } #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -972,7 +1026,8 @@ static void __init add_node_ranges(void) "start[%lx] end[%lx]\n", nid, start, this_end); - memblock_set_node(start, this_end - start, nid); + memblock_set_node(start, this_end - start, + &memblock.memory, nid); start = this_end; } } @@ -1049,7 +1104,14 @@ static int __init grab_mblocks(struct mdesc_handle *md) m->size = *val; val = mdesc_get_property(md, node, "address-congruence-offset", NULL); - m->offset = *val; + + /* The address-congruence-offset property is optional. + * Explicity zero it be identifty this. + */ + if (val) + m->offset = *val; + else + m->offset = 0UL; numadbg("MBLOCK[%d]: base[%llx] size[%llx] offset[%llx]\n", count - 1, m->base, m->size, m->offset); @@ -1269,7 +1331,7 @@ static void __init bootmem_init_nonnuma(void) (top_of_ram - total_ram) >> 20); init_node_masks_nonnuma(); - memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); + memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); allocate_node_data(0); node_set_online(0); } @@ -1358,32 +1420,75 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, extern unsigned int kvmap_linear_patch[1]; #endif /* CONFIG_DEBUG_PAGEALLOC */ -static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) +static void __init kpte_set_val(unsigned long index, unsigned long val) { - const unsigned long shift_256MB = 28; - const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL); - const unsigned long size_256MB = (1UL << shift_256MB); + unsigned long *ptr = kpte_linear_bitmap; - while (start < end) { - long remains; + val <<= ((index % (BITS_PER_LONG / 2)) * 2); + ptr += (index / (BITS_PER_LONG / 2)); - remains = end - start; - if (remains < size_256MB) - break; + *ptr |= val; +} - if (start & mask_256MB) { - start = (start + size_256MB) & ~mask_256MB; - continue; - } +static const unsigned long kpte_shift_min = 28; /* 256MB */ +static const unsigned long kpte_shift_max = 34; /* 16GB */ +static const unsigned long kpte_shift_incr = 3; + +static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end, + unsigned long shift) +{ + unsigned long size = (1UL << shift); + unsigned long mask = (size - 1UL); + unsigned long remains = end - start; + unsigned long val; + + if (remains < size || (start & mask)) + return start; + + /* VAL maps: + * + * shift 28 --> kern_linear_pte_xor index 1 + * shift 31 --> kern_linear_pte_xor index 2 + * shift 34 --> kern_linear_pte_xor index 3 + */ + val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1; + + remains &= ~mask; + if (shift != kpte_shift_max) + remains = size; + + while (remains) { + unsigned long index = start >> kpte_shift_min; + + kpte_set_val(index, val); + + start += 1UL << kpte_shift_min; + remains -= 1UL << kpte_shift_min; + } + + return start; +} + +static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) +{ + unsigned long smallest_size, smallest_mask; + unsigned long s; - while (remains >= size_256MB) { - unsigned long index = start >> shift_256MB; + smallest_size = (1UL << kpte_shift_min); + smallest_mask = (smallest_size - 1UL); - __set_bit(index, kpte_linear_bitmap); + while (start < end) { + unsigned long orig_start = start; + + for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) { + start = kpte_mark_using_shift(start, end, s); - start += size_256MB; - remains -= size_256MB; + if (start != orig_start) + break; } + + if (start == orig_start) + start = (start + smallest_size) & ~smallest_mask; } } @@ -1458,6 +1563,96 @@ unsigned long __init find_ecache_flush_span(unsigned long size) return ~0UL; } +unsigned long PAGE_OFFSET; +EXPORT_SYMBOL(PAGE_OFFSET); + +static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits) +{ + unsigned long final_shift; + unsigned int val = *insn; + unsigned int cnt; + + /* We are patching in ilog2(max_supported_phys_address), and + * we are doing so in a manner similar to a relocation addend. + * That is, we are adding the shift value to whatever value + * is in the shift instruction count field already. + */ + cnt = (val & 0x3f); + val &= ~0x3f; + + /* If we are trying to shift >= 64 bits, clear the destination + * register. This can happen when phys_bits ends up being equal + * to MAX_PHYS_ADDRESS_BITS. + */ + final_shift = (cnt + (64 - phys_bits)); + if (final_shift >= 64) { + unsigned int rd = (val >> 25) & 0x1f; + + val = 0x80100000 | (rd << 25); + } else { + val |= final_shift; + } + *insn = val; + + __asm__ __volatile__("flush %0" + : /* no outputs */ + : "r" (insn)); +} + +static void __init page_offset_shift_patch(unsigned long phys_bits) +{ + extern unsigned int __page_offset_shift_patch; + extern unsigned int __page_offset_shift_patch_end; + unsigned int *p; + + p = &__page_offset_shift_patch; + while (p < &__page_offset_shift_patch_end) { + unsigned int *insn = (unsigned int *)(unsigned long)*p; + + page_offset_shift_patch_one(insn, phys_bits); + + p++; + } +} + +static void __init setup_page_offset(void) +{ + unsigned long max_phys_bits = 40; + + if (tlb_type == cheetah || tlb_type == cheetah_plus) { + max_phys_bits = 42; + } else if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: + case SUN4V_CHIP_NIAGARA2: + max_phys_bits = 39; + break; + case SUN4V_CHIP_NIAGARA3: + max_phys_bits = 43; + break; + case SUN4V_CHIP_NIAGARA4: + case SUN4V_CHIP_NIAGARA5: + case SUN4V_CHIP_SPARC64X: + default: + max_phys_bits = 47; + break; + } + } + + if (max_phys_bits > MAX_PHYS_ADDRESS_BITS) { + prom_printf("MAX_PHYS_ADDRESS_BITS is too small, need %lu\n", + max_phys_bits); + prom_halt(); + } + + PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits); + + pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", + PAGE_OFFSET, max_phys_bits); + + page_offset_shift_patch(max_phys_bits); +} + static void __init tsb_phys_patch(void) { struct tsb_ldquad_phys_patch_entry *pquad; @@ -1577,13 +1772,16 @@ static void __init sun4v_ktsb_init(void) ktsb_descr[0].resv = 0; #ifndef CONFIG_DEBUG_PAGEALLOC - /* Second KTSB for 4MB/256MB mappings. */ + /* Second KTSB for 4MB/256MB/2GB/16GB mappings. */ ktsb_pa = (kern_base + ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB; - ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB | - HV_PGSZ_MASK_256MB); + ktsb_descr[1].pgsz_mask = ((HV_PGSZ_MASK_4MB | + HV_PGSZ_MASK_256MB | + HV_PGSZ_MASK_2GB | + HV_PGSZ_MASK_16GB) & + cpu_pgsz_mask); ktsb_descr[1].assoc = 1; ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES; ktsb_descr[1].ctx_idx = 0; @@ -1592,7 +1790,7 @@ static void __init sun4v_ktsb_init(void) #endif } -void __cpuinit sun4v_ktsb_register(void) +void sun4v_ktsb_register(void) { unsigned long pa, ret; @@ -1606,10 +1804,51 @@ void __cpuinit sun4v_ktsb_register(void) } } +static void __init sun4u_linear_pte_xor_finalize(void) +{ +#ifndef CONFIG_DEBUG_PAGEALLOC + /* This is where we would add Panther support for + * 32MB and 256MB pages. + */ +#endif +} + +static void __init sun4v_linear_pte_xor_finalize(void) +{ +#ifndef CONFIG_DEBUG_PAGEALLOC + if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { + kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ + PAGE_OFFSET; + kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + } else { + kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; + } + + if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { + kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ + PAGE_OFFSET; + kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + } else { + kern_linear_pte_xor[2] = kern_linear_pte_xor[1]; + } + + if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { + kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ + PAGE_OFFSET; + kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + } else { + kern_linear_pte_xor[3] = kern_linear_pte_xor[2]; + } +#endif +} + /* paging_init() sets up the page tables */ static unsigned long last_valid_pfn; -pgd_t swapper_pg_dir[2048]; +pgd_t swapper_pg_dir[PTRS_PER_PGD]; static void sun4u_pgprot_init(void); static void sun4v_pgprot_init(void); @@ -1620,6 +1859,8 @@ void __init paging_init(void) unsigned long real_end, i; int node; + setup_page_offset(); + /* These build time checkes make sure that the dcache_dirty_cpu() * page->flags usage will work. * @@ -1645,7 +1886,7 @@ void __init paging_init(void) BUILD_BUG_ON(NR_CPUS > 4096); - kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; + kern_base = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB; kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; /* Invalidate both kernel TSBs. */ @@ -1665,10 +1906,8 @@ void __init paging_init(void) ktsb_phys_patch(); } - if (tlb_type == hypervisor) { + if (tlb_type == hypervisor) sun4v_patch_tlb_handlers(); - sun4v_ktsb_init(); - } /* Find available physical memory... * @@ -1703,7 +1942,7 @@ void __init paging_init(void) shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE); real_end = (unsigned long)_end; - num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << 22); + num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << ILOG2_4MB); printk("Kernel: Using %d locked TLB entries for main kernel image.\n", num_kernel_image_mappings); @@ -1727,9 +1966,6 @@ void __init paging_init(void) __flush_tlb_all(); - if (tlb_type == hypervisor) - sun4v_ktsb_register(); - prom_build_devicetree(); of_populate_present_mask(); #ifndef CONFIG_SMP @@ -1742,8 +1978,36 @@ void __init paging_init(void) #ifndef CONFIG_SMP mdesc_fill_in_cpu_data(cpu_all_mask); #endif + mdesc_get_page_sizes(cpu_all_mask, &cpu_pgsz_mask); + + sun4v_linear_pte_xor_finalize(); + + sun4v_ktsb_init(); + sun4v_ktsb_register(); + } else { + unsigned long impl, ver; + + cpu_pgsz_mask = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K | + HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB); + + __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver)); + impl = ((ver >> 32) & 0xffff); + if (impl == PANTHER_IMPL) + cpu_pgsz_mask |= (HV_PGSZ_MASK_32MB | + HV_PGSZ_MASK_256MB); + + sun4u_linear_pte_xor_finalize(); } + /* Flush the TLBs and the 4M TSB so that the updated linear + * pte XOR settings are realized for all mappings. + */ + __flush_tlb_all(); +#ifndef CONFIG_DEBUG_PAGEALLOC + memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); +#endif + __flush_tlb_all(); + /* Setup bootmem... */ last_valid_pfn = end_pfn = bootmem_init(phys_base); @@ -1777,7 +2041,7 @@ void __init paging_init(void) printk("Booting Linux...\n"); } -int __devinit page_in_phys_avail(unsigned long paddr) +int page_in_phys_avail(unsigned long paddr) { int i; @@ -1835,7 +2099,7 @@ static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap) if (new_start <= old_start && new_end >= (old_start + PAGE_SIZE)) { - set_bit(old_start >> 22, bitmap); + set_bit(old_start >> ILOG2_4MB, bitmap); goto do_next_page; } } @@ -1867,15 +2131,24 @@ static void __init patch_tlb_miss_handler_bitmap(void) flushi(&valid_addr_bitmap_insn[0]); } +static void __init register_page_bootmem_info(void) +{ +#ifdef CONFIG_NEED_MULTIPLE_NODES + int i; + + for_each_online_node(i) + if (NODE_DATA(i)->node_spanned_pages) + register_page_bootmem_info_node(NODE_DATA(i)); +#endif +} void __init mem_init(void) { - unsigned long codepages, datapages, initpages; unsigned long addr, last; addr = PAGE_OFFSET + kern_base; last = PAGE_ALIGN(kern_size) + addr; while (addr < last) { - set_bit(__pa(addr) >> 22, sparc64_valid_addr_bitmap); + set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap); addr += PAGE_SIZE; } @@ -1884,26 +2157,8 @@ void __init mem_init(void) high_memory = __va(last_valid_pfn << PAGE_SHIFT); -#ifdef CONFIG_NEED_MULTIPLE_NODES - { - int i; - for_each_online_node(i) { - if (NODE_DATA(i)->node_spanned_pages != 0) { - totalram_pages += - free_all_bootmem_node(NODE_DATA(i)); - } - } - totalram_pages += free_low_memory_core_early(MAX_NUMNODES); - } -#else - totalram_pages = free_all_bootmem(); -#endif - - /* We subtract one to account for the mem_map_zero page - * allocated below. - */ - totalram_pages -= 1; - num_physpages = totalram_pages; + register_page_bootmem_info(); + free_all_bootmem(); /* * Set up the zero page, mark it reserved, so that page count @@ -1914,21 +2169,9 @@ void __init mem_init(void) prom_printf("paging_init: Cannot alloc zero page.\n"); prom_halt(); } - SetPageReserved(mem_map_zero); + mark_page_reserved(mem_map_zero); - codepages = (((unsigned long) _etext) - ((unsigned long) _start)); - codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT; - datapages = (((unsigned long) _edata) - ((unsigned long) _etext)); - datapages = PAGE_ALIGN(datapages) >> PAGE_SHIFT; - initpages = (((unsigned long) __init_end) - ((unsigned long) __init_begin)); - initpages = PAGE_ALIGN(initpages) >> PAGE_SHIFT; - - printk("Memory: %luk available (%ldk kernel code, %ldk data, %ldk init) [%016lx,%016lx]\n", - nr_free_pages() << (PAGE_SHIFT-10), - codepages << (PAGE_SHIFT-10), - datapages << (PAGE_SHIFT-10), - initpages << (PAGE_SHIFT-10), - PAGE_OFFSET, (last_valid_pfn << PAGE_SHIFT)); + mem_init_print_info(NULL); if (tlb_type == cheetah || tlb_type == cheetah_plus) cheetah_ecache_flush_init(); @@ -1954,39 +2197,22 @@ void free_initmem(void) initend = (unsigned long)(__init_end) & PAGE_MASK; for (; addr < initend; addr += PAGE_SIZE) { unsigned long page; - struct page *p; page = (addr + ((unsigned long) __va(kern_base)) - ((unsigned long) KERNBASE)); memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - if (do_free) { - p = virt_to_page(page); - - ClearPageReserved(p); - init_page_count(p); - __free_page(p); - num_physpages++; - totalram_pages++; - } + if (do_free) + free_reserved_page(virt_to_page(page)); } } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - if (start < end) - printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - struct page *p = virt_to_page(start); - - ClearPageReserved(p); - init_page_count(p); - __free_page(p); - num_physpages++; - totalram_pages++; - } + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif @@ -2020,10 +2246,12 @@ EXPORT_SYMBOL(_PAGE_CACHE); #ifdef CONFIG_SPARSEMEM_VMEMMAP unsigned long vmemmap_table[VMEMMAP_SIZE]; -int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) +static long __meminitdata addr_start, addr_end; +static int __meminitdata node_start; + +int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, + int node) { - unsigned long vstart = (unsigned long) start; - unsigned long vend = (unsigned long) (start + nr); unsigned long phys_start = (vstart - VMEMMAP_BASE); unsigned long phys_end = (vend - VMEMMAP_BASE); unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK; @@ -2044,21 +2272,41 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) void *block; if (!(*vmem_pp & _PAGE_VALID)) { - block = vmemmap_alloc_block(1UL << 22, node); + block = vmemmap_alloc_block(1UL << ILOG2_4MB, node); if (!block) return -ENOMEM; *vmem_pp = pte_base | __pa(block); - printk(KERN_INFO "[%p-%p] page_structs=%lu " - "node=%d entry=%lu/%lu\n", start, block, nr, - node, - addr >> VMEMMAP_CHUNK_SHIFT, - VMEMMAP_SIZE); + /* check to see if we have contiguous blocks */ + if (addr_end != addr || node_start != node) { + if (addr_start) + printk(KERN_DEBUG " [%lx-%lx] on node %d\n", + addr_start, addr_end-1, node_start); + addr_start = addr; + node_start = node; + } + addr_end = addr + VMEMMAP_CHUNK; } } return 0; } + +void __meminit vmemmap_populate_print_last(void) +{ + if (addr_start) { + printk(KERN_DEBUG " [%lx-%lx] on node %d\n", + addr_start, addr_end-1, node_start); + addr_start = 0; + addr_end = 0; + node_start = 0; + } +} + +void vmemmap_free(unsigned long start, unsigned long end) +{ +} + #endif /* CONFIG_SPARSEMEM_VMEMMAP */ static void prot_init_common(unsigned long page_none, @@ -2092,6 +2340,7 @@ static void __init sun4u_pgprot_init(void) { unsigned long page_none, page_shared, page_copy, page_readonly; unsigned long page_exec_bit; + int i; PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | _PAGE_CACHE_4U | _PAGE_P_4U | @@ -2110,19 +2359,17 @@ static void __init sun4u_pgprot_init(void) __ACCESS_BITS_4U | _PAGE_E_4U); #ifdef CONFIG_DEBUG_PAGEALLOC - kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZBITS_4U) ^ - 0xfffff80000000000UL; + kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; #else kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; #endif kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U | _PAGE_W_4U); - /* XXX Should use 256MB on Panther. XXX */ - kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; + for (i = 1; i < 4; i++) + kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; - _PAGE_SZBITS = _PAGE_SZBITS_4U; _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | _PAGE_SZ64K_4U | _PAGE_SZ8K_4U | _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U); @@ -2146,6 +2393,7 @@ static void __init sun4v_pgprot_init(void) { unsigned long page_none, page_shared, page_copy, page_readonly; unsigned long page_exec_bit; + int i; PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | _PAGE_CACHE_4V | _PAGE_P_4V | @@ -2158,29 +2406,20 @@ static void __init sun4v_pgprot_init(void) _PAGE_CACHE = _PAGE_CACHE_4V; #ifdef CONFIG_DEBUG_PAGEALLOC - kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^ - 0xfffff80000000000UL; + kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; #else kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; #endif kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); -#ifdef CONFIG_DEBUG_PAGEALLOC - kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZBITS_4V) ^ - 0xfffff80000000000UL; -#else - kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ - 0xfffff80000000000UL; -#endif - kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | - _PAGE_P_4V | _PAGE_W_4V); + for (i = 1; i < 4; i++) + kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | __ACCESS_BITS_4V | _PAGE_E_4V); - _PAGE_SZBITS = _PAGE_SZBITS_4V; _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V | _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V | _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | @@ -2313,3 +2552,150 @@ void __flush_tlb_all(void) __asm__ __volatile__("wrpr %0, 0, %%pstate" : : "r" (pstate)); } + +pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long address) +{ + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | + __GFP_REPEAT | __GFP_ZERO); + pte_t *pte = NULL; + + if (page) + pte = (pte_t *) page_address(page); + + return pte; +} + +pgtable_t pte_alloc_one(struct mm_struct *mm, + unsigned long address) +{ + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | + __GFP_REPEAT | __GFP_ZERO); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + free_hot_cold_page(page, 0); + return NULL; + } + return (pte_t *) page_address(page); +} + +void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + free_page((unsigned long)pte); +} + +static void __pte_free(pgtable_t pte) +{ + struct page *page = virt_to_page(pte); + + pgtable_page_dtor(page); + __free_page(page); +} + +void pte_free(struct mm_struct *mm, pgtable_t pte) +{ + __pte_free(pte); +} + +void pgtable_free(void *table, bool is_page) +{ + if (is_page) + __pte_free(table); + else + kmem_cache_free(pgtable_cache, table); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd) +{ + unsigned long pte, flags; + struct mm_struct *mm; + pmd_t entry = *pmd; + + if (!pmd_large(entry) || !pmd_young(entry)) + return; + + pte = pmd_val(entry); + + /* We are fabricating 8MB pages using 4MB real hw pages. */ + pte |= (addr & (1UL << REAL_HPAGE_SHIFT)); + + mm = vma->vm_mm; + + spin_lock_irqsave(&mm->context.lock, flags); + + if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, + addr, pte); + + spin_unlock_irqrestore(&mm->context.lock, flags); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +static void context_reload(void *__data) +{ + struct mm_struct *mm = __data; + + if (mm == current->mm) + load_secondary_context(mm); +} + +void hugetlb_setup(struct pt_regs *regs) +{ + struct mm_struct *mm = current->mm; + struct tsb_config *tp; + + if (in_atomic() || !mm) { + const struct exception_table_entry *entry; + + entry = search_exception_tables(regs->tpc); + if (entry) { + regs->tpc = entry->fixup; + regs->tnpc = regs->tpc + 4; + return; + } + pr_alert("Unexpected HugeTLB setup in atomic context.\n"); + die_if_kernel("HugeTSB in atomic", regs); + } + + tp = &mm->context.tsb_block[MM_TSB_HUGE]; + if (likely(tp->tsb == NULL)) + tsb_grow(mm, MM_TSB_HUGE, 0); + + tsb_context_switch(mm); + smp_tsb_sync(mm); + + /* On UltraSPARC-III+ and later, configure the second half of + * the Data-TLB for huge pages. + */ + if (tlb_type == cheetah_plus) { + unsigned long ctx; + + spin_lock(&ctx_alloc_lock); + ctx = mm->context.sparc64_ctx_val; + ctx &= ~CTX_PGSZ_MASK; + ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; + ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT; + + if (ctx != mm->context.sparc64_ctx_val) { + /* When changing the page size fields, we + * must perform a context flush so that no + * stale entries match. This flush must + * occur with the original context register + * settings. + */ + do_flush_tlb_mm(mm); + + /* Reload the context register of all processors + * also executing in this address space. + */ + mm->context.sparc64_ctx_val = ctx; + on_each_cpu(context_reload, mm, 0); + } + spin_unlock(&ctx_alloc_lock); + } +} +#endif diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h index 3e1ac8b96ca..0668b364f44 100644 --- a/arch/sparc/mm/init_64.h +++ b/arch/sparc/mm/init_64.h @@ -1,25 +1,27 @@ #ifndef _SPARC64_MM_INIT_H #define _SPARC64_MM_INIT_H +#include <asm/page.h> + /* Most of the symbols in this file are defined in init.c and * marked non-static so that assembler code can get at them. */ -#define MAX_PHYS_ADDRESS (1UL << 41UL) +#define MAX_PHYS_ADDRESS (1UL << MAX_PHYS_ADDRESS_BITS) #define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) #define KPTE_BITMAP_BYTES \ - ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8) + ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4) #define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) #define VALID_ADDR_BITMAP_BYTES \ ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) -extern unsigned long kern_linear_pte_xor[2]; +extern unsigned long kern_linear_pte_xor[4]; extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; extern unsigned int sparc64_highest_unlocked_tlb_ent; extern unsigned long sparc64_kern_pri_context; extern unsigned long sparc64_kern_pri_nuc_bits; extern unsigned long sparc64_kern_sec_context; -extern void mmu_info(struct seq_file *m); +void mmu_info(struct seq_file *m); struct linux_prom_translation { unsigned long virt; @@ -34,7 +36,7 @@ extern unsigned int prom_trans_ents; /* Exported for SMP bootup purposes. */ extern unsigned long kern_locked_tte_data; -extern void prom_world(int enter); +void prom_world(int enter); #ifdef CONFIG_SPARSEMEM_VMEMMAP #define VMEMMAP_CHUNK_SHIFT 22 diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index eb99862e965..f311bf21901 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -25,6 +25,8 @@ #include <asm/dma.h> #include <asm/oplib.h> +#include "mm_32.h" + /* #define IOUNIT_DEBUG */ #ifdef IOUNIT_DEBUG #define IOD(x) printk(x) @@ -38,7 +40,8 @@ static void __init iounit_iommu_init(struct platform_device *op) { struct iounit_struct *iounit; - iopte_t *xpt, *xptend; + iopte_t __iomem *xpt; + iopte_t __iomem *xptend; iounit = kzalloc(sizeof(struct iounit_struct), GFP_ATOMIC); if (!iounit) { @@ -62,10 +65,10 @@ static void __init iounit_iommu_init(struct platform_device *op) op->dev.archdata.iommu = iounit; iounit->page_table = xpt; spin_lock_init(&iounit->lock); - - for (xptend = iounit->page_table + (16 * PAGE_SIZE) / sizeof(iopte_t); - xpt < xptend;) - iopte_val(*xpt++) = 0; + + xptend = iounit->page_table + (16 * PAGE_SIZE) / sizeof(iopte_t); + for (; xpt < xptend; xpt++) + sbus_writel(0, xpt); } static int __init iounit_init(void) @@ -130,7 +133,7 @@ nexti: scan = find_next_zero_bit(iounit->bmap, limit, scan); vaddr = IOUNIT_DMA_BASE + (scan << PAGE_SHIFT) + (vaddr & ~PAGE_MASK); for (k = 0; k < npages; k++, iopte = __iopte(iopte_val(iopte) + 0x100), scan++) { set_bit(scan, iounit->bmap); - iounit->page_table[scan] = iopte; + sbus_writel(iopte, &iounit->page_table[scan]); } IOD(("%08lx\n", vaddr)); return vaddr; @@ -202,7 +205,7 @@ static int iounit_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned lon struct iounit_struct *iounit = dev->archdata.iommu; unsigned long page, end; pgprot_t dvma_prot; - iopte_t *iopte; + iopte_t __iomem *iopte; *pba = addr; @@ -224,8 +227,8 @@ static int iounit_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned lon i = ((addr - IOUNIT_DMA_BASE) >> PAGE_SHIFT); - iopte = (iopte_t *)(iounit->page_table + i); - *iopte = MKIOPTE(__pa(page)); + iopte = iounit->page_table + i; + sbus_writel(MKIOPTE(__pa(page)), iopte); } addr += PAGE_SIZE; va += PAGE_SIZE; diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index a8a58cad9d2..491511d37e3 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -27,6 +27,8 @@ #include <asm/iommu.h> #include <asm/dma.h> +#include "mm_32.h" + /* * This can be sized dynamically, but we will do this * only when we have a guidance about actual I/O pressures. @@ -34,12 +36,9 @@ #define IOMMU_RNGE IOMMU_RNGE_256MB #define IOMMU_START 0xF0000000 #define IOMMU_WINSIZE (256*1024*1024U) -#define IOMMU_NPTES (IOMMU_WINSIZE/PAGE_SIZE) /* 64K PTEs, 265KB */ +#define IOMMU_NPTES (IOMMU_WINSIZE/PAGE_SIZE) /* 64K PTEs, 256KB */ #define IOMMU_ORDER 6 /* 4096 * (1<<6) */ -/* srmmu.c */ -extern int viking_mxcc_present; -extern int flush_page_for_dma_global; static int viking_flush; /* viking.S */ extern void viking_flush_page(unsigned long page); @@ -59,6 +58,8 @@ static void __init sbus_iommu_init(struct platform_device *op) struct iommu_struct *iommu; unsigned int impl, vers; unsigned long *bitmap; + unsigned long control; + unsigned long base; unsigned long tmp; iommu = kmalloc(sizeof(struct iommu_struct), GFP_KERNEL); @@ -73,12 +74,14 @@ static void __init sbus_iommu_init(struct platform_device *op) prom_printf("Cannot map IOMMU registers\n"); prom_halt(); } - impl = (iommu->regs->control & IOMMU_CTRL_IMPL) >> 28; - vers = (iommu->regs->control & IOMMU_CTRL_VERS) >> 24; - tmp = iommu->regs->control; - tmp &= ~(IOMMU_CTRL_RNGE); - tmp |= (IOMMU_RNGE_256MB | IOMMU_CTRL_ENAB); - iommu->regs->control = tmp; + + control = sbus_readl(&iommu->regs->control); + impl = (control & IOMMU_CTRL_IMPL) >> 28; + vers = (control & IOMMU_CTRL_VERS) >> 24; + control &= ~(IOMMU_CTRL_RNGE); + control |= (IOMMU_RNGE_256MB | IOMMU_CTRL_ENAB); + sbus_writel(control, &iommu->regs->control); + iommu_invalidate(iommu->regs); iommu->start = IOMMU_START; iommu->end = 0xffffffff; @@ -90,8 +93,8 @@ static void __init sbus_iommu_init(struct platform_device *op) it to us. */ tmp = __get_free_pages(GFP_KERNEL, IOMMU_ORDER); if (!tmp) { - prom_printf("Unable to allocate iommu table [0x%08x]\n", - IOMMU_NPTES*sizeof(iopte_t)); + prom_printf("Unable to allocate iommu table [0x%lx]\n", + IOMMU_NPTES * sizeof(iopte_t)); prom_halt(); } iommu->page_table = (iopte_t *)tmp; @@ -100,7 +103,9 @@ static void __init sbus_iommu_init(struct platform_device *op) memset(iommu->page_table, 0, IOMMU_NPTES*sizeof(iopte_t)); flush_cache_all(); flush_tlb_all(); - iommu->regs->base = __pa((unsigned long) iommu->page_table) >> 4; + + base = __pa((unsigned long)iommu->page_table) >> 4; + sbus_writel(base, &iommu->regs->base); iommu_invalidate(iommu->regs); bitmap = kmalloc(IOMMU_NPTES>>3, GFP_KERNEL); diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c index 5bed085a2c1..3b17b6f7895 100644 --- a/arch/sparc/mm/leon_mm.c +++ b/arch/sparc/mm/leon_mm.c @@ -15,10 +15,10 @@ #include <asm/leon.h> #include <asm/tlbflush.h> -#include "srmmu.h" +#include "mm_32.h" int leon_flush_during_switch = 1; -int srmmu_swprobe_trace; +static int srmmu_swprobe_trace; static inline unsigned long leon_get_ctable_ptr(void) { diff --git a/arch/sparc/mm/mm_32.h b/arch/sparc/mm/mm_32.h new file mode 100644 index 00000000000..a6c27ca9a72 --- /dev/null +++ b/arch/sparc/mm/mm_32.h @@ -0,0 +1,24 @@ +/* fault_32.c - visible as they are called from assembler */ +asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc, + unsigned long address); +asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, + unsigned long address); + +void window_overflow_fault(void); +void window_underflow_fault(unsigned long sp); +void window_ret_fault(struct pt_regs *regs); + +/* srmmu.c */ +extern char *srmmu_name; +extern int viking_mxcc_present; +extern int flush_page_for_dma_global; + +extern void (*poke_srmmu)(void); + +void __init srmmu_paging_init(void); + +/* iommu.c */ +void ld_mmu_iommu(void); + +/* io-unit.c */ +void ld_mmu_iounit(void); diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 62e3f577330..be65f035d18 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -8,57 +8,54 @@ * Copyright (C) 1999,2000 Anton Blanchard (anton@samba.org) */ -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/vmalloc.h> -#include <linux/pagemap.h> -#include <linux/init.h> +#include <linux/seq_file.h> #include <linux/spinlock.h> #include <linux/bootmem.h> -#include <linux/fs.h> -#include <linux/seq_file.h> +#include <linux/pagemap.h> +#include <linux/vmalloc.h> #include <linux/kdebug.h> +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/init.h> #include <linux/log2.h> #include <linux/gfp.h> +#include <linux/fs.h> +#include <linux/mm.h> -#include <asm/bitext.h> -#include <asm/page.h> +#include <asm/mmu_context.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/io-unit.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> -#include <asm/io.h> +#include <asm/bitext.h> #include <asm/vaddrs.h> -#include <asm/traps.h> -#include <asm/smp.h> -#include <asm/mbus.h> #include <asm/cache.h> +#include <asm/traps.h> #include <asm/oplib.h> +#include <asm/mbus.h> +#include <asm/page.h> #include <asm/asi.h> #include <asm/msi.h> -#include <asm/mmu_context.h> -#include <asm/io-unit.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> +#include <asm/smp.h> +#include <asm/io.h> /* Now the cpu specific definitions. */ -#include <asm/viking.h> -#include <asm/mxcc.h> -#include <asm/ross.h> +#include <asm/turbosparc.h> #include <asm/tsunami.h> +#include <asm/viking.h> #include <asm/swift.h> -#include <asm/turbosparc.h> #include <asm/leon.h> +#include <asm/mxcc.h> +#include <asm/ross.h> -#include "srmmu.h" +#include "mm_32.h" enum mbus_module srmmu_modtype; static unsigned int hwbug_bitmask; int vac_cache_size; int vac_line_size; -struct ctx_list *ctx_list_pool; -struct ctx_list ctx_free; -struct ctx_list ctx_used; - extern struct resource sparc_iomap; extern unsigned long last_valid_pfn; @@ -66,6 +63,7 @@ extern unsigned long last_valid_pfn; static pgd_t *srmmu_swapper_pg_dir; const struct sparc32_cachetlb_ops *sparc32_cachetlb_ops; +EXPORT_SYMBOL(sparc32_cachetlb_ops); #ifdef CONFIG_SMP const struct sparc32_cachetlb_ops *local_ops; @@ -102,7 +100,6 @@ static unsigned long srmmu_nocache_end; #define SRMMU_NOCACHE_ALIGN_MAX (sizeof(ctxd_t)*SRMMU_MAX_CONTEXTS) void *srmmu_nocache_pool; -void *srmmu_nocache_bitmap; static struct bit_map srmmu_nocache_map; static inline int srmmu_pmd_none(pmd_t pmd) @@ -136,8 +133,8 @@ void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep) } } -/* Find an entry in the third-level page table.. */ -pte_t *pte_offset_kernel(pmd_t * dir, unsigned long address) +/* Find an entry in the third-level page table.. */ +pte_t *pte_offset_kernel(pmd_t *dir, unsigned long address) { void *pte; @@ -151,55 +148,61 @@ pte_t *pte_offset_kernel(pmd_t * dir, unsigned long address) * align: bytes, number to align at. * Returns the virtual address of the allocated area. */ -static unsigned long __srmmu_get_nocache(int size, int align) +static void *__srmmu_get_nocache(int size, int align) { int offset; + unsigned long addr; if (size < SRMMU_NOCACHE_BITMAP_SHIFT) { - printk("Size 0x%x too small for nocache request\n", size); + printk(KERN_ERR "Size 0x%x too small for nocache request\n", + size); size = SRMMU_NOCACHE_BITMAP_SHIFT; } - if (size & (SRMMU_NOCACHE_BITMAP_SHIFT-1)) { - printk("Size 0x%x unaligned int nocache request\n", size); - size += SRMMU_NOCACHE_BITMAP_SHIFT-1; + if (size & (SRMMU_NOCACHE_BITMAP_SHIFT - 1)) { + printk(KERN_ERR "Size 0x%x unaligned int nocache request\n", + size); + size += SRMMU_NOCACHE_BITMAP_SHIFT - 1; } BUG_ON(align > SRMMU_NOCACHE_ALIGN_MAX); offset = bit_map_string_get(&srmmu_nocache_map, - size >> SRMMU_NOCACHE_BITMAP_SHIFT, - align >> SRMMU_NOCACHE_BITMAP_SHIFT); + size >> SRMMU_NOCACHE_BITMAP_SHIFT, + align >> SRMMU_NOCACHE_BITMAP_SHIFT); if (offset == -1) { - printk("srmmu: out of nocache %d: %d/%d\n", - size, (int) srmmu_nocache_size, - srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT); - return 0; + printk(KERN_ERR "srmmu: out of nocache %d: %d/%d\n", + size, (int) srmmu_nocache_size, + srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT); + return NULL; } - return (SRMMU_NOCACHE_VADDR + (offset << SRMMU_NOCACHE_BITMAP_SHIFT)); + addr = SRMMU_NOCACHE_VADDR + (offset << SRMMU_NOCACHE_BITMAP_SHIFT); + return (void *)addr; } -unsigned long srmmu_get_nocache(int size, int align) +void *srmmu_get_nocache(int size, int align) { - unsigned long tmp; + void *tmp; tmp = __srmmu_get_nocache(size, align); if (tmp) - memset((void *)tmp, 0, size); + memset(tmp, 0, size); return tmp; } -void srmmu_free_nocache(unsigned long vaddr, int size) +void srmmu_free_nocache(void *addr, int size) { + unsigned long vaddr; int offset; + vaddr = (unsigned long)addr; if (vaddr < SRMMU_NOCACHE_VADDR) { printk("Vaddr %lx is smaller than nocache base 0x%lx\n", vaddr, (unsigned long)SRMMU_NOCACHE_VADDR); BUG(); } - if (vaddr+size > srmmu_nocache_end) { + if (vaddr + size > srmmu_nocache_end) { printk("Vaddr %lx is bigger than nocache end 0x%lx\n", vaddr, srmmu_nocache_end); BUG(); @@ -212,7 +215,7 @@ void srmmu_free_nocache(unsigned long vaddr, int size) printk("Size 0x%x is too small\n", size); BUG(); } - if (vaddr & (size-1)) { + if (vaddr & (size - 1)) { printk("Vaddr %lx is not aligned to size 0x%x\n", vaddr, size); BUG(); } @@ -226,13 +229,23 @@ void srmmu_free_nocache(unsigned long vaddr, int size) static void srmmu_early_allocate_ptable_skeleton(unsigned long start, unsigned long end); -extern unsigned long probe_memory(void); /* in fault.c */ +/* Return how much physical memory we have. */ +static unsigned long __init probe_memory(void) +{ + unsigned long total = 0; + int i; + + for (i = 0; sp_banks[i].num_bytes; i++) + total += sp_banks[i].num_bytes; + + return total; +} /* * Reserve nocache dynamically proportionally to the amount of * system RAM. -- Tomas Szepe <szepe@pinerecords.com>, June 2002 */ -static void srmmu_nocache_calcsize(void) +static void __init srmmu_nocache_calcsize(void) { unsigned long sysmemavail = probe_memory() / 1024; int srmmu_nocache_npages; @@ -255,6 +268,7 @@ static void srmmu_nocache_calcsize(void) static void __init srmmu_nocache_init(void) { + void *srmmu_nocache_bitmap; unsigned int bitmap_bits; pgd_t *pgd; pmd_t *pmd; @@ -268,10 +282,12 @@ static void __init srmmu_nocache_init(void) SRMMU_NOCACHE_ALIGN_MAX, 0UL); memset(srmmu_nocache_pool, 0, srmmu_nocache_size); - srmmu_nocache_bitmap = __alloc_bootmem(bitmap_bits >> 3, SMP_CACHE_BYTES, 0UL); + srmmu_nocache_bitmap = + __alloc_bootmem(BITS_TO_LONGS(bitmap_bits) * sizeof(long), + SMP_CACHE_BYTES, 0UL); bit_map_init(&srmmu_nocache_map, srmmu_nocache_bitmap, bitmap_bits); - srmmu_swapper_pg_dir = (pgd_t *)__srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE); + srmmu_swapper_pg_dir = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE); memset(__nocache_fix(srmmu_swapper_pg_dir), 0, SRMMU_PGD_TABLE_SIZE); init_mm.pgd = srmmu_swapper_pg_dir; @@ -304,7 +320,7 @@ pgd_t *get_pgd_fast(void) { pgd_t *pgd = NULL; - pgd = (pgd_t *)__srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE); + pgd = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE); if (pgd) { pgd_t *init = pgd_offset_k(0); memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); @@ -330,8 +346,11 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0) return NULL; - page = pfn_to_page( __nocache_pa(pte) >> PAGE_SHIFT ); - pgtable_page_ctor(page); + page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } return page; } @@ -344,18 +363,50 @@ void pte_free(struct mm_struct *mm, pgtable_t pte) if (p == 0) BUG(); p = page_to_pfn(pte) << PAGE_SHIFT; /* Physical address */ - p = (unsigned long) __nocache_va(p); /* Nocached virtual */ - srmmu_free_nocache(p, PTE_SIZE); + + /* free non cached virtual address*/ + srmmu_free_nocache(__nocache_va(p), PTE_SIZE); } -/* - */ +/* context handling - a dynamically sized pool is used */ +#define NO_CONTEXT -1 + +struct ctx_list { + struct ctx_list *next; + struct ctx_list *prev; + unsigned int ctx_number; + struct mm_struct *ctx_mm; +}; + +static struct ctx_list *ctx_list_pool; +static struct ctx_list ctx_free; +static struct ctx_list ctx_used; + +/* At boot time we determine the number of contexts */ +static int num_contexts; + +static inline void remove_from_ctx_list(struct ctx_list *entry) +{ + entry->next->prev = entry->prev; + entry->prev->next = entry->next; +} + +static inline void add_to_ctx_list(struct ctx_list *head, struct ctx_list *entry) +{ + entry->next = head; + (entry->prev = head->prev)->next = entry; + head->prev = entry; +} +#define add_to_free_ctxlist(entry) add_to_ctx_list(&ctx_free, entry) +#define add_to_used_ctxlist(entry) add_to_ctx_list(&ctx_used, entry) + + static inline void alloc_context(struct mm_struct *old_mm, struct mm_struct *mm) { struct ctx_list *ctxp; ctxp = ctx_free.next; - if(ctxp != &ctx_free) { + if (ctxp != &ctx_free) { remove_from_ctx_list(ctxp); add_to_used_ctxlist(ctxp); mm->context = ctxp->ctx_number; @@ -363,9 +414,9 @@ static inline void alloc_context(struct mm_struct *old_mm, struct mm_struct *mm) return; } ctxp = ctx_used.next; - if(ctxp->ctx_mm == old_mm) + if (ctxp->ctx_mm == old_mm) ctxp = ctxp->next; - if(ctxp == &ctx_used) + if (ctxp == &ctx_used) panic("out of mmu contexts"); flush_cache_mm(ctxp->ctx_mm); flush_tlb_mm(ctxp->ctx_mm); @@ -385,11 +436,31 @@ static inline void free_context(int context) add_to_free_ctxlist(ctx_old); } +static void __init sparc_context_init(int numctx) +{ + int ctx; + unsigned long size; + + size = numctx * sizeof(struct ctx_list); + ctx_list_pool = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL); + + for (ctx = 0; ctx < numctx; ctx++) { + struct ctx_list *clist; + + clist = (ctx_list_pool + ctx); + clist->ctx_number = ctx; + clist->ctx_mm = NULL; + } + ctx_free.next = ctx_free.prev = &ctx_free; + ctx_used.next = ctx_used.prev = &ctx_used; + for (ctx = 0; ctx < numctx; ctx++) + add_to_free_ctxlist(ctx_list_pool + ctx); +} void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk) { - if(mm->context == NO_CONTEXT) { + if (mm->context == NO_CONTEXT) { spin_lock(&srmmu_context_spinlock); alloc_context(old_mm, mm); spin_unlock(&srmmu_context_spinlock); @@ -407,7 +478,7 @@ void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, /* Low level IO area allocation on the SRMMU. */ static inline void srmmu_mapioaddr(unsigned long physaddr, - unsigned long virt_addr, int bus_type) + unsigned long virt_addr, int bus_type) { pgd_t *pgdp; pmd_t *pmdp; @@ -420,8 +491,7 @@ static inline void srmmu_mapioaddr(unsigned long physaddr, ptep = pte_offset_kernel(pmdp, virt_addr); tmp = (physaddr >> 4) | SRMMU_ET_PTE; - /* - * I need to test whether this is consistent over all + /* I need to test whether this is consistent over all * sun4m's. The bus_type represents the upper 4 bits of * 36-bit physical address on the I/O space lines... */ @@ -591,10 +661,10 @@ static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start, pmd_t *pmdp; pte_t *ptep; - while(start < end) { + while (start < end) { pgdp = pgd_offset_k(start); if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) { - pmdp = (pmd_t *) __srmmu_get_nocache( + pmdp = __srmmu_get_nocache( SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE); if (pmdp == NULL) early_pgtable_allocfail("pmd"); @@ -602,8 +672,8 @@ static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start, pgd_set(__nocache_fix(pgdp), pmdp); } pmdp = pmd_offset(__nocache_fix(pgdp), start); - if(srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) { - ptep = (pte_t *)__srmmu_get_nocache(PTE_SIZE, PTE_SIZE); + if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) { + ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE); if (ptep == NULL) early_pgtable_allocfail("pte"); memset(__nocache_fix(ptep), 0, PTE_SIZE); @@ -622,18 +692,18 @@ static void __init srmmu_allocate_ptable_skeleton(unsigned long start, pmd_t *pmdp; pte_t *ptep; - while(start < end) { + while (start < end) { pgdp = pgd_offset_k(start); if (pgd_none(*pgdp)) { - pmdp = (pmd_t *)__srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE); + pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE); if (pmdp == NULL) early_pgtable_allocfail("pmd"); memset(pmdp, 0, SRMMU_PMD_TABLE_SIZE); pgd_set(pgdp, pmdp); } pmdp = pmd_offset(pgdp, start); - if(srmmu_pmd_none(*pmdp)) { - ptep = (pte_t *) __srmmu_get_nocache(PTE_SIZE, + if (srmmu_pmd_none(*pmdp)) { + ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE); if (ptep == NULL) early_pgtable_allocfail("pte"); @@ -658,7 +728,7 @@ static inline unsigned long srmmu_probe(unsigned long vaddr) "=r" (retval) : "r" (vaddr | 0x400), "i" (ASI_M_FLUSH_PROBE)); } else { - retval = leon_swprobe(vaddr, 0); + retval = leon_swprobe(vaddr, NULL); } return retval; } @@ -671,72 +741,76 @@ static inline unsigned long srmmu_probe(unsigned long vaddr) static void __init srmmu_inherit_prom_mappings(unsigned long start, unsigned long end) { + unsigned long probed; + unsigned long addr; pgd_t *pgdp; pmd_t *pmdp; pte_t *ptep; - int what = 0; /* 0 = normal-pte, 1 = pmd-level pte, 2 = pgd-level pte */ - unsigned long prompte; + int what; /* 0 = normal-pte, 1 = pmd-level pte, 2 = pgd-level pte */ - while(start <= end) { + while (start <= end) { if (start == 0) break; /* probably wrap around */ - if(start == 0xfef00000) + if (start == 0xfef00000) start = KADB_DEBUGGER_BEGVM; - if(!(prompte = srmmu_probe(start))) { + probed = srmmu_probe(start); + if (!probed) { + /* continue probing until we find an entry */ start += PAGE_SIZE; continue; } - + /* A red snapper, see what it really is. */ what = 0; - - if(!(start & ~(SRMMU_REAL_PMD_MASK))) { - if(srmmu_probe((start-PAGE_SIZE) + SRMMU_REAL_PMD_SIZE) == prompte) + addr = start - PAGE_SIZE; + + if (!(start & ~(SRMMU_REAL_PMD_MASK))) { + if (srmmu_probe(addr + SRMMU_REAL_PMD_SIZE) == probed) what = 1; } - - if(!(start & ~(SRMMU_PGDIR_MASK))) { - if(srmmu_probe((start-PAGE_SIZE) + SRMMU_PGDIR_SIZE) == - prompte) + + if (!(start & ~(SRMMU_PGDIR_MASK))) { + if (srmmu_probe(addr + SRMMU_PGDIR_SIZE) == probed) what = 2; } - + pgdp = pgd_offset_k(start); - if(what == 2) { - *(pgd_t *)__nocache_fix(pgdp) = __pgd(prompte); + if (what == 2) { + *(pgd_t *)__nocache_fix(pgdp) = __pgd(probed); start += SRMMU_PGDIR_SIZE; continue; } if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) { - pmdp = (pmd_t *)__srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE); + pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, + SRMMU_PMD_TABLE_SIZE); if (pmdp == NULL) early_pgtable_allocfail("pmd"); memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE); pgd_set(__nocache_fix(pgdp), pmdp); } pmdp = pmd_offset(__nocache_fix(pgdp), start); - if(srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) { - ptep = (pte_t *) __srmmu_get_nocache(PTE_SIZE, - PTE_SIZE); + if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) { + ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE); if (ptep == NULL) early_pgtable_allocfail("pte"); memset(__nocache_fix(ptep), 0, PTE_SIZE); pmd_set(__nocache_fix(pmdp), ptep); } - if(what == 1) { - /* - * We bend the rule where all 16 PTPs in a pmd_t point + if (what == 1) { + /* We bend the rule where all 16 PTPs in a pmd_t point * inside the same PTE page, and we leak a perfectly * good hardware PTE piece. Alternatives seem worse. */ unsigned int x; /* Index of HW PMD in soft cluster */ + unsigned long *val; x = (start >> PMD_SHIFT) & 15; - *(unsigned long *)__nocache_fix(&pmdp->pmdv[x]) = prompte; + val = &pmdp->pmdv[x]; + *(unsigned long *)__nocache_fix(val) = probed; start += SRMMU_REAL_PMD_SIZE; continue; } ptep = pte_offset_kernel(__nocache_fix(pmdp), start); - *(pte_t *)__nocache_fix(ptep) = __pte(prompte); + *(pte_t *)__nocache_fix(ptep) = __pte(probed); start += PAGE_SIZE; } } @@ -765,18 +839,18 @@ static unsigned long __init map_spbank(unsigned long vbase, int sp_entry) if (vstart < min_vaddr || vstart >= max_vaddr) return vstart; - + if (vend > max_vaddr || vend < min_vaddr) vend = max_vaddr; - while(vstart < vend) { + while (vstart < vend) { do_large_mapping(vstart, pstart); vstart += SRMMU_PGDIR_SIZE; pstart += SRMMU_PGDIR_SIZE; } return vstart; } -static inline void map_kernel(void) +static void __init map_kernel(void) { int i; @@ -789,12 +863,7 @@ static inline void map_kernel(void) } } -/* Paging initialization on the Sparc Reference MMU. */ -extern void sparc_context_init(int); - -void (*poke_srmmu)(void) __cpuinitdata = NULL; - -extern unsigned long bootmem_init(unsigned long *pages_avail); +void (*poke_srmmu)(void) = NULL; void __init srmmu_paging_init(void) { @@ -806,6 +875,7 @@ void __init srmmu_paging_init(void) pte_t *pte; unsigned long pages_avail; + init_mm.context = (unsigned long) NO_CONTEXT; sparc_iomap.start = SUN4M_IOBASE_VADDR; /* 16MB of IOSPACE on all sun4m's. */ if (sparc_cpu_model == sun4d) @@ -814,9 +884,9 @@ void __init srmmu_paging_init(void) /* Find the number of contexts on the srmmu. */ cpunode = prom_getchild(prom_root_node); num_contexts = 0; - while(cpunode != 0) { + while (cpunode != 0) { prom_getstring(cpunode, "device_type", node_str, sizeof(node_str)); - if(!strcmp(node_str, "cpu")) { + if (!strcmp(node_str, "cpu")) { num_contexts = prom_getintdefault(cpunode, "mmu-nctx", 0x8); break; } @@ -824,7 +894,7 @@ void __init srmmu_paging_init(void) } } - if(!num_contexts) { + if (!num_contexts) { prom_printf("Something wrong, can't find cpu node in paging_init.\n"); prom_halt(); } @@ -834,14 +904,14 @@ void __init srmmu_paging_init(void) srmmu_nocache_calcsize(); srmmu_nocache_init(); - srmmu_inherit_prom_mappings(0xfe400000,(LINUX_OPPROM_ENDVM-PAGE_SIZE)); + srmmu_inherit_prom_mappings(0xfe400000, (LINUX_OPPROM_ENDVM - PAGE_SIZE)); map_kernel(); /* ctx table has to be physically aligned to its size */ - srmmu_context_table = (ctxd_t *)__srmmu_get_nocache(num_contexts*sizeof(ctxd_t), num_contexts*sizeof(ctxd_t)); + srmmu_context_table = __srmmu_get_nocache(num_contexts * sizeof(ctxd_t), num_contexts * sizeof(ctxd_t)); srmmu_ctx_table_phys = (ctxd_t *)__nocache_pa((unsigned long)srmmu_context_table); - for(i = 0; i < num_contexts; i++) + for (i = 0; i < num_contexts; i++) srmmu_ctxd_set((ctxd_t *)__nocache_fix(&srmmu_context_table[i]), srmmu_swapper_pg_dir); flush_cache_all(); @@ -897,7 +967,7 @@ void __init srmmu_paging_init(void) void mmu_info(struct seq_file *m) { - seq_printf(m, + seq_printf(m, "MMU type\t: %s\n" "contexts\t: %d\n" "nocache total\t: %ld\n" @@ -908,10 +978,16 @@ void mmu_info(struct seq_file *m) srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT); } +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + mm->context = NO_CONTEXT; + return 0; +} + void destroy_context(struct mm_struct *mm) { - if(mm->context != NO_CONTEXT) { + if (mm->context != NO_CONTEXT) { flush_cache_mm(mm); srmmu_ctxd_set(&srmmu_context_table[mm->context], srmmu_swapper_pg_dir); flush_tlb_mm(mm); @@ -941,13 +1017,12 @@ static void __init init_vac_layout(void) #endif nd = prom_getchild(prom_root_node); - while((nd = prom_getsibling(nd)) != 0) { + while ((nd = prom_getsibling(nd)) != 0) { prom_getstring(nd, "device_type", node_str, sizeof(node_str)); - if(!strcmp(node_str, "cpu")) { + if (!strcmp(node_str, "cpu")) { vac_line_size = prom_getint(nd, "cache-line-size"); if (vac_line_size == -1) { - prom_printf("can't determine cache-line-size, " - "halting.\n"); + prom_printf("can't determine cache-line-size, halting.\n"); prom_halt(); } cache_lines = prom_getint(nd, "cache-nlines"); @@ -958,9 +1033,9 @@ static void __init init_vac_layout(void) vac_cache_size = cache_lines * vac_line_size; #ifdef CONFIG_SMP - if(vac_cache_size > max_size) + if (vac_cache_size > max_size) max_size = vac_cache_size; - if(vac_line_size < min_line_size) + if (vac_line_size < min_line_size) min_line_size = vac_line_size; //FIXME: cpus not contiguous!! cpu++; @@ -971,7 +1046,7 @@ static void __init init_vac_layout(void) #endif } } - if(nd == 0) { + if (nd == 0) { prom_printf("No CPU nodes found, halting.\n"); prom_halt(); } @@ -983,7 +1058,7 @@ static void __init init_vac_layout(void) (int)vac_cache_size, (int)vac_line_size); } -static void __cpuinit poke_hypersparc(void) +static void poke_hypersparc(void) { volatile unsigned long clear; unsigned long mreg = srmmu_get_mmureg(); @@ -1035,7 +1110,7 @@ static void __init init_hypersparc(void) hypersparc_setup_blockops(); } -static void __cpuinit poke_swift(void) +static void poke_swift(void) { unsigned long mreg; @@ -1082,7 +1157,7 @@ static void __init init_swift(void) "=r" (swift_rev) : "r" (SWIFT_MASKID_ADDR), "i" (ASI_M_BYPASS)); srmmu_name = "Fujitsu Swift"; - switch(swift_rev) { + switch (swift_rev) { case 0x11: case 0x20: case 0x23: @@ -1215,17 +1290,18 @@ static void turbosparc_flush_tlb_page(struct vm_area_struct *vma, unsigned long } -static void __cpuinit poke_turbosparc(void) +static void poke_turbosparc(void) { unsigned long mreg = srmmu_get_mmureg(); unsigned long ccreg; /* Clear any crap from the cache or else... */ turbosparc_flush_cache_all(); - mreg &= ~(TURBOSPARC_ICENABLE | TURBOSPARC_DCENABLE); /* Temporarily disable I & D caches */ + /* Temporarily disable I & D caches */ + mreg &= ~(TURBOSPARC_ICENABLE | TURBOSPARC_DCENABLE); mreg &= ~(TURBOSPARC_PCENABLE); /* Don't check parity */ srmmu_set_mmureg(mreg); - + ccreg = turbosparc_get_ccreg(); #ifdef TURBOSPARC_WRITEBACK @@ -1248,7 +1324,7 @@ static void __cpuinit poke_turbosparc(void) default: ccreg |= (TURBOSPARC_SCENABLE); } - turbosparc_set_ccreg (ccreg); + turbosparc_set_ccreg(ccreg); mreg |= (TURBOSPARC_ICENABLE | TURBOSPARC_DCENABLE); /* I & D caches on */ mreg |= (TURBOSPARC_ICSNOOP); /* Icache snooping on */ @@ -1277,7 +1353,7 @@ static void __init init_turbosparc(void) poke_srmmu = poke_turbosparc; } -static void __cpuinit poke_tsunami(void) +static void poke_tsunami(void) { unsigned long mreg = srmmu_get_mmureg(); @@ -1318,7 +1394,7 @@ static void __init init_tsunami(void) tsunami_setup_blockops(); } -static void __cpuinit poke_viking(void) +static void poke_viking(void) { unsigned long mreg = srmmu_get_mmureg(); static int smp_catch; @@ -1342,7 +1418,7 @@ static void __cpuinit poke_viking(void) unsigned long bpreg; mreg &= ~(VIKING_TCENABLE); - if(smp_catch++) { + if (smp_catch++) { /* Must disable mixed-cmd mode here for other cpu's. */ bpreg = viking_get_bpreg(); bpreg &= ~(VIKING_ACTION_MIX); @@ -1411,7 +1487,7 @@ static void __init init_viking(void) unsigned long mreg = srmmu_get_mmureg(); /* Ahhh, the viking. SRMMU VLSI abortion number two... */ - if(mreg & VIKING_MMODE) { + if (mreg & VIKING_MMODE) { srmmu_name = "TI Viking"; viking_mxcc_present = 0; msi_set_sync(); @@ -1467,8 +1543,8 @@ static void __init get_srmmu_type(void) } /* Second, check for HyperSparc or Cypress. */ - if(mod_typ == 1) { - switch(mod_rev) { + if (mod_typ == 1) { + switch (mod_rev) { case 7: /* UP or MP Hypersparc */ init_hypersparc(); @@ -1488,9 +1564,8 @@ static void __init get_srmmu_type(void) } return; } - - /* - * Now Fujitsu TurboSparc. It might happen that it is + + /* Now Fujitsu TurboSparc. It might happen that it is * in Swift emulation mode, so we will check later... */ if (psr_typ == 0 && psr_vers == 5) { @@ -1499,15 +1574,15 @@ static void __init get_srmmu_type(void) } /* Next check for Fujitsu Swift. */ - if(psr_typ == 0 && psr_vers == 4) { + if (psr_typ == 0 && psr_vers == 4) { phandle cpunode; char node_str[128]; /* Look if it is not a TurboSparc emulating Swift... */ cpunode = prom_getchild(prom_root_node); - while((cpunode = prom_getsibling(cpunode)) != 0) { + while ((cpunode = prom_getsibling(cpunode)) != 0) { prom_getstring(cpunode, "device_type", node_str, sizeof(node_str)); - if(!strcmp(node_str, "cpu")) { + if (!strcmp(node_str, "cpu")) { if (!prom_getintdefault(cpunode, "psr-implementation", 1) && prom_getintdefault(cpunode, "psr-version", 1) == 5) { init_turbosparc(); @@ -1516,13 +1591,13 @@ static void __init get_srmmu_type(void) break; } } - + init_swift(); return; } /* Now the Viking family of srmmu. */ - if(psr_typ == 4 && + if (psr_typ == 4 && ((psr_vers == 0) || ((psr_vers == 1) && (mod_typ == 0) && (mod_rev == 0)))) { init_viking(); @@ -1530,7 +1605,7 @@ static void __init get_srmmu_type(void) } /* Finally the Tsunami. */ - if(psr_typ == 4 && psr_vers == 1 && (mod_typ || mod_rev)) { + if (psr_typ == 4 && psr_vers == 1 && (mod_typ || mod_rev)) { init_tsunami(); return; } @@ -1694,9 +1769,6 @@ static struct sparc32_cachetlb_ops smp_cachetlb_ops = { /* Load up routines and constants for sun4m and sun4d mmu */ void __init load_mmu(void) { - extern void ld_mmu_iommu(void); - extern void ld_mmu_iounit(void); - /* Functions */ get_srmmu_type(); diff --git a/arch/sparc/mm/srmmu.h b/arch/sparc/mm/srmmu.h deleted file mode 100644 index 5703274ccf8..00000000000 --- a/arch/sparc/mm/srmmu.h +++ /dev/null @@ -1,4 +0,0 @@ -/* srmmu.c */ -extern char *srmmu_name; - -extern void (*poke_srmmu)(void); diff --git a/arch/sparc/mm/swift.S b/arch/sparc/mm/swift.S index c801c3953a0..5d2b88d3942 100644 --- a/arch/sparc/mm/swift.S +++ b/arch/sparc/mm/swift.S @@ -105,7 +105,7 @@ swift_flush_cache_mm_out: .globl swift_flush_cache_range swift_flush_cache_range: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 sub %o2, %o1, %o2 sethi %hi(4096), %o3 cmp %o2, %o3 @@ -116,7 +116,7 @@ swift_flush_cache_range: .globl swift_flush_cache_page swift_flush_cache_page: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 70: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -219,7 +219,7 @@ swift_flush_sig_insns: .globl swift_flush_tlb_range .globl swift_flush_tlb_all swift_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 swift_flush_tlb_mm: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -233,7 +233,7 @@ swift_flush_tlb_all_out: .globl swift_flush_tlb_page swift_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 andn %o1, (PAGE_SIZE - 1), %o1 diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index b1f279cd00b..b89aba217e3 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -4,7 +4,6 @@ */ #include <linux/kernel.h> -#include <linux/init.h> #include <linux/percpu.h> #include <linux/mm.h> #include <linux/swap.h> @@ -24,11 +23,17 @@ static DEFINE_PER_CPU(struct tlb_batch, tlb_batch); void flush_tlb_pending(void) { struct tlb_batch *tb = &get_cpu_var(tlb_batch); + struct mm_struct *mm = tb->mm; - if (tb->tlb_nr) { - flush_tsb_user(tb); + if (!tb->tlb_nr) + goto out; - if (CTX_VALID(tb->mm->context)) { + flush_tsb_user(tb); + + if (CTX_VALID(mm->context)) { + if (tb->tlb_nr == 1) { + global_flush_tlb_page(mm, tb->vaddrs[0]); + } else { #ifdef CONFIG_SMP smp_flush_tlb_pending(tb->mm, tb->tlb_nr, &tb->vaddrs[0]); @@ -37,22 +42,68 @@ void flush_tlb_pending(void) tb->tlb_nr, &tb->vaddrs[0]); #endif } - tb->tlb_nr = 0; } + tb->tlb_nr = 0; + +out: put_cpu_var(tlb_batch); } -void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, - pte_t *ptep, pte_t orig, int fullmm) +void arch_enter_lazy_mmu_mode(void) +{ + struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + + tb->active = 1; +} + +void arch_leave_lazy_mmu_mode(void) +{ + struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + + if (tb->tlb_nr) + flush_tlb_pending(); + tb->active = 0; +} + +static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, + bool exec) { struct tlb_batch *tb = &get_cpu_var(tlb_batch); unsigned long nr; vaddr &= PAGE_MASK; - if (pte_exec(orig)) + if (exec) vaddr |= 0x1UL; + nr = tb->tlb_nr; + + if (unlikely(nr != 0 && mm != tb->mm)) { + flush_tlb_pending(); + nr = 0; + } + + if (!tb->active) { + flush_tsb_user_page(mm, vaddr); + global_flush_tlb_page(mm, vaddr); + goto out; + } + + if (nr == 0) + tb->mm = mm; + + tb->vaddrs[nr] = vaddr; + tb->tlb_nr = ++nr; + if (nr >= TLB_BATCH_NR) + flush_tlb_pending(); + +out: + put_cpu_var(tlb_batch); +} + +void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, + pte_t *ptep, pte_t orig, int fullmm) +{ if (tlb_type != hypervisor && pte_dirty(orig)) { unsigned long paddr, pfn = pte_pfn(orig); @@ -77,26 +128,116 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, } no_cache_flush: + if (!fullmm) + tlb_batch_add_one(mm, vaddr, pte_exec(orig)); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, + pmd_t pmd) +{ + unsigned long end; + pte_t *pte; + + pte = pte_offset_map(&pmd, vaddr); + end = vaddr + HPAGE_SIZE; + while (vaddr < end) { + if (pte_val(*pte) & _PAGE_VALID) { + bool exec = pte_exec(*pte); + + tlb_batch_add_one(mm, vaddr, exec); + } + pte++; + vaddr += PAGE_SIZE; + } + pte_unmap(pte); +} + +void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + pmd_t orig = *pmdp; + + *pmdp = pmd; - if (fullmm) { - put_cpu_var(tlb_batch); + if (mm == &init_mm) return; + + if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { + if (pmd_val(pmd) & _PAGE_PMD_HUGE) + mm->context.huge_pte_count++; + else + mm->context.huge_pte_count--; + + /* Do not try to allocate the TSB hash table if we + * don't have one already. We have various locks held + * and thus we'll end up doing a GFP_KERNEL allocation + * in an atomic context. + * + * Instead, we let the first TLB miss on a hugepage + * take care of this. + */ } - nr = tb->tlb_nr; + if (!pmd_none(orig)) { + addr &= HPAGE_MASK; + if (pmd_trans_huge(orig)) { + pte_t orig_pte = __pte(pmd_val(orig)); + bool exec = pte_exec(orig_pte); - if (unlikely(nr != 0 && mm != tb->mm)) { - flush_tlb_pending(); - nr = 0; + tlb_batch_add_one(mm, addr, exec); + tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec); + } else { + tlb_batch_pmd_scan(mm, addr, orig); + } } +} - if (nr == 0) - tb->mm = mm; +void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + pmd_t entry = *pmdp; - tb->vaddrs[nr] = vaddr; - tb->tlb_nr = ++nr; - if (nr >= TLB_BATCH_NR) - flush_tlb_pending(); + pmd_val(entry) &= ~_PAGE_VALID; - put_cpu_var(tlb_batch); + set_pmd_at(vma->vm_mm, address, pmdp, entry); + flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); +} + +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) +{ + struct list_head *lh = (struct list_head *) pgtable; + + assert_spin_locked(&mm->page_table_lock); + + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; +} + +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) +{ + struct list_head *lh; + pgtable_t pgtable; + + assert_spin_locked(&mm->page_table_lock); + + /* FIFO */ + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); + } + pte_val(pgtable[0]) = 0; + pte_val(pgtable[1]) = 0; + + return pgtable; } +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index c52add79b83..a06576683c3 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -7,11 +7,11 @@ #include <linux/preempt.h> #include <linux/slab.h> #include <asm/page.h> -#include <asm/tlbflush.h> -#include <asm/tlb.h> -#include <asm/mmu_context.h> #include <asm/pgtable.h> +#include <asm/mmu_context.h> +#include <asm/setup.h> #include <asm/tsb.h> +#include <asm/tlb.h> #include <asm/oplib.h> extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; @@ -46,28 +46,56 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) } } +static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v, + unsigned long hash_shift, + unsigned long nentries) +{ + unsigned long tag, ent, hash; + + v &= ~0x1UL; + hash = tsb_hash(v, hash_shift, nentries); + ent = tsb + (hash * sizeof(struct tsb)); + tag = (v >> 22UL); + + tsb_flush(ent, tag); +} + static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, unsigned long tsb, unsigned long nentries) { unsigned long i; - for (i = 0; i < tb->tlb_nr; i++) { - unsigned long v = tb->vaddrs[i]; - unsigned long tag, ent, hash; + for (i = 0; i < tb->tlb_nr; i++) + __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries); +} - v &= ~0x1UL; +void flush_tsb_user(struct tlb_batch *tb) +{ + struct mm_struct *mm = tb->mm; + unsigned long nentries, base, flags; - hash = tsb_hash(v, hash_shift, nentries); - ent = tsb + (hash * sizeof(struct tsb)); - tag = (v >> 22UL); + spin_lock_irqsave(&mm->context.lock, flags); - tsb_flush(ent, tag); + base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; + nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; + nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries); } +#endif + spin_unlock_irqrestore(&mm->context.lock, flags); } -void flush_tsb_user(struct tlb_batch *tb) +void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) { - struct mm_struct *mm = tb->mm; unsigned long nentries, base, flags; spin_lock_irqsave(&mm->context.lock, flags); @@ -76,43 +104,26 @@ void flush_tsb_user(struct tlb_batch *tb) nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(base); - __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); + __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); -#ifdef CONFIG_HUGETLB_PAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(base); - __flush_tsb_one(tb, HPAGE_SHIFT, base, nentries); + __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries); } #endif spin_unlock_irqrestore(&mm->context.lock, flags); } -#if defined(CONFIG_SPARC64_PAGE_SIZE_8KB) #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K #define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K -#elif defined(CONFIG_SPARC64_PAGE_SIZE_64KB) -#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_64K -#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_64K -#else -#error Broken base page size setting... -#endif -#ifdef CONFIG_HUGETLB_PAGE -#if defined(CONFIG_HUGETLB_PAGE_SIZE_64K) -#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_64K -#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_64K -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) -#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_512K -#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_512K -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4MB) +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB #define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB -#else -#error Broken huge page size setting... -#endif #endif static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes) @@ -123,7 +134,19 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign mm->context.tsb_block[tsb_idx].tsb_nentries = tsb_bytes / sizeof(struct tsb); - base = TSBMAP_BASE; + switch (tsb_idx) { + case MM_TSB_BASE: + base = TSBMAP_8K_BASE; + break; +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + case MM_TSB_HUGE: + base = TSBMAP_4M_BASE; + break; +#endif + default: + BUG(); + } + tte = pgprot_val(PAGE_KERNEL_LOCKED); tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb); BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); @@ -207,7 +230,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign case MM_TSB_BASE: hp->pgsz_idx = HV_PGSZ_IDX_BASE; break; -#ifdef CONFIG_HUGETLB_PAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) case MM_TSB_HUGE: hp->pgsz_idx = HV_PGSZ_IDX_HUGE; break; @@ -222,7 +245,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign case MM_TSB_BASE: hp->pgsz_mask = HV_PGSZ_MASK_BASE; break; -#ifdef CONFIG_HUGETLB_PAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) case MM_TSB_HUGE: hp->pgsz_mask = HV_PGSZ_MASK_HUGE; break; @@ -263,7 +286,7 @@ void __init pgtable_cache_init(void) prom_halt(); } - for (i = 0; i < 8; i++) { + for (i = 0; i < ARRAY_SIZE(tsb_cache_names); i++) { unsigned long size = 8192 << i; const char *name = tsb_cache_names[i]; @@ -331,7 +354,7 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) retry_tsb_alloc: gfp_flags = GFP_KERNEL; if (new_size > (PAGE_SIZE * 2)) - gfp_flags = __GFP_NOWARN | __GFP_NORETRY; + gfp_flags |= __GFP_NOWARN | __GFP_NORETRY; new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index], gfp_flags, numa_node_id()); @@ -444,7 +467,7 @@ retry_tsb_alloc: int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { -#ifdef CONFIG_HUGETLB_PAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) unsigned long huge_pte_count; #endif unsigned int i; @@ -453,7 +476,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm->context.sparc64_ctx_val = 0UL; -#ifdef CONFIG_HUGETLB_PAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) /* We reset it to zero because the fork() page copying * will re-increment the counters as the parent PTEs are * copied into the child address space. @@ -474,7 +497,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) */ tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); -#ifdef CONFIG_HUGETLB_PAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) if (unlikely(huge_pte_count)) tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); #endif diff --git a/arch/sparc/mm/tsunami.S b/arch/sparc/mm/tsunami.S index 4e55e8f7664..bf10a345fa8 100644 --- a/arch/sparc/mm/tsunami.S +++ b/arch/sparc/mm/tsunami.S @@ -24,7 +24,7 @@ /* Sliiick... */ tsunami_flush_cache_page: tsunami_flush_cache_range: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 tsunami_flush_cache_mm: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -46,7 +46,7 @@ tsunami_flush_sig_insns: /* More slick stuff... */ tsunami_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 tsunami_flush_tlb_mm: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -65,7 +65,7 @@ tsunami_flush_tlb_out: /* This one can be done in a fine grained manner... */ tsunami_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 andn %o1, (PAGE_SIZE - 1), %o1 diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index 874162a11ce..b4f4733abc6 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -53,6 +53,33 @@ __flush_tlb_mm: /* 18 insns */ nop .align 32 + .globl __flush_tlb_page +__flush_tlb_page: /* 22 insns */ + /* %o0 = context, %o1 = vaddr */ + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, %pstate + mov SECONDARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + stxa %o0, [%o4] ASI_DMMU + andcc %o1, 1, %g0 + andn %o1, 1, %o3 + be,pn %icc, 1f + or %o3, 0x10, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +1: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + retl + wrpr %g7, 0x0, %pstate + nop + nop + nop + nop + + .align 32 .globl __flush_tlb_pending __flush_tlb_pending: /* 26 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ @@ -126,10 +153,10 @@ __spitfire_flush_tlb_mm_slow: .globl __flush_icache_page __flush_icache_page: /* %o0 = phys_page */ srlx %o0, PAGE_SHIFT, %o0 - sethi %uhi(PAGE_OFFSET), %g1 + sethi %hi(PAGE_OFFSET), %g1 sllx %o0, PAGE_SHIFT, %o0 sethi %hi(PAGE_SIZE), %g2 - sllx %g1, 32, %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 add %o0, %g1, %o0 1: subcc %g2, 32, %g2 bne,pt %icc, 1b @@ -151,8 +178,8 @@ __flush_icache_page: /* %o0 = phys_page */ .align 64 .globl __flush_dcache_page __flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */ - sethi %uhi(PAGE_OFFSET), %g1 - sllx %g1, 32, %g1 + sethi %hi(PAGE_OFFSET), %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 sub %o0, %g1, %o0 ! physical address srlx %o0, 11, %o0 ! make D-cache TAG sethi %hi(1 << 14), %o2 ! D-cache size @@ -203,6 +230,31 @@ __cheetah_flush_tlb_mm: /* 19 insns */ retl wrpr %g7, 0x0, %pstate +__cheetah_flush_tlb_page: /* 22 insns */ + /* %o0 = context, %o1 = vaddr */ + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, 0x0, %pstate + wrpr %g0, 1, %tl + mov PRIMARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + srlx %g2, CTX_PGSZ1_NUC_SHIFT, %o3 + sllx %o3, CTX_PGSZ1_NUC_SHIFT, %o3 + or %o0, %o3, %o0 /* Preserve nucleus page size fields */ + stxa %o0, [%o4] ASI_DMMU + andcc %o1, 1, %g0 + be,pn %icc, 1f + andn %o1, 1, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +1: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + wrpr %g0, 0, %tl + retl + wrpr %g7, 0x0, %pstate + __cheetah_flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 @@ -235,8 +287,8 @@ __cheetah_flush_tlb_pending: /* 27 insns */ #ifdef DCACHE_ALIASING_POSSIBLE __cheetah_flush_dcache_page: /* 11 insns */ - sethi %uhi(PAGE_OFFSET), %g1 - sllx %g1, 32, %g1 + sethi %hi(PAGE_OFFSET), %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 sub %o0, %g1, %o0 sethi %hi(PAGE_SIZE), %o4 1: subcc %o4, (1 << 5), %o4 @@ -269,6 +321,20 @@ __hypervisor_flush_tlb_mm: /* 10 insns */ retl nop +__hypervisor_flush_tlb_page: /* 11 insns */ + /* %o0 = context, %o1 = vaddr */ + mov %o0, %g2 + mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ + mov %g2, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + retl + nop + __hypervisor_flush_tlb_pending: /* 16 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ sllx %o1, 3, %g1 @@ -339,6 +405,13 @@ cheetah_patch_cachetlbops: call tlb_patch_one mov 19, %o2 + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__cheetah_flush_tlb_page), %o1 + or %o1, %lo(__cheetah_flush_tlb_page), %o1 + call tlb_patch_one + mov 22, %o2 + sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__cheetah_flush_tlb_pending), %o1 @@ -397,10 +470,9 @@ xcall_flush_tlb_mm: /* 21 insns */ nop nop - .globl xcall_flush_tlb_pending -xcall_flush_tlb_pending: /* 21 insns */ - /* %g5=context, %g1=nr, %g7=vaddrs[] */ - sllx %g1, 3, %g1 + .globl xcall_flush_tlb_page +xcall_flush_tlb_page: /* 17 insns */ + /* %g5=context, %g1=vaddr */ mov PRIMARY_CONTEXT, %g4 ldxa [%g4] ASI_DMMU, %g2 srlx %g2, CTX_PGSZ1_NUC_SHIFT, %g4 @@ -408,20 +480,16 @@ xcall_flush_tlb_pending: /* 21 insns */ or %g5, %g4, %g5 mov PRIMARY_CONTEXT, %g4 stxa %g5, [%g4] ASI_DMMU -1: sub %g1, (1 << 3), %g1 - ldx [%g7 + %g1], %g5 - andcc %g5, 0x1, %g0 + andcc %g1, 0x1, %g0 be,pn %icc, 2f - - andn %g5, 0x1, %g5 + andn %g1, 0x1, %g5 stxa %g0, [%g5] ASI_IMMU_DEMAP 2: stxa %g0, [%g5] ASI_DMMU_DEMAP membar #Sync - brnz,pt %g1, 1b - nop stxa %g2, [%g4] ASI_DMMU retry nop + nop .globl xcall_flush_tlb_kernel_range xcall_flush_tlb_kernel_range: /* 25 insns */ @@ -481,8 +549,8 @@ xcall_sync_tick: .globl xcall_fetch_glob_regs xcall_fetch_glob_regs: - sethi %hi(global_reg_snapshot), %g1 - or %g1, %lo(global_reg_snapshot), %g1 + sethi %hi(global_cpu_snapshot), %g1 + or %g1, %lo(global_cpu_snapshot), %g1 __GET_CPUID(%g2) sllx %g2, 6, %g3 add %g1, %g3, %g1 @@ -509,6 +577,66 @@ xcall_fetch_glob_regs: stx %g3, [%g1 + GR_SNAP_THREAD] retry + .globl xcall_fetch_glob_pmu +xcall_fetch_glob_pmu: + sethi %hi(global_cpu_snapshot), %g1 + or %g1, %lo(global_cpu_snapshot), %g1 + __GET_CPUID(%g2) + sllx %g2, 6, %g3 + add %g1, %g3, %g1 + rd %pic, %g7 + stx %g7, [%g1 + (4 * 8)] + rd %pcr, %g7 + stx %g7, [%g1 + (0 * 8)] + retry + + .globl xcall_fetch_glob_pmu_n4 +xcall_fetch_glob_pmu_n4: + sethi %hi(global_cpu_snapshot), %g1 + or %g1, %lo(global_cpu_snapshot), %g1 + __GET_CPUID(%g2) + sllx %g2, 6, %g3 + add %g1, %g3, %g1 + + ldxa [%g0] ASI_PIC, %g7 + stx %g7, [%g1 + (4 * 8)] + mov 0x08, %g3 + ldxa [%g3] ASI_PIC, %g7 + stx %g7, [%g1 + (5 * 8)] + mov 0x10, %g3 + ldxa [%g3] ASI_PIC, %g7 + stx %g7, [%g1 + (6 * 8)] + mov 0x18, %g3 + ldxa [%g3] ASI_PIC, %g7 + stx %g7, [%g1 + (7 * 8)] + + mov %o0, %g2 + mov %o1, %g3 + mov %o5, %g7 + + mov HV_FAST_VT_GET_PERFREG, %o5 + mov 3, %o0 + ta HV_FAST_TRAP + stx %o1, [%g1 + (3 * 8)] + mov HV_FAST_VT_GET_PERFREG, %o5 + mov 2, %o0 + ta HV_FAST_TRAP + stx %o1, [%g1 + (2 * 8)] + mov HV_FAST_VT_GET_PERFREG, %o5 + mov 1, %o0 + ta HV_FAST_TRAP + stx %o1, [%g1 + (1 * 8)] + mov HV_FAST_VT_GET_PERFREG, %o5 + mov 0, %o0 + ta HV_FAST_TRAP + stx %o1, [%g1 + (0 * 8)] + + mov %g2, %o0 + mov %g3, %o1 + mov %g7, %o5 + + retry + #ifdef DCACHE_ALIASING_POSSIBLE .align 32 .globl xcall_flush_dcache_page_cheetah @@ -596,15 +724,13 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ membar #Sync retry - .globl __hypervisor_xcall_flush_tlb_pending -__hypervisor_xcall_flush_tlb_pending: /* 21 insns */ - /* %g5=ctx, %g1=nr, %g7=vaddrs[], %g2,%g3,%g4,g6=scratch */ - sllx %g1, 3, %g1 + .globl __hypervisor_xcall_flush_tlb_page +__hypervisor_xcall_flush_tlb_page: /* 17 insns */ + /* %g5=ctx, %g1=vaddr */ mov %o0, %g2 mov %o1, %g3 mov %o2, %g4 -1: sub %g1, (1 << 3), %g1 - ldx [%g7 + %g1], %o0 /* ARG0: virtual address */ + mov %g1, %o0 /* ARG0: virtual address */ mov %g5, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ srlx %o0, PAGE_SHIFT, %o0 @@ -613,8 +739,6 @@ __hypervisor_xcall_flush_tlb_pending: /* 21 insns */ mov HV_MMU_UNMAP_ADDR_TRAP, %g6 brnz,a,pn %o0, __hypervisor_tlb_xcall_error mov %o0, %g5 - brnz,pt %g1, 1b - nop mov %g2, %o0 mov %g3, %o1 mov %g4, %o2 @@ -697,6 +821,13 @@ hypervisor_patch_cachetlbops: call tlb_patch_one mov 10, %o2 + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__hypervisor_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_flush_tlb_page), %o1 + call tlb_patch_one + mov 11, %o2 + sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__hypervisor_flush_tlb_pending), %o1 @@ -728,12 +859,12 @@ hypervisor_patch_cachetlbops: call tlb_patch_one mov 21, %o2 - sethi %hi(xcall_flush_tlb_pending), %o0 - or %o0, %lo(xcall_flush_tlb_pending), %o0 - sethi %hi(__hypervisor_xcall_flush_tlb_pending), %o1 - or %o1, %lo(__hypervisor_xcall_flush_tlb_pending), %o1 + sethi %hi(xcall_flush_tlb_page), %o0 + or %o0, %lo(xcall_flush_tlb_page), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 call tlb_patch_one - mov 21, %o2 + mov 17, %o2 sethi %hi(xcall_flush_tlb_kernel_range), %o0 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 diff --git a/arch/sparc/mm/viking.S b/arch/sparc/mm/viking.S index bf8ee0613ae..852257fcc82 100644 --- a/arch/sparc/mm/viking.S +++ b/arch/sparc/mm/viking.S @@ -108,7 +108,7 @@ viking_mxcc_flush_page: viking_flush_cache_page: viking_flush_cache_range: #ifndef CONFIG_SMP - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 #endif viking_flush_cache_mm: #ifndef CONFIG_SMP @@ -148,7 +148,7 @@ viking_flush_tlb_mm: #endif viking_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 @@ -173,7 +173,7 @@ viking_flush_tlb_range: #endif viking_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 @@ -239,7 +239,7 @@ sun4dsmp_flush_tlb_range: tst %g5 bne 3f mov SRMMU_CTX_REG, %g1 - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 sethi %hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4 @@ -265,7 +265,7 @@ sun4dsmp_flush_tlb_page: tst %g5 bne 2f mov SRMMU_CTX_REG, %g1 - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 and %o1, PAGE_MASK, %o1 |
