diff options
Diffstat (limited to 'arch/sparc/mm')
30 files changed, 7236 insertions, 5051 deletions
diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index 16eeba4b991..30c3eccfdf5 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -1,23 +1,19 @@ -# $Id: Makefile,v 1.38 2000/12/15 00:41:22 davem Exp $ # Makefile for the linux Sparc-specific parts of the memory manager. # -EXTRA_AFLAGS := -ansi +asflags-y := -ansi +ccflags-y := -Werror -obj-y := fault.o init.o loadmmu.o generic.o extable.o btfixup.o +obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o gup.o +obj-y += fault_$(BITS).o +obj-y += init_$(BITS).o +obj-$(CONFIG_SPARC32) += extable.o srmmu.o iommu.o io-unit.o +obj-$(CONFIG_SPARC32) += srmmu_access.o +obj-$(CONFIG_SPARC32) += hypersparc.o viking.o tsunami.o swift.o +obj-$(CONFIG_SPARC32) += leon_mm.o -ifeq ($(CONFIG_SUN4),y) -obj-y += nosrmmu.o -else -obj-y += srmmu.o iommu.o io-unit.o hypersparc.o viking.o tsunami.o swift.o -endif +# Only used by sparc64 +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -ifdef CONFIG_HIGHMEM -obj-y += highmem.o -endif - -ifdef CONFIG_SMP -obj-y += nosun4c.o -else -obj-y += sun4c.o -endif +# Only used by sparc32 +obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/sparc/mm/btfixup.c b/arch/sparc/mm/btfixup.c deleted file mode 100644 index a312d127d47..00000000000 --- a/arch/sparc/mm/btfixup.c +++ /dev/null @@ -1,334 +0,0 @@ -/* btfixup.c: Boot time code fixup and relocator, so that - * we can get rid of most indirect calls to achieve single - * image sun4c and srmmu kernel. - * - * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <asm/btfixup.h> -#include <asm/page.h> -#include <asm/pgalloc.h> -#include <asm/pgtable.h> -#include <asm/oplib.h> -#include <asm/system.h> -#include <asm/cacheflush.h> - -#define BTFIXUP_OPTIMIZE_NOP -#define BTFIXUP_OPTIMIZE_OTHER - -extern char *srmmu_name; -static char version[] __initdata = "Boot time fixup v1.6. 4/Mar/98 Jakub Jelinek (jj@ultra.linux.cz). Patching kernel for "; -#ifdef CONFIG_SUN4 -static char str_sun4c[] __initdata = "sun4\n"; -#else -static char str_sun4c[] __initdata = "sun4c\n"; -#endif -static char str_srmmu[] __initdata = "srmmu[%s]/"; -static char str_iommu[] __initdata = "iommu\n"; -static char str_iounit[] __initdata = "io-unit\n"; - -static int visited __initdata = 0; -extern unsigned int ___btfixup_start[], ___btfixup_end[], __init_begin[], __init_end[], __init_text_end[]; -extern unsigned int _stext[], _end[], __start___ksymtab[], __stop___ksymtab[]; -static char wrong_f[] __initdata = "Trying to set f fixup %p to invalid function %08x\n"; -static char wrong_b[] __initdata = "Trying to set b fixup %p to invalid function %08x\n"; -static char wrong_s[] __initdata = "Trying to set s fixup %p to invalid value %08x\n"; -static char wrong_h[] __initdata = "Trying to set h fixup %p to invalid value %08x\n"; -static char wrong_a[] __initdata = "Trying to set a fixup %p to invalid value %08x\n"; -static char wrong[] __initdata = "Wrong address for %c fixup %p\n"; -static char insn_f[] __initdata = "Fixup f %p refers to weird instructions at %p[%08x,%08x]\n"; -static char insn_b[] __initdata = "Fixup b %p doesn't refer to a SETHI at %p[%08x]\n"; -static char insn_s[] __initdata = "Fixup s %p doesn't refer to an OR at %p[%08x]\n"; -static char insn_h[] __initdata = "Fixup h %p doesn't refer to a SETHI at %p[%08x]\n"; -static char insn_a[] __initdata = "Fixup a %p doesn't refer to a SETHI nor OR at %p[%08x]\n"; -static char insn_i[] __initdata = "Fixup i %p doesn't refer to a valid instruction at %p[%08x]\n"; -static char fca_und[] __initdata = "flush_cache_all undefined in btfixup()\n"; -static char wrong_setaddr[] __initdata = "Garbled CALL/INT patch at %p[%08x,%08x,%08x]=%08x\n"; - -#ifdef BTFIXUP_OPTIMIZE_OTHER -static void __init set_addr(unsigned int *addr, unsigned int q1, int fmangled, unsigned int value) -{ - if (!fmangled) - *addr = value; - else { - unsigned int *q = (unsigned int *)q1; - if (*addr == 0x01000000) { - /* Noped */ - *q = value; - } else if (addr[-1] == *q) { - /* Moved */ - addr[-1] = value; - *q = value; - } else { - prom_printf(wrong_setaddr, addr-1, addr[-1], *addr, *q, value); - prom_halt(); - } - } -} -#else -static inline void set_addr(unsigned int *addr, unsigned int q1, int fmangled, unsigned int value) -{ - *addr = value; -} -#endif - -void __init btfixup(void) -{ - unsigned int *p, *q; - int type, count; - unsigned insn; - unsigned *addr; - int fmangled = 0; - void (*flush_cacheall)(void); - - if (!visited) { - visited++; - printk(version); - if (ARCH_SUN4C_SUN4) - printk(str_sun4c); - else { - printk(str_srmmu, srmmu_name); - if (sparc_cpu_model == sun4d) - printk(str_iounit); - else - printk(str_iommu); - } - } - for (p = ___btfixup_start; p < ___btfixup_end; ) { - count = p[2]; - q = p + 3; - switch (type = *(unsigned char *)p) { - case 'f': - count = p[3]; - q = p + 4; - if (((p[0] & 1) || p[1]) - && ((p[1] & 3) || (unsigned *)(p[1]) < _stext || (unsigned *)(p[1]) >= _end)) { - prom_printf(wrong_f, p, p[1]); - prom_halt(); - } - break; - case 'b': - if (p[1] < (unsigned long)__init_begin || p[1] >= (unsigned long)__init_text_end || (p[1] & 3)) { - prom_printf(wrong_b, p, p[1]); - prom_halt(); - } - break; - case 's': - if (p[1] + 0x1000 >= 0x2000) { - prom_printf(wrong_s, p, p[1]); - prom_halt(); - } - break; - case 'h': - if (p[1] & 0x3ff) { - prom_printf(wrong_h, p, p[1]); - prom_halt(); - } - break; - case 'a': - if (p[1] + 0x1000 >= 0x2000 && (p[1] & 0x3ff)) { - prom_printf(wrong_a, p, p[1]); - prom_halt(); - } - break; - } - if (p[0] & 1) { - p[0] &= ~1; - while (count) { - fmangled = 0; - addr = (unsigned *)*q; - if (addr < _stext || addr >= _end) { - prom_printf(wrong, type, p); - prom_halt(); - } - insn = *addr; -#ifdef BTFIXUP_OPTIMIZE_OTHER - if (type != 'f' && q[1]) { - insn = *(unsigned int *)q[1]; - if (!insn || insn == 1) - insn = *addr; - else - fmangled = 1; - } -#endif - switch (type) { - case 'f': /* CALL */ - if (addr >= __start___ksymtab && addr < __stop___ksymtab) { - *addr = p[1]; - break; - } else if (!q[1]) { - if ((insn & 0xc1c00000) == 0x01000000) { /* SETHI */ - *addr = (insn & 0xffc00000) | (p[1] >> 10); break; - } else if ((insn & 0xc1f82000) == 0x80102000) { /* OR X, %LO(i), Y */ - *addr = (insn & 0xffffe000) | (p[1] & 0x3ff); break; - } else if ((insn & 0xc0000000) != 0x40000000) { /* !CALL */ - bad_f: - prom_printf(insn_f, p, addr, insn, addr[1]); - prom_halt(); - } - } else if (q[1] != 1) - addr[1] = q[1]; - if (p[2] == BTFIXUPCALL_NORM) { - norm_f: - *addr = 0x40000000 | ((p[1] - (unsigned)addr) >> 2); - q[1] = 0; - break; - } -#ifndef BTFIXUP_OPTIMIZE_NOP - goto norm_f; -#else - if (!(addr[1] & 0x80000000)) { - if ((addr[1] & 0xc1c00000) != 0x01000000) /* !SETHI */ - goto bad_f; /* CALL, Bicc, FBfcc, CBccc are weird in delay slot, aren't they? */ - } else { - if ((addr[1] & 0x01800000) == 0x01800000) { - if ((addr[1] & 0x01f80000) == 0x01e80000) { - /* RESTORE */ - goto norm_f; /* It is dangerous to patch that */ - } - goto bad_f; - } - if ((addr[1] & 0xffffe003) == 0x9e03e000) { - /* ADD %O7, XX, %o7 */ - int displac = (addr[1] << 19); - - displac = (displac >> 21) + 2; - *addr = (0x10800000) + (displac & 0x3fffff); - q[1] = addr[1]; - addr[1] = p[2]; - break; - } - if ((addr[1] & 0x201f) == 0x200f || (addr[1] & 0x7c000) == 0x3c000) - goto norm_f; /* Someone is playing bad tricks with us: rs1 or rs2 is o7 */ - if ((addr[1] & 0x3e000000) == 0x1e000000) - goto norm_f; /* rd is %o7. We'd better take care. */ - } - if (p[2] == BTFIXUPCALL_NOP) { - *addr = 0x01000000; - q[1] = 1; - break; - } -#ifndef BTFIXUP_OPTIMIZE_OTHER - goto norm_f; -#else - if (addr[1] == 0x01000000) { /* NOP in the delay slot */ - q[1] = addr[1]; - *addr = p[2]; - break; - } - if ((addr[1] & 0xc0000000) != 0xc0000000) { - /* Not a memory operation */ - if ((addr[1] & 0x30000000) == 0x10000000) { - /* Ok, non-memory op with rd %oX */ - if ((addr[1] & 0x3e000000) == 0x1c000000) - goto bad_f; /* Aiee. Someone is playing strange %sp tricks */ - if ((addr[1] & 0x3e000000) > 0x12000000 || - ((addr[1] & 0x3e000000) == 0x12000000 && - p[2] != BTFIXUPCALL_STO1O0 && p[2] != BTFIXUPCALL_SWAPO0O1) || - ((p[2] & 0xffffe000) == BTFIXUPCALL_RETINT(0))) { - /* Nobody uses the result. We can nop it out. */ - *addr = p[2]; - q[1] = addr[1]; - addr[1] = 0x01000000; - break; - } - if ((addr[1] & 0xf1ffffe0) == 0x90100000) { - /* MOV %reg, %Ox */ - if ((addr[1] & 0x3e000000) == 0x10000000 && - (p[2] & 0x7c000) == 0x20000) { - /* Ok, it is call xx; mov reg, %o0 and call optimizes - to doing something on %o0. Patch the patch. */ - *addr = (p[2] & ~0x7c000) | ((addr[1] & 0x1f) << 14); - q[1] = addr[1]; - addr[1] = 0x01000000; - break; - } - if ((addr[1] & 0x3e000000) == 0x12000000 && - p[2] == BTFIXUPCALL_STO1O0) { - *addr = (p[2] & ~0x3e000000) | ((addr[1] & 0x1f) << 25); - q[1] = addr[1]; - addr[1] = 0x01000000; - break; - } - } - } - } - *addr = addr[1]; - q[1] = addr[1]; - addr[1] = p[2]; - break; -#endif /* BTFIXUP_OPTIMIZE_OTHER */ -#endif /* BTFIXUP_OPTIMIZE_NOP */ - case 'b': /* BLACKBOX */ - /* Has to be sethi i, xx */ - if ((insn & 0xc1c00000) != 0x01000000) { - prom_printf(insn_b, p, addr, insn); - prom_halt(); - } else { - void (*do_fixup)(unsigned *); - - do_fixup = (void (*)(unsigned *))p[1]; - do_fixup(addr); - } - break; - case 's': /* SIMM13 */ - /* Has to be or %g0, i, xx */ - if ((insn & 0xc1ffe000) != 0x80102000) { - prom_printf(insn_s, p, addr, insn); - prom_halt(); - } - set_addr(addr, q[1], fmangled, (insn & 0xffffe000) | (p[1] & 0x1fff)); - break; - case 'h': /* SETHI */ - /* Has to be sethi i, xx */ - if ((insn & 0xc1c00000) != 0x01000000) { - prom_printf(insn_h, p, addr, insn); - prom_halt(); - } - set_addr(addr, q[1], fmangled, (insn & 0xffc00000) | (p[1] >> 10)); - break; - case 'a': /* HALF */ - /* Has to be sethi i, xx or or %g0, i, xx */ - if ((insn & 0xc1c00000) != 0x01000000 && - (insn & 0xc1ffe000) != 0x80102000) { - prom_printf(insn_a, p, addr, insn); - prom_halt(); - } - if (p[1] & 0x3ff) - set_addr(addr, q[1], fmangled, - (insn & 0x3e000000) | 0x80102000 | (p[1] & 0x1fff)); - else - set_addr(addr, q[1], fmangled, - (insn & 0x3e000000) | 0x01000000 | (p[1] >> 10)); - break; - case 'i': /* INT */ - if ((insn & 0xc1c00000) == 0x01000000) /* %HI */ - set_addr(addr, q[1], fmangled, (insn & 0xffc00000) | (p[1] >> 10)); - else if ((insn & 0x80002000) == 0x80002000 && - (insn & 0x01800000) != 0x01800000) /* %LO */ - set_addr(addr, q[1], fmangled, (insn & 0xffffe000) | (p[1] & 0x3ff)); - else { - prom_printf(insn_i, p, addr, insn); - prom_halt(); - } - break; - } - count -= 2; - q += 2; - } - } else - p = q + count; - } -#ifdef CONFIG_SMP - flush_cacheall = (void (*)(void))BTFIXUPVAL_CALL(local_flush_cache_all); -#else - flush_cacheall = (void (*)(void))BTFIXUPVAL_CALL(flush_cache_all); -#endif - if (!flush_cacheall) { - prom_printf(fca_und); - prom_halt(); - } - (*flush_cacheall)(); -} diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c index 16cc28935e3..a61c349448e 100644 --- a/arch/sparc/mm/extable.c +++ b/arch/sparc/mm/extable.c @@ -28,6 +28,10 @@ search_extable(const struct exception_table_entry *start, * word 3: last insn address + 4 bytes * word 4: fixup code address * + * Deleted entries are encoded as: + * word 1: unused + * word 2: -1 + * * See asm/uaccess.h for more details. */ @@ -39,6 +43,10 @@ search_extable(const struct exception_table_entry *start, continue; } + /* A deleted entry; see trim_init_extable */ + if (walk->fixup == -1) + continue; + if (walk->insn == value) return walk; } @@ -57,6 +65,27 @@ search_extable(const struct exception_table_entry *start, return NULL; } +#ifdef CONFIG_MODULES +/* We could memmove them around; easier to mark the trimmed ones. */ +void trim_init_extable(struct module *m) +{ + unsigned int i; + bool range; + + for (i = 0; i < m->num_exentries; i += range ? 2 : 1) { + range = m->extable[i].fixup == 0; + + if (within_module_init(m->extable[i].insn, m)) { + m->extable[i].fixup = -1; + if (range) + m->extable[i+1].fixup = -1; + } + if (range) + i++; + } +} +#endif /* CONFIG_MODULES */ + /* Special extable search, which handles ranges. Returns fixup */ unsigned long search_extables_range(unsigned long addr, unsigned long *g2) { diff --git a/arch/sparc/mm/fault.c b/arch/sparc/mm/fault.c deleted file mode 100644 index e4d9c8e19df..00000000000 --- a/arch/sparc/mm/fault.c +++ /dev/null @@ -1,592 +0,0 @@ -/* $Id: fault.c,v 1.122 2001/11/17 07:19:26 davem Exp $ - * fault.c: Page fault handlers for the Sparc. - * - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) - * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - */ - -#include <asm/head.h> - -#include <linux/string.h> -#include <linux/types.h> -#include <linux/sched.h> -#include <linux/ptrace.h> -#include <linux/mman.h> -#include <linux/threads.h> -#include <linux/kernel.h> -#include <linux/signal.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/interrupt.h> -#include <linux/module.h> -#include <linux/kdebug.h> - -#include <asm/system.h> -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/memreg.h> -#include <asm/openprom.h> -#include <asm/oplib.h> -#include <asm/smp.h> -#include <asm/traps.h> -#include <asm/uaccess.h> - -extern int prom_node_root; - -/* At boot time we determine these two values necessary for setting - * up the segment maps and page table entries (pte's). - */ - -int num_segmaps, num_contexts; -int invalid_segment; - -/* various Virtual Address Cache parameters we find at boot time... */ - -int vac_size, vac_linesize, vac_do_hw_vac_flushes; -int vac_entries_per_context, vac_entries_per_segment; -int vac_entries_per_page; - -/* Nice, simple, prom library does all the sweating for us. ;) */ -int prom_probe_memory (void) -{ - register struct linux_mlist_v0 *mlist; - register unsigned long bytes, base_paddr, tally; - register int i; - - i = 0; - mlist= *prom_meminfo()->v0_available; - bytes = tally = mlist->num_bytes; - base_paddr = (unsigned long) mlist->start_adr; - - sp_banks[0].base_addr = base_paddr; - sp_banks[0].num_bytes = bytes; - - while (mlist->theres_more != (void *) 0){ - i++; - mlist = mlist->theres_more; - bytes = mlist->num_bytes; - tally += bytes; - if (i > SPARC_PHYS_BANKS-1) { - printk ("The machine has more banks than " - "this kernel can support\n" - "Increase the SPARC_PHYS_BANKS " - "setting (currently %d)\n", - SPARC_PHYS_BANKS); - i = SPARC_PHYS_BANKS-1; - break; - } - - sp_banks[i].base_addr = (unsigned long) mlist->start_adr; - sp_banks[i].num_bytes = mlist->num_bytes; - } - - i++; - sp_banks[i].base_addr = 0xdeadbeef; - sp_banks[i].num_bytes = 0; - - /* Now mask all bank sizes on a page boundary, it is all we can - * use anyways. - */ - for(i=0; sp_banks[i].num_bytes != 0; i++) - sp_banks[i].num_bytes &= PAGE_MASK; - - return tally; -} - -/* Traverse the memory lists in the prom to see how much physical we - * have. - */ -unsigned long -probe_memory(void) -{ - int total; - - total = prom_probe_memory(); - - /* Oh man, much nicer, keep the dirt in promlib. */ - return total; -} - -extern void sun4c_complete_all_stores(void); - -/* Whee, a level 15 NMI interrupt memory error. Let's have fun... */ -asmlinkage void sparc_lvl15_nmi(struct pt_regs *regs, unsigned long serr, - unsigned long svaddr, unsigned long aerr, - unsigned long avaddr) -{ - sun4c_complete_all_stores(); - printk("FAULT: NMI received\n"); - printk("SREGS: Synchronous Error %08lx\n", serr); - printk(" Synchronous Vaddr %08lx\n", svaddr); - printk(" Asynchronous Error %08lx\n", aerr); - printk(" Asynchronous Vaddr %08lx\n", avaddr); - if (sun4c_memerr_reg) - printk(" Memory Parity Error %08lx\n", *sun4c_memerr_reg); - printk("REGISTER DUMP:\n"); - show_regs(regs); - prom_halt(); -} - -static void unhandled_fault(unsigned long, struct task_struct *, - struct pt_regs *) __attribute__ ((noreturn)); - -static void unhandled_fault(unsigned long address, struct task_struct *tsk, - struct pt_regs *regs) -{ - if((unsigned long) address < PAGE_SIZE) { - printk(KERN_ALERT - "Unable to handle kernel NULL pointer dereference\n"); - } else { - printk(KERN_ALERT "Unable to handle kernel paging request " - "at virtual address %08lx\n", address); - } - printk(KERN_ALERT "tsk->{mm,active_mm}->context = %08lx\n", - (tsk->mm ? tsk->mm->context : tsk->active_mm->context)); - printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %08lx\n", - (tsk->mm ? (unsigned long) tsk->mm->pgd : - (unsigned long) tsk->active_mm->pgd)); - die_if_kernel("Oops", regs); -} - -asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc, - unsigned long address) -{ - struct pt_regs regs; - unsigned long g2; - unsigned int insn; - int i; - - i = search_extables_range(ret_pc, &g2); - switch (i) { - case 3: - /* load & store will be handled by fixup */ - return 3; - - case 1: - /* store will be handled by fixup, load will bump out */ - /* for _to_ macros */ - insn = *((unsigned int *) pc); - if ((insn >> 21) & 1) - return 1; - break; - - case 2: - /* load will be handled by fixup, store will bump out */ - /* for _from_ macros */ - insn = *((unsigned int *) pc); - if (!((insn >> 21) & 1) || ((insn>>19)&0x3f) == 15) - return 2; - break; - - default: - break; - }; - - memset(®s, 0, sizeof (regs)); - regs.pc = pc; - regs.npc = pc + 4; - __asm__ __volatile__( - "rd %%psr, %0\n\t" - "nop\n\t" - "nop\n\t" - "nop\n" : "=r" (regs.psr)); - unhandled_fault(address, current, ®s); - - /* Not reached */ - return 0; -} - -extern unsigned long safe_compute_effective_address(struct pt_regs *, - unsigned int); - -static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault) -{ - unsigned int insn; - - if (text_fault) - return regs->pc; - - if (regs->psr & PSR_PS) { - insn = *(unsigned int *) regs->pc; - } else { - __get_user(insn, (unsigned int *) regs->pc); - } - - return safe_compute_effective_address(regs, insn); -} - -asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, - unsigned long address) -{ - struct vm_area_struct *vma; - struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; - unsigned int fixup; - unsigned long g2; - siginfo_t info; - int from_user = !(regs->psr & PSR_PS); - int fault; - - if(text_fault) - address = regs->pc; - - /* - * We fault-in kernel-space virtual memory on-demand. The - * 'reference' page table is init_mm.pgd. - * - * NOTE! We MUST NOT take any locks for this case. We may - * be in an interrupt or a critical region, and should - * only copy the information from the master page table, - * nothing more. - */ - if (!ARCH_SUN4C_SUN4 && address >= TASK_SIZE) - goto vmalloc_fault; - - info.si_code = SEGV_MAPERR; - - /* - * If we're in an interrupt or have no user - * context, we must not take the fault.. - */ - if (in_atomic() || !mm) - goto no_context; - - down_read(&mm->mmap_sem); - - /* - * The kernel referencing a bad kernel pointer can lock up - * a sun4c machine completely, so we must attempt recovery. - */ - if(!from_user && address >= PAGE_OFFSET) - goto bad_area; - - vma = find_vma(mm, address); - if(!vma) - goto bad_area; - if(vma->vm_start <= address) - goto good_area; - if(!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if(expand_stack(vma, address)) - goto bad_area; - /* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ -good_area: - info.si_code = SEGV_ACCERR; - if(write) { - if(!(vma->vm_flags & VM_WRITE)) - goto bad_area; - } else { - /* Allow reads even for write-only mappings */ - if(!(vma->vm_flags & (VM_READ | VM_EXEC))) - goto bad_area; - } - - /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo - * the fault. - */ - fault = handle_mm_fault(mm, vma, address, write); - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); - } - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; - up_read(&mm->mmap_sem); - return; - - /* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); - -bad_area_nosemaphore: - /* User mode accesses just cause a SIGSEGV */ - if(from_user) { -#if 0 - printk("Fault whee %s [%d]: segfaults at %08lx pc=%08lx\n", - tsk->comm, tsk->pid, address, regs->pc); -#endif - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code set above to make clear whether - this was a SEGV_MAPERR or SEGV_ACCERR fault. */ - info.si_addr = (void __user *)compute_si_addr(regs, text_fault); - info.si_trapno = 0; - force_sig_info (SIGSEGV, &info, tsk); - return; - } - - /* Is this in ex_table? */ -no_context: - g2 = regs->u_regs[UREG_G2]; - if (!from_user && (fixup = search_extables_range(regs->pc, &g2))) { - if (fixup > 10) { /* Values below are reserved for other things */ - extern const unsigned __memset_start[]; - extern const unsigned __memset_end[]; - extern const unsigned __csum_partial_copy_start[]; - extern const unsigned __csum_partial_copy_end[]; - -#ifdef DEBUG_EXCEPTIONS - printk("Exception: PC<%08lx> faddr<%08lx>\n", regs->pc, address); - printk("EX_TABLE: insn<%08lx> fixup<%08x> g2<%08lx>\n", - regs->pc, fixup, g2); -#endif - if ((regs->pc >= (unsigned long)__memset_start && - regs->pc < (unsigned long)__memset_end) || - (regs->pc >= (unsigned long)__csum_partial_copy_start && - regs->pc < (unsigned long)__csum_partial_copy_end)) { - regs->u_regs[UREG_I4] = address; - regs->u_regs[UREG_I5] = regs->pc; - } - regs->u_regs[UREG_G2] = g2; - regs->pc = fixup; - regs->npc = regs->pc + 4; - return; - } - } - - unhandled_fault (address, tsk, regs); - do_exit(SIGKILL); - -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: - up_read(&mm->mmap_sem); - printk("VM: killing process %s\n", tsk->comm); - if (from_user) - do_group_exit(SIGKILL); - goto no_context; - -do_sigbus: - up_read(&mm->mmap_sem); - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *) compute_si_addr(regs, text_fault); - info.si_trapno = 0; - force_sig_info (SIGBUS, &info, tsk); - if (!from_user) - goto no_context; - -vmalloc_fault: - { - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - */ - int offset = pgd_index(address); - pgd_t *pgd, *pgd_k; - pmd_t *pmd, *pmd_k; - - pgd = tsk->active_mm->pgd + offset; - pgd_k = init_mm.pgd + offset; - - if (!pgd_present(*pgd)) { - if (!pgd_present(*pgd_k)) - goto bad_area_nosemaphore; - pgd_val(*pgd) = pgd_val(*pgd_k); - return; - } - - pmd = pmd_offset(pgd, address); - pmd_k = pmd_offset(pgd_k, address); - - if (pmd_present(*pmd) || !pmd_present(*pmd_k)) - goto bad_area_nosemaphore; - *pmd = *pmd_k; - return; - } -} - -asmlinkage void do_sun4c_fault(struct pt_regs *regs, int text_fault, int write, - unsigned long address) -{ - extern void sun4c_update_mmu_cache(struct vm_area_struct *, - unsigned long,pte_t); - extern pte_t *sun4c_pte_offset_kernel(pmd_t *,unsigned long); - struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; - pgd_t *pgdp; - pte_t *ptep; - - if (text_fault) { - address = regs->pc; - } else if (!write && - !(regs->psr & PSR_PS)) { - unsigned int insn, __user *ip; - - ip = (unsigned int __user *)regs->pc; - if (!get_user(insn, ip)) { - if ((insn & 0xc1680000) == 0xc0680000) - write = 1; - } - } - - if (!mm) { - /* We are oopsing. */ - do_sparc_fault(regs, text_fault, write, address); - BUG(); /* P3 Oops already, you bitch */ - } - - pgdp = pgd_offset(mm, address); - ptep = sun4c_pte_offset_kernel((pmd_t *) pgdp, address); - - if (pgd_val(*pgdp)) { - if (write) { - if ((pte_val(*ptep) & (_SUN4C_PAGE_WRITE|_SUN4C_PAGE_PRESENT)) - == (_SUN4C_PAGE_WRITE|_SUN4C_PAGE_PRESENT)) { - unsigned long flags; - - *ptep = __pte(pte_val(*ptep) | _SUN4C_PAGE_ACCESSED | - _SUN4C_PAGE_MODIFIED | - _SUN4C_PAGE_VALID | - _SUN4C_PAGE_DIRTY); - - local_irq_save(flags); - if (sun4c_get_segmap(address) != invalid_segment) { - sun4c_put_pte(address, pte_val(*ptep)); - local_irq_restore(flags); - return; - } - local_irq_restore(flags); - } - } else { - if ((pte_val(*ptep) & (_SUN4C_PAGE_READ|_SUN4C_PAGE_PRESENT)) - == (_SUN4C_PAGE_READ|_SUN4C_PAGE_PRESENT)) { - unsigned long flags; - - *ptep = __pte(pte_val(*ptep) | _SUN4C_PAGE_ACCESSED | - _SUN4C_PAGE_VALID); - - local_irq_save(flags); - if (sun4c_get_segmap(address) != invalid_segment) { - sun4c_put_pte(address, pte_val(*ptep)); - local_irq_restore(flags); - return; - } - local_irq_restore(flags); - } - } - } - - /* This conditional is 'interesting'. */ - if (pgd_val(*pgdp) && !(write && !(pte_val(*ptep) & _SUN4C_PAGE_WRITE)) - && (pte_val(*ptep) & _SUN4C_PAGE_VALID)) - /* Note: It is safe to not grab the MMAP semaphore here because - * we know that update_mmu_cache() will not sleep for - * any reason (at least not in the current implementation) - * and therefore there is no danger of another thread getting - * on the CPU and doing a shrink_mmap() on this vma. - */ - sun4c_update_mmu_cache (find_vma(current->mm, address), address, - *ptep); - else - do_sparc_fault(regs, text_fault, write, address); -} - -/* This always deals with user addresses. */ -inline void force_user_fault(unsigned long address, int write) -{ - struct vm_area_struct *vma; - struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; - siginfo_t info; - - info.si_code = SEGV_MAPERR; - -#if 0 - printk("wf<pid=%d,wr=%d,addr=%08lx>\n", - tsk->pid, write, address); -#endif - down_read(&mm->mmap_sem); - vma = find_vma(mm, address); - if(!vma) - goto bad_area; - if(vma->vm_start <= address) - goto good_area; - if(!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if(expand_stack(vma, address)) - goto bad_area; -good_area: - info.si_code = SEGV_ACCERR; - if(write) { - if(!(vma->vm_flags & VM_WRITE)) - goto bad_area; - } else { - if(!(vma->vm_flags & (VM_READ | VM_EXEC))) - goto bad_area; - } - switch (handle_mm_fault(mm, vma, address, write)) { - case VM_FAULT_SIGBUS: - case VM_FAULT_OOM: - goto do_sigbus; - } - up_read(&mm->mmap_sem); - return; -bad_area: - up_read(&mm->mmap_sem); -#if 0 - printk("Window whee %s [%d]: segfaults at %08lx\n", - tsk->comm, tsk->pid, address); -#endif - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code set above to make clear whether - this was a SEGV_MAPERR or SEGV_ACCERR fault. */ - info.si_addr = (void __user *) address; - info.si_trapno = 0; - force_sig_info (SIGSEGV, &info, tsk); - return; - -do_sigbus: - up_read(&mm->mmap_sem); - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *) address; - info.si_trapno = 0; - force_sig_info (SIGBUS, &info, tsk); -} - -void window_overflow_fault(void) -{ - unsigned long sp; - - sp = current_thread_info()->rwbuf_stkptrs[0]; - if(((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK)) - force_user_fault(sp + 0x38, 1); - force_user_fault(sp, 1); -} - -void window_underflow_fault(unsigned long sp) -{ - if(((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK)) - force_user_fault(sp + 0x38, 0); - force_user_fault(sp, 0); -} - -void window_ret_fault(struct pt_regs *regs) -{ - unsigned long sp; - - sp = regs->u_regs[UREG_FP]; - if(((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK)) - force_user_fault(sp + 0x38, 0); - force_user_fault(sp, 0); -} diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c new file mode 100644 index 00000000000..908e8c17c90 --- /dev/null +++ b/arch/sparc/mm/fault_32.c @@ -0,0 +1,466 @@ +/* + * fault.c: Page fault handlers for the Sparc. + * + * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + */ + +#include <asm/head.h> + +#include <linux/string.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/threads.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/perf_event.h> +#include <linux/interrupt.h> +#include <linux/kdebug.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/openprom.h> +#include <asm/oplib.h> +#include <asm/setup.h> +#include <asm/smp.h> +#include <asm/traps.h> +#include <asm/uaccess.h> + +#include "mm_32.h" + +int show_unhandled_signals = 1; + +static void __noreturn unhandled_fault(unsigned long address, + struct task_struct *tsk, + struct pt_regs *regs) +{ + if ((unsigned long) address < PAGE_SIZE) { + printk(KERN_ALERT + "Unable to handle kernel NULL pointer dereference\n"); + } else { + printk(KERN_ALERT "Unable to handle kernel paging request at virtual address %08lx\n", + address); + } + printk(KERN_ALERT "tsk->{mm,active_mm}->context = %08lx\n", + (tsk->mm ? tsk->mm->context : tsk->active_mm->context)); + printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %08lx\n", + (tsk->mm ? (unsigned long) tsk->mm->pgd : + (unsigned long) tsk->active_mm->pgd)); + die_if_kernel("Oops", regs); +} + +asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc, + unsigned long address) +{ + struct pt_regs regs; + unsigned long g2; + unsigned int insn; + int i; + + i = search_extables_range(ret_pc, &g2); + switch (i) { + case 3: + /* load & store will be handled by fixup */ + return 3; + + case 1: + /* store will be handled by fixup, load will bump out */ + /* for _to_ macros */ + insn = *((unsigned int *) pc); + if ((insn >> 21) & 1) + return 1; + break; + + case 2: + /* load will be handled by fixup, store will bump out */ + /* for _from_ macros */ + insn = *((unsigned int *) pc); + if (!((insn >> 21) & 1) || ((insn>>19)&0x3f) == 15) + return 2; + break; + + default: + break; + } + + memset(®s, 0, sizeof(regs)); + regs.pc = pc; + regs.npc = pc + 4; + __asm__ __volatile__( + "rd %%psr, %0\n\t" + "nop\n\t" + "nop\n\t" + "nop\n" : "=r" (regs.psr)); + unhandled_fault(address, current, ®s); + + /* Not reached */ + return 0; +} + +static inline void +show_signal_msg(struct pt_regs *regs, int sig, int code, + unsigned long address, struct task_struct *tsk) +{ + if (!unhandled_signal(tsk, sig)) + return; + + if (!printk_ratelimit()) + return; + + printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, + (void *)regs->pc, (void *)regs->u_regs[UREG_I7], + (void *)regs->u_regs[UREG_FP], code); + + print_vma_addr(KERN_CONT " in ", regs->pc); + + printk(KERN_CONT "\n"); +} + +static void __do_fault_siginfo(int code, int sig, struct pt_regs *regs, + unsigned long addr) +{ + siginfo_t info; + + info.si_signo = sig; + info.si_code = code; + info.si_errno = 0; + info.si_addr = (void __user *) addr; + info.si_trapno = 0; + + if (unlikely(show_unhandled_signals)) + show_signal_msg(regs, sig, info.si_code, + addr, current); + + force_sig_info (sig, &info, current); +} + +static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault) +{ + unsigned int insn; + + if (text_fault) + return regs->pc; + + if (regs->psr & PSR_PS) + insn = *(unsigned int *) regs->pc; + else + __get_user(insn, (unsigned int *) regs->pc); + + return safe_compute_effective_address(regs, insn); +} + +static noinline void do_fault_siginfo(int code, int sig, struct pt_regs *regs, + int text_fault) +{ + unsigned long addr = compute_si_addr(regs, text_fault); + + __do_fault_siginfo(code, sig, regs, addr); +} + +asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, + unsigned long address) +{ + struct vm_area_struct *vma; + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + unsigned int fixup; + unsigned long g2; + int from_user = !(regs->psr & PSR_PS); + int fault, code; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + if (text_fault) + address = regs->pc; + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + */ + code = SEGV_MAPERR; + if (address >= TASK_SIZE) + goto vmalloc_fault; + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (in_atomic() || !mm) + goto no_context; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + +retry: + down_read(&mm->mmap_sem); + + if (!from_user && address >= PAGE_OFFSET) + goto bad_area; + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, address)) + goto bad_area; + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + code = SEGV_ACCERR; + if (write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } else { + /* Allow reads even for write-only mappings */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + if (from_user) + flags |= FAULT_FLAG_USER; + if (write) + flags |= FAULT_FLAG_WRITE; + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return; + + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); + } + + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + current->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, + 1, regs, address); + } else { + current->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, + 1, regs, address); + } + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + + /* No need to up_read(&mm->mmap_sem) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + + goto retry; + } + } + + up_read(&mm->mmap_sem); + return; + + /* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + up_read(&mm->mmap_sem); + +bad_area_nosemaphore: + /* User mode accesses just cause a SIGSEGV */ + if (from_user) { + do_fault_siginfo(code, SIGSEGV, regs, text_fault); + return; + } + + /* Is this in ex_table? */ +no_context: + g2 = regs->u_regs[UREG_G2]; + if (!from_user) { + fixup = search_extables_range(regs->pc, &g2); + /* Values below 10 are reserved for other things */ + if (fixup > 10) { + extern const unsigned __memset_start[]; + extern const unsigned __memset_end[]; + extern const unsigned __csum_partial_copy_start[]; + extern const unsigned __csum_partial_copy_end[]; + +#ifdef DEBUG_EXCEPTIONS + printk("Exception: PC<%08lx> faddr<%08lx>\n", + regs->pc, address); + printk("EX_TABLE: insn<%08lx> fixup<%08x> g2<%08lx>\n", + regs->pc, fixup, g2); +#endif + if ((regs->pc >= (unsigned long)__memset_start && + regs->pc < (unsigned long)__memset_end) || + (regs->pc >= (unsigned long)__csum_partial_copy_start && + regs->pc < (unsigned long)__csum_partial_copy_end)) { + regs->u_regs[UREG_I4] = address; + regs->u_regs[UREG_I5] = regs->pc; + } + regs->u_regs[UREG_G2] = g2; + regs->pc = fixup; + regs->npc = regs->pc + 4; + return; + } + } + + unhandled_fault(address, tsk, regs); + do_exit(SIGKILL); + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + up_read(&mm->mmap_sem); + if (from_user) { + pagefault_out_of_memory(); + return; + } + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, text_fault); + if (!from_user) + goto no_context; + +vmalloc_fault: + { + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + */ + int offset = pgd_index(address); + pgd_t *pgd, *pgd_k; + pmd_t *pmd, *pmd_k; + + pgd = tsk->active_mm->pgd + offset; + pgd_k = init_mm.pgd + offset; + + if (!pgd_present(*pgd)) { + if (!pgd_present(*pgd_k)) + goto bad_area_nosemaphore; + pgd_val(*pgd) = pgd_val(*pgd_k); + return; + } + + pmd = pmd_offset(pgd, address); + pmd_k = pmd_offset(pgd_k, address); + + if (pmd_present(*pmd) || !pmd_present(*pmd_k)) + goto bad_area_nosemaphore; + + *pmd = *pmd_k; + return; + } +} + +/* This always deals with user addresses. */ +static void force_user_fault(unsigned long address, int write) +{ + struct vm_area_struct *vma; + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + unsigned int flags = FAULT_FLAG_USER; + int code; + + code = SEGV_MAPERR; + + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, address)) + goto bad_area; +good_area: + code = SEGV_ACCERR; + if (write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + flags |= FAULT_FLAG_WRITE; + } else { + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + switch (handle_mm_fault(mm, vma, address, flags)) { + case VM_FAULT_SIGBUS: + case VM_FAULT_OOM: + goto do_sigbus; + } + up_read(&mm->mmap_sem); + return; +bad_area: + up_read(&mm->mmap_sem); + __do_fault_siginfo(code, SIGSEGV, tsk->thread.kregs, address); + return; + +do_sigbus: + up_read(&mm->mmap_sem); + __do_fault_siginfo(BUS_ADRERR, SIGBUS, tsk->thread.kregs, address); +} + +static void check_stack_aligned(unsigned long sp) +{ + if (sp & 0x7UL) + force_sig(SIGILL, current); +} + +void window_overflow_fault(void) +{ + unsigned long sp; + + sp = current_thread_info()->rwbuf_stkptrs[0]; + if (((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK)) + force_user_fault(sp + 0x38, 1); + force_user_fault(sp, 1); + + check_stack_aligned(sp); +} + +void window_underflow_fault(unsigned long sp) +{ + if (((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK)) + force_user_fault(sp + 0x38, 0); + force_user_fault(sp, 0); + + check_stack_aligned(sp); +} + +void window_ret_fault(struct pt_regs *regs) +{ + unsigned long sp; + + sp = regs->u_regs[UREG_FP]; + if (((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK)) + force_user_fault(sp + 0x38, 0); + force_user_fault(sp, 0); + + check_stack_aligned(sp); +} diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c new file mode 100644 index 00000000000..587cd056512 --- /dev/null +++ b/arch/sparc/mm/fault_64.c @@ -0,0 +1,539 @@ +/* + * arch/sparc64/mm/fault.c: Page fault handlers for the 64-bit Sparc. + * + * Copyright (C) 1996, 2008 David S. Miller (davem@davemloft.net) + * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz) + */ + +#include <asm/head.h> + +#include <linux/string.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/perf_event.h> +#include <linux/interrupt.h> +#include <linux/kprobes.h> +#include <linux/kdebug.h> +#include <linux/percpu.h> +#include <linux/context_tracking.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/openprom.h> +#include <asm/oplib.h> +#include <asm/uaccess.h> +#include <asm/asi.h> +#include <asm/lsu.h> +#include <asm/sections.h> +#include <asm/mmu_context.h> +#include <asm/setup.h> + +int show_unhandled_signals = 1; + +static inline __kprobes int notify_page_fault(struct pt_regs *regs) +{ + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (kprobes_built_in() && !user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 0)) + ret = 1; + preempt_enable(); + } + return ret; +} + +static void __kprobes unhandled_fault(unsigned long address, + struct task_struct *tsk, + struct pt_regs *regs) +{ + if ((unsigned long) address < PAGE_SIZE) { + printk(KERN_ALERT "Unable to handle kernel NULL " + "pointer dereference\n"); + } else { + printk(KERN_ALERT "Unable to handle kernel paging request " + "at virtual address %016lx\n", (unsigned long)address); + } + printk(KERN_ALERT "tsk->{mm,active_mm}->context = %016lx\n", + (tsk->mm ? + CTX_HWBITS(tsk->mm->context) : + CTX_HWBITS(tsk->active_mm->context))); + printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %016lx\n", + (tsk->mm ? (unsigned long) tsk->mm->pgd : + (unsigned long) tsk->active_mm->pgd)); + die_if_kernel("Oops", regs); +} + +static void __kprobes bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr) +{ + printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n", + regs->tpc); + printk(KERN_CRIT "OOPS: RPC [%016lx]\n", regs->u_regs[15]); + printk("OOPS: RPC <%pS>\n", (void *) regs->u_regs[15]); + printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr); + dump_stack(); + unhandled_fault(regs->tpc, current, regs); +} + +/* + * We now make sure that mmap_sem is held in all paths that call + * this. Additionally, to prevent kswapd from ripping ptes from + * under us, raise interrupts around the time that we look at the + * pte, kswapd will have to wait to get his smp ipi response from + * us. vmtruncate likewise. This saves us having to get pte lock. + */ +static unsigned int get_user_insn(unsigned long tpc) +{ + pgd_t *pgdp = pgd_offset(current->mm, tpc); + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep, pte; + unsigned long pa; + u32 insn = 0; + + if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp))) + goto out; + pudp = pud_offset(pgdp, tpc); + if (pud_none(*pudp) || unlikely(pud_bad(*pudp))) + goto out; + + /* This disables preemption for us as well. */ + local_irq_disable(); + + pmdp = pmd_offset(pudp, tpc); + if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp))) + goto out_irq_enable; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (pmd_trans_huge(*pmdp)) { + if (pmd_trans_splitting(*pmdp)) + goto out_irq_enable; + + pa = pmd_pfn(*pmdp) << PAGE_SHIFT; + pa += tpc & ~HPAGE_MASK; + + /* Use phys bypass so we don't pollute dtlb/dcache. */ + __asm__ __volatile__("lduwa [%1] %2, %0" + : "=r" (insn) + : "r" (pa), "i" (ASI_PHYS_USE_EC)); + } else +#endif + { + ptep = pte_offset_map(pmdp, tpc); + pte = *ptep; + if (pte_present(pte)) { + pa = (pte_pfn(pte) << PAGE_SHIFT); + pa += (tpc & ~PAGE_MASK); + + /* Use phys bypass so we don't pollute dtlb/dcache. */ + __asm__ __volatile__("lduwa [%1] %2, %0" + : "=r" (insn) + : "r" (pa), "i" (ASI_PHYS_USE_EC)); + } + pte_unmap(ptep); + } +out_irq_enable: + local_irq_enable(); +out: + return insn; +} + +static inline void +show_signal_msg(struct pt_regs *regs, int sig, int code, + unsigned long address, struct task_struct *tsk) +{ + if (!unhandled_signal(tsk, sig)) + return; + + if (!printk_ratelimit()) + return; + + printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, + (void *)regs->tpc, (void *)regs->u_regs[UREG_I7], + (void *)regs->u_regs[UREG_FP], code); + + print_vma_addr(KERN_CONT " in ", regs->tpc); + + printk(KERN_CONT "\n"); +} + +static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, + unsigned long fault_addr, unsigned int insn, + int fault_code) +{ + unsigned long addr; + siginfo_t info; + + info.si_code = code; + info.si_signo = sig; + info.si_errno = 0; + if (fault_code & FAULT_CODE_ITLB) { + addr = regs->tpc; + } else { + /* If we were able to probe the faulting instruction, use it + * to compute a precise fault address. Otherwise use the fault + * time provided address which may only have page granularity. + */ + if (insn) + addr = compute_effective_address(regs, insn, 0); + else + addr = fault_addr; + } + info.si_addr = (void __user *) addr; + info.si_trapno = 0; + + if (unlikely(show_unhandled_signals)) + show_signal_msg(regs, sig, code, addr, current); + + force_sig_info(sig, &info, current); +} + +static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn) +{ + if (!insn) { + if (!regs->tpc || (regs->tpc & 0x3)) + return 0; + if (regs->tstate & TSTATE_PRIV) { + insn = *(unsigned int *) regs->tpc; + } else { + insn = get_user_insn(regs->tpc); + } + } + return insn; +} + +static void __kprobes do_kernel_fault(struct pt_regs *regs, int si_code, + int fault_code, unsigned int insn, + unsigned long address) +{ + unsigned char asi = ASI_P; + + if ((!insn) && (regs->tstate & TSTATE_PRIV)) + goto cannot_handle; + + /* If user insn could be read (thus insn is zero), that + * is fine. We will just gun down the process with a signal + * in that case. + */ + + if (!(fault_code & (FAULT_CODE_WRITE|FAULT_CODE_ITLB)) && + (insn & 0xc0800000) == 0xc0800000) { + if (insn & 0x2000) + asi = (regs->tstate >> 24); + else + asi = (insn >> 5); + if ((asi & 0xf2) == 0x82) { + if (insn & 0x1000000) { + handle_ldf_stq(insn, regs); + } else { + /* This was a non-faulting load. Just clear the + * destination register(s) and continue with the next + * instruction. -jj + */ + handle_ld_nf(insn, regs); + } + return; + } + } + + /* Is this in ex_table? */ + if (regs->tstate & TSTATE_PRIV) { + const struct exception_table_entry *entry; + + entry = search_exception_tables(regs->tpc); + if (entry) { + regs->tpc = entry->fixup; + regs->tnpc = regs->tpc + 4; + return; + } + } else { + /* The si_code was set to make clear whether + * this was a SEGV_MAPERR or SEGV_ACCERR fault. + */ + do_fault_siginfo(si_code, SIGSEGV, regs, address, insn, fault_code); + return; + } + +cannot_handle: + unhandled_fault (address, current, regs); +} + +static void noinline __kprobes bogus_32bit_fault_tpc(struct pt_regs *regs) +{ + static int times; + + if (times++ < 10) + printk(KERN_ERR "FAULT[%s:%d]: 32-bit process reports " + "64-bit TPC [%lx]\n", + current->comm, current->pid, + regs->tpc); + show_regs(regs); +} + +asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) +{ + enum ctx_state prev_state = exception_enter(); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned int insn = 0; + int si_code, fault_code, fault; + unsigned long address, mm_rss; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + fault_code = get_thread_fault_code(); + + if (notify_page_fault(regs)) + goto exit_exception; + + si_code = SEGV_MAPERR; + address = current_thread_info()->fault_address; + + if ((fault_code & FAULT_CODE_ITLB) && + (fault_code & FAULT_CODE_DTLB)) + BUG(); + + if (test_thread_flag(TIF_32BIT)) { + if (!(regs->tstate & TSTATE_PRIV)) { + if (unlikely((regs->tpc >> 32) != 0)) { + bogus_32bit_fault_tpc(regs); + goto intr_or_no_mm; + } + } + if (unlikely((address >> 32) != 0)) + goto intr_or_no_mm; + } + + if (regs->tstate & TSTATE_PRIV) { + unsigned long tpc = regs->tpc; + + /* Sanity check the PC. */ + if ((tpc >= KERNBASE && tpc < (unsigned long) __init_end) || + (tpc >= MODULES_VADDR && tpc < MODULES_END)) { + /* Valid, no problems... */ + } else { + bad_kernel_pc(regs, address); + goto exit_exception; + } + } else + flags |= FAULT_FLAG_USER; + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (in_atomic() || !mm) + goto intr_or_no_mm; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + + if (!down_read_trylock(&mm->mmap_sem)) { + if ((regs->tstate & TSTATE_PRIV) && + !search_exception_tables(regs->tpc)) { + insn = get_fault_insn(regs, insn); + goto handle_kernel_fault; + } + +retry: + down_read(&mm->mmap_sem); + } + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + + /* Pure DTLB misses do not tell us whether the fault causing + * load/store/atomic was a write or not, it only says that there + * was no match. So in such a case we (carefully) read the + * instruction to try and figure this out. It's an optimization + * so it's ok if we can't do this. + * + * Special hack, window spill/fill knows the exact fault type. + */ + if (((fault_code & + (FAULT_CODE_DTLB | FAULT_CODE_WRITE | FAULT_CODE_WINFIXUP)) == FAULT_CODE_DTLB) && + (vma->vm_flags & VM_WRITE) != 0) { + insn = get_fault_insn(regs, 0); + if (!insn) + goto continue_fault; + /* All loads, stores and atomics have bits 30 and 31 both set + * in the instruction. Bit 21 is set in all stores, but we + * have to avoid prefetches which also have bit 21 set. + */ + if ((insn & 0xc0200000) == 0xc0200000 && + (insn & 0x01780000) != 0x01680000) { + /* Don't bother updating thread struct value, + * because update_mmu_cache only cares which tlb + * the access came from. + */ + fault_code |= FAULT_CODE_WRITE; + } + } +continue_fault: + + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (!(fault_code & FAULT_CODE_WRITE)) { + /* Non-faulting loads shouldn't expand stack. */ + insn = get_fault_insn(regs, insn); + if ((insn & 0xc0800000) == 0xc0800000) { + unsigned char asi; + + if (insn & 0x2000) + asi = (regs->tstate >> 24); + else + asi = (insn >> 5); + if ((asi & 0xf2) == 0x82) + goto bad_area; + } + } + if (expand_stack(vma, address)) + goto bad_area; + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + si_code = SEGV_ACCERR; + + /* If we took a ITLB miss on a non-executable page, catch + * that here. + */ + if ((fault_code & FAULT_CODE_ITLB) && !(vma->vm_flags & VM_EXEC)) { + BUG_ON(address != regs->tpc); + BUG_ON(regs->tstate & TSTATE_PRIV); + goto bad_area; + } + + if (fault_code & FAULT_CODE_WRITE) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + + /* Spitfire has an icache which does not snoop + * processor stores. Later processors do... + */ + if (tlb_type == spitfire && + (vma->vm_flags & VM_EXEC) != 0 && + vma->vm_file != NULL) + set_thread_fault_code(fault_code | + FAULT_CODE_BLKCOMMIT); + + flags |= FAULT_FLAG_WRITE; + } else { + /* Allow reads even for write-only mappings */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + goto exit_exception; + + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); + } + + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + current->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, + 1, regs, address); + } else { + current->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, + 1, regs, address); + } + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + + /* No need to up_read(&mm->mmap_sem) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + + goto retry; + } + } + up_read(&mm->mmap_sem); + + mm_rss = get_mm_rss(mm); +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE)); +#endif + if (unlikely(mm_rss > + mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit)) + tsb_grow(mm, MM_TSB_BASE, mm_rss); +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + mm_rss = mm->context.huge_pte_count; + if (unlikely(mm_rss > + mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { + if (mm->context.tsb_block[MM_TSB_HUGE].tsb) + tsb_grow(mm, MM_TSB_HUGE, mm_rss); + else + hugetlb_setup(regs); + + } +#endif +exit_exception: + exception_exit(prev_state); + return; + + /* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + insn = get_fault_insn(regs, insn); + up_read(&mm->mmap_sem); + +handle_kernel_fault: + do_kernel_fault(regs, si_code, fault_code, insn, address); + goto exit_exception; + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + insn = get_fault_insn(regs, insn); + up_read(&mm->mmap_sem); + if (!(regs->tstate & TSTATE_PRIV)) { + pagefault_out_of_memory(); + goto exit_exception; + } + goto handle_kernel_fault; + +intr_or_no_mm: + insn = get_fault_insn(regs, 0); + goto handle_kernel_fault; + +do_sigbus: + insn = get_fault_insn(regs, insn); + up_read(&mm->mmap_sem); + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, address, insn, fault_code); + + /* Kernel mode? Handle exceptions or die */ + if (regs->tstate & TSTATE_PRIV) + goto handle_kernel_fault; +} diff --git a/arch/sparc/mm/generic.c b/arch/sparc/mm/generic.c deleted file mode 100644 index 1ef7fa03fef..00000000000 --- a/arch/sparc/mm/generic.c +++ /dev/null @@ -1,97 +0,0 @@ -/* $Id: generic.c,v 1.14 2001/12/21 04:56:15 davem Exp $ - * generic.c: Generic Sparc mm routines that are not dependent upon - * MMU type but are Sparc specific. - * - * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) - */ - -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/pagemap.h> - -#include <asm/pgalloc.h> -#include <asm/pgtable.h> -#include <asm/page.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> - -/* Remap IO memory, the same way as remap_pfn_range(), but use - * the obio memory space. - * - * They use a pgprot that sets PAGE_IO and does not check the - * mem_map table as this is independent of normal memory. - */ -static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte, unsigned long address, unsigned long size, - unsigned long offset, pgprot_t prot, int space) -{ - unsigned long end; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - do { - set_pte_at(mm, address, pte, mk_pte_io(offset, prot, space)); - address += PAGE_SIZE; - offset += PAGE_SIZE; - pte++; - } while (address < end); -} - -static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, - unsigned long offset, pgprot_t prot, int space) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - offset -= address; - do { - pte_t * pte = pte_alloc_map(mm, pmd, address); - if (!pte) - return -ENOMEM; - io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address < end); - return 0; -} - -int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from, - unsigned long pfn, unsigned long size, pgprot_t prot) -{ - int error = 0; - pgd_t * dir; - unsigned long beg = from; - unsigned long end = from + size; - struct mm_struct *mm = vma->vm_mm; - int space = GET_IOSPACE(pfn); - unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; - - /* See comment in mm/memory.c remap_pfn_range */ - vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; - vma->vm_pgoff = (offset >> PAGE_SHIFT) | - ((unsigned long)space << 28UL); - - offset -= from; - dir = pgd_offset(mm, from); - flush_cache_range(vma, beg, end); - - while (from < end) { - pmd_t *pmd = pmd_alloc(mm, dir, from); - error = -ENOMEM; - if (!pmd) - break; - error = io_remap_pmd_range(mm, pmd, from, end - from, offset + from, prot, space); - if (error) - break; - from = (from + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } - - flush_tlb_range(vma, beg, end); - return error; -} diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c new file mode 100644 index 00000000000..1aed0432c64 --- /dev/null +++ b/arch/sparc/mm/gup.c @@ -0,0 +1,237 @@ +/* + * Lockless get_user_pages_fast for sparc, cribbed from powerpc + * + * Copyright (C) 2008 Nick Piggin + * Copyright (C) 2008 Novell Inc. + */ + +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/vmstat.h> +#include <linux/pagemap.h> +#include <linux/rwsem.h> +#include <asm/pgtable.h> + +/* + * The performance critical leaf functions are made noinline otherwise gcc + * inlines everything into a single function which results in too much + * register pressure. + */ +static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask, result; + pte_t *ptep; + + if (tlb_type == hypervisor) { + result = _PAGE_PRESENT_4V|_PAGE_P_4V; + if (write) + result |= _PAGE_WRITE_4V; + } else { + result = _PAGE_PRESENT_4U|_PAGE_P_4U; + if (write) + result |= _PAGE_WRITE_4U; + } + mask = result | _PAGE_SPECIAL; + + ptep = pte_offset_kernel(&pmd, addr); + do { + struct page *page, *head; + pte_t pte = *ptep; + + if ((pte_val(pte) & mask) != result) + return 0; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + + /* The hugepage case is simplified on sparc64 because + * we encode the sub-page pfn offsets into the + * hugepage PTEs. We could optimize this in the future + * use page_cache_add_speculative() for the hugepage case. + */ + page = pte_page(pte); + head = compound_head(page); + if (!page_cache_get_speculative(head)) + return 0; + if (unlikely(pte_val(pte) != pte_val(*ptep))) { + put_page(head); + return 0; + } + if (head != page) + get_huge_page_tail(page); + + pages[*nr] = page; + (*nr)++; + } while (ptep++, addr += PAGE_SIZE, addr != end); + + return 1; +} + +static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, + int *nr) +{ + struct page *head, *page, *tail; + int refs; + + if (!(pmd_val(pmd) & _PAGE_VALID)) + return 0; + + if (write && !pmd_write(pmd)) + return 0; + + refs = 0; + head = pmd_page(pmd); + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + tail = page; + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + /* Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; + } + + return 1; +} + +static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pmd_t *pmdp; + + pmdp = pmd_offset(&pud, addr); + do { + pmd_t pmd = *pmdp; + + next = pmd_addr_end(addr, end); + if (pmd_none(pmd) || pmd_trans_splitting(pmd)) + return 0; + if (unlikely(pmd_large(pmd))) { + if (!gup_huge_pmd(pmdp, pmd, addr, next, + write, pages, nr)) + return 0; + } else if (!gup_pte_range(pmd, addr, next, write, + pages, nr)) + return 0; + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pud_t *pudp; + + pudp = pud_offset(&pgd, addr); + do { + pud_t pud = *pudp; + + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (!gup_pmd_range(pud, addr, next, write, pages, nr)) + return 0; + } while (pudp++, addr = next, addr != end); + + return 1; +} + +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + + /* + * XXX: batch / limit 'nr', to avoid large irq off latency + * needs some instrumenting to determine the common sizes used by + * important workloads (eg. DB2), and whether limiting the batch size + * will decrease performance. + * + * It seems like we're in the clear for the moment. Direct-IO is + * the main guy that batches up lots of get_user_pages, and even + * they are limited to 64-at-a-time which is not so many. + */ + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables from being freed on sparc. + * + * So long as we atomically load page table pointers versus teardown, + * we can follow the address down to the the page and take a ref on it. + */ + local_irq_disable(); + + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + goto slow; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + goto slow; + } while (pgdp++, addr = next, addr != end); + + local_irq_enable(); + + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); + return nr; + + { + int ret; + +slow: + local_irq_enable(); + + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); + up_read(&mm->mmap_sem); + + /* Have to be a bit careful with return values */ + if (nr > 0) { + if (ret < 0) + ret = nr; + else + ret += nr; + } + + return ret; + } +} diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c index 01fc6c25429..449f864f0ce 100644 --- a/arch/sparc/mm/highmem.c +++ b/arch/sparc/mm/highmem.c @@ -22,23 +22,43 @@ * shared by CPUs, and so precious, and establishing them requires IPI. * Atomic kmaps are lightweight and we may have NCPUS more of them. */ -#include <linux/mm.h> #include <linux/highmem.h> -#include <asm/pgalloc.h> +#include <linux/export.h> +#include <linux/mm.h> + #include <asm/cacheflush.h> #include <asm/tlbflush.h> -#include <asm/fixmap.h> +#include <asm/pgalloc.h> +#include <asm/vaddrs.h> + +pgprot_t kmap_prot; + +static pte_t *kmap_pte; -void *kmap_atomic(struct page *page, enum km_type type) +void __init kmap_init(void) +{ + unsigned long address; + pmd_t *dir; + + address = __fix_to_virt(FIX_KMAP_BEGIN); + dir = pmd_offset(pgd_offset_k(address), address); + + /* cache the first kmap pte */ + kmap_pte = pte_offset_kernel(dir, address); + kmap_prot = __pgprot(SRMMU_ET_PTE | SRMMU_PRIV | SRMMU_CACHE); +} + +void *kmap_atomic(struct page *page) { - unsigned long idx; unsigned long vaddr; + long idx, type; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ pagefault_disable(); if (!PageHighMem(page)) return page_address(page); + type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); @@ -62,57 +82,49 @@ void *kmap_atomic(struct page *page, enum km_type type) return (void*) vaddr; } +EXPORT_SYMBOL(kmap_atomic); -void kunmap_atomic(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr) { -#ifdef CONFIG_DEBUG_HIGHMEM unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - unsigned long idx = type + KM_TYPE_NR*smp_processor_id(); + int type; if (vaddr < FIXADDR_START) { // FIXME pagefault_enable(); return; } - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)); + type = kmap_atomic_idx(); -/* XXX Fix - Anton */ +#ifdef CONFIG_DEBUG_HIGHMEM + { + unsigned long idx; + + idx = type + KM_TYPE_NR * smp_processor_id(); + BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)); + + /* XXX Fix - Anton */ #if 0 - __flush_cache_one(vaddr); + __flush_cache_one(vaddr); #else - flush_cache_all(); + flush_cache_all(); #endif - /* - * force other mappings to Oops if they'll try to access - * this pte without first remap it - */ - pte_clear(&init_mm, vaddr, kmap_pte-idx); -/* XXX Fix - Anton */ + /* + * force other mappings to Oops if they'll try to access + * this pte without first remap it + */ + pte_clear(&init_mm, vaddr, kmap_pte-idx); + /* XXX Fix - Anton */ #if 0 - __flush_tlb_one(vaddr); + __flush_tlb_one(vaddr); #else - flush_tlb_all(); + flush_tlb_all(); #endif + } #endif + kmap_atomic_idx_pop(); pagefault_enable(); } - -/* We may be fed a pagetable here by ptep_to_xxx and others. */ -struct page *kmap_atomic_to_page(void *ptr) -{ - unsigned long idx, vaddr = (unsigned long)ptr; - pte_t *pte; - - if (vaddr < SRMMU_NOCACHE_VADDR) - return virt_to_page(ptr); - if (vaddr < PKMAP_BASE) - return pfn_to_page(__nocache_pa(vaddr) >> PAGE_SHIFT); - BUG_ON(vaddr < FIXADDR_START); - BUG_ON(vaddr > FIXADDR_TOP); - - idx = virt_to_fix(vaddr); - pte = kmap_pte - (idx - FIX_KMAP_BEGIN); - return pte_page(*pte); -} +EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c new file mode 100644 index 00000000000..d329537739c --- /dev/null +++ b/arch/sparc/mm/hugetlbpage.c @@ -0,0 +1,238 @@ +/* + * SPARC64 Huge TLB page support. + * + * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net) + */ + +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/sysctl.h> + +#include <asm/mman.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> +#include <asm/mmu_context.h> + +/* Slightly simplified from the non-hugepage variant because by + * definition we don't have to worry about any page coloring stuff + */ + +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags) +{ + unsigned long task_size = TASK_SIZE; + struct vm_unmapped_area_info info; + + if (test_thread_flag(TIF_32BIT)) + task_size = STACK_TOP32; + + info.flags = 0; + info.length = len; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = min(task_size, VA_EXCLUDE_START); + info.align_mask = PAGE_MASK & ~HPAGE_MASK; + info.align_offset = 0; + addr = vm_unmapped_area(&info); + + if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) { + VM_BUG_ON(addr != -ENOMEM); + info.low_limit = VA_EXCLUDE_END; + info.high_limit = task_size; + addr = vm_unmapped_area(&info); + } + + return addr; +} + +static unsigned long +hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, + const unsigned long pgoff, + const unsigned long flags) +{ + struct mm_struct *mm = current->mm; + unsigned long addr = addr0; + struct vm_unmapped_area_info info; + + /* This should only ever run for 32-bit processes. */ + BUG_ON(!test_thread_flag(TIF_32BIT)); + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = PAGE_SIZE; + info.high_limit = mm->mmap_base; + info.align_mask = PAGE_MASK & ~HPAGE_MASK; + info.align_offset = 0; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = STACK_TOP32; + addr = vm_unmapped_area(&info); + } + + return addr; +} + +unsigned long +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long task_size = TASK_SIZE; + + if (test_thread_flag(TIF_32BIT)) + task_size = STACK_TOP32; + + if (len & ~HPAGE_MASK) + return -EINVAL; + if (len > task_size) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + return addr; + } + + if (addr) { + addr = ALIGN(addr, HPAGE_SIZE); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + if (mm->get_unmapped_area == arch_get_unmapped_area) + return hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); + else + return hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); +} + +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + /* We must align the address, because our caller will run + * set_huge_pte_at() on whatever we return, which writes out + * all of the sub-ptes for the hugepage range. So we have + * to give it the first such sub-pte. + */ + addr &= HPAGE_MASK; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) { + pmd = pmd_alloc(mm, pud, addr); + if (pmd) + pte = pte_alloc_map(mm, NULL, pmd, addr); + } + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + addr &= HPAGE_MASK; + + pgd = pgd_offset(mm, addr); + if (!pgd_none(*pgd)) { + pud = pud_offset(pgd, addr); + if (!pud_none(*pud)) { + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) + pte = pte_offset_map(pmd, addr); + } + } + return pte; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + int i; + + if (!pte_present(*ptep) && pte_present(entry)) + mm->context.huge_pte_count++; + + addr &= HPAGE_MASK; + for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { + set_pte_at(mm, addr, ptep, entry); + ptep++; + addr += PAGE_SIZE; + pte_val(entry) += PAGE_SIZE; + } +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pte_t entry; + int i; + + entry = *ptep; + if (pte_present(entry)) + mm->context.huge_pte_count--; + + addr &= HPAGE_MASK; + + for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) { + pte_clear(mm, addr, ptep); + addr += PAGE_SIZE; + ptep++; + } + + return entry; +} + +struct page *follow_huge_addr(struct mm_struct *mm, + unsigned long address, int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return 0; +} + +int pud_huge(pud_t pud) +{ + return 0; +} + +struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + return NULL; +} diff --git a/arch/sparc/mm/hypersparc.S b/arch/sparc/mm/hypersparc.S index d29cc24c5bb..969f96450f6 100644 --- a/arch/sparc/mm/hypersparc.S +++ b/arch/sparc/mm/hypersparc.S @@ -1,4 +1,4 @@ -/* $Id: hypersparc.S,v 1.18 2001/12/21 04:56:15 davem Exp $ +/* * hypersparc.S: High speed Hypersparc mmu/cache operations. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) @@ -74,7 +74,7 @@ hypersparc_flush_cache_mm_out: /* The things we do for performance... */ hypersparc_flush_cache_range: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 #ifndef CONFIG_SMP ld [%o0 + AOFF_mm_context], %g1 cmp %g1, -1 @@ -163,7 +163,7 @@ hypersparc_flush_cache_range_out: */ /* Verified, my ass... */ hypersparc_flush_cache_page: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 ld [%o0 + AOFF_mm_context], %g2 #ifndef CONFIG_SMP cmp %g2, -1 @@ -284,7 +284,7 @@ hypersparc_flush_tlb_mm_out: sta %g5, [%g1] ASI_M_MMUREGS hypersparc_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 @@ -307,7 +307,7 @@ hypersparc_flush_tlb_range_out: sta %g5, [%g1] ASI_M_MMUREGS hypersparc_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 andn %o1, (PAGE_SIZE - 1), %o1 diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init_32.c index b89837accc8..eb828715527 100644 --- a/arch/sparc/mm/init.c +++ b/arch/sparc/mm/init_32.c @@ -1,4 +1,4 @@ -/* $Id: init.c,v 1.103 2001/11/19 19:03:08 davem Exp $ +/* * linux/arch/sparc/mm/init.c * * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) @@ -22,32 +22,32 @@ #include <linux/init.h> #include <linux/highmem.h> #include <linux/bootmem.h> +#include <linux/pagemap.h> +#include <linux/poison.h> +#include <linux/gfp.h> -#include <asm/system.h> -#include <asm/vac-ops.h> +#include <asm/sections.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/vaddrs.h> #include <asm/pgalloc.h> /* bug in asm-generic/tlb.h: check_pgt_cache */ +#include <asm/setup.h> #include <asm/tlb.h> #include <asm/prom.h> +#include <asm/leon.h> -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +#include "mm_32.h" unsigned long *sparc_valid_addr_bitmap; +EXPORT_SYMBOL(sparc_valid_addr_bitmap); unsigned long phys_base; -unsigned long pfn_base; +EXPORT_SYMBOL(phys_base); -unsigned long page_kernel; +unsigned long pfn_base; +EXPORT_SYMBOL(pfn_base); struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS+1]; -unsigned long sparc_unmapped_base; - -struct pgtable_cache_struct pgt_quicklists; - -/* References to section boundaries */ -extern char __init_begin, __init_end, _start, _end, etext , edata; /* Initial ramdisk setup */ extern unsigned int sparc_ramdisk_image; @@ -55,56 +55,17 @@ extern unsigned int sparc_ramdisk_size; unsigned long highstart_pfn, highend_pfn; -pte_t *kmap_pte; -pgprot_t kmap_prot; - -#define kmap_get_fixmap_pte(vaddr) \ - pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) - -void __init kmap_init(void) -{ - /* cache the first kmap pte */ - kmap_pte = kmap_get_fixmap_pte(__fix_to_virt(FIX_KMAP_BEGIN)); - kmap_prot = __pgprot(SRMMU_ET_PTE | SRMMU_PRIV | SRMMU_CACHE); -} - -void show_mem(void) +void show_mem(unsigned int filter) { printk("Mem-info:\n"); - show_free_areas(); + show_free_areas(filter); printk("Free swap: %6ldkB\n", - nr_swap_pages << (PAGE_SHIFT-10)); + get_nr_swap_pages() << (PAGE_SHIFT-10)); printk("%ld pages of RAM\n", totalram_pages); printk("%ld free pages\n", nr_free_pages()); -#if 0 /* undefined pgtable_cache_size, pgd_cache_size */ - printk("%ld pages in page table cache\n",pgtable_cache_size); -#ifndef CONFIG_SMP - if (sparc_cpu_model == sun4m || sparc_cpu_model == sun4d) - printk("%ld entries in page dir cache\n",pgd_cache_size); -#endif -#endif } -void __init sparc_context_init(int numctx) -{ - int ctx; - - ctx_list_pool = __alloc_bootmem(numctx * sizeof(struct ctx_list), SMP_CACHE_BYTES, 0UL); - - for(ctx = 0; ctx < numctx; ctx++) { - struct ctx_list *clist; - clist = (ctx_list_pool + ctx); - clist->ctx_number = ctx; - clist->ctx_mm = NULL; - } - ctx_free.next = ctx_free.prev = &ctx_free; - ctx_used.next = ctx_used.prev = &ctx_used; - for(ctx = 0; ctx < numctx; ctx++) - add_to_free_ctxlist(ctx_list_pool + ctx); -} - -extern unsigned long cmdline_memory_size; unsigned long last_valid_pfn; unsigned long calc_highpages(void) @@ -128,7 +89,7 @@ unsigned long calc_highpages(void) return nr; } -unsigned long calc_max_low_pfn(void) +static unsigned long calc_max_low_pfn(void) { int i; unsigned long tmp = pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT); @@ -283,81 +244,18 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) } /* - * check_pgt_cache - * - * This is called at the end of unmapping of VMA (zap_page_range), - * to rescan the page cache for architecture specific things, - * presumably something like sun4/sun4c PMEGs. Most architectures - * define check_pgt_cache empty. - * - * We simply copy the 2.4 implementation for now. - */ -int pgt_cache_water[2] = { 25, 50 }; - -void check_pgt_cache(void) -{ - do_check_pgt_cache(pgt_cache_water[0], pgt_cache_water[1]); -} - -/* * paging_init() sets up the page tables: We call the MMU specific * init routine based upon the Sun model type on the Sparc. * */ -extern void sun4c_paging_init(void); -extern void srmmu_paging_init(void); -extern void device_scan(void); - -pgprot_t PAGE_SHARED __read_mostly; -EXPORT_SYMBOL(PAGE_SHARED); - void __init paging_init(void) { - switch(sparc_cpu_model) { - case sun4c: - case sun4e: - case sun4: - sun4c_paging_init(); - sparc_unmapped_base = 0xe0000000; - BTFIXUPSET_SETHI(sparc_unmapped_base, 0xe0000000); - break; - case sun4m: - case sun4d: - srmmu_paging_init(); - sparc_unmapped_base = 0x50000000; - BTFIXUPSET_SETHI(sparc_unmapped_base, 0x50000000); - break; - default: - prom_printf("paging_init: Cannot init paging on this Sparc\n"); - prom_printf("paging_init: sparc_cpu_model = %d\n", sparc_cpu_model); - prom_printf("paging_init: Halting...\n"); - prom_halt(); - }; - - /* Initialize the protection map with non-constant, MMU dependent values. */ - protection_map[0] = PAGE_NONE; - protection_map[1] = PAGE_READONLY; - protection_map[2] = PAGE_COPY; - protection_map[3] = PAGE_COPY; - protection_map[4] = PAGE_READONLY; - protection_map[5] = PAGE_READONLY; - protection_map[6] = PAGE_COPY; - protection_map[7] = PAGE_COPY; - protection_map[8] = PAGE_NONE; - protection_map[9] = PAGE_READONLY; - protection_map[10] = PAGE_SHARED; - protection_map[11] = PAGE_SHARED; - protection_map[12] = PAGE_READONLY; - protection_map[13] = PAGE_READONLY; - protection_map[14] = PAGE_SHARED; - protection_map[15] = PAGE_SHARED; - btfixup(); + srmmu_paging_init(); prom_build_devicetree(); + of_fill_in_cpu_data(); device_scan(); } -struct cache_palias *sparc_aliases; - static void __init taint_real_pages(void) { int i; @@ -375,7 +273,7 @@ static void __init taint_real_pages(void) } } -void map_high_region(unsigned long start_pfn, unsigned long end_pfn) +static void map_high_region(unsigned long start_pfn, unsigned long end_pfn) { unsigned long tmp; @@ -383,22 +281,12 @@ void map_high_region(unsigned long start_pfn, unsigned long end_pfn) printk("mapping high region %08lx - %08lx\n", start_pfn, end_pfn); #endif - for (tmp = start_pfn; tmp < end_pfn; tmp++) { - struct page *page = pfn_to_page(tmp); - - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - totalhigh_pages++; - } + for (tmp = start_pfn; tmp < end_pfn; tmp++) + free_highmem_page(pfn_to_page(tmp)); } void __init mem_init(void) { - int codepages = 0; - int datapages = 0; - int initpages = 0; - int reservedpages = 0; int i; if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { @@ -430,15 +318,12 @@ void __init mem_init(void) max_mapnr = last_valid_pfn - pfn_base; high_memory = __va(max_low_pfn << PAGE_SHIFT); - - totalram_pages = free_all_bootmem(); + free_all_bootmem(); for (i = 0; sp_banks[i].num_bytes != 0; i++) { unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT; unsigned long end_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT; - num_physpages += sp_banks[i].num_bytes >> PAGE_SHIFT; - if (end_pfn <= highstart_pfn) continue; @@ -448,63 +333,19 @@ void __init mem_init(void) map_high_region(start_pfn, end_pfn); } - totalram_pages += totalhigh_pages; - - codepages = (((unsigned long) &etext) - ((unsigned long)&_start)); - codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT; - datapages = (((unsigned long) &edata) - ((unsigned long)&etext)); - datapages = PAGE_ALIGN(datapages) >> PAGE_SHIFT; - initpages = (((unsigned long) &__init_end) - ((unsigned long) &__init_begin)); - initpages = PAGE_ALIGN(initpages) >> PAGE_SHIFT; - - /* Ignore memory holes for the purpose of counting reserved pages */ - for (i=0; i < max_low_pfn; i++) - if (test_bit(i >> (20 - PAGE_SHIFT), sparc_valid_addr_bitmap) - && PageReserved(pfn_to_page(i))) - reservedpages++; - - printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT - 10), - codepages << (PAGE_SHIFT-10), - reservedpages << (PAGE_SHIFT - 10), - datapages << (PAGE_SHIFT-10), - initpages << (PAGE_SHIFT-10), - totalhigh_pages << (PAGE_SHIFT-10)); + mem_init_print_info(NULL); } void free_initmem (void) { - unsigned long addr; - - addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - struct page *p; - - p = virt_to_page(addr); - - ClearPageReserved(p); - init_page_count(p); - __free_page(p); - totalram_pages++; - num_physpages++; - } - printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10); + free_initmem_default(POISON_FREE_INITMEM); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - if (start < end) - printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { - struct page *p = virt_to_page(start); - - ClearPageReserved(p); - init_page_count(p); - __free_page(p); - num_physpages++; - } + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif @@ -515,3 +356,4 @@ void sparc_flush_page_to_ram(struct page *page) if (vaddr) __flush_page_to_ram(vaddr); } +EXPORT_SYMBOL(sparc_flush_page_to_ram); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c new file mode 100644 index 00000000000..16b58ff11e6 --- /dev/null +++ b/arch/sparc/mm/init_64.c @@ -0,0 +1,2701 @@ +/* + * arch/sparc64/mm/init.c + * + * Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1997-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/initrd.h> +#include <linux/swap.h> +#include <linux/pagemap.h> +#include <linux/poison.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/kprobes.h> +#include <linux/cache.h> +#include <linux/sort.h> +#include <linux/percpu.h> +#include <linux/memblock.h> +#include <linux/mmzone.h> +#include <linux/gfp.h> + +#include <asm/head.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/oplib.h> +#include <asm/iommu.h> +#include <asm/io.h> +#include <asm/uaccess.h> +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> +#include <asm/dma.h> +#include <asm/starfire.h> +#include <asm/tlb.h> +#include <asm/spitfire.h> +#include <asm/sections.h> +#include <asm/tsb.h> +#include <asm/hypervisor.h> +#include <asm/prom.h> +#include <asm/mdesc.h> +#include <asm/cpudata.h> +#include <asm/setup.h> +#include <asm/irq.h> + +#include "init_64.h" + +unsigned long kern_linear_pte_xor[4] __read_mostly; + +/* A bitmap, two bits for every 256MB of physical memory. These two + * bits determine what page size we use for kernel linear + * translations. They form an index into kern_linear_pte_xor[]. The + * value in the indexed slot is XOR'd with the TLB miss virtual + * address to form the resulting TTE. The mapping is: + * + * 0 ==> 4MB + * 1 ==> 256MB + * 2 ==> 2GB + * 3 ==> 16GB + * + * All sun4v chips support 256MB pages. Only SPARC-T4 and later + * support 2GB pages, and hopefully future cpus will support the 16GB + * pages as well. For slots 2 and 3, we encode a 256MB TTE xor there + * if these larger page sizes are not supported by the cpu. + * + * It would be nice to determine this from the machine description + * 'cpu' properties, but we need to have this table setup before the + * MDESC is initialized. + */ +unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; + +#ifndef CONFIG_DEBUG_PAGEALLOC +/* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings. + * Space is allocated for this right after the trap table in + * arch/sparc64/kernel/head.S + */ +extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; +#endif + +static unsigned long cpu_pgsz_mask; + +#define MAX_BANKS 32 + +static struct linux_prom64_registers pavail[MAX_BANKS]; +static int pavail_ents; + +static int cmp_p64(const void *a, const void *b) +{ + const struct linux_prom64_registers *x = a, *y = b; + + if (x->phys_addr > y->phys_addr) + return 1; + if (x->phys_addr < y->phys_addr) + return -1; + return 0; +} + +static void __init read_obp_memory(const char *property, + struct linux_prom64_registers *regs, + int *num_ents) +{ + phandle node = prom_finddevice("/memory"); + int prop_size = prom_getproplen(node, property); + int ents, ret, i; + + ents = prop_size / sizeof(struct linux_prom64_registers); + if (ents > MAX_BANKS) { + prom_printf("The machine has more %s property entries than " + "this kernel can support (%d).\n", + property, MAX_BANKS); + prom_halt(); + } + + ret = prom_getproperty(node, property, (char *) regs, prop_size); + if (ret == -1) { + prom_printf("Couldn't get %s property from /memory.\n", + property); + prom_halt(); + } + + /* Sanitize what we got from the firmware, by page aligning + * everything. + */ + for (i = 0; i < ents; i++) { + unsigned long base, size; + + base = regs[i].phys_addr; + size = regs[i].reg_size; + + size &= PAGE_MASK; + if (base & ~PAGE_MASK) { + unsigned long new_base = PAGE_ALIGN(base); + + size -= new_base - base; + if ((long) size < 0L) + size = 0UL; + base = new_base; + } + if (size == 0UL) { + /* If it is empty, simply get rid of it. + * This simplifies the logic of the other + * functions that process these arrays. + */ + memmove(®s[i], ®s[i + 1], + (ents - i - 1) * sizeof(regs[0])); + i--; + ents--; + continue; + } + regs[i].phys_addr = base; + regs[i].reg_size = size; + } + + *num_ents = ents; + + sort(regs, ents, sizeof(struct linux_prom64_registers), + cmp_p64, NULL); +} + +unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES / + sizeof(unsigned long)]; +EXPORT_SYMBOL(sparc64_valid_addr_bitmap); + +/* Kernel physical address base and size in bytes. */ +unsigned long kern_base __read_mostly; +unsigned long kern_size __read_mostly; + +/* Initial ramdisk setup */ +extern unsigned long sparc_ramdisk_image64; +extern unsigned int sparc_ramdisk_image; +extern unsigned int sparc_ramdisk_size; + +struct page *mem_map_zero __read_mostly; +EXPORT_SYMBOL(mem_map_zero); + +unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly; + +unsigned long sparc64_kern_pri_context __read_mostly; +unsigned long sparc64_kern_pri_nuc_bits __read_mostly; +unsigned long sparc64_kern_sec_context __read_mostly; + +int num_kernel_image_mappings; + +#ifdef CONFIG_DEBUG_DCFLUSH +atomic_t dcpage_flushes = ATOMIC_INIT(0); +#ifdef CONFIG_SMP +atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0); +#endif +#endif + +inline void flush_dcache_page_impl(struct page *page) +{ + BUG_ON(tlb_type == hypervisor); +#ifdef CONFIG_DEBUG_DCFLUSH + atomic_inc(&dcpage_flushes); +#endif + +#ifdef DCACHE_ALIASING_POSSIBLE + __flush_dcache_page(page_address(page), + ((tlb_type == spitfire) && + page_mapping(page) != NULL)); +#else + if (page_mapping(page) != NULL && + tlb_type == spitfire) + __flush_icache_page(__pa(page_address(page))); +#endif +} + +#define PG_dcache_dirty PG_arch_1 +#define PG_dcache_cpu_shift 32UL +#define PG_dcache_cpu_mask \ + ((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL) + +#define dcache_dirty_cpu(page) \ + (((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask) + +static inline void set_dcache_dirty(struct page *page, int this_cpu) +{ + unsigned long mask = this_cpu; + unsigned long non_cpu_bits; + + non_cpu_bits = ~(PG_dcache_cpu_mask << PG_dcache_cpu_shift); + mask = (mask << PG_dcache_cpu_shift) | (1UL << PG_dcache_dirty); + + __asm__ __volatile__("1:\n\t" + "ldx [%2], %%g7\n\t" + "and %%g7, %1, %%g1\n\t" + "or %%g1, %0, %%g1\n\t" + "casx [%2], %%g7, %%g1\n\t" + "cmp %%g7, %%g1\n\t" + "bne,pn %%xcc, 1b\n\t" + " nop" + : /* no outputs */ + : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags) + : "g1", "g7"); +} + +static inline void clear_dcache_dirty_cpu(struct page *page, unsigned long cpu) +{ + unsigned long mask = (1UL << PG_dcache_dirty); + + __asm__ __volatile__("! test_and_clear_dcache_dirty\n" + "1:\n\t" + "ldx [%2], %%g7\n\t" + "srlx %%g7, %4, %%g1\n\t" + "and %%g1, %3, %%g1\n\t" + "cmp %%g1, %0\n\t" + "bne,pn %%icc, 2f\n\t" + " andn %%g7, %1, %%g1\n\t" + "casx [%2], %%g7, %%g1\n\t" + "cmp %%g7, %%g1\n\t" + "bne,pn %%xcc, 1b\n\t" + " nop\n" + "2:" + : /* no outputs */ + : "r" (cpu), "r" (mask), "r" (&page->flags), + "i" (PG_dcache_cpu_mask), + "i" (PG_dcache_cpu_shift) + : "g1", "g7"); +} + +static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte) +{ + unsigned long tsb_addr = (unsigned long) ent; + + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + tsb_addr = __pa(tsb_addr); + + __tsb_insert(tsb_addr, tag, pte); +} + +unsigned long _PAGE_ALL_SZ_BITS __read_mostly; + +static void flush_dcache(unsigned long pfn) +{ + struct page *page; + + page = pfn_to_page(pfn); + if (page) { + unsigned long pg_flags; + + pg_flags = page->flags; + if (pg_flags & (1UL << PG_dcache_dirty)) { + int cpu = ((pg_flags >> PG_dcache_cpu_shift) & + PG_dcache_cpu_mask); + int this_cpu = get_cpu(); + + /* This is just to optimize away some function calls + * in the SMP case. + */ + if (cpu == this_cpu) + flush_dcache_page_impl(page); + else + smp_flush_dcache_page_impl(page, cpu); + + clear_dcache_dirty_cpu(page, cpu); + + put_cpu(); + } + } +} + +/* mm->context.lock must be held */ +static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index, + unsigned long tsb_hash_shift, unsigned long address, + unsigned long tte) +{ + struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb; + unsigned long tag; + + if (unlikely(!tsb)) + return; + + tsb += ((address >> tsb_hash_shift) & + (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL)); + tag = (address >> 22UL); + tsb_insert(tsb, tag, tte); +} + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +static inline bool is_hugetlb_pte(pte_t pte) +{ + if ((tlb_type == hypervisor && + (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) || + (tlb_type != hypervisor && + (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) + return true; + return false; +} +#endif + +void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +{ + struct mm_struct *mm; + unsigned long flags; + pte_t pte = *ptep; + + if (tlb_type != hypervisor) { + unsigned long pfn = pte_pfn(pte); + + if (pfn_valid(pfn)) + flush_dcache(pfn); + } + + mm = vma->vm_mm; + + spin_lock_irqsave(&mm->context.lock, flags); + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, + address, pte_val(pte)); + else +#endif + __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT, + address, pte_val(pte)); + + spin_unlock_irqrestore(&mm->context.lock, flags); +} + +void flush_dcache_page(struct page *page) +{ + struct address_space *mapping; + int this_cpu; + + if (tlb_type == hypervisor) + return; + + /* Do not bother with the expensive D-cache flush if it + * is merely the zero page. The 'bigcore' testcase in GDB + * causes this case to run millions of times. + */ + if (page == ZERO_PAGE(0)) + return; + + this_cpu = get_cpu(); + + mapping = page_mapping(page); + if (mapping && !mapping_mapped(mapping)) { + int dirty = test_bit(PG_dcache_dirty, &page->flags); + if (dirty) { + int dirty_cpu = dcache_dirty_cpu(page); + + if (dirty_cpu == this_cpu) + goto out; + smp_flush_dcache_page_impl(page, dirty_cpu); + } + set_dcache_dirty(page, this_cpu); + } else { + /* We could delay the flush for the !page_mapping + * case too. But that case is for exec env/arg + * pages and those are %99 certainly going to get + * faulted into the tlb (and thus flushed) anyways. + */ + flush_dcache_page_impl(page); + } + +out: + put_cpu(); +} +EXPORT_SYMBOL(flush_dcache_page); + +void __kprobes flush_icache_range(unsigned long start, unsigned long end) +{ + /* Cheetah and Hypervisor platform cpus have coherent I-cache. */ + if (tlb_type == spitfire) { + unsigned long kaddr; + + /* This code only runs on Spitfire cpus so this is + * why we can assume _PAGE_PADDR_4U. + */ + for (kaddr = start; kaddr < end; kaddr += PAGE_SIZE) { + unsigned long paddr, mask = _PAGE_PADDR_4U; + + if (kaddr >= PAGE_OFFSET) + paddr = kaddr & mask; + else { + pgd_t *pgdp = pgd_offset_k(kaddr); + pud_t *pudp = pud_offset(pgdp, kaddr); + pmd_t *pmdp = pmd_offset(pudp, kaddr); + pte_t *ptep = pte_offset_kernel(pmdp, kaddr); + + paddr = pte_val(*ptep) & mask; + } + __flush_icache_page(paddr); + } + } +} +EXPORT_SYMBOL(flush_icache_range); + +void mmu_info(struct seq_file *m) +{ + static const char *pgsz_strings[] = { + "8K", "64K", "512K", "4MB", "32MB", + "256MB", "2GB", "16GB", + }; + int i, printed; + + if (tlb_type == cheetah) + seq_printf(m, "MMU Type\t: Cheetah\n"); + else if (tlb_type == cheetah_plus) + seq_printf(m, "MMU Type\t: Cheetah+\n"); + else if (tlb_type == spitfire) + seq_printf(m, "MMU Type\t: Spitfire\n"); + else if (tlb_type == hypervisor) + seq_printf(m, "MMU Type\t: Hypervisor (sun4v)\n"); + else + seq_printf(m, "MMU Type\t: ???\n"); + + seq_printf(m, "MMU PGSZs\t: "); + printed = 0; + for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) { + if (cpu_pgsz_mask & (1UL << i)) { + seq_printf(m, "%s%s", + printed ? "," : "", pgsz_strings[i]); + printed++; + } + } + seq_putc(m, '\n'); + +#ifdef CONFIG_DEBUG_DCFLUSH + seq_printf(m, "DCPageFlushes\t: %d\n", + atomic_read(&dcpage_flushes)); +#ifdef CONFIG_SMP + seq_printf(m, "DCPageFlushesXC\t: %d\n", + atomic_read(&dcpage_flushes_xcall)); +#endif /* CONFIG_SMP */ +#endif /* CONFIG_DEBUG_DCFLUSH */ +} + +struct linux_prom_translation prom_trans[512] __read_mostly; +unsigned int prom_trans_ents __read_mostly; + +unsigned long kern_locked_tte_data; + +/* The obp translations are saved based on 8k pagesize, since obp can + * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS -> + * HI_OBP_ADDRESS range are handled in ktlb.S. + */ +static inline int in_obp_range(unsigned long vaddr) +{ + return (vaddr >= LOW_OBP_ADDRESS && + vaddr < HI_OBP_ADDRESS); +} + +static int cmp_ptrans(const void *a, const void *b) +{ + const struct linux_prom_translation *x = a, *y = b; + + if (x->virt > y->virt) + return 1; + if (x->virt < y->virt) + return -1; + return 0; +} + +/* Read OBP translations property into 'prom_trans[]'. */ +static void __init read_obp_translations(void) +{ + int n, node, ents, first, last, i; + + node = prom_finddevice("/virtual-memory"); + n = prom_getproplen(node, "translations"); + if (unlikely(n == 0 || n == -1)) { + prom_printf("prom_mappings: Couldn't get size.\n"); + prom_halt(); + } + if (unlikely(n > sizeof(prom_trans))) { + prom_printf("prom_mappings: Size %d is too big.\n", n); + prom_halt(); + } + + if ((n = prom_getproperty(node, "translations", + (char *)&prom_trans[0], + sizeof(prom_trans))) == -1) { + prom_printf("prom_mappings: Couldn't get property.\n"); + prom_halt(); + } + + n = n / sizeof(struct linux_prom_translation); + + ents = n; + + sort(prom_trans, ents, sizeof(struct linux_prom_translation), + cmp_ptrans, NULL); + + /* Now kick out all the non-OBP entries. */ + for (i = 0; i < ents; i++) { + if (in_obp_range(prom_trans[i].virt)) + break; + } + first = i; + for (; i < ents; i++) { + if (!in_obp_range(prom_trans[i].virt)) + break; + } + last = i; + + for (i = 0; i < (last - first); i++) { + struct linux_prom_translation *src = &prom_trans[i + first]; + struct linux_prom_translation *dest = &prom_trans[i]; + + *dest = *src; + } + for (; i < ents; i++) { + struct linux_prom_translation *dest = &prom_trans[i]; + dest->virt = dest->size = dest->data = 0x0UL; + } + + prom_trans_ents = last - first; + + if (tlb_type == spitfire) { + /* Clear diag TTE bits. */ + for (i = 0; i < prom_trans_ents; i++) + prom_trans[i].data &= ~0x0003fe0000000000UL; + } + + /* Force execute bit on. */ + for (i = 0; i < prom_trans_ents; i++) + prom_trans[i].data |= (tlb_type == hypervisor ? + _PAGE_EXEC_4V : _PAGE_EXEC_4U); +} + +static void __init hypervisor_tlb_lock(unsigned long vaddr, + unsigned long pte, + unsigned long mmu) +{ + unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu); + + if (ret != 0) { + prom_printf("hypervisor_tlb_lock[%lx:%x:%lx:%lx]: " + "errors with %lx\n", vaddr, 0, pte, mmu, ret); + prom_halt(); + } +} + +static unsigned long kern_large_tte(unsigned long paddr); + +static void __init remap_kernel(void) +{ + unsigned long phys_page, tte_vaddr, tte_data; + int i, tlb_ent = sparc64_highest_locked_tlbent(); + + tte_vaddr = (unsigned long) KERNBASE; + phys_page = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB; + tte_data = kern_large_tte(phys_page); + + kern_locked_tte_data = tte_data; + + /* Now lock us into the TLBs via Hypervisor or OBP. */ + if (tlb_type == hypervisor) { + for (i = 0; i < num_kernel_image_mappings; i++) { + hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU); + hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU); + tte_vaddr += 0x400000; + tte_data += 0x400000; + } + } else { + for (i = 0; i < num_kernel_image_mappings; i++) { + prom_dtlb_load(tlb_ent - i, tte_data, tte_vaddr); + prom_itlb_load(tlb_ent - i, tte_data, tte_vaddr); + tte_vaddr += 0x400000; + tte_data += 0x400000; + } + sparc64_highest_unlocked_tlb_ent = tlb_ent - i; + } + if (tlb_type == cheetah_plus) { + sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 | + CTX_CHEETAH_PLUS_NUC); + sparc64_kern_pri_nuc_bits = CTX_CHEETAH_PLUS_NUC; + sparc64_kern_sec_context = CTX_CHEETAH_PLUS_CTX0; + } +} + + +static void __init inherit_prom_mappings(void) +{ + /* Now fixup OBP's idea about where we really are mapped. */ + printk("Remapping the kernel... "); + remap_kernel(); + printk("done.\n"); +} + +void prom_world(int enter) +{ + if (!enter) + set_fs(get_fs()); + + __asm__ __volatile__("flushw"); +} + +void __flush_dcache_range(unsigned long start, unsigned long end) +{ + unsigned long va; + + if (tlb_type == spitfire) { + int n = 0; + + for (va = start; va < end; va += 32) { + spitfire_put_dcache_tag(va & 0x3fe0, 0x0); + if (++n >= 512) + break; + } + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { + start = __pa(start); + end = __pa(end); + for (va = start; va < end; va += 32) + __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (va), + "i" (ASI_DCACHE_INVALIDATE)); + } +} +EXPORT_SYMBOL(__flush_dcache_range); + +/* get_new_mmu_context() uses "cache + 1". */ +DEFINE_SPINLOCK(ctx_alloc_lock); +unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1; +#define MAX_CTX_NR (1UL << CTX_NR_BITS) +#define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR) +DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); + +/* Caller does TLB context flushing on local CPU if necessary. + * The caller also ensures that CTX_VALID(mm->context) is false. + * + * We must be careful about boundary cases so that we never + * let the user have CTX 0 (nucleus) or we ever use a CTX + * version of zero (and thus NO_CONTEXT would not be caught + * by version mis-match tests in mmu_context.h). + * + * Always invoked with interrupts disabled. + */ +void get_new_mmu_context(struct mm_struct *mm) +{ + unsigned long ctx, new_ctx; + unsigned long orig_pgsz_bits; + int new_version; + + spin_lock(&ctx_alloc_lock); + orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); + ctx = (tlb_context_cache + 1) & CTX_NR_MASK; + new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); + new_version = 0; + if (new_ctx >= (1 << CTX_NR_BITS)) { + new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); + if (new_ctx >= ctx) { + int i; + new_ctx = (tlb_context_cache & CTX_VERSION_MASK) + + CTX_FIRST_VERSION; + if (new_ctx == 1) + new_ctx = CTX_FIRST_VERSION; + + /* Don't call memset, for 16 entries that's just + * plain silly... + */ + mmu_context_bmap[0] = 3; + mmu_context_bmap[1] = 0; + mmu_context_bmap[2] = 0; + mmu_context_bmap[3] = 0; + for (i = 4; i < CTX_BMAP_SLOTS; i += 4) { + mmu_context_bmap[i + 0] = 0; + mmu_context_bmap[i + 1] = 0; + mmu_context_bmap[i + 2] = 0; + mmu_context_bmap[i + 3] = 0; + } + new_version = 1; + goto out; + } + } + mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63)); + new_ctx |= (tlb_context_cache & CTX_VERSION_MASK); +out: + tlb_context_cache = new_ctx; + mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; + spin_unlock(&ctx_alloc_lock); + + if (unlikely(new_version)) + smp_new_mmu_context_version(); +} + +static int numa_enabled = 1; +static int numa_debug; + +static int __init early_numa(char *p) +{ + if (!p) + return 0; + + if (strstr(p, "off")) + numa_enabled = 0; + + if (strstr(p, "debug")) + numa_debug = 1; + + return 0; +} +early_param("numa", early_numa); + +#define numadbg(f, a...) \ +do { if (numa_debug) \ + printk(KERN_INFO f, ## a); \ +} while (0) + +static void __init find_ramdisk(unsigned long phys_base) +{ +#ifdef CONFIG_BLK_DEV_INITRD + if (sparc_ramdisk_image || sparc_ramdisk_image64) { + unsigned long ramdisk_image; + + /* Older versions of the bootloader only supported a + * 32-bit physical address for the ramdisk image + * location, stored at sparc_ramdisk_image. Newer + * SILO versions set sparc_ramdisk_image to zero and + * provide a full 64-bit physical address at + * sparc_ramdisk_image64. + */ + ramdisk_image = sparc_ramdisk_image; + if (!ramdisk_image) + ramdisk_image = sparc_ramdisk_image64; + + /* Another bootloader quirk. The bootloader normalizes + * the physical address to KERNBASE, so we have to + * factor that back out and add in the lowest valid + * physical page address to get the true physical address. + */ + ramdisk_image -= KERNBASE; + ramdisk_image += phys_base; + + numadbg("Found ramdisk at physical address 0x%lx, size %u\n", + ramdisk_image, sparc_ramdisk_size); + + initrd_start = ramdisk_image; + initrd_end = ramdisk_image + sparc_ramdisk_size; + + memblock_reserve(initrd_start, sparc_ramdisk_size); + + initrd_start += PAGE_OFFSET; + initrd_end += PAGE_OFFSET; + } +#endif +} + +struct node_mem_mask { + unsigned long mask; + unsigned long val; +}; +static struct node_mem_mask node_masks[MAX_NUMNODES]; +static int num_node_masks; + +#ifdef CONFIG_NEED_MULTIPLE_NODES + +int numa_cpu_lookup_table[NR_CPUS]; +cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; + +struct mdesc_mblock { + u64 base; + u64 size; + u64 offset; /* RA-to-PA */ +}; +static struct mdesc_mblock *mblocks; +static int num_mblocks; + +static unsigned long ra_to_pa(unsigned long addr) +{ + int i; + + for (i = 0; i < num_mblocks; i++) { + struct mdesc_mblock *m = &mblocks[i]; + + if (addr >= m->base && + addr < (m->base + m->size)) { + addr += m->offset; + break; + } + } + return addr; +} + +static int find_node(unsigned long addr) +{ + int i; + + addr = ra_to_pa(addr); + for (i = 0; i < num_node_masks; i++) { + struct node_mem_mask *p = &node_masks[i]; + + if ((addr & p->mask) == p->val) + return i; + } + return -1; +} + +static u64 memblock_nid_range(u64 start, u64 end, int *nid) +{ + *nid = find_node(start); + start += PAGE_SIZE; + while (start < end) { + int n = find_node(start); + + if (n != *nid) + break; + start += PAGE_SIZE; + } + + if (start > end) + start = end; + + return start; +} +#endif + +/* This must be invoked after performing all of the necessary + * memblock_set_node() calls for 'nid'. We need to be able to get + * correct data from get_pfn_range_for_nid(). + */ +static void __init allocate_node_data(int nid) +{ + struct pglist_data *p; + unsigned long start_pfn, end_pfn; +#ifdef CONFIG_NEED_MULTIPLE_NODES + unsigned long paddr; + + paddr = memblock_alloc_try_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid); + if (!paddr) { + prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid); + prom_halt(); + } + NODE_DATA(nid) = __va(paddr); + memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); + + NODE_DATA(nid)->node_id = nid; +#endif + + p = NODE_DATA(nid); + + get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + p->node_start_pfn = start_pfn; + p->node_spanned_pages = end_pfn - start_pfn; +} + +static void init_node_masks_nonnuma(void) +{ +#ifdef CONFIG_NEED_MULTIPLE_NODES + int i; +#endif + + numadbg("Initializing tables for non-numa.\n"); + + node_masks[0].mask = node_masks[0].val = 0; + num_node_masks = 1; + +#ifdef CONFIG_NEED_MULTIPLE_NODES + for (i = 0; i < NR_CPUS; i++) + numa_cpu_lookup_table[i] = 0; + + cpumask_setall(&numa_cpumask_lookup_table[0]); +#endif +} + +#ifdef CONFIG_NEED_MULTIPLE_NODES +struct pglist_data *node_data[MAX_NUMNODES]; + +EXPORT_SYMBOL(numa_cpu_lookup_table); +EXPORT_SYMBOL(numa_cpumask_lookup_table); +EXPORT_SYMBOL(node_data); + +struct mdesc_mlgroup { + u64 node; + u64 latency; + u64 match; + u64 mask; +}; +static struct mdesc_mlgroup *mlgroups; +static int num_mlgroups; + +static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio, + u32 cfg_handle) +{ + u64 arc; + + mdesc_for_each_arc(arc, md, pio, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(md, arc); + const u64 *val; + + val = mdesc_get_property(md, target, + "cfg-handle", NULL); + if (val && *val == cfg_handle) + return 0; + } + return -ENODEV; +} + +static int scan_arcs_for_cfg_handle(struct mdesc_handle *md, u64 grp, + u32 cfg_handle) +{ + u64 arc, candidate, best_latency = ~(u64)0; + + candidate = MDESC_NODE_NULL; + mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(md, arc); + const char *name = mdesc_node_name(md, target); + const u64 *val; + + if (strcmp(name, "pio-latency-group")) + continue; + + val = mdesc_get_property(md, target, "latency", NULL); + if (!val) + continue; + + if (*val < best_latency) { + candidate = target; + best_latency = *val; + } + } + + if (candidate == MDESC_NODE_NULL) + return -ENODEV; + + return scan_pio_for_cfg_handle(md, candidate, cfg_handle); +} + +int of_node_to_nid(struct device_node *dp) +{ + const struct linux_prom64_registers *regs; + struct mdesc_handle *md; + u32 cfg_handle; + int count, nid; + u64 grp; + + /* This is the right thing to do on currently supported + * SUN4U NUMA platforms as well, as the PCI controller does + * not sit behind any particular memory controller. + */ + if (!mlgroups) + return -1; + + regs = of_get_property(dp, "reg", NULL); + if (!regs) + return -1; + + cfg_handle = (regs->phys_addr >> 32UL) & 0x0fffffff; + + md = mdesc_grab(); + + count = 0; + nid = -1; + mdesc_for_each_node_by_name(md, grp, "group") { + if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) { + nid = count; + break; + } + count++; + } + + mdesc_release(md); + + return nid; +} + +static void __init add_node_ranges(void) +{ + struct memblock_region *reg; + + for_each_memblock(memory, reg) { + unsigned long size = reg->size; + unsigned long start, end; + + start = reg->base; + end = start + size; + while (start < end) { + unsigned long this_end; + int nid; + + this_end = memblock_nid_range(start, end, &nid); + + numadbg("Setting memblock NUMA node nid[%d] " + "start[%lx] end[%lx]\n", + nid, start, this_end); + + memblock_set_node(start, this_end - start, + &memblock.memory, nid); + start = this_end; + } + } +} + +static int __init grab_mlgroups(struct mdesc_handle *md) +{ + unsigned long paddr; + int count = 0; + u64 node; + + mdesc_for_each_node_by_name(md, node, "memory-latency-group") + count++; + if (!count) + return -ENOENT; + + paddr = memblock_alloc(count * sizeof(struct mdesc_mlgroup), + SMP_CACHE_BYTES); + if (!paddr) + return -ENOMEM; + + mlgroups = __va(paddr); + num_mlgroups = count; + + count = 0; + mdesc_for_each_node_by_name(md, node, "memory-latency-group") { + struct mdesc_mlgroup *m = &mlgroups[count++]; + const u64 *val; + + m->node = node; + + val = mdesc_get_property(md, node, "latency", NULL); + m->latency = *val; + val = mdesc_get_property(md, node, "address-match", NULL); + m->match = *val; + val = mdesc_get_property(md, node, "address-mask", NULL); + m->mask = *val; + + numadbg("MLGROUP[%d]: node[%llx] latency[%llx] " + "match[%llx] mask[%llx]\n", + count - 1, m->node, m->latency, m->match, m->mask); + } + + return 0; +} + +static int __init grab_mblocks(struct mdesc_handle *md) +{ + unsigned long paddr; + int count = 0; + u64 node; + + mdesc_for_each_node_by_name(md, node, "mblock") + count++; + if (!count) + return -ENOENT; + + paddr = memblock_alloc(count * sizeof(struct mdesc_mblock), + SMP_CACHE_BYTES); + if (!paddr) + return -ENOMEM; + + mblocks = __va(paddr); + num_mblocks = count; + + count = 0; + mdesc_for_each_node_by_name(md, node, "mblock") { + struct mdesc_mblock *m = &mblocks[count++]; + const u64 *val; + + val = mdesc_get_property(md, node, "base", NULL); + m->base = *val; + val = mdesc_get_property(md, node, "size", NULL); + m->size = *val; + val = mdesc_get_property(md, node, + "address-congruence-offset", NULL); + + /* The address-congruence-offset property is optional. + * Explicity zero it be identifty this. + */ + if (val) + m->offset = *val; + else + m->offset = 0UL; + + numadbg("MBLOCK[%d]: base[%llx] size[%llx] offset[%llx]\n", + count - 1, m->base, m->size, m->offset); + } + + return 0; +} + +static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md, + u64 grp, cpumask_t *mask) +{ + u64 arc; + + cpumask_clear(mask); + + mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) { + u64 target = mdesc_arc_target(md, arc); + const char *name = mdesc_node_name(md, target); + const u64 *id; + + if (strcmp(name, "cpu")) + continue; + id = mdesc_get_property(md, target, "id", NULL); + if (*id < nr_cpu_ids) + cpumask_set_cpu(*id, mask); + } +} + +static struct mdesc_mlgroup * __init find_mlgroup(u64 node) +{ + int i; + + for (i = 0; i < num_mlgroups; i++) { + struct mdesc_mlgroup *m = &mlgroups[i]; + if (m->node == node) + return m; + } + return NULL; +} + +static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp, + int index) +{ + struct mdesc_mlgroup *candidate = NULL; + u64 arc, best_latency = ~(u64)0; + struct node_mem_mask *n; + + mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(md, arc); + struct mdesc_mlgroup *m = find_mlgroup(target); + if (!m) + continue; + if (m->latency < best_latency) { + candidate = m; + best_latency = m->latency; + } + } + if (!candidate) + return -ENOENT; + + if (num_node_masks != index) { + printk(KERN_ERR "Inconsistent NUMA state, " + "index[%d] != num_node_masks[%d]\n", + index, num_node_masks); + return -EINVAL; + } + + n = &node_masks[num_node_masks++]; + + n->mask = candidate->mask; + n->val = candidate->match; + + numadbg("NUMA NODE[%d]: mask[%lx] val[%lx] (latency[%llx])\n", + index, n->mask, n->val, candidate->latency); + + return 0; +} + +static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, + int index) +{ + cpumask_t mask; + int cpu; + + numa_parse_mdesc_group_cpus(md, grp, &mask); + + for_each_cpu(cpu, &mask) + numa_cpu_lookup_table[cpu] = index; + cpumask_copy(&numa_cpumask_lookup_table[index], &mask); + + if (numa_debug) { + printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index); + for_each_cpu(cpu, &mask) + printk("%d ", cpu); + printk("]\n"); + } + + return numa_attach_mlgroup(md, grp, index); +} + +static int __init numa_parse_mdesc(void) +{ + struct mdesc_handle *md = mdesc_grab(); + int i, err, count; + u64 node; + + node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); + if (node == MDESC_NODE_NULL) { + mdesc_release(md); + return -ENOENT; + } + + err = grab_mblocks(md); + if (err < 0) + goto out; + + err = grab_mlgroups(md); + if (err < 0) + goto out; + + count = 0; + mdesc_for_each_node_by_name(md, node, "group") { + err = numa_parse_mdesc_group(md, node, count); + if (err < 0) + break; + count++; + } + + add_node_ranges(); + + for (i = 0; i < num_node_masks; i++) { + allocate_node_data(i); + node_set_online(i); + } + + err = 0; +out: + mdesc_release(md); + return err; +} + +static int __init numa_parse_jbus(void) +{ + unsigned long cpu, index; + + /* NUMA node id is encoded in bits 36 and higher, and there is + * a 1-to-1 mapping from CPU ID to NUMA node ID. + */ + index = 0; + for_each_present_cpu(cpu) { + numa_cpu_lookup_table[cpu] = index; + cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu)); + node_masks[index].mask = ~((1UL << 36UL) - 1UL); + node_masks[index].val = cpu << 36UL; + + index++; + } + num_node_masks = index; + + add_node_ranges(); + + for (index = 0; index < num_node_masks; index++) { + allocate_node_data(index); + node_set_online(index); + } + + return 0; +} + +static int __init numa_parse_sun4u(void) +{ + if (tlb_type == cheetah || tlb_type == cheetah_plus) { + unsigned long ver; + + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + if ((ver >> 32UL) == __JALAPENO_ID || + (ver >> 32UL) == __SERRANO_ID) + return numa_parse_jbus(); + } + return -1; +} + +static int __init bootmem_init_numa(void) +{ + int err = -1; + + numadbg("bootmem_init_numa()\n"); + + if (numa_enabled) { + if (tlb_type == hypervisor) + err = numa_parse_mdesc(); + else + err = numa_parse_sun4u(); + } + return err; +} + +#else + +static int bootmem_init_numa(void) +{ + return -1; +} + +#endif + +static void __init bootmem_init_nonnuma(void) +{ + unsigned long top_of_ram = memblock_end_of_DRAM(); + unsigned long total_ram = memblock_phys_mem_size(); + + numadbg("bootmem_init_nonnuma()\n"); + + printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", + top_of_ram, total_ram); + printk(KERN_INFO "Memory hole size: %ldMB\n", + (top_of_ram - total_ram) >> 20); + + init_node_masks_nonnuma(); + memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); + allocate_node_data(0); + node_set_online(0); +} + +static unsigned long __init bootmem_init(unsigned long phys_base) +{ + unsigned long end_pfn; + + end_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; + max_pfn = max_low_pfn = end_pfn; + min_low_pfn = (phys_base >> PAGE_SHIFT); + + if (bootmem_init_numa() < 0) + bootmem_init_nonnuma(); + + /* Dump memblock with node info. */ + memblock_dump_all(); + + /* XXX cpu notifier XXX */ + + sparse_memory_present_with_active_regions(MAX_NUMNODES); + sparse_init(); + + return end_pfn; +} + +static struct linux_prom64_registers pall[MAX_BANKS] __initdata; +static int pall_ents __initdata; + +#ifdef CONFIG_DEBUG_PAGEALLOC +static unsigned long __ref kernel_map_range(unsigned long pstart, + unsigned long pend, pgprot_t prot) +{ + unsigned long vstart = PAGE_OFFSET + pstart; + unsigned long vend = PAGE_OFFSET + pend; + unsigned long alloc_bytes = 0UL; + + if ((vstart & ~PAGE_MASK) || (vend & ~PAGE_MASK)) { + prom_printf("kernel_map: Unaligned physmem[%lx:%lx]\n", + vstart, vend); + prom_halt(); + } + + while (vstart < vend) { + unsigned long this_end, paddr = __pa(vstart); + pgd_t *pgd = pgd_offset_k(vstart); + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pud = pud_offset(pgd, vstart); + if (pud_none(*pud)) { + pmd_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + alloc_bytes += PAGE_SIZE; + pud_populate(&init_mm, pud, new); + } + + pmd = pmd_offset(pud, vstart); + if (!pmd_present(*pmd)) { + pte_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + alloc_bytes += PAGE_SIZE; + pmd_populate_kernel(&init_mm, pmd, new); + } + + pte = pte_offset_kernel(pmd, vstart); + this_end = (vstart + PMD_SIZE) & PMD_MASK; + if (this_end > vend) + this_end = vend; + + while (vstart < this_end) { + pte_val(*pte) = (paddr | pgprot_val(prot)); + + vstart += PAGE_SIZE; + paddr += PAGE_SIZE; + pte++; + } + } + + return alloc_bytes; +} + +extern unsigned int kvmap_linear_patch[1]; +#endif /* CONFIG_DEBUG_PAGEALLOC */ + +static void __init kpte_set_val(unsigned long index, unsigned long val) +{ + unsigned long *ptr = kpte_linear_bitmap; + + val <<= ((index % (BITS_PER_LONG / 2)) * 2); + ptr += (index / (BITS_PER_LONG / 2)); + + *ptr |= val; +} + +static const unsigned long kpte_shift_min = 28; /* 256MB */ +static const unsigned long kpte_shift_max = 34; /* 16GB */ +static const unsigned long kpte_shift_incr = 3; + +static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end, + unsigned long shift) +{ + unsigned long size = (1UL << shift); + unsigned long mask = (size - 1UL); + unsigned long remains = end - start; + unsigned long val; + + if (remains < size || (start & mask)) + return start; + + /* VAL maps: + * + * shift 28 --> kern_linear_pte_xor index 1 + * shift 31 --> kern_linear_pte_xor index 2 + * shift 34 --> kern_linear_pte_xor index 3 + */ + val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1; + + remains &= ~mask; + if (shift != kpte_shift_max) + remains = size; + + while (remains) { + unsigned long index = start >> kpte_shift_min; + + kpte_set_val(index, val); + + start += 1UL << kpte_shift_min; + remains -= 1UL << kpte_shift_min; + } + + return start; +} + +static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) +{ + unsigned long smallest_size, smallest_mask; + unsigned long s; + + smallest_size = (1UL << kpte_shift_min); + smallest_mask = (smallest_size - 1UL); + + while (start < end) { + unsigned long orig_start = start; + + for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) { + start = kpte_mark_using_shift(start, end, s); + + if (start != orig_start) + break; + } + + if (start == orig_start) + start = (start + smallest_size) & ~smallest_mask; + } +} + +static void __init init_kpte_bitmap(void) +{ + unsigned long i; + + for (i = 0; i < pall_ents; i++) { + unsigned long phys_start, phys_end; + + phys_start = pall[i].phys_addr; + phys_end = phys_start + pall[i].reg_size; + + mark_kpte_bitmap(phys_start, phys_end); + } +} + +static void __init kernel_physical_mapping_init(void) +{ +#ifdef CONFIG_DEBUG_PAGEALLOC + unsigned long i, mem_alloced = 0UL; + + for (i = 0; i < pall_ents; i++) { + unsigned long phys_start, phys_end; + + phys_start = pall[i].phys_addr; + phys_end = phys_start + pall[i].reg_size; + + mem_alloced += kernel_map_range(phys_start, phys_end, + PAGE_KERNEL); + } + + printk("Allocated %ld bytes for kernel page tables.\n", + mem_alloced); + + kvmap_linear_patch[0] = 0x01000000; /* nop */ + flushi(&kvmap_linear_patch[0]); + + __flush_tlb_all(); +#endif +} + +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT; + unsigned long phys_end = phys_start + (numpages * PAGE_SIZE); + + kernel_map_range(phys_start, phys_end, + (enable ? PAGE_KERNEL : __pgprot(0))); + + flush_tsb_kernel_range(PAGE_OFFSET + phys_start, + PAGE_OFFSET + phys_end); + + /* we should perform an IPI and flush all tlbs, + * but that can deadlock->flush only current cpu. + */ + __flush_tlb_kernel_range(PAGE_OFFSET + phys_start, + PAGE_OFFSET + phys_end); +} +#endif + +unsigned long __init find_ecache_flush_span(unsigned long size) +{ + int i; + + for (i = 0; i < pavail_ents; i++) { + if (pavail[i].reg_size >= size) + return pavail[i].phys_addr; + } + + return ~0UL; +} + +unsigned long PAGE_OFFSET; +EXPORT_SYMBOL(PAGE_OFFSET); + +static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits) +{ + unsigned long final_shift; + unsigned int val = *insn; + unsigned int cnt; + + /* We are patching in ilog2(max_supported_phys_address), and + * we are doing so in a manner similar to a relocation addend. + * That is, we are adding the shift value to whatever value + * is in the shift instruction count field already. + */ + cnt = (val & 0x3f); + val &= ~0x3f; + + /* If we are trying to shift >= 64 bits, clear the destination + * register. This can happen when phys_bits ends up being equal + * to MAX_PHYS_ADDRESS_BITS. + */ + final_shift = (cnt + (64 - phys_bits)); + if (final_shift >= 64) { + unsigned int rd = (val >> 25) & 0x1f; + + val = 0x80100000 | (rd << 25); + } else { + val |= final_shift; + } + *insn = val; + + __asm__ __volatile__("flush %0" + : /* no outputs */ + : "r" (insn)); +} + +static void __init page_offset_shift_patch(unsigned long phys_bits) +{ + extern unsigned int __page_offset_shift_patch; + extern unsigned int __page_offset_shift_patch_end; + unsigned int *p; + + p = &__page_offset_shift_patch; + while (p < &__page_offset_shift_patch_end) { + unsigned int *insn = (unsigned int *)(unsigned long)*p; + + page_offset_shift_patch_one(insn, phys_bits); + + p++; + } +} + +static void __init setup_page_offset(void) +{ + unsigned long max_phys_bits = 40; + + if (tlb_type == cheetah || tlb_type == cheetah_plus) { + max_phys_bits = 42; + } else if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: + case SUN4V_CHIP_NIAGARA2: + max_phys_bits = 39; + break; + case SUN4V_CHIP_NIAGARA3: + max_phys_bits = 43; + break; + case SUN4V_CHIP_NIAGARA4: + case SUN4V_CHIP_NIAGARA5: + case SUN4V_CHIP_SPARC64X: + default: + max_phys_bits = 47; + break; + } + } + + if (max_phys_bits > MAX_PHYS_ADDRESS_BITS) { + prom_printf("MAX_PHYS_ADDRESS_BITS is too small, need %lu\n", + max_phys_bits); + prom_halt(); + } + + PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits); + + pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", + PAGE_OFFSET, max_phys_bits); + + page_offset_shift_patch(max_phys_bits); +} + +static void __init tsb_phys_patch(void) +{ + struct tsb_ldquad_phys_patch_entry *pquad; + struct tsb_phys_patch_entry *p; + + pquad = &__tsb_ldquad_phys_patch; + while (pquad < &__tsb_ldquad_phys_patch_end) { + unsigned long addr = pquad->addr; + + if (tlb_type == hypervisor) + *(unsigned int *) addr = pquad->sun4v_insn; + else + *(unsigned int *) addr = pquad->sun4u_insn; + wmb(); + __asm__ __volatile__("flush %0" + : /* no outputs */ + : "r" (addr)); + + pquad++; + } + + p = &__tsb_phys_patch; + while (p < &__tsb_phys_patch_end) { + unsigned long addr = p->addr; + + *(unsigned int *) addr = p->insn; + wmb(); + __asm__ __volatile__("flush %0" + : /* no outputs */ + : "r" (addr)); + + p++; + } +} + +/* Don't mark as init, we give this to the Hypervisor. */ +#ifndef CONFIG_DEBUG_PAGEALLOC +#define NUM_KTSB_DESCR 2 +#else +#define NUM_KTSB_DESCR 1 +#endif +static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR]; +extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + +static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa) +{ + pa >>= KTSB_PHYS_SHIFT; + + while (start < end) { + unsigned int *ia = (unsigned int *)(unsigned long)*start; + + ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10); + __asm__ __volatile__("flush %0" : : "r" (ia)); + + ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff); + __asm__ __volatile__("flush %0" : : "r" (ia + 1)); + + start++; + } +} + +static void ktsb_phys_patch(void) +{ + extern unsigned int __swapper_tsb_phys_patch; + extern unsigned int __swapper_tsb_phys_patch_end; + unsigned long ktsb_pa; + + ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE); + patch_one_ktsb_phys(&__swapper_tsb_phys_patch, + &__swapper_tsb_phys_patch_end, ktsb_pa); +#ifndef CONFIG_DEBUG_PAGEALLOC + { + extern unsigned int __swapper_4m_tsb_phys_patch; + extern unsigned int __swapper_4m_tsb_phys_patch_end; + ktsb_pa = (kern_base + + ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); + patch_one_ktsb_phys(&__swapper_4m_tsb_phys_patch, + &__swapper_4m_tsb_phys_patch_end, ktsb_pa); + } +#endif +} + +static void __init sun4v_ktsb_init(void) +{ + unsigned long ktsb_pa; + + /* First KTSB for PAGE_SIZE mappings. */ + ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE); + + switch (PAGE_SIZE) { + case 8 * 1024: + default: + ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_8K; + ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_8K; + break; + + case 64 * 1024: + ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_64K; + ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_64K; + break; + + case 512 * 1024: + ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_512K; + ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_512K; + break; + + case 4 * 1024 * 1024: + ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_4MB; + ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_4MB; + break; + } + + ktsb_descr[0].assoc = 1; + ktsb_descr[0].num_ttes = KERNEL_TSB_NENTRIES; + ktsb_descr[0].ctx_idx = 0; + ktsb_descr[0].tsb_base = ktsb_pa; + ktsb_descr[0].resv = 0; + +#ifndef CONFIG_DEBUG_PAGEALLOC + /* Second KTSB for 4MB/256MB/2GB/16GB mappings. */ + ktsb_pa = (kern_base + + ((unsigned long)&swapper_4m_tsb[0] - KERNBASE)); + + ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB; + ktsb_descr[1].pgsz_mask = ((HV_PGSZ_MASK_4MB | + HV_PGSZ_MASK_256MB | + HV_PGSZ_MASK_2GB | + HV_PGSZ_MASK_16GB) & + cpu_pgsz_mask); + ktsb_descr[1].assoc = 1; + ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES; + ktsb_descr[1].ctx_idx = 0; + ktsb_descr[1].tsb_base = ktsb_pa; + ktsb_descr[1].resv = 0; +#endif +} + +void sun4v_ktsb_register(void) +{ + unsigned long pa, ret; + + pa = kern_base + ((unsigned long)&ktsb_descr[0] - KERNBASE); + + ret = sun4v_mmu_tsb_ctx0(NUM_KTSB_DESCR, pa); + if (ret != 0) { + prom_printf("hypervisor_mmu_tsb_ctx0[%lx]: " + "errors with %lx\n", pa, ret); + prom_halt(); + } +} + +static void __init sun4u_linear_pte_xor_finalize(void) +{ +#ifndef CONFIG_DEBUG_PAGEALLOC + /* This is where we would add Panther support for + * 32MB and 256MB pages. + */ +#endif +} + +static void __init sun4v_linear_pte_xor_finalize(void) +{ +#ifndef CONFIG_DEBUG_PAGEALLOC + if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { + kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ + PAGE_OFFSET; + kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + } else { + kern_linear_pte_xor[1] = kern_linear_pte_xor[0]; + } + + if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { + kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ + PAGE_OFFSET; + kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + } else { + kern_linear_pte_xor[2] = kern_linear_pte_xor[1]; + } + + if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { + kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ + PAGE_OFFSET; + kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + } else { + kern_linear_pte_xor[3] = kern_linear_pte_xor[2]; + } +#endif +} + +/* paging_init() sets up the page tables */ + +static unsigned long last_valid_pfn; +pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +static void sun4u_pgprot_init(void); +static void sun4v_pgprot_init(void); + +void __init paging_init(void) +{ + unsigned long end_pfn, shift, phys_base; + unsigned long real_end, i; + int node; + + setup_page_offset(); + + /* These build time checkes make sure that the dcache_dirty_cpu() + * page->flags usage will work. + * + * When a page gets marked as dcache-dirty, we store the + * cpu number starting at bit 32 in the page->flags. Also, + * functions like clear_dcache_dirty_cpu use the cpu mask + * in 13-bit signed-immediate instruction fields. + */ + + /* + * Page flags must not reach into upper 32 bits that are used + * for the cpu number + */ + BUILD_BUG_ON(NR_PAGEFLAGS > 32); + + /* + * The bit fields placed in the high range must not reach below + * the 32 bit boundary. Otherwise we cannot place the cpu field + * at the 32 bit boundary. + */ + BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH + + ilog2(roundup_pow_of_two(NR_CPUS)) > 32); + + BUILD_BUG_ON(NR_CPUS > 4096); + + kern_base = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB; + kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; + + /* Invalidate both kernel TSBs. */ + memset(swapper_tsb, 0x40, sizeof(swapper_tsb)); +#ifndef CONFIG_DEBUG_PAGEALLOC + memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); +#endif + + if (tlb_type == hypervisor) + sun4v_pgprot_init(); + else + sun4u_pgprot_init(); + + if (tlb_type == cheetah_plus || + tlb_type == hypervisor) { + tsb_phys_patch(); + ktsb_phys_patch(); + } + + if (tlb_type == hypervisor) + sun4v_patch_tlb_handlers(); + + /* Find available physical memory... + * + * Read it twice in order to work around a bug in openfirmware. + * The call to grab this table itself can cause openfirmware to + * allocate memory, which in turn can take away some space from + * the list of available memory. Reading it twice makes sure + * we really do get the final value. + */ + read_obp_translations(); + read_obp_memory("reg", &pall[0], &pall_ents); + read_obp_memory("available", &pavail[0], &pavail_ents); + read_obp_memory("available", &pavail[0], &pavail_ents); + + phys_base = 0xffffffffffffffffUL; + for (i = 0; i < pavail_ents; i++) { + phys_base = min(phys_base, pavail[i].phys_addr); + memblock_add(pavail[i].phys_addr, pavail[i].reg_size); + } + + memblock_reserve(kern_base, kern_size); + + find_ramdisk(phys_base); + + memblock_enforce_memory_limit(cmdline_memory_size); + + memblock_allow_resize(); + memblock_dump_all(); + + set_bit(0, mmu_context_bmap); + + shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE); + + real_end = (unsigned long)_end; + num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << ILOG2_4MB); + printk("Kernel: Using %d locked TLB entries for main kernel image.\n", + num_kernel_image_mappings); + + /* Set kernel pgd to upper alias so physical page computations + * work. + */ + init_mm.pgd += ((shift) / (sizeof(pgd_t))); + + memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir)); + + /* Now can init the kernel/bad page tables. */ + pud_set(pud_offset(&swapper_pg_dir[0], 0), + swapper_low_pmd_dir + (shift / sizeof(pgd_t))); + + inherit_prom_mappings(); + + init_kpte_bitmap(); + + /* Ok, we can use our TLB miss and window trap handlers safely. */ + setup_tba(); + + __flush_tlb_all(); + + prom_build_devicetree(); + of_populate_present_mask(); +#ifndef CONFIG_SMP + of_fill_in_cpu_data(); +#endif + + if (tlb_type == hypervisor) { + sun4v_mdesc_init(); + mdesc_populate_present_mask(cpu_all_mask); +#ifndef CONFIG_SMP + mdesc_fill_in_cpu_data(cpu_all_mask); +#endif + mdesc_get_page_sizes(cpu_all_mask, &cpu_pgsz_mask); + + sun4v_linear_pte_xor_finalize(); + + sun4v_ktsb_init(); + sun4v_ktsb_register(); + } else { + unsigned long impl, ver; + + cpu_pgsz_mask = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K | + HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB); + + __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver)); + impl = ((ver >> 32) & 0xffff); + if (impl == PANTHER_IMPL) + cpu_pgsz_mask |= (HV_PGSZ_MASK_32MB | + HV_PGSZ_MASK_256MB); + + sun4u_linear_pte_xor_finalize(); + } + + /* Flush the TLBs and the 4M TSB so that the updated linear + * pte XOR settings are realized for all mappings. + */ + __flush_tlb_all(); +#ifndef CONFIG_DEBUG_PAGEALLOC + memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb)); +#endif + __flush_tlb_all(); + + /* Setup bootmem... */ + last_valid_pfn = end_pfn = bootmem_init(phys_base); + + /* Once the OF device tree and MDESC have been setup, we know + * the list of possible cpus. Therefore we can allocate the + * IRQ stacks. + */ + for_each_possible_cpu(i) { + node = cpu_to_node(i); + + softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), + THREAD_SIZE, + THREAD_SIZE, 0); + hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), + THREAD_SIZE, + THREAD_SIZE, 0); + } + + kernel_physical_mapping_init(); + + { + unsigned long max_zone_pfns[MAX_NR_ZONES]; + + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + + max_zone_pfns[ZONE_NORMAL] = end_pfn; + + free_area_init_nodes(max_zone_pfns); + } + + printk("Booting Linux...\n"); +} + +int page_in_phys_avail(unsigned long paddr) +{ + int i; + + paddr &= PAGE_MASK; + + for (i = 0; i < pavail_ents; i++) { + unsigned long start, end; + + start = pavail[i].phys_addr; + end = start + pavail[i].reg_size; + + if (paddr >= start && paddr < end) + return 1; + } + if (paddr >= kern_base && paddr < (kern_base + kern_size)) + return 1; +#ifdef CONFIG_BLK_DEV_INITRD + if (paddr >= __pa(initrd_start) && + paddr < __pa(PAGE_ALIGN(initrd_end))) + return 1; +#endif + + return 0; +} + +static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; +static int pavail_rescan_ents __initdata; + +/* Certain OBP calls, such as fetching "available" properties, can + * claim physical memory. So, along with initializing the valid + * address bitmap, what we do here is refetch the physical available + * memory list again, and make sure it provides at least as much + * memory as 'pavail' does. + */ +static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap) +{ + int i; + + read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); + + for (i = 0; i < pavail_ents; i++) { + unsigned long old_start, old_end; + + old_start = pavail[i].phys_addr; + old_end = old_start + pavail[i].reg_size; + while (old_start < old_end) { + int n; + + for (n = 0; n < pavail_rescan_ents; n++) { + unsigned long new_start, new_end; + + new_start = pavail_rescan[n].phys_addr; + new_end = new_start + + pavail_rescan[n].reg_size; + + if (new_start <= old_start && + new_end >= (old_start + PAGE_SIZE)) { + set_bit(old_start >> ILOG2_4MB, bitmap); + goto do_next_page; + } + } + + prom_printf("mem_init: Lost memory in pavail\n"); + prom_printf("mem_init: OLD start[%lx] size[%lx]\n", + pavail[i].phys_addr, + pavail[i].reg_size); + prom_printf("mem_init: NEW start[%lx] size[%lx]\n", + pavail_rescan[i].phys_addr, + pavail_rescan[i].reg_size); + prom_printf("mem_init: Cannot continue, aborting.\n"); + prom_halt(); + + do_next_page: + old_start += PAGE_SIZE; + } + } +} + +static void __init patch_tlb_miss_handler_bitmap(void) +{ + extern unsigned int valid_addr_bitmap_insn[]; + extern unsigned int valid_addr_bitmap_patch[]; + + valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1]; + mb(); + valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0]; + flushi(&valid_addr_bitmap_insn[0]); +} + +static void __init register_page_bootmem_info(void) +{ +#ifdef CONFIG_NEED_MULTIPLE_NODES + int i; + + for_each_online_node(i) + if (NODE_DATA(i)->node_spanned_pages) + register_page_bootmem_info_node(NODE_DATA(i)); +#endif +} +void __init mem_init(void) +{ + unsigned long addr, last; + + addr = PAGE_OFFSET + kern_base; + last = PAGE_ALIGN(kern_size) + addr; + while (addr < last) { + set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap); + addr += PAGE_SIZE; + } + + setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap); + patch_tlb_miss_handler_bitmap(); + + high_memory = __va(last_valid_pfn << PAGE_SHIFT); + + register_page_bootmem_info(); + free_all_bootmem(); + + /* + * Set up the zero page, mark it reserved, so that page count + * is not manipulated when freeing the page from user ptes. + */ + mem_map_zero = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0); + if (mem_map_zero == NULL) { + prom_printf("paging_init: Cannot alloc zero page.\n"); + prom_halt(); + } + mark_page_reserved(mem_map_zero); + + mem_init_print_info(NULL); + + if (tlb_type == cheetah || tlb_type == cheetah_plus) + cheetah_ecache_flush_init(); +} + +void free_initmem(void) +{ + unsigned long addr, initend; + int do_free = 1; + + /* If the physical memory maps were trimmed by kernel command + * line options, don't even try freeing this initmem stuff up. + * The kernel image could have been in the trimmed out region + * and if so the freeing below will free invalid page structs. + */ + if (cmdline_memory_size) + do_free = 0; + + /* + * The init section is aligned to 8k in vmlinux.lds. Page align for >8k pagesizes. + */ + addr = PAGE_ALIGN((unsigned long)(__init_begin)); + initend = (unsigned long)(__init_end) & PAGE_MASK; + for (; addr < initend; addr += PAGE_SIZE) { + unsigned long page; + + page = (addr + + ((unsigned long) __va(kern_base)) - + ((unsigned long) KERNBASE)); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); + + if (do_free) + free_reserved_page(virt_to_page(page)); + } +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); +} +#endif + +#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) +#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) +#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) +#define __DIRTY_BITS_4V (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V) +#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R) +#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R) + +pgprot_t PAGE_KERNEL __read_mostly; +EXPORT_SYMBOL(PAGE_KERNEL); + +pgprot_t PAGE_KERNEL_LOCKED __read_mostly; +pgprot_t PAGE_COPY __read_mostly; + +pgprot_t PAGE_SHARED __read_mostly; +EXPORT_SYMBOL(PAGE_SHARED); + +unsigned long pg_iobits __read_mostly; + +unsigned long _PAGE_IE __read_mostly; +EXPORT_SYMBOL(_PAGE_IE); + +unsigned long _PAGE_E __read_mostly; +EXPORT_SYMBOL(_PAGE_E); + +unsigned long _PAGE_CACHE __read_mostly; +EXPORT_SYMBOL(_PAGE_CACHE); + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +unsigned long vmemmap_table[VMEMMAP_SIZE]; + +static long __meminitdata addr_start, addr_end; +static int __meminitdata node_start; + +int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, + int node) +{ + unsigned long phys_start = (vstart - VMEMMAP_BASE); + unsigned long phys_end = (vend - VMEMMAP_BASE); + unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK; + unsigned long end = VMEMMAP_ALIGN(phys_end); + unsigned long pte_base; + + pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U | + _PAGE_CP_4U | _PAGE_CV_4U | + _PAGE_P_4U | _PAGE_W_4U); + if (tlb_type == hypervisor) + pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V | + _PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + + for (; addr < end; addr += VMEMMAP_CHUNK) { + unsigned long *vmem_pp = + vmemmap_table + (addr >> VMEMMAP_CHUNK_SHIFT); + void *block; + + if (!(*vmem_pp & _PAGE_VALID)) { + block = vmemmap_alloc_block(1UL << ILOG2_4MB, node); + if (!block) + return -ENOMEM; + + *vmem_pp = pte_base | __pa(block); + + /* check to see if we have contiguous blocks */ + if (addr_end != addr || node_start != node) { + if (addr_start) + printk(KERN_DEBUG " [%lx-%lx] on node %d\n", + addr_start, addr_end-1, node_start); + addr_start = addr; + node_start = node; + } + addr_end = addr + VMEMMAP_CHUNK; + } + } + return 0; +} + +void __meminit vmemmap_populate_print_last(void) +{ + if (addr_start) { + printk(KERN_DEBUG " [%lx-%lx] on node %d\n", + addr_start, addr_end-1, node_start); + addr_start = 0; + addr_end = 0; + node_start = 0; + } +} + +void vmemmap_free(unsigned long start, unsigned long end) +{ +} + +#endif /* CONFIG_SPARSEMEM_VMEMMAP */ + +static void prot_init_common(unsigned long page_none, + unsigned long page_shared, + unsigned long page_copy, + unsigned long page_readonly, + unsigned long page_exec_bit) +{ + PAGE_COPY = __pgprot(page_copy); + PAGE_SHARED = __pgprot(page_shared); + + protection_map[0x0] = __pgprot(page_none); + protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit); + protection_map[0x2] = __pgprot(page_copy & ~page_exec_bit); + protection_map[0x3] = __pgprot(page_copy & ~page_exec_bit); + protection_map[0x4] = __pgprot(page_readonly); + protection_map[0x5] = __pgprot(page_readonly); + protection_map[0x6] = __pgprot(page_copy); + protection_map[0x7] = __pgprot(page_copy); + protection_map[0x8] = __pgprot(page_none); + protection_map[0x9] = __pgprot(page_readonly & ~page_exec_bit); + protection_map[0xa] = __pgprot(page_shared & ~page_exec_bit); + protection_map[0xb] = __pgprot(page_shared & ~page_exec_bit); + protection_map[0xc] = __pgprot(page_readonly); + protection_map[0xd] = __pgprot(page_readonly); + protection_map[0xe] = __pgprot(page_shared); + protection_map[0xf] = __pgprot(page_shared); +} + +static void __init sun4u_pgprot_init(void) +{ + unsigned long page_none, page_shared, page_copy, page_readonly; + unsigned long page_exec_bit; + int i; + + PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | + _PAGE_CACHE_4U | _PAGE_P_4U | + __ACCESS_BITS_4U | __DIRTY_BITS_4U | + _PAGE_EXEC_4U); + PAGE_KERNEL_LOCKED = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID | + _PAGE_CACHE_4U | _PAGE_P_4U | + __ACCESS_BITS_4U | __DIRTY_BITS_4U | + _PAGE_EXEC_4U | _PAGE_L_4U); + + _PAGE_IE = _PAGE_IE_4U; + _PAGE_E = _PAGE_E_4U; + _PAGE_CACHE = _PAGE_CACHE_4U; + + pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4U | __DIRTY_BITS_4U | + __ACCESS_BITS_4U | _PAGE_E_4U); + +#ifdef CONFIG_DEBUG_PAGEALLOC + kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; +#else + kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^ + PAGE_OFFSET; +#endif + kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | + _PAGE_P_4U | _PAGE_W_4U); + + for (i = 1; i < 4; i++) + kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; + + _PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U | + _PAGE_SZ64K_4U | _PAGE_SZ8K_4U | + _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U); + + + page_none = _PAGE_PRESENT_4U | _PAGE_ACCESSED_4U | _PAGE_CACHE_4U; + page_shared = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | + __ACCESS_BITS_4U | _PAGE_WRITE_4U | _PAGE_EXEC_4U); + page_copy = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | + __ACCESS_BITS_4U | _PAGE_EXEC_4U); + page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U | + __ACCESS_BITS_4U | _PAGE_EXEC_4U); + + page_exec_bit = _PAGE_EXEC_4U; + + prot_init_common(page_none, page_shared, page_copy, page_readonly, + page_exec_bit); +} + +static void __init sun4v_pgprot_init(void) +{ + unsigned long page_none, page_shared, page_copy, page_readonly; + unsigned long page_exec_bit; + int i; + + PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID | + _PAGE_CACHE_4V | _PAGE_P_4V | + __ACCESS_BITS_4V | __DIRTY_BITS_4V | + _PAGE_EXEC_4V); + PAGE_KERNEL_LOCKED = PAGE_KERNEL; + + _PAGE_IE = _PAGE_IE_4V; + _PAGE_E = _PAGE_E_4V; + _PAGE_CACHE = _PAGE_CACHE_4V; + +#ifdef CONFIG_DEBUG_PAGEALLOC + kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; +#else + kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ + PAGE_OFFSET; +#endif + kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | + _PAGE_P_4V | _PAGE_W_4V); + + for (i = 1; i < 4; i++) + kern_linear_pte_xor[i] = kern_linear_pte_xor[0]; + + pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V | + __ACCESS_BITS_4V | _PAGE_E_4V); + + _PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V | + _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V | + _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V | + _PAGE_SZ64K_4V | _PAGE_SZ8K_4V); + + page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | _PAGE_CACHE_4V; + page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V); + page_copy = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + __ACCESS_BITS_4V | _PAGE_EXEC_4V); + page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | _PAGE_CACHE_4V | + __ACCESS_BITS_4V | _PAGE_EXEC_4V); + + page_exec_bit = _PAGE_EXEC_4V; + + prot_init_common(page_none, page_shared, page_copy, page_readonly, + page_exec_bit); +} + +unsigned long pte_sz_bits(unsigned long sz) +{ + if (tlb_type == hypervisor) { + switch (sz) { + case 8 * 1024: + default: + return _PAGE_SZ8K_4V; + case 64 * 1024: + return _PAGE_SZ64K_4V; + case 512 * 1024: + return _PAGE_SZ512K_4V; + case 4 * 1024 * 1024: + return _PAGE_SZ4MB_4V; + } + } else { + switch (sz) { + case 8 * 1024: + default: + return _PAGE_SZ8K_4U; + case 64 * 1024: + return _PAGE_SZ64K_4U; + case 512 * 1024: + return _PAGE_SZ512K_4U; + case 4 * 1024 * 1024: + return _PAGE_SZ4MB_4U; + } + } +} + +pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space, unsigned long page_size) +{ + pte_t pte; + + pte_val(pte) = page | pgprot_val(pgprot_noncached(prot)); + pte_val(pte) |= (((unsigned long)space) << 32); + pte_val(pte) |= pte_sz_bits(page_size); + + return pte; +} + +static unsigned long kern_large_tte(unsigned long paddr) +{ + unsigned long val; + + val = (_PAGE_VALID | _PAGE_SZ4MB_4U | + _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U | + _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U); + if (tlb_type == hypervisor) + val = (_PAGE_VALID | _PAGE_SZ4MB_4V | + _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | + _PAGE_EXEC_4V | _PAGE_W_4V); + + return val | paddr; +} + +/* If not locked, zap it. */ +void __flush_tlb_all(void) +{ + unsigned long pstate; + int i; + + __asm__ __volatile__("flushw\n\t" + "rdpr %%pstate, %0\n\t" + "wrpr %0, %1, %%pstate" + : "=r" (pstate) + : "i" (PSTATE_IE)); + if (tlb_type == hypervisor) { + sun4v_mmu_demap_all(); + } else if (tlb_type == spitfire) { + for (i = 0; i < 64; i++) { + /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no + * cheetah+ page size encodings. + */ + __asm__ __volatile__("stxa %0, [%1] %2\n\t" + "flush %%g6" + : /* No outputs */ + : "r" (0), + "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); + + if (!(spitfire_get_dtlb_data(i) & _PAGE_L_4U)) { + __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU)); + spitfire_put_dtlb_data(i, 0x0UL); + } + + /* Spitfire Errata #32 workaround */ + /* NOTE: Always runs on spitfire, so no + * cheetah+ page size encodings. + */ + __asm__ __volatile__("stxa %0, [%1] %2\n\t" + "flush %%g6" + : /* No outputs */ + : "r" (0), + "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU)); + + if (!(spitfire_get_itlb_data(i) & _PAGE_L_4U)) { + __asm__ __volatile__("stxa %%g0, [%0] %1\n\t" + "membar #Sync" + : /* no outputs */ + : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU)); + spitfire_put_itlb_data(i, 0x0UL); + } + } + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { + cheetah_flush_dtlb_all(); + cheetah_flush_itlb_all(); + } + __asm__ __volatile__("wrpr %0, 0, %%pstate" + : : "r" (pstate)); +} + +pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long address) +{ + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | + __GFP_REPEAT | __GFP_ZERO); + pte_t *pte = NULL; + + if (page) + pte = (pte_t *) page_address(page); + + return pte; +} + +pgtable_t pte_alloc_one(struct mm_struct *mm, + unsigned long address) +{ + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | + __GFP_REPEAT | __GFP_ZERO); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + free_hot_cold_page(page, 0); + return NULL; + } + return (pte_t *) page_address(page); +} + +void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + free_page((unsigned long)pte); +} + +static void __pte_free(pgtable_t pte) +{ + struct page *page = virt_to_page(pte); + + pgtable_page_dtor(page); + __free_page(page); +} + +void pte_free(struct mm_struct *mm, pgtable_t pte) +{ + __pte_free(pte); +} + +void pgtable_free(void *table, bool is_page) +{ + if (is_page) + __pte_free(table); + else + kmem_cache_free(pgtable_cache, table); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd) +{ + unsigned long pte, flags; + struct mm_struct *mm; + pmd_t entry = *pmd; + + if (!pmd_large(entry) || !pmd_young(entry)) + return; + + pte = pmd_val(entry); + + /* We are fabricating 8MB pages using 4MB real hw pages. */ + pte |= (addr & (1UL << REAL_HPAGE_SHIFT)); + + mm = vma->vm_mm; + + spin_lock_irqsave(&mm->context.lock, flags); + + if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, + addr, pte); + + spin_unlock_irqrestore(&mm->context.lock, flags); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +static void context_reload(void *__data) +{ + struct mm_struct *mm = __data; + + if (mm == current->mm) + load_secondary_context(mm); +} + +void hugetlb_setup(struct pt_regs *regs) +{ + struct mm_struct *mm = current->mm; + struct tsb_config *tp; + + if (in_atomic() || !mm) { + const struct exception_table_entry *entry; + + entry = search_exception_tables(regs->tpc); + if (entry) { + regs->tpc = entry->fixup; + regs->tnpc = regs->tpc + 4; + return; + } + pr_alert("Unexpected HugeTLB setup in atomic context.\n"); + die_if_kernel("HugeTSB in atomic", regs); + } + + tp = &mm->context.tsb_block[MM_TSB_HUGE]; + if (likely(tp->tsb == NULL)) + tsb_grow(mm, MM_TSB_HUGE, 0); + + tsb_context_switch(mm); + smp_tsb_sync(mm); + + /* On UltraSPARC-III+ and later, configure the second half of + * the Data-TLB for huge pages. + */ + if (tlb_type == cheetah_plus) { + unsigned long ctx; + + spin_lock(&ctx_alloc_lock); + ctx = mm->context.sparc64_ctx_val; + ctx &= ~CTX_PGSZ_MASK; + ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; + ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT; + + if (ctx != mm->context.sparc64_ctx_val) { + /* When changing the page size fields, we + * must perform a context flush so that no + * stale entries match. This flush must + * occur with the original context register + * settings. + */ + do_flush_tlb_mm(mm); + + /* Reload the context register of all processors + * also executing in this address space. + */ + mm->context.sparc64_ctx_val = ctx; + on_each_cpu(context_reload, mm, 0); + } + spin_unlock(&ctx_alloc_lock); + } +} +#endif diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h new file mode 100644 index 00000000000..0668b364f44 --- /dev/null +++ b/arch/sparc/mm/init_64.h @@ -0,0 +1,52 @@ +#ifndef _SPARC64_MM_INIT_H +#define _SPARC64_MM_INIT_H + +#include <asm/page.h> + +/* Most of the symbols in this file are defined in init.c and + * marked non-static so that assembler code can get at them. + */ + +#define MAX_PHYS_ADDRESS (1UL << MAX_PHYS_ADDRESS_BITS) +#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) +#define KPTE_BITMAP_BYTES \ + ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4) +#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) +#define VALID_ADDR_BITMAP_BYTES \ + ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) + +extern unsigned long kern_linear_pte_xor[4]; +extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; +extern unsigned int sparc64_highest_unlocked_tlb_ent; +extern unsigned long sparc64_kern_pri_context; +extern unsigned long sparc64_kern_pri_nuc_bits; +extern unsigned long sparc64_kern_sec_context; +void mmu_info(struct seq_file *m); + +struct linux_prom_translation { + unsigned long virt; + unsigned long size; + unsigned long data; +}; + +/* Exported for kernel TLB miss handling in ktlb.S */ +extern struct linux_prom_translation prom_trans[512]; +extern unsigned int prom_trans_ents; + +/* Exported for SMP bootup purposes. */ +extern unsigned long kern_locked_tte_data; + +void prom_world(int enter); + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +#define VMEMMAP_CHUNK_SHIFT 22 +#define VMEMMAP_CHUNK (1UL << VMEMMAP_CHUNK_SHIFT) +#define VMEMMAP_CHUNK_MASK ~(VMEMMAP_CHUNK - 1UL) +#define VMEMMAP_ALIGN(x) (((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK) + +#define VMEMMAP_SIZE ((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \ + sizeof(struct page)) >> VMEMMAP_CHUNK_SHIFT) +extern unsigned long vmemmap_table[VMEMMAP_SIZE]; +#endif + +#endif /* _SPARC64_MM_INIT_H */ diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index b86dfce8eee..f311bf21901 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -1,4 +1,4 @@ -/* $Id: io-unit.c,v 1.24 2001/12/17 07:05:09 davem Exp $ +/* * io-unit.c: IO-UNIT specific routines for memory management. * * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) @@ -12,10 +12,11 @@ #include <linux/highmem.h> /* pte_offset_map => kmap_atomic */ #include <linux/bitops.h> #include <linux/scatterlist.h> +#include <linux/of.h> +#include <linux/of_device.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> -#include <asm/sbus.h> #include <asm/io.h> #include <asm/io-unit.h> #include <asm/mxcc.h> @@ -24,6 +25,8 @@ #include <asm/dma.h> #include <asm/oplib.h> +#include "mm_32.h" + /* #define IOUNIT_DEBUG */ #ifdef IOUNIT_DEBUG #define IOD(x) printk(x) @@ -34,13 +37,11 @@ #define IOPERM (IOUPTE_CACHE | IOUPTE_WRITE | IOUPTE_VALID) #define MKIOPTE(phys) __iopte((((phys)>>4) & IOUPTE_PAGE) | IOPERM) -void __init -iounit_init(int sbi_node, int io_node, struct sbus_bus *sbus) +static void __init iounit_iommu_init(struct platform_device *op) { - iopte_t *xpt, *xptend; struct iounit_struct *iounit; - struct linux_prom_registers iommu_promregs[PROMREG_MAX]; - struct resource r; + iopte_t __iomem *xpt; + iopte_t __iomem *xptend; iounit = kzalloc(sizeof(struct iounit_struct), GFP_ATOMIC); if (!iounit) { @@ -55,26 +56,40 @@ iounit_init(int sbi_node, int io_node, struct sbus_bus *sbus) iounit->rotor[1] = IOUNIT_BMAP2_START; iounit->rotor[2] = IOUNIT_BMAPM_START; - xpt = NULL; - if(prom_getproperty(sbi_node, "reg", (void *) iommu_promregs, - sizeof(iommu_promregs)) != -1) { - prom_apply_generic_ranges(io_node, 0, iommu_promregs, 3); - memset(&r, 0, sizeof(r)); - r.flags = iommu_promregs[2].which_io; - r.start = iommu_promregs[2].phys_addr; - xpt = (iopte_t *) sbus_ioremap(&r, 0, PAGE_SIZE * 16, "XPT"); + xpt = of_ioremap(&op->resource[2], 0, PAGE_SIZE * 16, "XPT"); + if (!xpt) { + prom_printf("SUN4D: Cannot map External Page Table."); + prom_halt(); } - if(!xpt) panic("Cannot map External Page Table."); - sbus->ofdev.dev.archdata.iommu = iounit; + op->dev.archdata.iommu = iounit; iounit->page_table = xpt; spin_lock_init(&iounit->lock); - - for (xptend = iounit->page_table + (16 * PAGE_SIZE) / sizeof(iopte_t); - xpt < xptend;) - iopte_val(*xpt++) = 0; + + xptend = iounit->page_table + (16 * PAGE_SIZE) / sizeof(iopte_t); + for (; xpt < xptend; xpt++) + sbus_writel(0, xpt); +} + +static int __init iounit_init(void) +{ + extern void sun4d_init_sbi_irq(void); + struct device_node *dp; + + for_each_node_by_name(dp, "sbi") { + struct platform_device *op = of_find_device_by_node(dp); + + iounit_iommu_init(op); + of_propagate_archdata(op); + } + + sun4d_init_sbi_irq(); + + return 0; } +subsys_initcall(iounit_init); + /* One has to hold iounit->lock to call this */ static unsigned long iounit_get_area(struct iounit_struct *iounit, unsigned long vaddr, int size) { @@ -118,16 +133,16 @@ nexti: scan = find_next_zero_bit(iounit->bmap, limit, scan); vaddr = IOUNIT_DMA_BASE + (scan << PAGE_SHIFT) + (vaddr & ~PAGE_MASK); for (k = 0; k < npages; k++, iopte = __iopte(iopte_val(iopte) + 0x100), scan++) { set_bit(scan, iounit->bmap); - iounit->page_table[scan] = iopte; + sbus_writel(iopte, &iounit->page_table[scan]); } IOD(("%08lx\n", vaddr)); return vaddr; } -static __u32 iounit_get_scsi_one(char *vaddr, unsigned long len, struct sbus_bus *sbus) +static __u32 iounit_get_scsi_one(struct device *dev, char *vaddr, unsigned long len) { + struct iounit_struct *iounit = dev->archdata.iommu; unsigned long ret, flags; - struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; spin_lock_irqsave(&iounit->lock, flags); ret = iounit_get_area(iounit, (unsigned long)vaddr, len); @@ -135,26 +150,26 @@ static __u32 iounit_get_scsi_one(char *vaddr, unsigned long len, struct sbus_bus return ret; } -static void iounit_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus *sbus) +static void iounit_get_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz) { + struct iounit_struct *iounit = dev->archdata.iommu; unsigned long flags; - struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; /* FIXME: Cache some resolved pages - often several sg entries are to the same page */ spin_lock_irqsave(&iounit->lock, flags); while (sz != 0) { --sz; - sg->dvma_address = iounit_get_area(iounit, (unsigned long) sg_virt(sg), sg->length); - sg->dvma_length = sg->length; + sg->dma_address = iounit_get_area(iounit, (unsigned long) sg_virt(sg), sg->length); + sg->dma_length = sg->length; sg = sg_next(sg); } spin_unlock_irqrestore(&iounit->lock, flags); } -static void iounit_release_scsi_one(__u32 vaddr, unsigned long len, struct sbus_bus *sbus) +static void iounit_release_scsi_one(struct device *dev, __u32 vaddr, unsigned long len) { + struct iounit_struct *iounit = dev->archdata.iommu; unsigned long flags; - struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; spin_lock_irqsave(&iounit->lock, flags); len = ((vaddr & ~PAGE_MASK) + len + (PAGE_SIZE-1)) >> PAGE_SHIFT; @@ -165,17 +180,17 @@ static void iounit_release_scsi_one(__u32 vaddr, unsigned long len, struct sbus_ spin_unlock_irqrestore(&iounit->lock, flags); } -static void iounit_release_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus *sbus) +static void iounit_release_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz) { + struct iounit_struct *iounit = dev->archdata.iommu; unsigned long flags; unsigned long vaddr, len; - struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; spin_lock_irqsave(&iounit->lock, flags); while (sz != 0) { --sz; - len = ((sg->dvma_address & ~PAGE_MASK) + sg->length + (PAGE_SIZE-1)) >> PAGE_SHIFT; - vaddr = (sg->dvma_address - IOUNIT_DMA_BASE) >> PAGE_SHIFT; + len = ((sg->dma_address & ~PAGE_MASK) + sg->length + (PAGE_SIZE-1)) >> PAGE_SHIFT; + vaddr = (sg->dma_address - IOUNIT_DMA_BASE) >> PAGE_SHIFT; IOD(("iounit_release %08lx-%08lx\n", (long)vaddr, (long)len+vaddr)); for (len += vaddr; vaddr < len; vaddr++) clear_bit(vaddr, iounit->bmap); @@ -185,12 +200,12 @@ static void iounit_release_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_ } #ifdef CONFIG_SBUS -static int iounit_map_dma_area(dma_addr_t *pba, unsigned long va, __u32 addr, int len) +static int iounit_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned long va, unsigned long addr, int len) { + struct iounit_struct *iounit = dev->archdata.iommu; unsigned long page, end; pgprot_t dvma_prot; - iopte_t *iopte; - struct sbus_bus *sbus; + iopte_t __iomem *iopte; *pba = addr; @@ -212,12 +227,8 @@ static int iounit_map_dma_area(dma_addr_t *pba, unsigned long va, __u32 addr, in i = ((addr - IOUNIT_DMA_BASE) >> PAGE_SHIFT); - for_each_sbus(sbus) { - struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; - - iopte = (iopte_t *)(iounit->page_table + i); - *iopte = MKIOPTE(__pa(page)); - } + iopte = iounit->page_table + i; + sbus_writel(MKIOPTE(__pa(page)), iopte); } addr += PAGE_SIZE; va += PAGE_SIZE; @@ -228,97 +239,24 @@ static int iounit_map_dma_area(dma_addr_t *pba, unsigned long va, __u32 addr, in return 0; } -static void iounit_unmap_dma_area(unsigned long addr, int len) +static void iounit_unmap_dma_area(struct device *dev, unsigned long addr, int len) { /* XXX Somebody please fill this in */ } - -/* XXX We do not pass sbus device here, bad. */ -static struct page *iounit_translate_dvma(unsigned long addr) -{ - struct sbus_bus *sbus = sbus_root; /* They are all the same */ - struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; - int i; - iopte_t *iopte; - - i = ((addr - IOUNIT_DMA_BASE) >> PAGE_SHIFT); - iopte = (iopte_t *)(iounit->page_table + i); - return pfn_to_page(iopte_val(*iopte) >> (PAGE_SHIFT-4)); /* XXX sun4d guru, help */ -} #endif -static char *iounit_lockarea(char *vaddr, unsigned long len) -{ -/* FIXME: Write this */ - return vaddr; -} - -static void iounit_unlockarea(char *vaddr, unsigned long len) -{ -/* FIXME: Write this */ -} - -void __init ld_mmu_iounit(void) -{ - BTFIXUPSET_CALL(mmu_lockarea, iounit_lockarea, BTFIXUPCALL_RETO0); - BTFIXUPSET_CALL(mmu_unlockarea, iounit_unlockarea, BTFIXUPCALL_NOP); - - BTFIXUPSET_CALL(mmu_get_scsi_one, iounit_get_scsi_one, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_get_scsi_sgl, iounit_get_scsi_sgl, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_release_scsi_one, iounit_release_scsi_one, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_release_scsi_sgl, iounit_release_scsi_sgl, BTFIXUPCALL_NORM); - +static const struct sparc32_dma_ops iounit_dma_ops = { + .get_scsi_one = iounit_get_scsi_one, + .get_scsi_sgl = iounit_get_scsi_sgl, + .release_scsi_one = iounit_release_scsi_one, + .release_scsi_sgl = iounit_release_scsi_sgl, #ifdef CONFIG_SBUS - BTFIXUPSET_CALL(mmu_map_dma_area, iounit_map_dma_area, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_unmap_dma_area, iounit_unmap_dma_area, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_translate_dvma, iounit_translate_dvma, BTFIXUPCALL_NORM); + .map_dma_area = iounit_map_dma_area, + .unmap_dma_area = iounit_unmap_dma_area, #endif -} +}; -__u32 iounit_map_dma_init(struct sbus_bus *sbus, int size) -{ - int i, j, k, npages; - unsigned long rotor, scan, limit; - unsigned long flags; - __u32 ret; - struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; - - npages = (size + (PAGE_SIZE-1)) >> PAGE_SHIFT; - i = 0x0213; - spin_lock_irqsave(&iounit->lock, flags); -next: j = (i & 15); - rotor = iounit->rotor[j - 1]; - limit = iounit->limit[j]; - scan = rotor; -nexti: scan = find_next_zero_bit(iounit->bmap, limit, scan); - if (scan + npages > limit) { - if (limit != rotor) { - limit = rotor; - scan = iounit->limit[j - 1]; - goto nexti; - } - i >>= 4; - if (!(i & 15)) - panic("iounit_map_dma_init: Couldn't find free iopte slots for %d bytes\n", size); - goto next; - } - for (k = 1, scan++; k < npages; k++) - if (test_bit(scan++, iounit->bmap)) - goto nexti; - iounit->rotor[j - 1] = (scan < limit) ? scan : iounit->limit[j - 1]; - scan -= npages; - ret = IOUNIT_DMA_BASE + (scan << PAGE_SHIFT); - for (k = 0; k < npages; k++, scan++) - set_bit(scan, iounit->bmap); - spin_unlock_irqrestore(&iounit->lock, flags); - return ret; -} - -__u32 iounit_map_dma_page(__u32 vaddr, void *addr, struct sbus_bus *sbus) +void __init ld_mmu_iounit(void) { - int scan = (vaddr - IOUNIT_DMA_BASE) >> PAGE_SHIFT; - struct iounit_struct *iounit = sbus->ofdev.dev.archdata.iommu; - - iounit->page_table[scan] = MKIOPTE(__pa(((unsigned long)addr) & PAGE_MASK)); - return vaddr + (((unsigned long)addr) & ~PAGE_MASK); + sparc32_dma_ops = &iounit_dma_ops; } diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 4b934270f05..491511d37e3 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -13,10 +13,11 @@ #include <linux/slab.h> #include <linux/highmem.h> /* pte_offset_map => kmap_atomic */ #include <linux/scatterlist.h> +#include <linux/of.h> +#include <linux/of_device.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> -#include <asm/sbus.h> #include <asm/io.h> #include <asm/mxcc.h> #include <asm/mbus.h> @@ -26,6 +27,8 @@ #include <asm/iommu.h> #include <asm/dma.h> +#include "mm_32.h" + /* * This can be sized dynamically, but we will do this * only when we have a guidance about actual I/O pressures. @@ -33,14 +36,9 @@ #define IOMMU_RNGE IOMMU_RNGE_256MB #define IOMMU_START 0xF0000000 #define IOMMU_WINSIZE (256*1024*1024U) -#define IOMMU_NPTES (IOMMU_WINSIZE/PAGE_SIZE) /* 64K PTEs, 265KB */ +#define IOMMU_NPTES (IOMMU_WINSIZE/PAGE_SIZE) /* 64K PTEs, 256KB */ #define IOMMU_ORDER 6 /* 4096 * (1<<6) */ -/* srmmu.c */ -extern int viking_mxcc_present; -BTFIXUPDEF_CALL(void, flush_page_for_dma, unsigned long) -#define flush_page_for_dma(page) BTFIXUP_CALL(flush_page_for_dma)(page) -extern int flush_page_for_dma_global; static int viking_flush; /* viking.S */ extern void viking_flush_page(unsigned long page); @@ -55,40 +53,35 @@ static pgprot_t dvma_prot; /* Consistent mapping pte flags */ #define IOPERM (IOPTE_CACHE | IOPTE_WRITE | IOPTE_VALID) #define MKIOPTE(pfn, perm) (((((pfn)<<8) & IOPTE_PAGE) | (perm)) & ~IOPTE_WAZ) -void __init -iommu_init(int iommund, struct sbus_bus *sbus) +static void __init sbus_iommu_init(struct platform_device *op) { - unsigned int impl, vers; - unsigned long tmp; struct iommu_struct *iommu; - struct linux_prom_registers iommu_promregs[PROMREG_MAX]; - struct resource r; + unsigned int impl, vers; unsigned long *bitmap; + unsigned long control; + unsigned long base; + unsigned long tmp; - iommu = kmalloc(sizeof(struct iommu_struct), GFP_ATOMIC); + iommu = kmalloc(sizeof(struct iommu_struct), GFP_KERNEL); if (!iommu) { prom_printf("Unable to allocate iommu structure\n"); prom_halt(); } - iommu->regs = NULL; - if (prom_getproperty(iommund, "reg", (void *) iommu_promregs, - sizeof(iommu_promregs)) != -1) { - memset(&r, 0, sizeof(r)); - r.flags = iommu_promregs[0].which_io; - r.start = iommu_promregs[0].phys_addr; - iommu->regs = (struct iommu_regs *) - sbus_ioremap(&r, 0, PAGE_SIZE * 3, "iommu_regs"); - } + + iommu->regs = of_ioremap(&op->resource[0], 0, PAGE_SIZE * 3, + "iommu_regs"); if (!iommu->regs) { prom_printf("Cannot map IOMMU registers\n"); prom_halt(); } - impl = (iommu->regs->control & IOMMU_CTRL_IMPL) >> 28; - vers = (iommu->regs->control & IOMMU_CTRL_VERS) >> 24; - tmp = iommu->regs->control; - tmp &= ~(IOMMU_CTRL_RNGE); - tmp |= (IOMMU_RNGE_256MB | IOMMU_CTRL_ENAB); - iommu->regs->control = tmp; + + control = sbus_readl(&iommu->regs->control); + impl = (control & IOMMU_CTRL_IMPL) >> 28; + vers = (control & IOMMU_CTRL_VERS) >> 24; + control &= ~(IOMMU_CTRL_RNGE); + control |= (IOMMU_RNGE_256MB | IOMMU_CTRL_ENAB); + sbus_writel(control, &iommu->regs->control); + iommu_invalidate(iommu->regs); iommu->start = IOMMU_START; iommu->end = 0xffffffff; @@ -100,8 +93,8 @@ iommu_init(int iommund, struct sbus_bus *sbus) it to us. */ tmp = __get_free_pages(GFP_KERNEL, IOMMU_ORDER); if (!tmp) { - prom_printf("Unable to allocate iommu table [0x%08x]\n", - IOMMU_NPTES*sizeof(iopte_t)); + prom_printf("Unable to allocate iommu table [0x%lx]\n", + IOMMU_NPTES * sizeof(iopte_t)); prom_halt(); } iommu->page_table = (iopte_t *)tmp; @@ -110,7 +103,9 @@ iommu_init(int iommund, struct sbus_bus *sbus) memset(iommu->page_table, 0, IOMMU_NPTES*sizeof(iopte_t)); flush_cache_all(); flush_tlb_all(); - iommu->regs->base = __pa((unsigned long) iommu->page_table) >> 4; + + base = __pa((unsigned long)iommu->page_table) >> 4; + sbus_writel(base, &iommu->regs->base); iommu_invalidate(iommu->regs); bitmap = kmalloc(IOMMU_NPTES>>3, GFP_KERNEL); @@ -128,14 +123,29 @@ iommu_init(int iommund, struct sbus_bus *sbus) else iommu->usemap.num_colors = 1; - printk("IOMMU: impl %d vers %d table 0x%p[%d B] map [%d b]\n", - impl, vers, iommu->page_table, - (int)(IOMMU_NPTES*sizeof(iopte_t)), (int)IOMMU_NPTES); + printk(KERN_INFO "IOMMU: impl %d vers %d table 0x%p[%d B] map [%d b]\n", + impl, vers, iommu->page_table, + (int)(IOMMU_NPTES*sizeof(iopte_t)), (int)IOMMU_NPTES); + + op->dev.archdata.iommu = iommu; +} + +static int __init iommu_init(void) +{ + struct device_node *dp; + + for_each_node_by_name(dp, "iommu") { + struct platform_device *op = of_find_device_by_node(dp); - sbus->ofdev.dev.archdata.iommu = iommu; + sbus_iommu_init(op); + of_propagate_archdata(op); + } + + return 0; } -/* This begs to be btfixup-ed by srmmu. */ +subsys_initcall(iommu_init); + /* Flush the iotlb entries to ram. */ /* This could be better if we didn't have to flush whole pages. */ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte) @@ -164,9 +174,9 @@ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte) } } -static u32 iommu_get_one(struct page *page, int npages, struct sbus_bus *sbus) +static u32 iommu_get_one(struct device *dev, struct page *page, int npages) { - struct iommu_struct *iommu = sbus->ofdev.dev.archdata.iommu; + struct iommu_struct *iommu = dev->archdata.iommu; int ioptex; iopte_t *iopte, *iopte0; unsigned int busa, busa0; @@ -194,8 +204,7 @@ static u32 iommu_get_one(struct page *page, int npages, struct sbus_bus *sbus) return busa0; } -static u32 iommu_get_scsi_one(char *vaddr, unsigned int len, - struct sbus_bus *sbus) +static u32 iommu_get_scsi_one(struct device *dev, char *vaddr, unsigned int len) { unsigned long off; int npages; @@ -205,22 +214,17 @@ static u32 iommu_get_scsi_one(char *vaddr, unsigned int len, off = (unsigned long)vaddr & ~PAGE_MASK; npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT; page = virt_to_page((unsigned long)vaddr & PAGE_MASK); - busa = iommu_get_one(page, npages, sbus); + busa = iommu_get_one(dev, page, npages); return busa + off; } -static __u32 iommu_get_scsi_one_noflush(char *vaddr, unsigned long len, struct sbus_bus *sbus) -{ - return iommu_get_scsi_one(vaddr, len, sbus); -} - -static __u32 iommu_get_scsi_one_gflush(char *vaddr, unsigned long len, struct sbus_bus *sbus) +static __u32 iommu_get_scsi_one_gflush(struct device *dev, char *vaddr, unsigned long len) { flush_page_for_dma(0); - return iommu_get_scsi_one(vaddr, len, sbus); + return iommu_get_scsi_one(dev, vaddr, len); } -static __u32 iommu_get_scsi_one_pflush(char *vaddr, unsigned long len, struct sbus_bus *sbus) +static __u32 iommu_get_scsi_one_pflush(struct device *dev, char *vaddr, unsigned long len) { unsigned long page = ((unsigned long) vaddr) & PAGE_MASK; @@ -228,23 +232,10 @@ static __u32 iommu_get_scsi_one_pflush(char *vaddr, unsigned long len, struct sb flush_page_for_dma(page); page += PAGE_SIZE; } - return iommu_get_scsi_one(vaddr, len, sbus); -} - -static void iommu_get_scsi_sgl_noflush(struct scatterlist *sg, int sz, struct sbus_bus *sbus) -{ - int n; - - while (sz != 0) { - --sz; - n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; - sg->dvma_address = iommu_get_one(sg_page(sg), n, sbus) + sg->offset; - sg->dvma_length = (__u32) sg->length; - sg = sg_next(sg); - } + return iommu_get_scsi_one(dev, vaddr, len); } -static void iommu_get_scsi_sgl_gflush(struct scatterlist *sg, int sz, struct sbus_bus *sbus) +static void iommu_get_scsi_sgl_gflush(struct device *dev, struct scatterlist *sg, int sz) { int n; @@ -252,13 +243,13 @@ static void iommu_get_scsi_sgl_gflush(struct scatterlist *sg, int sz, struct sbu while (sz != 0) { --sz; n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; - sg->dvma_address = iommu_get_one(sg_page(sg), n, sbus) + sg->offset; - sg->dvma_length = (__u32) sg->length; + sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset; + sg->dma_length = sg->length; sg = sg_next(sg); } } -static void iommu_get_scsi_sgl_pflush(struct scatterlist *sg, int sz, struct sbus_bus *sbus) +static void iommu_get_scsi_sgl_pflush(struct device *dev, struct scatterlist *sg, int sz) { unsigned long page, oldpage = 0; int n, i; @@ -283,15 +274,15 @@ static void iommu_get_scsi_sgl_pflush(struct scatterlist *sg, int sz, struct sbu } } - sg->dvma_address = iommu_get_one(sg_page(sg), n, sbus) + sg->offset; - sg->dvma_length = (__u32) sg->length; + sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset; + sg->dma_length = sg->length; sg = sg_next(sg); } } -static void iommu_release_one(u32 busa, int npages, struct sbus_bus *sbus) +static void iommu_release_one(struct device *dev, u32 busa, int npages) { - struct iommu_struct *iommu = sbus->ofdev.dev.archdata.iommu; + struct iommu_struct *iommu = dev->archdata.iommu; int ioptex; int i; @@ -305,17 +296,17 @@ static void iommu_release_one(u32 busa, int npages, struct sbus_bus *sbus) bit_map_clear(&iommu->usemap, ioptex, npages); } -static void iommu_release_scsi_one(__u32 vaddr, unsigned long len, struct sbus_bus *sbus) +static void iommu_release_scsi_one(struct device *dev, __u32 vaddr, unsigned long len) { unsigned long off; int npages; off = vaddr & ~PAGE_MASK; npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT; - iommu_release_one(vaddr & PAGE_MASK, npages, sbus); + iommu_release_one(dev, vaddr & PAGE_MASK, npages); } -static void iommu_release_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus *sbus) +static void iommu_release_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz) { int n; @@ -323,18 +314,18 @@ static void iommu_release_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_b --sz; n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; - iommu_release_one(sg->dvma_address & PAGE_MASK, n, sbus); - sg->dvma_address = 0x21212121; + iommu_release_one(dev, sg->dma_address & PAGE_MASK, n); + sg->dma_address = 0x21212121; sg = sg_next(sg); } } #ifdef CONFIG_SBUS -static int iommu_map_dma_area(dma_addr_t *pba, unsigned long va, - unsigned long addr, int len) +static int iommu_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned long va, + unsigned long addr, int len) { + struct iommu_struct *iommu = dev->archdata.iommu; unsigned long page, end; - struct iommu_struct *iommu = sbus_root->ofdev.dev.archdata.iommu; iopte_t *iopte = iommu->page_table; iopte_t *first; int ioptex; @@ -397,9 +388,9 @@ static int iommu_map_dma_area(dma_addr_t *pba, unsigned long va, return 0; } -static void iommu_unmap_dma_area(unsigned long busa, int len) +static void iommu_unmap_dma_area(struct device *dev, unsigned long busa, int len) { - struct iommu_struct *iommu = sbus_root->ofdev.dev.archdata.iommu; + struct iommu_struct *iommu = dev->archdata.iommu; iopte_t *iopte = iommu->page_table; unsigned long end; int ioptex = (busa - iommu->start) >> PAGE_SHIFT; @@ -417,52 +408,38 @@ static void iommu_unmap_dma_area(unsigned long busa, int len) iommu_invalidate(iommu->regs); bit_map_clear(&iommu->usemap, ioptex, len >> PAGE_SHIFT); } - -static struct page *iommu_translate_dvma(unsigned long busa) -{ - struct iommu_struct *iommu = sbus_root->ofdev.dev.archdata.iommu; - iopte_t *iopte = iommu->page_table; - - iopte += ((busa - iommu->start) >> PAGE_SHIFT); - return pfn_to_page((iopte_val(*iopte) & IOPTE_PAGE) >> (PAGE_SHIFT-4)); -} #endif -static char *iommu_lockarea(char *vaddr, unsigned long len) -{ - return vaddr; -} +static const struct sparc32_dma_ops iommu_dma_gflush_ops = { + .get_scsi_one = iommu_get_scsi_one_gflush, + .get_scsi_sgl = iommu_get_scsi_sgl_gflush, + .release_scsi_one = iommu_release_scsi_one, + .release_scsi_sgl = iommu_release_scsi_sgl, +#ifdef CONFIG_SBUS + .map_dma_area = iommu_map_dma_area, + .unmap_dma_area = iommu_unmap_dma_area, +#endif +}; -static void iommu_unlockarea(char *vaddr, unsigned long len) -{ -} +static const struct sparc32_dma_ops iommu_dma_pflush_ops = { + .get_scsi_one = iommu_get_scsi_one_pflush, + .get_scsi_sgl = iommu_get_scsi_sgl_pflush, + .release_scsi_one = iommu_release_scsi_one, + .release_scsi_sgl = iommu_release_scsi_sgl, +#ifdef CONFIG_SBUS + .map_dma_area = iommu_map_dma_area, + .unmap_dma_area = iommu_unmap_dma_area, +#endif +}; void __init ld_mmu_iommu(void) { - viking_flush = (BTFIXUPVAL_CALL(flush_page_for_dma) == (unsigned long)viking_flush_page); - BTFIXUPSET_CALL(mmu_lockarea, iommu_lockarea, BTFIXUPCALL_RETO0); - BTFIXUPSET_CALL(mmu_unlockarea, iommu_unlockarea, BTFIXUPCALL_NOP); - - if (!BTFIXUPVAL_CALL(flush_page_for_dma)) { - /* IO coherent chip */ - BTFIXUPSET_CALL(mmu_get_scsi_one, iommu_get_scsi_one_noflush, BTFIXUPCALL_RETO0); - BTFIXUPSET_CALL(mmu_get_scsi_sgl, iommu_get_scsi_sgl_noflush, BTFIXUPCALL_NORM); - } else if (flush_page_for_dma_global) { + if (flush_page_for_dma_global) { /* flush_page_for_dma flushes everything, no matter of what page is it */ - BTFIXUPSET_CALL(mmu_get_scsi_one, iommu_get_scsi_one_gflush, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_get_scsi_sgl, iommu_get_scsi_sgl_gflush, BTFIXUPCALL_NORM); + sparc32_dma_ops = &iommu_dma_gflush_ops; } else { - BTFIXUPSET_CALL(mmu_get_scsi_one, iommu_get_scsi_one_pflush, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_get_scsi_sgl, iommu_get_scsi_sgl_pflush, BTFIXUPCALL_NORM); + sparc32_dma_ops = &iommu_dma_pflush_ops; } - BTFIXUPSET_CALL(mmu_release_scsi_one, iommu_release_scsi_one, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_release_scsi_sgl, iommu_release_scsi_sgl, BTFIXUPCALL_NORM); - -#ifdef CONFIG_SBUS - BTFIXUPSET_CALL(mmu_map_dma_area, iommu_map_dma_area, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_unmap_dma_area, iommu_unmap_dma_area, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_translate_dvma, iommu_translate_dvma, BTFIXUPCALL_NORM); -#endif if (viking_mxcc_present || srmmu_modtype == HyperSparc) { dvma_prot = __pgprot(SRMMU_CACHE | SRMMU_ET_PTE | SRMMU_PRIV); diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c new file mode 100644 index 00000000000..3b17b6f7895 --- /dev/null +++ b/arch/sparc/mm/leon_mm.c @@ -0,0 +1,351 @@ +/* + * linux/arch/sparc/mm/leon_m.c + * + * Copyright (C) 2004 Konrad Eisele (eiselekd@web.de, konrad@gaisler.com) Gaisler Research + * Copyright (C) 2009 Daniel Hellstrom (daniel@gaisler.com) Aeroflex Gaisler AB + * Copyright (C) 2009 Konrad Eisele (konrad@gaisler.com) Aeroflex Gaisler AB + * + * do srmmu probe in software + * + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <asm/asi.h> +#include <asm/leon.h> +#include <asm/tlbflush.h> + +#include "mm_32.h" + +int leon_flush_during_switch = 1; +static int srmmu_swprobe_trace; + +static inline unsigned long leon_get_ctable_ptr(void) +{ + unsigned int retval; + + __asm__ __volatile__("lda [%1] %2, %0\n\t" : + "=r" (retval) : + "r" (SRMMU_CTXTBL_PTR), + "i" (ASI_LEON_MMUREGS)); + return (retval & SRMMU_CTX_PMASK) << 4; +} + + +unsigned long leon_swprobe(unsigned long vaddr, unsigned long *paddr) +{ + + unsigned int ctxtbl; + unsigned int pgd, pmd, ped; + unsigned int ptr; + unsigned int lvl, pte, paddrbase; + unsigned int ctx; + unsigned int paddr_calc; + + paddrbase = 0; + + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: trace on\n"); + + ctxtbl = leon_get_ctable_ptr(); + if (!(ctxtbl)) { + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: leon_get_ctable_ptr returned 0=>0\n"); + return 0; + } + if (!_pfn_valid(PFN(ctxtbl))) { + if (srmmu_swprobe_trace) + printk(KERN_INFO + "swprobe: !_pfn_valid(%x)=>0\n", + PFN(ctxtbl)); + return 0; + } + + ctx = srmmu_get_context(); + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: --- ctx (%x) ---\n", ctx); + + pgd = LEON_BYPASS_LOAD_PA(ctxtbl + (ctx * 4)); + + if (((pgd & SRMMU_ET_MASK) == SRMMU_ET_PTE)) { + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: pgd is entry level 3\n"); + lvl = 3; + pte = pgd; + paddrbase = pgd & _SRMMU_PTE_PMASK_LEON; + goto ready; + } + if (((pgd & SRMMU_ET_MASK) != SRMMU_ET_PTD)) { + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: pgd is invalid => 0\n"); + return 0; + } + + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: --- pgd (%x) ---\n", pgd); + + ptr = (pgd & SRMMU_PTD_PMASK) << 4; + ptr += ((((vaddr) >> LEON_PGD_SH) & LEON_PGD_M) * 4); + if (!_pfn_valid(PFN(ptr))) + return 0; + + pmd = LEON_BYPASS_LOAD_PA(ptr); + if (((pmd & SRMMU_ET_MASK) == SRMMU_ET_PTE)) { + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: pmd is entry level 2\n"); + lvl = 2; + pte = pmd; + paddrbase = pmd & _SRMMU_PTE_PMASK_LEON; + goto ready; + } + if (((pmd & SRMMU_ET_MASK) != SRMMU_ET_PTD)) { + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: pmd is invalid => 0\n"); + return 0; + } + + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: --- pmd (%x) ---\n", pmd); + + ptr = (pmd & SRMMU_PTD_PMASK) << 4; + ptr += (((vaddr >> LEON_PMD_SH) & LEON_PMD_M) * 4); + if (!_pfn_valid(PFN(ptr))) { + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: !_pfn_valid(%x)=>0\n", + PFN(ptr)); + return 0; + } + + ped = LEON_BYPASS_LOAD_PA(ptr); + + if (((ped & SRMMU_ET_MASK) == SRMMU_ET_PTE)) { + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: ped is entry level 1\n"); + lvl = 1; + pte = ped; + paddrbase = ped & _SRMMU_PTE_PMASK_LEON; + goto ready; + } + if (((ped & SRMMU_ET_MASK) != SRMMU_ET_PTD)) { + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: ped is invalid => 0\n"); + return 0; + } + + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: --- ped (%x) ---\n", ped); + + ptr = (ped & SRMMU_PTD_PMASK) << 4; + ptr += (((vaddr >> LEON_PTE_SH) & LEON_PTE_M) * 4); + if (!_pfn_valid(PFN(ptr))) + return 0; + + ptr = LEON_BYPASS_LOAD_PA(ptr); + if (((ptr & SRMMU_ET_MASK) == SRMMU_ET_PTE)) { + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: ptr is entry level 0\n"); + lvl = 0; + pte = ptr; + paddrbase = ptr & _SRMMU_PTE_PMASK_LEON; + goto ready; + } + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: ptr is invalid => 0\n"); + return 0; + +ready: + switch (lvl) { + case 0: + paddr_calc = + (vaddr & ~(-1 << LEON_PTE_SH)) | ((pte & ~0xff) << 4); + break; + case 1: + paddr_calc = + (vaddr & ~(-1 << LEON_PMD_SH)) | ((pte & ~0xff) << 4); + break; + case 2: + paddr_calc = + (vaddr & ~(-1 << LEON_PGD_SH)) | ((pte & ~0xff) << 4); + break; + default: + case 3: + paddr_calc = vaddr; + break; + } + if (srmmu_swprobe_trace) + printk(KERN_INFO "swprobe: padde %x\n", paddr_calc); + if (paddr) + *paddr = paddr_calc; + return pte; +} + +void leon_flush_icache_all(void) +{ + __asm__ __volatile__(" flush "); /*iflush*/ +} + +void leon_flush_dcache_all(void) +{ + __asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : : + "i"(ASI_LEON_DFLUSH) : "memory"); +} + +void leon_flush_pcache_all(struct vm_area_struct *vma, unsigned long page) +{ + if (vma->vm_flags & VM_EXEC) + leon_flush_icache_all(); + leon_flush_dcache_all(); +} + +void leon_flush_cache_all(void) +{ + __asm__ __volatile__(" flush "); /*iflush*/ + __asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : : + "i"(ASI_LEON_DFLUSH) : "memory"); +} + +void leon_flush_tlb_all(void) +{ + leon_flush_cache_all(); + __asm__ __volatile__("sta %%g0, [%0] %1\n\t" : : "r"(0x400), + "i"(ASI_LEON_MMUFLUSH) : "memory"); +} + +/* get all cache regs */ +void leon3_getCacheRegs(struct leon3_cacheregs *regs) +{ + unsigned long ccr, iccr, dccr; + + if (!regs) + return; + /* Get Cache regs from "Cache ASI" address 0x0, 0x8 and 0xC */ + __asm__ __volatile__("lda [%%g0] %3, %0\n\t" + "mov 0x08, %%g1\n\t" + "lda [%%g1] %3, %1\n\t" + "mov 0x0c, %%g1\n\t" + "lda [%%g1] %3, %2\n\t" + : "=r"(ccr), "=r"(iccr), "=r"(dccr) + /* output */ + : "i"(ASI_LEON_CACHEREGS) /* input */ + : "g1" /* clobber list */ + ); + regs->ccr = ccr; + regs->iccr = iccr; + regs->dccr = dccr; +} + +/* Due to virtual cache we need to check cache configuration if + * it is possible to skip flushing in some cases. + * + * Leon2 and Leon3 differ in their way of telling cache information + * + */ +int __init leon_flush_needed(void) +{ + int flush_needed = -1; + unsigned int ssize, sets; + char *setStr[4] = + { "direct mapped", "2-way associative", "3-way associative", + "4-way associative" + }; + /* leon 3 */ + struct leon3_cacheregs cregs; + leon3_getCacheRegs(&cregs); + sets = (cregs.dccr & LEON3_XCCR_SETS_MASK) >> 24; + /* (ssize=>realsize) 0=>1k, 1=>2k, 2=>4k, 3=>8k ... */ + ssize = 1 << ((cregs.dccr & LEON3_XCCR_SSIZE_MASK) >> 20); + + printk(KERN_INFO "CACHE: %s cache, set size %dk\n", + sets > 3 ? "unknown" : setStr[sets], ssize); + if ((ssize <= (PAGE_SIZE / 1024)) && (sets == 0)) { + /* Set Size <= Page size ==> + flush on every context switch not needed. */ + flush_needed = 0; + printk(KERN_INFO "CACHE: not flushing on every context switch\n"); + } + return flush_needed; +} + +void leon_switch_mm(void) +{ + flush_tlb_mm((void *)0); + if (leon_flush_during_switch) + leon_flush_cache_all(); +} + +static void leon_flush_cache_mm(struct mm_struct *mm) +{ + leon_flush_cache_all(); +} + +static void leon_flush_cache_page(struct vm_area_struct *vma, unsigned long page) +{ + leon_flush_pcache_all(vma, page); +} + +static void leon_flush_cache_range(struct vm_area_struct *vma, + unsigned long start, + unsigned long end) +{ + leon_flush_cache_all(); +} + +static void leon_flush_tlb_mm(struct mm_struct *mm) +{ + leon_flush_tlb_all(); +} + +static void leon_flush_tlb_page(struct vm_area_struct *vma, + unsigned long page) +{ + leon_flush_tlb_all(); +} + +static void leon_flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, + unsigned long end) +{ + leon_flush_tlb_all(); +} + +static void leon_flush_page_to_ram(unsigned long page) +{ + leon_flush_cache_all(); +} + +static void leon_flush_sig_insns(struct mm_struct *mm, unsigned long page) +{ + leon_flush_cache_all(); +} + +static void leon_flush_page_for_dma(unsigned long page) +{ + leon_flush_dcache_all(); +} + +void __init poke_leonsparc(void) +{ +} + +static const struct sparc32_cachetlb_ops leon_ops = { + .cache_all = leon_flush_cache_all, + .cache_mm = leon_flush_cache_mm, + .cache_page = leon_flush_cache_page, + .cache_range = leon_flush_cache_range, + .tlb_all = leon_flush_tlb_all, + .tlb_mm = leon_flush_tlb_mm, + .tlb_page = leon_flush_tlb_page, + .tlb_range = leon_flush_tlb_range, + .page_to_ram = leon_flush_page_to_ram, + .sig_insns = leon_flush_sig_insns, + .page_for_dma = leon_flush_page_for_dma, +}; + +void __init init_leon(void) +{ + srmmu_name = "LEON"; + sparc32_cachetlb_ops = &leon_ops; + poke_srmmu = poke_leonsparc; + + leon_flush_during_switch = leon_flush_needed(); +} diff --git a/arch/sparc/mm/loadmmu.c b/arch/sparc/mm/loadmmu.c deleted file mode 100644 index 2d9cd65160a..00000000000 --- a/arch/sparc/mm/loadmmu.c +++ /dev/null @@ -1,43 +0,0 @@ -/* $Id: loadmmu.c,v 1.56 2000/02/08 20:24:21 davem Exp $ - * loadmmu.c: This code loads up all the mm function pointers once the - * machine type has been determined. It also sets the static - * mmu values such as PAGE_NONE, etc. - * - * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) - * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - */ - -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/init.h> - -#include <asm/system.h> -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/mmu_context.h> -#include <asm/oplib.h> - -struct ctx_list *ctx_list_pool; -struct ctx_list ctx_free; -struct ctx_list ctx_used; - -extern void ld_mmu_sun4c(void); -extern void ld_mmu_srmmu(void); - -void __init load_mmu(void) -{ - switch(sparc_cpu_model) { - case sun4c: - case sun4: - ld_mmu_sun4c(); - break; - case sun4m: - case sun4d: - ld_mmu_srmmu(); - break; - default: - prom_printf("load_mmu: %d unsupported\n", (int)sparc_cpu_model); - prom_halt(); - } - btfixup(); -} diff --git a/arch/sparc/mm/mm_32.h b/arch/sparc/mm/mm_32.h new file mode 100644 index 00000000000..a6c27ca9a72 --- /dev/null +++ b/arch/sparc/mm/mm_32.h @@ -0,0 +1,24 @@ +/* fault_32.c - visible as they are called from assembler */ +asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc, + unsigned long address); +asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, + unsigned long address); + +void window_overflow_fault(void); +void window_underflow_fault(unsigned long sp); +void window_ret_fault(struct pt_regs *regs); + +/* srmmu.c */ +extern char *srmmu_name; +extern int viking_mxcc_present; +extern int flush_page_for_dma_global; + +extern void (*poke_srmmu)(void); + +void __init srmmu_paging_init(void); + +/* iommu.c */ +void ld_mmu_iommu(void); + +/* io-unit.c */ +void ld_mmu_iounit(void); diff --git a/arch/sparc/mm/nosrmmu.c b/arch/sparc/mm/nosrmmu.c deleted file mode 100644 index 9e215659697..00000000000 --- a/arch/sparc/mm/nosrmmu.c +++ /dev/null @@ -1,59 +0,0 @@ -/* $Id: nosrmmu.c,v 1.5 1999/11/19 04:11:54 davem Exp $ - * nosrmmu.c: This file is a bunch of dummies for sun4 compiles, - * so that it does not need srmmu and avoid ifdefs. - * - * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - */ - -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/init.h> -#include <asm/mbus.h> -#include <asm/sbus.h> - -static char shouldnothappen[] __initdata = "SUN4 kernel can only run on SUN4\n"; - -enum mbus_module srmmu_modtype; -void *srmmu_nocache_pool; - -int vac_cache_size = 0; - -static void __init should_not_happen(void) -{ - prom_printf(shouldnothappen); - prom_halt(); -} - -void __init srmmu_frob_mem_map(unsigned long start_mem) -{ - should_not_happen(); -} - -unsigned long __init srmmu_paging_init(unsigned long start_mem, unsigned long end_mem) -{ - should_not_happen(); - return 0; -} - -void __init ld_mmu_srmmu(void) -{ - should_not_happen(); -} - -void srmmu_mapioaddr(unsigned long physaddr, unsigned long virt_addr, int bus_type, int rdonly) -{ -} - -void srmmu_unmapioaddr(unsigned long virt_addr) -{ -} - -__u32 iounit_map_dma_init(struct sbus_bus *sbus, int size) -{ - return 0; -} - -__u32 iounit_map_dma_page(__u32 vaddr, void *addr, struct sbus_bus *sbus) -{ - return 0; -} diff --git a/arch/sparc/mm/nosun4c.c b/arch/sparc/mm/nosun4c.c deleted file mode 100644 index ea2e2105341..00000000000 --- a/arch/sparc/mm/nosun4c.c +++ /dev/null @@ -1,77 +0,0 @@ -/* $Id: nosun4c.c,v 1.3 2000/02/14 04:52:36 jj Exp $ - * nosun4c.c: This file is a bunch of dummies for SMP compiles, - * so that it does not need sun4c and avoid ifdefs. - * - * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - */ - -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/init.h> -#include <asm/pgtable.h> - -static char shouldnothappen[] __initdata = "32bit SMP kernel only supports sun4m and sun4d\n"; - -/* Dummies */ -struct sun4c_mmu_ring { - unsigned long xxx1[3]; - unsigned char xxx2[2]; - int xxx3; -}; -struct sun4c_mmu_ring sun4c_kernel_ring; -struct sun4c_mmu_ring sun4c_kfree_ring; -unsigned long sun4c_kernel_faults; -unsigned long *sun4c_memerr_reg; - -static void __init should_not_happen(void) -{ - prom_printf(shouldnothappen); - prom_halt(); -} - -unsigned long __init sun4c_paging_init(unsigned long start_mem, unsigned long end_mem) -{ - should_not_happen(); - return 0; -} - -void __init ld_mmu_sun4c(void) -{ - should_not_happen(); -} - -void sun4c_mapioaddr(unsigned long physaddr, unsigned long virt_addr, int bus_type, int rdonly) -{ -} - -void sun4c_unmapioaddr(unsigned long virt_addr) -{ -} - -void sun4c_complete_all_stores(void) -{ -} - -pte_t *sun4c_pte_offset(pmd_t * dir, unsigned long address) -{ - return NULL; -} - -pte_t *sun4c_pte_offset_kernel(pmd_t *dir, unsigned long address) -{ - return NULL; -} - -void sun4c_update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) -{ -} - -void __init sun4c_probe_vac(void) -{ - should_not_happen(); -} - -void __init sun4c_probe_memerr_reg(void) -{ - should_not_happen(); -} diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 23d3291a3e8..be65f035d18 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -8,49 +8,51 @@ * Copyright (C) 1999,2000 Anton Blanchard (anton@samba.org) */ -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/pagemap.h> -#include <linux/init.h> +#include <linux/seq_file.h> #include <linux/spinlock.h> #include <linux/bootmem.h> -#include <linux/fs.h> -#include <linux/seq_file.h> +#include <linux/pagemap.h> +#include <linux/vmalloc.h> #include <linux/kdebug.h> +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/log2.h> +#include <linux/gfp.h> +#include <linux/fs.h> +#include <linux/mm.h> -#include <asm/bitext.h> -#include <asm/page.h> +#include <asm/mmu_context.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/io-unit.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> -#include <asm/io.h> +#include <asm/bitext.h> #include <asm/vaddrs.h> -#include <asm/traps.h> -#include <asm/smp.h> -#include <asm/mbus.h> #include <asm/cache.h> +#include <asm/traps.h> #include <asm/oplib.h> -#include <asm/sbus.h> +#include <asm/mbus.h> +#include <asm/page.h> #include <asm/asi.h> #include <asm/msi.h> -#include <asm/mmu_context.h> -#include <asm/io-unit.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> +#include <asm/smp.h> +#include <asm/io.h> /* Now the cpu specific definitions. */ +#include <asm/turbosparc.h> +#include <asm/tsunami.h> #include <asm/viking.h> +#include <asm/swift.h> +#include <asm/leon.h> #include <asm/mxcc.h> #include <asm/ross.h> -#include <asm/tsunami.h> -#include <asm/swift.h> -#include <asm/turbosparc.h> -#include <asm/btfixup.h> +#include "mm_32.h" enum mbus_module srmmu_modtype; -unsigned int hwbug_bitmask; +static unsigned int hwbug_bitmask; int vac_cache_size; int vac_line_size; @@ -58,65 +60,38 @@ extern struct resource sparc_iomap; extern unsigned long last_valid_pfn; -extern unsigned long page_kernel; +static pgd_t *srmmu_swapper_pg_dir; -pgd_t *srmmu_swapper_pg_dir; +const struct sparc32_cachetlb_ops *sparc32_cachetlb_ops; +EXPORT_SYMBOL(sparc32_cachetlb_ops); #ifdef CONFIG_SMP +const struct sparc32_cachetlb_ops *local_ops; + #define FLUSH_BEGIN(mm) #define FLUSH_END #else -#define FLUSH_BEGIN(mm) if((mm)->context != NO_CONTEXT) { +#define FLUSH_BEGIN(mm) if ((mm)->context != NO_CONTEXT) { #define FLUSH_END } #endif -BTFIXUPDEF_CALL(void, flush_page_for_dma, unsigned long) -#define flush_page_for_dma(page) BTFIXUP_CALL(flush_page_for_dma)(page) - int flush_page_for_dma_global = 1; -#ifdef CONFIG_SMP -BTFIXUPDEF_CALL(void, local_flush_page_for_dma, unsigned long) -#define local_flush_page_for_dma(page) BTFIXUP_CALL(local_flush_page_for_dma)(page) -#endif - char *srmmu_name; ctxd_t *srmmu_ctx_table_phys; -ctxd_t *srmmu_context_table; +static ctxd_t *srmmu_context_table; int viking_mxcc_present; static DEFINE_SPINLOCK(srmmu_context_spinlock); -int is_hypersparc; +static int is_hypersparc; -/* - * In general all page table modifications should use the V8 atomic - * swap instruction. This insures the mmu and the cpu are in sync - * with respect to ref/mod bits in the page tables. - */ -static inline unsigned long srmmu_swap(unsigned long *addr, unsigned long value) -{ - __asm__ __volatile__("swap [%2], %0" : "=&r" (value) : "0" (value), "r" (addr)); - return value; -} - -static inline void srmmu_set_pte(pte_t *ptep, pte_t pteval) -{ - srmmu_swap((unsigned long *)ptep, pte_val(pteval)); -} - -/* The very generic SRMMU page table operations. */ -static inline int srmmu_device_memory(unsigned long x) -{ - return ((x & 0xF0000000) != 0); -} - -int srmmu_cache_pagetables; +static int srmmu_cache_pagetables; /* these will be initialized in srmmu_nocache_calcsize() */ -unsigned long srmmu_nocache_size; -unsigned long srmmu_nocache_end; +static unsigned long srmmu_nocache_size; +static unsigned long srmmu_nocache_end; /* 1 bit <=> 256 bytes of nocache <=> 64 PTEs */ #define SRMMU_NOCACHE_BITMAP_SHIFT (PAGE_SHIFT - 4) @@ -125,148 +100,41 @@ unsigned long srmmu_nocache_end; #define SRMMU_NOCACHE_ALIGN_MAX (sizeof(ctxd_t)*SRMMU_MAX_CONTEXTS) void *srmmu_nocache_pool; -void *srmmu_nocache_bitmap; static struct bit_map srmmu_nocache_map; -static unsigned long srmmu_pte_pfn(pte_t pte) -{ - if (srmmu_device_memory(pte_val(pte))) { - /* Just return something that will cause - * pfn_valid() to return false. This makes - * copy_one_pte() to just directly copy to - * PTE over. - */ - return ~0UL; - } - return (pte_val(pte) & SRMMU_PTE_PMASK) >> (PAGE_SHIFT-4); -} - -static struct page *srmmu_pmd_page(pmd_t pmd) -{ - - if (srmmu_device_memory(pmd_val(pmd))) - BUG(); - return pfn_to_page((pmd_val(pmd) & SRMMU_PTD_PMASK) >> (PAGE_SHIFT-4)); -} - -static inline unsigned long srmmu_pgd_page(pgd_t pgd) -{ return srmmu_device_memory(pgd_val(pgd))?~0:(unsigned long)__nocache_va((pgd_val(pgd) & SRMMU_PTD_PMASK) << 4); } - - -static inline int srmmu_pte_none(pte_t pte) -{ return !(pte_val(pte) & 0xFFFFFFF); } - -static inline int srmmu_pte_present(pte_t pte) -{ return ((pte_val(pte) & SRMMU_ET_MASK) == SRMMU_ET_PTE); } - -static inline void srmmu_pte_clear(pte_t *ptep) -{ srmmu_set_pte(ptep, __pte(0)); } - static inline int srmmu_pmd_none(pmd_t pmd) { return !(pmd_val(pmd) & 0xFFFFFFF); } -static inline int srmmu_pmd_bad(pmd_t pmd) -{ return (pmd_val(pmd) & SRMMU_ET_MASK) != SRMMU_ET_PTD; } - -static inline int srmmu_pmd_present(pmd_t pmd) -{ return ((pmd_val(pmd) & SRMMU_ET_MASK) == SRMMU_ET_PTD); } - -static inline void srmmu_pmd_clear(pmd_t *pmdp) { - int i; - for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) - srmmu_set_pte((pte_t *)&pmdp->pmdv[i], __pte(0)); -} - -static inline int srmmu_pgd_none(pgd_t pgd) -{ return !(pgd_val(pgd) & 0xFFFFFFF); } - -static inline int srmmu_pgd_bad(pgd_t pgd) -{ return (pgd_val(pgd) & SRMMU_ET_MASK) != SRMMU_ET_PTD; } - -static inline int srmmu_pgd_present(pgd_t pgd) -{ return ((pgd_val(pgd) & SRMMU_ET_MASK) == SRMMU_ET_PTD); } - -static inline void srmmu_pgd_clear(pgd_t * pgdp) -{ srmmu_set_pte((pte_t *)pgdp, __pte(0)); } - -static inline pte_t srmmu_pte_wrprotect(pte_t pte) -{ return __pte(pte_val(pte) & ~SRMMU_WRITE);} - -static inline pte_t srmmu_pte_mkclean(pte_t pte) -{ return __pte(pte_val(pte) & ~SRMMU_DIRTY);} - -static inline pte_t srmmu_pte_mkold(pte_t pte) -{ return __pte(pte_val(pte) & ~SRMMU_REF);} - -static inline pte_t srmmu_pte_mkwrite(pte_t pte) -{ return __pte(pte_val(pte) | SRMMU_WRITE);} - -static inline pte_t srmmu_pte_mkdirty(pte_t pte) -{ return __pte(pte_val(pte) | SRMMU_DIRTY);} - -static inline pte_t srmmu_pte_mkyoung(pte_t pte) -{ return __pte(pte_val(pte) | SRMMU_REF);} - -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - */ -static pte_t srmmu_mk_pte(struct page *page, pgprot_t pgprot) -{ return __pte((page_to_pfn(page) << (PAGE_SHIFT-4)) | pgprot_val(pgprot)); } - -static pte_t srmmu_mk_pte_phys(unsigned long page, pgprot_t pgprot) -{ return __pte(((page) >> 4) | pgprot_val(pgprot)); } - -static pte_t srmmu_mk_pte_io(unsigned long page, pgprot_t pgprot, int space) -{ return __pte(((page) >> 4) | (space << 28) | pgprot_val(pgprot)); } - /* XXX should we hyper_flush_whole_icache here - Anton */ static inline void srmmu_ctxd_set(ctxd_t *ctxp, pgd_t *pgdp) -{ srmmu_set_pte((pte_t *)ctxp, (SRMMU_ET_PTD | (__nocache_pa((unsigned long) pgdp) >> 4))); } +{ set_pte((pte_t *)ctxp, (SRMMU_ET_PTD | (__nocache_pa((unsigned long) pgdp) >> 4))); } -static inline void srmmu_pgd_set(pgd_t * pgdp, pmd_t * pmdp) -{ srmmu_set_pte((pte_t *)pgdp, (SRMMU_ET_PTD | (__nocache_pa((unsigned long) pmdp) >> 4))); } - -static void srmmu_pmd_set(pmd_t *pmdp, pte_t *ptep) +void pmd_set(pmd_t *pmdp, pte_t *ptep) { unsigned long ptp; /* Physical address, shifted right by 4 */ int i; ptp = __nocache_pa((unsigned long) ptep) >> 4; for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) { - srmmu_set_pte((pte_t *)&pmdp->pmdv[i], SRMMU_ET_PTD | ptp); + set_pte((pte_t *)&pmdp->pmdv[i], SRMMU_ET_PTD | ptp); ptp += (SRMMU_REAL_PTRS_PER_PTE*sizeof(pte_t) >> 4); } } -static void srmmu_pmd_populate(pmd_t *pmdp, struct page *ptep) +void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep) { unsigned long ptp; /* Physical address, shifted right by 4 */ int i; ptp = page_to_pfn(ptep) << (PAGE_SHIFT-4); /* watch for overflow */ for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) { - srmmu_set_pte((pte_t *)&pmdp->pmdv[i], SRMMU_ET_PTD | ptp); + set_pte((pte_t *)&pmdp->pmdv[i], SRMMU_ET_PTD | ptp); ptp += (SRMMU_REAL_PTRS_PER_PTE*sizeof(pte_t) >> 4); } } -static inline pte_t srmmu_pte_modify(pte_t pte, pgprot_t newprot) -{ return __pte((pte_val(pte) & SRMMU_CHG_MASK) | pgprot_val(newprot)); } - -/* to find an entry in a top-level page table... */ -static inline pgd_t *srmmu_pgd_offset(struct mm_struct * mm, unsigned long address) -{ return mm->pgd + (address >> SRMMU_PGDIR_SHIFT); } - -/* Find an entry in the second-level page table.. */ -static inline pmd_t *srmmu_pmd_offset(pgd_t * dir, unsigned long address) -{ - return (pmd_t *) srmmu_pgd_page(*dir) + - ((address >> PMD_SHIFT) & (PTRS_PER_PMD - 1)); -} - -/* Find an entry in the third-level page table.. */ -static inline pte_t *srmmu_pte_offset(pmd_t * dir, unsigned long address) +/* Find an entry in the third-level page table.. */ +pte_t *pte_offset_kernel(pmd_t *dir, unsigned long address) { void *pte; @@ -275,82 +143,71 @@ static inline pte_t *srmmu_pte_offset(pmd_t * dir, unsigned long address) ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)); } -static unsigned long srmmu_swp_type(swp_entry_t entry) -{ - return (entry.val >> SRMMU_SWP_TYPE_SHIFT) & SRMMU_SWP_TYPE_MASK; -} - -static unsigned long srmmu_swp_offset(swp_entry_t entry) -{ - return (entry.val >> SRMMU_SWP_OFF_SHIFT) & SRMMU_SWP_OFF_MASK; -} - -static swp_entry_t srmmu_swp_entry(unsigned long type, unsigned long offset) -{ - return (swp_entry_t) { - (type & SRMMU_SWP_TYPE_MASK) << SRMMU_SWP_TYPE_SHIFT - | (offset & SRMMU_SWP_OFF_MASK) << SRMMU_SWP_OFF_SHIFT }; -} - /* * size: bytes to allocate in the nocache area. * align: bytes, number to align at. * Returns the virtual address of the allocated area. */ -static unsigned long __srmmu_get_nocache(int size, int align) +static void *__srmmu_get_nocache(int size, int align) { int offset; + unsigned long addr; if (size < SRMMU_NOCACHE_BITMAP_SHIFT) { - printk("Size 0x%x too small for nocache request\n", size); + printk(KERN_ERR "Size 0x%x too small for nocache request\n", + size); size = SRMMU_NOCACHE_BITMAP_SHIFT; } - if (size & (SRMMU_NOCACHE_BITMAP_SHIFT-1)) { - printk("Size 0x%x unaligned int nocache request\n", size); - size += SRMMU_NOCACHE_BITMAP_SHIFT-1; + if (size & (SRMMU_NOCACHE_BITMAP_SHIFT - 1)) { + printk(KERN_ERR "Size 0x%x unaligned int nocache request\n", + size); + size += SRMMU_NOCACHE_BITMAP_SHIFT - 1; } BUG_ON(align > SRMMU_NOCACHE_ALIGN_MAX); offset = bit_map_string_get(&srmmu_nocache_map, - size >> SRMMU_NOCACHE_BITMAP_SHIFT, - align >> SRMMU_NOCACHE_BITMAP_SHIFT); + size >> SRMMU_NOCACHE_BITMAP_SHIFT, + align >> SRMMU_NOCACHE_BITMAP_SHIFT); if (offset == -1) { - printk("srmmu: out of nocache %d: %d/%d\n", - size, (int) srmmu_nocache_size, - srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT); - return 0; + printk(KERN_ERR "srmmu: out of nocache %d: %d/%d\n", + size, (int) srmmu_nocache_size, + srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT); + return NULL; } - return (SRMMU_NOCACHE_VADDR + (offset << SRMMU_NOCACHE_BITMAP_SHIFT)); + addr = SRMMU_NOCACHE_VADDR + (offset << SRMMU_NOCACHE_BITMAP_SHIFT); + return (void *)addr; } -unsigned inline long srmmu_get_nocache(int size, int align) +void *srmmu_get_nocache(int size, int align) { - unsigned long tmp; + void *tmp; tmp = __srmmu_get_nocache(size, align); if (tmp) - memset((void *)tmp, 0, size); + memset(tmp, 0, size); return tmp; } -void srmmu_free_nocache(unsigned long vaddr, int size) +void srmmu_free_nocache(void *addr, int size) { + unsigned long vaddr; int offset; + vaddr = (unsigned long)addr; if (vaddr < SRMMU_NOCACHE_VADDR) { printk("Vaddr %lx is smaller than nocache base 0x%lx\n", vaddr, (unsigned long)SRMMU_NOCACHE_VADDR); BUG(); } - if (vaddr+size > srmmu_nocache_end) { + if (vaddr + size > srmmu_nocache_end) { printk("Vaddr %lx is bigger than nocache end 0x%lx\n", vaddr, srmmu_nocache_end); BUG(); } - if (size & (size-1)) { + if (!is_power_of_2(size)) { printk("Size 0x%x is not a power of 2\n", size); BUG(); } @@ -358,7 +215,7 @@ void srmmu_free_nocache(unsigned long vaddr, int size) printk("Size 0x%x is too small\n", size); BUG(); } - if (vaddr & (size-1)) { + if (vaddr & (size - 1)) { printk("Vaddr %lx is not aligned to size 0x%x\n", vaddr, size); BUG(); } @@ -369,15 +226,26 @@ void srmmu_free_nocache(unsigned long vaddr, int size) bit_map_clear(&srmmu_nocache_map, offset, size); } -void srmmu_early_allocate_ptable_skeleton(unsigned long start, unsigned long end); +static void srmmu_early_allocate_ptable_skeleton(unsigned long start, + unsigned long end); -extern unsigned long probe_memory(void); /* in fault.c */ +/* Return how much physical memory we have. */ +static unsigned long __init probe_memory(void) +{ + unsigned long total = 0; + int i; + + for (i = 0; sp_banks[i].num_bytes; i++) + total += sp_banks[i].num_bytes; + + return total; +} /* * Reserve nocache dynamically proportionally to the amount of * system RAM. -- Tomas Szepe <szepe@pinerecords.com>, June 2002 */ -void srmmu_nocache_calcsize(void) +static void __init srmmu_nocache_calcsize(void) { unsigned long sysmemavail = probe_memory() / 1024; int srmmu_nocache_npages; @@ -398,8 +266,9 @@ void srmmu_nocache_calcsize(void) srmmu_nocache_end = SRMMU_NOCACHE_VADDR + srmmu_nocache_size; } -void __init srmmu_nocache_init(void) +static void __init srmmu_nocache_init(void) { + void *srmmu_nocache_bitmap; unsigned int bitmap_bits; pgd_t *pgd; pmd_t *pmd; @@ -413,10 +282,12 @@ void __init srmmu_nocache_init(void) SRMMU_NOCACHE_ALIGN_MAX, 0UL); memset(srmmu_nocache_pool, 0, srmmu_nocache_size); - srmmu_nocache_bitmap = __alloc_bootmem(bitmap_bits >> 3, SMP_CACHE_BYTES, 0UL); + srmmu_nocache_bitmap = + __alloc_bootmem(BITS_TO_LONGS(bitmap_bits) * sizeof(long), + SMP_CACHE_BYTES, 0UL); bit_map_init(&srmmu_nocache_map, srmmu_nocache_bitmap, bitmap_bits); - srmmu_swapper_pg_dir = (pgd_t *)__srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE); + srmmu_swapper_pg_dir = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE); memset(__nocache_fix(srmmu_swapper_pg_dir), 0, SRMMU_PGD_TABLE_SIZE); init_mm.pgd = srmmu_swapper_pg_dir; @@ -427,15 +298,15 @@ void __init srmmu_nocache_init(void) while (vaddr < srmmu_nocache_end) { pgd = pgd_offset_k(vaddr); - pmd = srmmu_pmd_offset(__nocache_fix(pgd), vaddr); - pte = srmmu_pte_offset(__nocache_fix(pmd), vaddr); + pmd = pmd_offset(__nocache_fix(pgd), vaddr); + pte = pte_offset_kernel(__nocache_fix(pmd), vaddr); pteval = ((paddr >> 4) | SRMMU_ET_PTE | SRMMU_PRIV); if (srmmu_cache_pagetables) pteval |= SRMMU_CACHE; - srmmu_set_pte(__nocache_fix(pte), __pte(pteval)); + set_pte(__nocache_fix(pte), __pte(pteval)); vaddr += PAGE_SIZE; paddr += PAGE_SIZE; @@ -445,11 +316,11 @@ void __init srmmu_nocache_init(void) flush_tlb_all(); } -static inline pgd_t *srmmu_get_pgd_fast(void) +pgd_t *get_pgd_fast(void) { pgd_t *pgd = NULL; - pgd = (pgd_t *)__srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE); + pgd = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE); if (pgd) { pgd_t *init = pgd_offset_k(0); memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); @@ -460,21 +331,6 @@ static inline pgd_t *srmmu_get_pgd_fast(void) return pgd; } -static void srmmu_free_pgd_fast(pgd_t *pgd) -{ - srmmu_free_nocache((unsigned long)pgd, SRMMU_PGD_TABLE_SIZE); -} - -static pmd_t *srmmu_pmd_alloc_one(struct mm_struct *mm, unsigned long address) -{ - return (pmd_t *)srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE); -} - -static void srmmu_pmd_free(pmd_t * pmd) -{ - srmmu_free_nocache((unsigned long)pmd, SRMMU_PMD_TABLE_SIZE); -} - /* * Hardware needs alignment to 256 only, but we align to whole page size * to reduce fragmentation problems due to the buddy principle. @@ -483,31 +339,22 @@ static void srmmu_pmd_free(pmd_t * pmd) * Alignments up to the page size are the same for physical and virtual * addresses of the nocache area. */ -static pte_t * -srmmu_pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) -{ - return (pte_t *)srmmu_get_nocache(PTE_SIZE, PTE_SIZE); -} - -static pgtable_t -srmmu_pte_alloc_one(struct mm_struct *mm, unsigned long address) +pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { unsigned long pte; struct page *page; - if ((pte = (unsigned long)srmmu_pte_alloc_one_kernel(mm, address)) == 0) + if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0) return NULL; - page = pfn_to_page( __nocache_pa(pte) >> PAGE_SHIFT ); - pgtable_page_ctor(page); + page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } return page; } -static void srmmu_free_pte_fast(pte_t *pte) -{ - srmmu_free_nocache((unsigned long)pte, PTE_SIZE); -} - -static void srmmu_pte_free(pgtable_t pte) +void pte_free(struct mm_struct *mm, pgtable_t pte) { unsigned long p; @@ -516,18 +363,50 @@ static void srmmu_pte_free(pgtable_t pte) if (p == 0) BUG(); p = page_to_pfn(pte) << PAGE_SHIFT; /* Physical address */ - p = (unsigned long) __nocache_va(p); /* Nocached virtual */ - srmmu_free_nocache(p, PTE_SIZE); + + /* free non cached virtual address*/ + srmmu_free_nocache(__nocache_va(p), PTE_SIZE); } -/* - */ +/* context handling - a dynamically sized pool is used */ +#define NO_CONTEXT -1 + +struct ctx_list { + struct ctx_list *next; + struct ctx_list *prev; + unsigned int ctx_number; + struct mm_struct *ctx_mm; +}; + +static struct ctx_list *ctx_list_pool; +static struct ctx_list ctx_free; +static struct ctx_list ctx_used; + +/* At boot time we determine the number of contexts */ +static int num_contexts; + +static inline void remove_from_ctx_list(struct ctx_list *entry) +{ + entry->next->prev = entry->prev; + entry->prev->next = entry->next; +} + +static inline void add_to_ctx_list(struct ctx_list *head, struct ctx_list *entry) +{ + entry->next = head; + (entry->prev = head->prev)->next = entry; + head->prev = entry; +} +#define add_to_free_ctxlist(entry) add_to_ctx_list(&ctx_free, entry) +#define add_to_used_ctxlist(entry) add_to_ctx_list(&ctx_used, entry) + + static inline void alloc_context(struct mm_struct *old_mm, struct mm_struct *mm) { struct ctx_list *ctxp; ctxp = ctx_free.next; - if(ctxp != &ctx_free) { + if (ctxp != &ctx_free) { remove_from_ctx_list(ctxp); add_to_used_ctxlist(ctxp); mm->context = ctxp->ctx_number; @@ -535,9 +414,9 @@ static inline void alloc_context(struct mm_struct *old_mm, struct mm_struct *mm) return; } ctxp = ctx_used.next; - if(ctxp->ctx_mm == old_mm) + if (ctxp->ctx_mm == old_mm) ctxp = ctxp->next; - if(ctxp == &ctx_used) + if (ctxp == &ctx_used) panic("out of mmu contexts"); flush_cache_mm(ctxp->ctx_mm); flush_tlb_mm(ctxp->ctx_mm); @@ -557,17 +436,40 @@ static inline void free_context(int context) add_to_free_ctxlist(ctx_old); } +static void __init sparc_context_init(int numctx) +{ + int ctx; + unsigned long size; -static void srmmu_switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, - struct task_struct *tsk, int cpu) + size = numctx * sizeof(struct ctx_list); + ctx_list_pool = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL); + + for (ctx = 0; ctx < numctx; ctx++) { + struct ctx_list *clist; + + clist = (ctx_list_pool + ctx); + clist->ctx_number = ctx; + clist->ctx_mm = NULL; + } + ctx_free.next = ctx_free.prev = &ctx_free; + ctx_used.next = ctx_used.prev = &ctx_used; + for (ctx = 0; ctx < numctx; ctx++) + add_to_free_ctxlist(ctx_list_pool + ctx); +} + +void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, + struct task_struct *tsk) { - if(mm->context == NO_CONTEXT) { + if (mm->context == NO_CONTEXT) { spin_lock(&srmmu_context_spinlock); alloc_context(old_mm, mm); spin_unlock(&srmmu_context_spinlock); srmmu_ctxd_set(&srmmu_context_table[mm->context], mm->pgd); } + if (sparc_cpu_model == sparc_leon) + leon_switch_mm(); + if (is_hypersparc) hyper_flush_whole_icache(); @@ -576,7 +478,7 @@ static void srmmu_switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, /* Low level IO area allocation on the SRMMU. */ static inline void srmmu_mapioaddr(unsigned long physaddr, - unsigned long virt_addr, int bus_type) + unsigned long virt_addr, int bus_type) { pgd_t *pgdp; pmd_t *pmdp; @@ -585,23 +487,22 @@ static inline void srmmu_mapioaddr(unsigned long physaddr, physaddr &= PAGE_MASK; pgdp = pgd_offset_k(virt_addr); - pmdp = srmmu_pmd_offset(pgdp, virt_addr); - ptep = srmmu_pte_offset(pmdp, virt_addr); + pmdp = pmd_offset(pgdp, virt_addr); + ptep = pte_offset_kernel(pmdp, virt_addr); tmp = (physaddr >> 4) | SRMMU_ET_PTE; - /* - * I need to test whether this is consistent over all + /* I need to test whether this is consistent over all * sun4m's. The bus_type represents the upper 4 bits of * 36-bit physical address on the I/O space lines... */ tmp |= (bus_type << 28); tmp |= SRMMU_PRIV; __flush_page_to_ram(virt_addr); - srmmu_set_pte(ptep, __pte(tmp)); + set_pte(ptep, __pte(tmp)); } -static void srmmu_mapiorange(unsigned int bus, unsigned long xpa, - unsigned long xva, unsigned int len) +void srmmu_mapiorange(unsigned int bus, unsigned long xpa, + unsigned long xva, unsigned int len) { while (len != 0) { len -= PAGE_SIZE; @@ -619,14 +520,14 @@ static inline void srmmu_unmapioaddr(unsigned long virt_addr) pte_t *ptep; pgdp = pgd_offset_k(virt_addr); - pmdp = srmmu_pmd_offset(pgdp, virt_addr); - ptep = srmmu_pte_offset(pmdp, virt_addr); + pmdp = pmd_offset(pgdp, virt_addr); + ptep = pte_offset_kernel(pmdp, virt_addr); /* No need to flush uncacheable page. */ - srmmu_pte_clear(ptep); + __pte_clear(ptep); } -static void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len) +void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len) { while (len != 0) { len -= PAGE_SIZE; @@ -636,34 +537,6 @@ static void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len) flush_tlb_all(); } -/* - * On the SRMMU we do not have the problems with limited tlb entries - * for mapping kernel pages, so we just take things from the free page - * pool. As a side effect we are putting a little too much pressure - * on the gfp() subsystem. This setup also makes the logic of the - * iommu mapping code a lot easier as we can transparently handle - * mappings on the kernel stack without any special code as we did - * need on the sun4c. - */ -struct thread_info *srmmu_alloc_thread_info(void) -{ - struct thread_info *ret; - - ret = (struct thread_info *)__get_free_pages(GFP_KERNEL, - THREAD_INFO_ORDER); -#ifdef CONFIG_DEBUG_STACK_USAGE - if (ret) - memset(ret, 0, PAGE_SIZE << THREAD_INFO_ORDER); -#endif /* DEBUG_STACK_USAGE */ - - return ret; -} - -static void srmmu_free_thread_info(struct thread_info *ti) -{ - free_pages((unsigned long)ti, THREAD_INFO_ORDER); -} - /* tsunami.S */ extern void tsunami_flush_cache_all(void); extern void tsunami_flush_cache_mm(struct mm_struct *mm); @@ -678,38 +551,6 @@ extern void tsunami_flush_tlb_range(struct vm_area_struct *vma, unsigned long st extern void tsunami_flush_tlb_page(struct vm_area_struct *vma, unsigned long page); extern void tsunami_setup_blockops(void); -/* - * Workaround, until we find what's going on with Swift. When low on memory, - * it sometimes loops in fault/handle_mm_fault incl. flush_tlb_page to find - * out it is already in page tables/ fault again on the same instruction. - * I really don't understand it, have checked it and contexts - * are right, flush_tlb_all is done as well, and it faults again... - * Strange. -jj - * - * The following code is a deadwood that may be necessary when - * we start to make precise page flushes again. --zaitcev - */ -static void swift_update_mmu_cache(struct vm_area_struct * vma, unsigned long address, pte_t pte) -{ -#if 0 - static unsigned long last; - unsigned int val; - /* unsigned int n; */ - - if (address == last) { - val = srmmu_hwprobe(address); - if (val != 0 && pte_val(pte) != val) { - printk("swift_update_mmu_cache: " - "addr %lx put %08x probed %08x from %p\n", - address, pte_val(pte), val, - __builtin_return_address(0)); - srmmu_flush_whole_tlb(); - } - } - last = address; -#endif -} - /* swift.S */ extern void swift_flush_cache_all(void); extern void swift_flush_cache_mm(struct mm_struct *mm); @@ -762,244 +603,6 @@ void swift_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) * with respect to cache coherency. */ -/* Cypress flushes. */ -static void cypress_flush_cache_all(void) -{ - volatile unsigned long cypress_sucks; - unsigned long faddr, tagval; - - flush_user_windows(); - for(faddr = 0; faddr < 0x10000; faddr += 0x20) { - __asm__ __volatile__("lda [%1 + %2] %3, %0\n\t" : - "=r" (tagval) : - "r" (faddr), "r" (0x40000), - "i" (ASI_M_DATAC_TAG)); - - /* If modified and valid, kick it. */ - if((tagval & 0x60) == 0x60) - cypress_sucks = *(unsigned long *)(0xf0020000 + faddr); - } -} - -static void cypress_flush_cache_mm(struct mm_struct *mm) -{ - register unsigned long a, b, c, d, e, f, g; - unsigned long flags, faddr; - int octx; - - FLUSH_BEGIN(mm) - flush_user_windows(); - local_irq_save(flags); - octx = srmmu_get_context(); - srmmu_set_context(mm->context); - a = 0x20; b = 0x40; c = 0x60; - d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0; - - faddr = (0x10000 - 0x100); - goto inside; - do { - faddr -= 0x100; - inside: - __asm__ __volatile__("sta %%g0, [%0] %1\n\t" - "sta %%g0, [%0 + %2] %1\n\t" - "sta %%g0, [%0 + %3] %1\n\t" - "sta %%g0, [%0 + %4] %1\n\t" - "sta %%g0, [%0 + %5] %1\n\t" - "sta %%g0, [%0 + %6] %1\n\t" - "sta %%g0, [%0 + %7] %1\n\t" - "sta %%g0, [%0 + %8] %1\n\t" : : - "r" (faddr), "i" (ASI_M_FLUSH_CTX), - "r" (a), "r" (b), "r" (c), "r" (d), - "r" (e), "r" (f), "r" (g)); - } while(faddr); - srmmu_set_context(octx); - local_irq_restore(flags); - FLUSH_END -} - -static void cypress_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - register unsigned long a, b, c, d, e, f, g; - unsigned long flags, faddr; - int octx; - - FLUSH_BEGIN(mm) - flush_user_windows(); - local_irq_save(flags); - octx = srmmu_get_context(); - srmmu_set_context(mm->context); - a = 0x20; b = 0x40; c = 0x60; - d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0; - - start &= SRMMU_REAL_PMD_MASK; - while(start < end) { - faddr = (start + (0x10000 - 0x100)); - goto inside; - do { - faddr -= 0x100; - inside: - __asm__ __volatile__("sta %%g0, [%0] %1\n\t" - "sta %%g0, [%0 + %2] %1\n\t" - "sta %%g0, [%0 + %3] %1\n\t" - "sta %%g0, [%0 + %4] %1\n\t" - "sta %%g0, [%0 + %5] %1\n\t" - "sta %%g0, [%0 + %6] %1\n\t" - "sta %%g0, [%0 + %7] %1\n\t" - "sta %%g0, [%0 + %8] %1\n\t" : : - "r" (faddr), - "i" (ASI_M_FLUSH_SEG), - "r" (a), "r" (b), "r" (c), "r" (d), - "r" (e), "r" (f), "r" (g)); - } while (faddr != start); - start += SRMMU_REAL_PMD_SIZE; - } - srmmu_set_context(octx); - local_irq_restore(flags); - FLUSH_END -} - -static void cypress_flush_cache_page(struct vm_area_struct *vma, unsigned long page) -{ - register unsigned long a, b, c, d, e, f, g; - struct mm_struct *mm = vma->vm_mm; - unsigned long flags, line; - int octx; - - FLUSH_BEGIN(mm) - flush_user_windows(); - local_irq_save(flags); - octx = srmmu_get_context(); - srmmu_set_context(mm->context); - a = 0x20; b = 0x40; c = 0x60; - d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0; - - page &= PAGE_MASK; - line = (page + PAGE_SIZE) - 0x100; - goto inside; - do { - line -= 0x100; - inside: - __asm__ __volatile__("sta %%g0, [%0] %1\n\t" - "sta %%g0, [%0 + %2] %1\n\t" - "sta %%g0, [%0 + %3] %1\n\t" - "sta %%g0, [%0 + %4] %1\n\t" - "sta %%g0, [%0 + %5] %1\n\t" - "sta %%g0, [%0 + %6] %1\n\t" - "sta %%g0, [%0 + %7] %1\n\t" - "sta %%g0, [%0 + %8] %1\n\t" : : - "r" (line), - "i" (ASI_M_FLUSH_PAGE), - "r" (a), "r" (b), "r" (c), "r" (d), - "r" (e), "r" (f), "r" (g)); - } while(line != page); - srmmu_set_context(octx); - local_irq_restore(flags); - FLUSH_END -} - -/* Cypress is copy-back, at least that is how we configure it. */ -static void cypress_flush_page_to_ram(unsigned long page) -{ - register unsigned long a, b, c, d, e, f, g; - unsigned long line; - - a = 0x20; b = 0x40; c = 0x60; d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0; - page &= PAGE_MASK; - line = (page + PAGE_SIZE) - 0x100; - goto inside; - do { - line -= 0x100; - inside: - __asm__ __volatile__("sta %%g0, [%0] %1\n\t" - "sta %%g0, [%0 + %2] %1\n\t" - "sta %%g0, [%0 + %3] %1\n\t" - "sta %%g0, [%0 + %4] %1\n\t" - "sta %%g0, [%0 + %5] %1\n\t" - "sta %%g0, [%0 + %6] %1\n\t" - "sta %%g0, [%0 + %7] %1\n\t" - "sta %%g0, [%0 + %8] %1\n\t" : : - "r" (line), - "i" (ASI_M_FLUSH_PAGE), - "r" (a), "r" (b), "r" (c), "r" (d), - "r" (e), "r" (f), "r" (g)); - } while(line != page); -} - -/* Cypress is also IO cache coherent. */ -static void cypress_flush_page_for_dma(unsigned long page) -{ -} - -/* Cypress has unified L2 VIPT, from which both instructions and data - * are stored. It does not have an onboard icache of any sort, therefore - * no flush is necessary. - */ -static void cypress_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) -{ -} - -static void cypress_flush_tlb_all(void) -{ - srmmu_flush_whole_tlb(); -} - -static void cypress_flush_tlb_mm(struct mm_struct *mm) -{ - FLUSH_BEGIN(mm) - __asm__ __volatile__( - "lda [%0] %3, %%g5\n\t" - "sta %2, [%0] %3\n\t" - "sta %%g0, [%1] %4\n\t" - "sta %%g5, [%0] %3\n" - : /* no outputs */ - : "r" (SRMMU_CTX_REG), "r" (0x300), "r" (mm->context), - "i" (ASI_M_MMUREGS), "i" (ASI_M_FLUSH_PROBE) - : "g5"); - FLUSH_END -} - -static void cypress_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long size; - - FLUSH_BEGIN(mm) - start &= SRMMU_PGDIR_MASK; - size = SRMMU_PGDIR_ALIGN(end) - start; - __asm__ __volatile__( - "lda [%0] %5, %%g5\n\t" - "sta %1, [%0] %5\n" - "1:\n\t" - "subcc %3, %4, %3\n\t" - "bne 1b\n\t" - " sta %%g0, [%2 + %3] %6\n\t" - "sta %%g5, [%0] %5\n" - : /* no outputs */ - : "r" (SRMMU_CTX_REG), "r" (mm->context), "r" (start | 0x200), - "r" (size), "r" (SRMMU_PGDIR_SIZE), "i" (ASI_M_MMUREGS), - "i" (ASI_M_FLUSH_PROBE) - : "g5", "cc"); - FLUSH_END -} - -static void cypress_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) -{ - struct mm_struct *mm = vma->vm_mm; - - FLUSH_BEGIN(mm) - __asm__ __volatile__( - "lda [%0] %3, %%g5\n\t" - "sta %1, [%0] %3\n\t" - "sta %%g0, [%2] %4\n\t" - "sta %%g5, [%0] %3\n" - : /* no outputs */ - : "r" (SRMMU_CTX_REG), "r" (mm->context), "r" (page & PAGE_MASK), - "i" (ASI_M_MMUREGS), "i" (ASI_M_FLUSH_PROBE) - : "g5"); - FLUSH_END -} - /* viking.S */ extern void viking_flush_cache_all(void); extern void viking_flush_cache_mm(struct mm_struct *mm); @@ -1045,35 +648,36 @@ extern void hypersparc_setup_blockops(void); * around 8mb mapped for us. */ -void __init early_pgtable_allocfail(char *type) +static void __init early_pgtable_allocfail(char *type) { prom_printf("inherit_prom_mappings: Cannot alloc kernel %s.\n", type); prom_halt(); } -void __init srmmu_early_allocate_ptable_skeleton(unsigned long start, unsigned long end) +static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start, + unsigned long end) { pgd_t *pgdp; pmd_t *pmdp; pte_t *ptep; - while(start < end) { + while (start < end) { pgdp = pgd_offset_k(start); - if(srmmu_pgd_none(*(pgd_t *)__nocache_fix(pgdp))) { - pmdp = (pmd_t *) __srmmu_get_nocache( + if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) { + pmdp = __srmmu_get_nocache( SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE); if (pmdp == NULL) early_pgtable_allocfail("pmd"); memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE); - srmmu_pgd_set(__nocache_fix(pgdp), pmdp); + pgd_set(__nocache_fix(pgdp), pmdp); } - pmdp = srmmu_pmd_offset(__nocache_fix(pgdp), start); - if(srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) { - ptep = (pte_t *)__srmmu_get_nocache(PTE_SIZE, PTE_SIZE); + pmdp = pmd_offset(__nocache_fix(pgdp), start); + if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) { + ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE); if (ptep == NULL) early_pgtable_allocfail("pte"); memset(__nocache_fix(ptep), 0, PTE_SIZE); - srmmu_pmd_set(__nocache_fix(pmdp), ptep); + pmd_set(__nocache_fix(pmdp), ptep); } if (start > (0xffffffffUL - PMD_SIZE)) break; @@ -1081,29 +685,30 @@ void __init srmmu_early_allocate_ptable_skeleton(unsigned long start, unsigned l } } -void __init srmmu_allocate_ptable_skeleton(unsigned long start, unsigned long end) +static void __init srmmu_allocate_ptable_skeleton(unsigned long start, + unsigned long end) { pgd_t *pgdp; pmd_t *pmdp; pte_t *ptep; - while(start < end) { + while (start < end) { pgdp = pgd_offset_k(start); - if(srmmu_pgd_none(*pgdp)) { - pmdp = (pmd_t *)__srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE); + if (pgd_none(*pgdp)) { + pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE); if (pmdp == NULL) early_pgtable_allocfail("pmd"); memset(pmdp, 0, SRMMU_PMD_TABLE_SIZE); - srmmu_pgd_set(pgdp, pmdp); + pgd_set(pgdp, pmdp); } - pmdp = srmmu_pmd_offset(pgdp, start); - if(srmmu_pmd_none(*pmdp)) { - ptep = (pte_t *) __srmmu_get_nocache(PTE_SIZE, + pmdp = pmd_offset(pgdp, start); + if (srmmu_pmd_none(*pmdp)) { + ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE); if (ptep == NULL) early_pgtable_allocfail("pte"); memset(ptep, 0, PTE_SIZE); - srmmu_pmd_set(pmdp, ptep); + pmd_set(pmdp, ptep); } if (start > (0xffffffffUL - PMD_SIZE)) break; @@ -1111,79 +716,101 @@ void __init srmmu_allocate_ptable_skeleton(unsigned long start, unsigned long en } } +/* These flush types are not available on all chips... */ +static inline unsigned long srmmu_probe(unsigned long vaddr) +{ + unsigned long retval; + + if (sparc_cpu_model != sparc_leon) { + + vaddr &= PAGE_MASK; + __asm__ __volatile__("lda [%1] %2, %0\n\t" : + "=r" (retval) : + "r" (vaddr | 0x400), "i" (ASI_M_FLUSH_PROBE)); + } else { + retval = leon_swprobe(vaddr, NULL); + } + return retval; +} + /* * This is much cleaner than poking around physical address space * looking at the prom's page table directly which is what most * other OS's do. Yuck... this is much better. */ -void __init srmmu_inherit_prom_mappings(unsigned long start,unsigned long end) +static void __init srmmu_inherit_prom_mappings(unsigned long start, + unsigned long end) { + unsigned long probed; + unsigned long addr; pgd_t *pgdp; pmd_t *pmdp; pte_t *ptep; - int what = 0; /* 0 = normal-pte, 1 = pmd-level pte, 2 = pgd-level pte */ - unsigned long prompte; + int what; /* 0 = normal-pte, 1 = pmd-level pte, 2 = pgd-level pte */ - while(start <= end) { + while (start <= end) { if (start == 0) break; /* probably wrap around */ - if(start == 0xfef00000) + if (start == 0xfef00000) start = KADB_DEBUGGER_BEGVM; - if(!(prompte = srmmu_hwprobe(start))) { + probed = srmmu_probe(start); + if (!probed) { + /* continue probing until we find an entry */ start += PAGE_SIZE; continue; } - + /* A red snapper, see what it really is. */ what = 0; - - if(!(start & ~(SRMMU_REAL_PMD_MASK))) { - if(srmmu_hwprobe((start-PAGE_SIZE) + SRMMU_REAL_PMD_SIZE) == prompte) + addr = start - PAGE_SIZE; + + if (!(start & ~(SRMMU_REAL_PMD_MASK))) { + if (srmmu_probe(addr + SRMMU_REAL_PMD_SIZE) == probed) what = 1; } - - if(!(start & ~(SRMMU_PGDIR_MASK))) { - if(srmmu_hwprobe((start-PAGE_SIZE) + SRMMU_PGDIR_SIZE) == - prompte) + + if (!(start & ~(SRMMU_PGDIR_MASK))) { + if (srmmu_probe(addr + SRMMU_PGDIR_SIZE) == probed) what = 2; } - + pgdp = pgd_offset_k(start); - if(what == 2) { - *(pgd_t *)__nocache_fix(pgdp) = __pgd(prompte); + if (what == 2) { + *(pgd_t *)__nocache_fix(pgdp) = __pgd(probed); start += SRMMU_PGDIR_SIZE; continue; } - if(srmmu_pgd_none(*(pgd_t *)__nocache_fix(pgdp))) { - pmdp = (pmd_t *)__srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE); + if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) { + pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, + SRMMU_PMD_TABLE_SIZE); if (pmdp == NULL) early_pgtable_allocfail("pmd"); memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE); - srmmu_pgd_set(__nocache_fix(pgdp), pmdp); + pgd_set(__nocache_fix(pgdp), pmdp); } - pmdp = srmmu_pmd_offset(__nocache_fix(pgdp), start); - if(srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) { - ptep = (pte_t *) __srmmu_get_nocache(PTE_SIZE, - PTE_SIZE); + pmdp = pmd_offset(__nocache_fix(pgdp), start); + if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) { + ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE); if (ptep == NULL) early_pgtable_allocfail("pte"); memset(__nocache_fix(ptep), 0, PTE_SIZE); - srmmu_pmd_set(__nocache_fix(pmdp), ptep); + pmd_set(__nocache_fix(pmdp), ptep); } - if(what == 1) { - /* - * We bend the rule where all 16 PTPs in a pmd_t point + if (what == 1) { + /* We bend the rule where all 16 PTPs in a pmd_t point * inside the same PTE page, and we leak a perfectly * good hardware PTE piece. Alternatives seem worse. */ unsigned int x; /* Index of HW PMD in soft cluster */ + unsigned long *val; x = (start >> PMD_SHIFT) & 15; - *(unsigned long *)__nocache_fix(&pmdp->pmdv[x]) = prompte; + val = &pmdp->pmdv[x]; + *(unsigned long *)__nocache_fix(val) = probed; start += SRMMU_REAL_PMD_SIZE; continue; } - ptep = srmmu_pte_offset(__nocache_fix(pmdp), start); - *(pte_t *)__nocache_fix(ptep) = __pte(prompte); + ptep = pte_offset_kernel(__nocache_fix(pmdp), start); + *(pte_t *)__nocache_fix(ptep) = __pte(probed); start += PAGE_SIZE; } } @@ -1212,25 +839,18 @@ static unsigned long __init map_spbank(unsigned long vbase, int sp_entry) if (vstart < min_vaddr || vstart >= max_vaddr) return vstart; - + if (vend > max_vaddr || vend < min_vaddr) vend = max_vaddr; - while(vstart < vend) { + while (vstart < vend) { do_large_mapping(vstart, pstart); vstart += SRMMU_PGDIR_SIZE; pstart += SRMMU_PGDIR_SIZE; } return vstart; } -static inline void memprobe_error(char *msg) -{ - prom_printf(msg); - prom_printf("Halting now...\n"); - prom_halt(); -} - -static inline void map_kernel(void) +static void __init map_kernel(void) { int i; @@ -1241,26 +861,21 @@ static inline void map_kernel(void) for (i = 0; sp_banks[i].num_bytes != 0; i++) { map_spbank((unsigned long)__va(sp_banks[i].base_addr), i); } - - BTFIXUPSET_SIMM13(user_ptrs_per_pgd, PAGE_OFFSET / SRMMU_PGDIR_SIZE); } -/* Paging initialization on the Sparc Reference MMU. */ -extern void sparc_context_init(int); - -void (*poke_srmmu)(void) __initdata = NULL; - -extern unsigned long bootmem_init(unsigned long *pages_avail); +void (*poke_srmmu)(void) = NULL; void __init srmmu_paging_init(void) { - int i, cpunode; + int i; + phandle cpunode; char node_str[128]; pgd_t *pgd; pmd_t *pmd; pte_t *pte; unsigned long pages_avail; + init_mm.context = (unsigned long) NO_CONTEXT; sparc_iomap.start = SUN4M_IOBASE_VADDR; /* 16MB of IOSPACE on all sun4m's. */ if (sparc_cpu_model == sun4d) @@ -1269,9 +884,9 @@ void __init srmmu_paging_init(void) /* Find the number of contexts on the srmmu. */ cpunode = prom_getchild(prom_root_node); num_contexts = 0; - while(cpunode != 0) { + while (cpunode != 0) { prom_getstring(cpunode, "device_type", node_str, sizeof(node_str)); - if(!strcmp(node_str, "cpu")) { + if (!strcmp(node_str, "cpu")) { num_contexts = prom_getintdefault(cpunode, "mmu-nctx", 0x8); break; } @@ -1279,7 +894,7 @@ void __init srmmu_paging_init(void) } } - if(!num_contexts) { + if (!num_contexts) { prom_printf("Something wrong, can't find cpu node in paging_init.\n"); prom_halt(); } @@ -1289,38 +904,36 @@ void __init srmmu_paging_init(void) srmmu_nocache_calcsize(); srmmu_nocache_init(); - srmmu_inherit_prom_mappings(0xfe400000,(LINUX_OPPROM_ENDVM-PAGE_SIZE)); + srmmu_inherit_prom_mappings(0xfe400000, (LINUX_OPPROM_ENDVM - PAGE_SIZE)); map_kernel(); /* ctx table has to be physically aligned to its size */ - srmmu_context_table = (ctxd_t *)__srmmu_get_nocache(num_contexts*sizeof(ctxd_t), num_contexts*sizeof(ctxd_t)); + srmmu_context_table = __srmmu_get_nocache(num_contexts * sizeof(ctxd_t), num_contexts * sizeof(ctxd_t)); srmmu_ctx_table_phys = (ctxd_t *)__nocache_pa((unsigned long)srmmu_context_table); - for(i = 0; i < num_contexts; i++) + for (i = 0; i < num_contexts; i++) srmmu_ctxd_set((ctxd_t *)__nocache_fix(&srmmu_context_table[i]), srmmu_swapper_pg_dir); flush_cache_all(); srmmu_set_ctable_ptr((unsigned long)srmmu_ctx_table_phys); #ifdef CONFIG_SMP /* Stop from hanging here... */ - local_flush_tlb_all(); + local_ops->tlb_all(); #else flush_tlb_all(); #endif poke_srmmu(); -#ifdef CONFIG_SUN_IO srmmu_allocate_ptable_skeleton(sparc_iomap.start, IOBASE_END); srmmu_allocate_ptable_skeleton(DVMA_VADDR, DVMA_END); -#endif srmmu_allocate_ptable_skeleton( __fix_to_virt(__end_of_fixed_addresses - 1), FIXADDR_TOP); srmmu_allocate_ptable_skeleton(PKMAP_BASE, PKMAP_END); pgd = pgd_offset_k(PKMAP_BASE); - pmd = srmmu_pmd_offset(pgd, PKMAP_BASE); - pte = srmmu_pte_offset(pmd, PKMAP_BASE); + pmd = pmd_offset(pgd, PKMAP_BASE); + pte = pte_offset_kernel(pmd, PKMAP_BASE); pkmap_page_table = pte; flush_cache_all(); @@ -1348,14 +961,13 @@ void __init srmmu_paging_init(void) zones_size[ZONE_HIGHMEM] = npages; zholes_size[ZONE_HIGHMEM] = npages - calc_highpages(); - free_area_init_node(0, &contig_page_data, zones_size, - pfn_base, zholes_size); + free_area_init_node(0, zones_size, pfn_base, zholes_size); } } -static void srmmu_mmu_info(struct seq_file *m) +void mmu_info(struct seq_file *m) { - seq_printf(m, + seq_printf(m, "MMU type\t: %s\n" "contexts\t: %d\n" "nocache total\t: %ld\n" @@ -1366,14 +978,16 @@ static void srmmu_mmu_info(struct seq_file *m) srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT); } -static void srmmu_update_mmu_cache(struct vm_area_struct * vma, unsigned long address, pte_t pte) +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + mm->context = NO_CONTEXT; + return 0; } -static void srmmu_destroy_context(struct mm_struct *mm) +void destroy_context(struct mm_struct *mm) { - if(mm->context != NO_CONTEXT) { + if (mm->context != NO_CONTEXT) { flush_cache_mm(mm); srmmu_ctxd_set(&srmmu_context_table[mm->context], srmmu_swapper_pg_dir); flush_tlb_mm(mm); @@ -1393,7 +1007,8 @@ static void __init srmmu_is_bad(void) static void __init init_vac_layout(void) { - int nd, cache_lines; + phandle nd; + int cache_lines; char node_str[128]; #ifdef CONFIG_SMP int cpu = 0; @@ -1402,13 +1017,12 @@ static void __init init_vac_layout(void) #endif nd = prom_getchild(prom_root_node); - while((nd = prom_getsibling(nd)) != 0) { + while ((nd = prom_getsibling(nd)) != 0) { prom_getstring(nd, "device_type", node_str, sizeof(node_str)); - if(!strcmp(node_str, "cpu")) { + if (!strcmp(node_str, "cpu")) { vac_line_size = prom_getint(nd, "cache-line-size"); if (vac_line_size == -1) { - prom_printf("can't determine cache-line-size, " - "halting.\n"); + prom_printf("can't determine cache-line-size, halting.\n"); prom_halt(); } cache_lines = prom_getint(nd, "cache-nlines"); @@ -1419,20 +1033,20 @@ static void __init init_vac_layout(void) vac_cache_size = cache_lines * vac_line_size; #ifdef CONFIG_SMP - if(vac_cache_size > max_size) + if (vac_cache_size > max_size) max_size = vac_cache_size; - if(vac_line_size < min_line_size) + if (vac_line_size < min_line_size) min_line_size = vac_line_size; //FIXME: cpus not contiguous!! cpu++; - if (cpu >= NR_CPUS || !cpu_online(cpu)) + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) break; #else break; #endif } } - if(nd == 0) { + if (nd == 0) { prom_printf("No CPU nodes found, halting.\n"); prom_halt(); } @@ -1444,7 +1058,7 @@ static void __init init_vac_layout(void) (int)vac_cache_size, (int)vac_line_size); } -static void __init poke_hypersparc(void) +static void poke_hypersparc(void) { volatile unsigned long clear; unsigned long mreg = srmmu_get_mmureg(); @@ -1467,6 +1081,20 @@ static void __init poke_hypersparc(void) clear = srmmu_get_fstatus(); } +static const struct sparc32_cachetlb_ops hypersparc_ops = { + .cache_all = hypersparc_flush_cache_all, + .cache_mm = hypersparc_flush_cache_mm, + .cache_page = hypersparc_flush_cache_page, + .cache_range = hypersparc_flush_cache_range, + .tlb_all = hypersparc_flush_tlb_all, + .tlb_mm = hypersparc_flush_tlb_mm, + .tlb_page = hypersparc_flush_tlb_page, + .tlb_range = hypersparc_flush_tlb_range, + .page_to_ram = hypersparc_flush_page_to_ram, + .sig_insns = hypersparc_flush_sig_insns, + .page_for_dma = hypersparc_flush_page_for_dma, +}; + static void __init init_hypersparc(void) { srmmu_name = "ROSS HyperSparc"; @@ -1475,119 +1103,14 @@ static void __init init_hypersparc(void) init_vac_layout(); is_hypersparc = 1; - - BTFIXUPSET_CALL(pte_clear, srmmu_pte_clear, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_clear, srmmu_pmd_clear, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pgd_clear, srmmu_pgd_clear, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_all, hypersparc_flush_cache_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_mm, hypersparc_flush_cache_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_range, hypersparc_flush_cache_range, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_page, hypersparc_flush_cache_page, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(flush_tlb_all, hypersparc_flush_tlb_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_mm, hypersparc_flush_tlb_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_range, hypersparc_flush_tlb_range, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_page, hypersparc_flush_tlb_page, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(__flush_page_to_ram, hypersparc_flush_page_to_ram, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_sig_insns, hypersparc_flush_sig_insns, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_page_for_dma, hypersparc_flush_page_for_dma, BTFIXUPCALL_NOP); - + sparc32_cachetlb_ops = &hypersparc_ops; poke_srmmu = poke_hypersparc; hypersparc_setup_blockops(); } -static void __init poke_cypress(void) -{ - unsigned long mreg = srmmu_get_mmureg(); - unsigned long faddr, tagval; - volatile unsigned long cypress_sucks; - volatile unsigned long clear; - - clear = srmmu_get_faddr(); - clear = srmmu_get_fstatus(); - - if (!(mreg & CYPRESS_CENABLE)) { - for(faddr = 0x0; faddr < 0x10000; faddr += 20) { - __asm__ __volatile__("sta %%g0, [%0 + %1] %2\n\t" - "sta %%g0, [%0] %2\n\t" : : - "r" (faddr), "r" (0x40000), - "i" (ASI_M_DATAC_TAG)); - } - } else { - for(faddr = 0; faddr < 0x10000; faddr += 0x20) { - __asm__ __volatile__("lda [%1 + %2] %3, %0\n\t" : - "=r" (tagval) : - "r" (faddr), "r" (0x40000), - "i" (ASI_M_DATAC_TAG)); - - /* If modified and valid, kick it. */ - if((tagval & 0x60) == 0x60) - cypress_sucks = *(unsigned long *) - (0xf0020000 + faddr); - } - } - - /* And one more, for our good neighbor, Mr. Broken Cypress. */ - clear = srmmu_get_faddr(); - clear = srmmu_get_fstatus(); - - mreg |= (CYPRESS_CENABLE | CYPRESS_CMODE); - srmmu_set_mmureg(mreg); -} - -static void __init init_cypress_common(void) -{ - init_vac_layout(); - - BTFIXUPSET_CALL(pte_clear, srmmu_pte_clear, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_clear, srmmu_pmd_clear, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pgd_clear, srmmu_pgd_clear, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_all, cypress_flush_cache_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_mm, cypress_flush_cache_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_range, cypress_flush_cache_range, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_page, cypress_flush_cache_page, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(flush_tlb_all, cypress_flush_tlb_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_mm, cypress_flush_tlb_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_page, cypress_flush_tlb_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_range, cypress_flush_tlb_range, BTFIXUPCALL_NORM); - - - BTFIXUPSET_CALL(__flush_page_to_ram, cypress_flush_page_to_ram, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_sig_insns, cypress_flush_sig_insns, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(flush_page_for_dma, cypress_flush_page_for_dma, BTFIXUPCALL_NOP); - - poke_srmmu = poke_cypress; -} - -static void __init init_cypress_604(void) -{ - srmmu_name = "ROSS Cypress-604(UP)"; - srmmu_modtype = Cypress; - init_cypress_common(); -} - -static void __init init_cypress_605(unsigned long mrev) -{ - srmmu_name = "ROSS Cypress-605(MP)"; - if(mrev == 0xe) { - srmmu_modtype = Cypress_vE; - hwbug_bitmask |= HWBUG_COPYBACK_BROKEN; - } else { - if(mrev == 0xd) { - srmmu_modtype = Cypress_vD; - hwbug_bitmask |= HWBUG_ASIFLUSH_BROKEN; - } else { - srmmu_modtype = Cypress; - } - } - init_cypress_common(); -} - -static void __init poke_swift(void) +static void poke_swift(void) { unsigned long mreg; @@ -1610,6 +1133,20 @@ static void __init poke_swift(void) srmmu_set_mmureg(mreg); } +static const struct sparc32_cachetlb_ops swift_ops = { + .cache_all = swift_flush_cache_all, + .cache_mm = swift_flush_cache_mm, + .cache_page = swift_flush_cache_page, + .cache_range = swift_flush_cache_range, + .tlb_all = swift_flush_tlb_all, + .tlb_mm = swift_flush_tlb_mm, + .tlb_page = swift_flush_tlb_page, + .tlb_range = swift_flush_tlb_range, + .page_to_ram = swift_flush_page_to_ram, + .sig_insns = swift_flush_sig_insns, + .page_for_dma = swift_flush_page_for_dma, +}; + #define SWIFT_MASKID_ADDR 0x10003018 static void __init init_swift(void) { @@ -1620,7 +1157,7 @@ static void __init init_swift(void) "=r" (swift_rev) : "r" (SWIFT_MASKID_ADDR), "i" (ASI_M_BYPASS)); srmmu_name = "Fujitsu Swift"; - switch(swift_rev) { + switch (swift_rev) { case 0x11: case 0x20: case 0x23: @@ -1658,25 +1195,9 @@ static void __init init_swift(void) default: srmmu_modtype = Swift_ok; break; - }; - - BTFIXUPSET_CALL(flush_cache_all, swift_flush_cache_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_mm, swift_flush_cache_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_page, swift_flush_cache_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_range, swift_flush_cache_range, BTFIXUPCALL_NORM); - - - BTFIXUPSET_CALL(flush_tlb_all, swift_flush_tlb_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_mm, swift_flush_tlb_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_page, swift_flush_tlb_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_range, swift_flush_tlb_range, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(__flush_page_to_ram, swift_flush_page_to_ram, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_sig_insns, swift_flush_sig_insns, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_page_for_dma, swift_flush_page_for_dma, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(update_mmu_cache, swift_update_mmu_cache, BTFIXUPCALL_NORM); + } + sparc32_cachetlb_ops = &swift_ops; flush_page_for_dma_global = 0; /* @@ -1727,7 +1248,7 @@ static void turbosparc_flush_page_to_ram(unsigned long page) #ifdef TURBOSPARC_WRITEBACK volatile unsigned long clear; - if (srmmu_hwprobe(page)) + if (srmmu_probe(page)) turbosparc_flush_page_cache(page); clear = srmmu_get_fstatus(); #endif @@ -1769,17 +1290,18 @@ static void turbosparc_flush_tlb_page(struct vm_area_struct *vma, unsigned long } -static void __init poke_turbosparc(void) +static void poke_turbosparc(void) { unsigned long mreg = srmmu_get_mmureg(); unsigned long ccreg; /* Clear any crap from the cache or else... */ turbosparc_flush_cache_all(); - mreg &= ~(TURBOSPARC_ICENABLE | TURBOSPARC_DCENABLE); /* Temporarily disable I & D caches */ + /* Temporarily disable I & D caches */ + mreg &= ~(TURBOSPARC_ICENABLE | TURBOSPARC_DCENABLE); mreg &= ~(TURBOSPARC_PCENABLE); /* Don't check parity */ srmmu_set_mmureg(mreg); - + ccreg = turbosparc_get_ccreg(); #ifdef TURBOSPARC_WRITEBACK @@ -1802,37 +1324,36 @@ static void __init poke_turbosparc(void) default: ccreg |= (TURBOSPARC_SCENABLE); } - turbosparc_set_ccreg (ccreg); + turbosparc_set_ccreg(ccreg); mreg |= (TURBOSPARC_ICENABLE | TURBOSPARC_DCENABLE); /* I & D caches on */ mreg |= (TURBOSPARC_ICSNOOP); /* Icache snooping on */ srmmu_set_mmureg(mreg); } +static const struct sparc32_cachetlb_ops turbosparc_ops = { + .cache_all = turbosparc_flush_cache_all, + .cache_mm = turbosparc_flush_cache_mm, + .cache_page = turbosparc_flush_cache_page, + .cache_range = turbosparc_flush_cache_range, + .tlb_all = turbosparc_flush_tlb_all, + .tlb_mm = turbosparc_flush_tlb_mm, + .tlb_page = turbosparc_flush_tlb_page, + .tlb_range = turbosparc_flush_tlb_range, + .page_to_ram = turbosparc_flush_page_to_ram, + .sig_insns = turbosparc_flush_sig_insns, + .page_for_dma = turbosparc_flush_page_for_dma, +}; + static void __init init_turbosparc(void) { srmmu_name = "Fujitsu TurboSparc"; srmmu_modtype = TurboSparc; - - BTFIXUPSET_CALL(flush_cache_all, turbosparc_flush_cache_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_mm, turbosparc_flush_cache_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_page, turbosparc_flush_cache_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_range, turbosparc_flush_cache_range, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(flush_tlb_all, turbosparc_flush_tlb_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_mm, turbosparc_flush_tlb_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_page, turbosparc_flush_tlb_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_range, turbosparc_flush_tlb_range, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(__flush_page_to_ram, turbosparc_flush_page_to_ram, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(flush_sig_insns, turbosparc_flush_sig_insns, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(flush_page_for_dma, turbosparc_flush_page_for_dma, BTFIXUPCALL_NORM); - + sparc32_cachetlb_ops = &turbosparc_ops; poke_srmmu = poke_turbosparc; } -static void __init poke_tsunami(void) +static void poke_tsunami(void) { unsigned long mreg = srmmu_get_mmureg(); @@ -1843,6 +1364,20 @@ static void __init poke_tsunami(void) srmmu_set_mmureg(mreg); } +static const struct sparc32_cachetlb_ops tsunami_ops = { + .cache_all = tsunami_flush_cache_all, + .cache_mm = tsunami_flush_cache_mm, + .cache_page = tsunami_flush_cache_page, + .cache_range = tsunami_flush_cache_range, + .tlb_all = tsunami_flush_tlb_all, + .tlb_mm = tsunami_flush_tlb_mm, + .tlb_page = tsunami_flush_tlb_page, + .tlb_range = tsunami_flush_tlb_range, + .page_to_ram = tsunami_flush_page_to_ram, + .sig_insns = tsunami_flush_sig_insns, + .page_for_dma = tsunami_flush_page_for_dma, +}; + static void __init init_tsunami(void) { /* @@ -1853,33 +1388,18 @@ static void __init init_tsunami(void) srmmu_name = "TI Tsunami"; srmmu_modtype = Tsunami; - - BTFIXUPSET_CALL(flush_cache_all, tsunami_flush_cache_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_mm, tsunami_flush_cache_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_page, tsunami_flush_cache_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_range, tsunami_flush_cache_range, BTFIXUPCALL_NORM); - - - BTFIXUPSET_CALL(flush_tlb_all, tsunami_flush_tlb_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_mm, tsunami_flush_tlb_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_page, tsunami_flush_tlb_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_range, tsunami_flush_tlb_range, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(__flush_page_to_ram, tsunami_flush_page_to_ram, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(flush_sig_insns, tsunami_flush_sig_insns, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_page_for_dma, tsunami_flush_page_for_dma, BTFIXUPCALL_NORM); - + sparc32_cachetlb_ops = &tsunami_ops; poke_srmmu = poke_tsunami; tsunami_setup_blockops(); } -static void __init poke_viking(void) +static void poke_viking(void) { unsigned long mreg = srmmu_get_mmureg(); static int smp_catch; - if(viking_mxcc_present) { + if (viking_mxcc_present) { unsigned long mxcc_control = mxcc_get_creg(); mxcc_control |= (MXCC_CTL_ECE | MXCC_CTL_PRE | MXCC_CTL_MCE); @@ -1898,7 +1418,7 @@ static void __init poke_viking(void) unsigned long bpreg; mreg &= ~(VIKING_TCENABLE); - if(smp_catch++) { + if (smp_catch++) { /* Must disable mixed-cmd mode here for other cpu's. */ bpreg = viking_get_bpreg(); bpreg &= ~(VIKING_ACTION_MIX); @@ -1914,34 +1434,64 @@ static void __init poke_viking(void) mreg |= VIKING_SBENABLE; mreg &= ~(VIKING_ACENABLE); srmmu_set_mmureg(mreg); +} + +static struct sparc32_cachetlb_ops viking_ops = { + .cache_all = viking_flush_cache_all, + .cache_mm = viking_flush_cache_mm, + .cache_page = viking_flush_cache_page, + .cache_range = viking_flush_cache_range, + .tlb_all = viking_flush_tlb_all, + .tlb_mm = viking_flush_tlb_mm, + .tlb_page = viking_flush_tlb_page, + .tlb_range = viking_flush_tlb_range, + .page_to_ram = viking_flush_page_to_ram, + .sig_insns = viking_flush_sig_insns, + .page_for_dma = viking_flush_page_for_dma, +}; #ifdef CONFIG_SMP - /* Avoid unnecessary cross calls. */ - BTFIXUPCOPY_CALL(flush_cache_all, local_flush_cache_all); - BTFIXUPCOPY_CALL(flush_cache_mm, local_flush_cache_mm); - BTFIXUPCOPY_CALL(flush_cache_range, local_flush_cache_range); - BTFIXUPCOPY_CALL(flush_cache_page, local_flush_cache_page); - BTFIXUPCOPY_CALL(__flush_page_to_ram, local_flush_page_to_ram); - BTFIXUPCOPY_CALL(flush_sig_insns, local_flush_sig_insns); - BTFIXUPCOPY_CALL(flush_page_for_dma, local_flush_page_for_dma); - btfixup(); +/* On sun4d the cpu broadcasts local TLB flushes, so we can just + * perform the local TLB flush and all the other cpus will see it. + * But, unfortunately, there is a bug in the sun4d XBUS backplane + * that requires that we add some synchronization to these flushes. + * + * The bug is that the fifo which keeps track of all the pending TLB + * broadcasts in the system is an entry or two too small, so if we + * have too many going at once we'll overflow that fifo and lose a TLB + * flush resulting in corruption. + * + * Our workaround is to take a global spinlock around the TLB flushes, + * which guarentees we won't ever have too many pending. It's a big + * hammer, but a semaphore like system to make sure we only have N TLB + * flushes going at once will require SMP locking anyways so there's + * no real value in trying any harder than this. + */ +static struct sparc32_cachetlb_ops viking_sun4d_smp_ops = { + .cache_all = viking_flush_cache_all, + .cache_mm = viking_flush_cache_mm, + .cache_page = viking_flush_cache_page, + .cache_range = viking_flush_cache_range, + .tlb_all = sun4dsmp_flush_tlb_all, + .tlb_mm = sun4dsmp_flush_tlb_mm, + .tlb_page = sun4dsmp_flush_tlb_page, + .tlb_range = sun4dsmp_flush_tlb_range, + .page_to_ram = viking_flush_page_to_ram, + .sig_insns = viking_flush_sig_insns, + .page_for_dma = viking_flush_page_for_dma, +}; #endif -} static void __init init_viking(void) { unsigned long mreg = srmmu_get_mmureg(); /* Ahhh, the viking. SRMMU VLSI abortion number two... */ - if(mreg & VIKING_MMODE) { + if (mreg & VIKING_MMODE) { srmmu_name = "TI Viking"; viking_mxcc_present = 0; msi_set_sync(); - BTFIXUPSET_CALL(pte_clear, srmmu_pte_clear, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_clear, srmmu_pmd_clear, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pgd_clear, srmmu_pgd_clear, BTFIXUPCALL_NORM); - /* * We need this to make sure old viking takes no hits * on it's cache for dma snoops to workaround the @@ -1949,41 +1499,24 @@ static void __init init_viking(void) * This is only necessary because of the new way in * which we use the IOMMU. */ - BTFIXUPSET_CALL(flush_page_for_dma, viking_flush_page, BTFIXUPCALL_NORM); - + viking_ops.page_for_dma = viking_flush_page; +#ifdef CONFIG_SMP + viking_sun4d_smp_ops.page_for_dma = viking_flush_page; +#endif flush_page_for_dma_global = 0; } else { srmmu_name = "TI Viking/MXCC"; viking_mxcc_present = 1; - srmmu_cache_pagetables = 1; - - /* MXCC vikings lack the DMA snooping bug. */ - BTFIXUPSET_CALL(flush_page_for_dma, viking_flush_page_for_dma, BTFIXUPCALL_NOP); } - BTFIXUPSET_CALL(flush_cache_all, viking_flush_cache_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_mm, viking_flush_cache_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_page, viking_flush_cache_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_range, viking_flush_cache_range, BTFIXUPCALL_NORM); - + sparc32_cachetlb_ops = (const struct sparc32_cachetlb_ops *) + &viking_ops; #ifdef CONFIG_SMP - if (sparc_cpu_model == sun4d) { - BTFIXUPSET_CALL(flush_tlb_all, sun4dsmp_flush_tlb_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_mm, sun4dsmp_flush_tlb_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_page, sun4dsmp_flush_tlb_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_range, sun4dsmp_flush_tlb_range, BTFIXUPCALL_NORM); - } else + if (sparc_cpu_model == sun4d) + sparc32_cachetlb_ops = (const struct sparc32_cachetlb_ops *) + &viking_sun4d_smp_ops; #endif - { - BTFIXUPSET_CALL(flush_tlb_all, viking_flush_tlb_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_mm, viking_flush_tlb_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_page, viking_flush_tlb_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_range, viking_flush_tlb_range, BTFIXUPCALL_NORM); - } - - BTFIXUPSET_CALL(__flush_page_to_ram, viking_flush_page_to_ram, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(flush_sig_insns, viking_flush_sig_insns, BTFIXUPCALL_NOP); poke_srmmu = poke_viking; } @@ -2003,38 +1536,36 @@ static void __init get_srmmu_type(void) psr_typ = (psr >> 28) & 0xf; psr_vers = (psr >> 24) & 0xf; - /* First, check for HyperSparc or Cypress. */ - if(mod_typ == 1) { - switch(mod_rev) { + /* First, check for sparc-leon. */ + if (sparc_cpu_model == sparc_leon) { + init_leon(); + return; + } + + /* Second, check for HyperSparc or Cypress. */ + if (mod_typ == 1) { + switch (mod_rev) { case 7: /* UP or MP Hypersparc */ init_hypersparc(); break; case 0: case 2: - /* Uniprocessor Cypress */ - init_cypress_604(); - break; case 10: case 11: case 12: - /* _REALLY OLD_ Cypress MP chips... */ case 13: case 14: case 15: - /* MP Cypress mmu/cache-controller */ - init_cypress_605(mod_rev); - break; default: - /* Some other Cypress revision, assume a 605. */ - init_cypress_605(mod_rev); + prom_printf("Sparc-Linux Cypress support does not longer exit.\n"); + prom_halt(); break; - }; + } return; } - - /* - * Now Fujitsu TurboSparc. It might happen that it is + + /* Now Fujitsu TurboSparc. It might happen that it is * in Swift emulation mode, so we will check later... */ if (psr_typ == 0 && psr_vers == 5) { @@ -2043,15 +1574,15 @@ static void __init get_srmmu_type(void) } /* Next check for Fujitsu Swift. */ - if(psr_typ == 0 && psr_vers == 4) { - int cpunode; + if (psr_typ == 0 && psr_vers == 4) { + phandle cpunode; char node_str[128]; /* Look if it is not a TurboSparc emulating Swift... */ cpunode = prom_getchild(prom_root_node); - while((cpunode = prom_getsibling(cpunode)) != 0) { + while ((cpunode = prom_getsibling(cpunode)) != 0) { prom_getstring(cpunode, "device_type", node_str, sizeof(node_str)); - if(!strcmp(node_str, "cpu")) { + if (!strcmp(node_str, "cpu")) { if (!prom_getintdefault(cpunode, "psr-implementation", 1) && prom_getintdefault(cpunode, "psr-version", 1) == 5) { init_turbosparc(); @@ -2060,13 +1591,13 @@ static void __init get_srmmu_type(void) break; } } - + init_swift(); return; } /* Now the Viking family of srmmu. */ - if(psr_typ == 4 && + if (psr_typ == 4 && ((psr_vers == 0) || ((psr_vers == 1) && (mod_typ == 0) && (mod_rev == 0)))) { init_viking(); @@ -2074,7 +1605,7 @@ static void __init get_srmmu_type(void) } /* Finally the Tsunami. */ - if(psr_typ == 4 && psr_vers == 1 && (mod_typ || mod_rev)) { + if (psr_typ == 4 && psr_vers == 1 && (mod_typ || mod_rev)) { init_tsunami(); return; } @@ -2083,193 +1614,190 @@ static void __init get_srmmu_type(void) srmmu_is_bad(); } -/* don't laugh, static pagetables */ -static void srmmu_check_pgt_cache(int low, int high) +#ifdef CONFIG_SMP +/* Local cross-calls. */ +static void smp_flush_page_for_dma(unsigned long page) { + xc1((smpfunc_t) local_ops->page_for_dma, page); + local_ops->page_for_dma(page); } -extern unsigned long spwin_mmu_patchme, fwin_mmu_patchme, - tsetup_mmu_patchme, rtrap_mmu_patchme; - -extern unsigned long spwin_srmmu_stackchk, srmmu_fwin_stackchk, - tsetup_srmmu_stackchk, srmmu_rett_stackchk; - -extern unsigned long srmmu_fault; - -#define PATCH_BRANCH(insn, dest) do { \ - iaddr = &(insn); \ - daddr = &(dest); \ - *iaddr = SPARC_BRANCH((unsigned long) daddr, (unsigned long) iaddr); \ - } while(0) - -static void __init patch_window_trap_handlers(void) +static void smp_flush_cache_all(void) { - unsigned long *iaddr, *daddr; - - PATCH_BRANCH(spwin_mmu_patchme, spwin_srmmu_stackchk); - PATCH_BRANCH(fwin_mmu_patchme, srmmu_fwin_stackchk); - PATCH_BRANCH(tsetup_mmu_patchme, tsetup_srmmu_stackchk); - PATCH_BRANCH(rtrap_mmu_patchme, srmmu_rett_stackchk); - PATCH_BRANCH(sparc_ttable[SP_TRAP_TFLT].inst_three, srmmu_fault); - PATCH_BRANCH(sparc_ttable[SP_TRAP_DFLT].inst_three, srmmu_fault); - PATCH_BRANCH(sparc_ttable[SP_TRAP_DACC].inst_three, srmmu_fault); + xc0((smpfunc_t) local_ops->cache_all); + local_ops->cache_all(); } -#ifdef CONFIG_SMP -/* Local cross-calls. */ -static void smp_flush_page_for_dma(unsigned long page) +static void smp_flush_tlb_all(void) { - xc1((smpfunc_t) BTFIXUP_CALL(local_flush_page_for_dma), page); - local_flush_page_for_dma(page); + xc0((smpfunc_t) local_ops->tlb_all); + local_ops->tlb_all(); } -#endif +static void smp_flush_cache_mm(struct mm_struct *mm) +{ + if (mm->context != NO_CONTEXT) { + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) + xc1((smpfunc_t) local_ops->cache_mm, (unsigned long) mm); + local_ops->cache_mm(mm); + } +} -static pte_t srmmu_pgoff_to_pte(unsigned long pgoff) +static void smp_flush_tlb_mm(struct mm_struct *mm) { - return __pte((pgoff << SRMMU_PTE_FILE_SHIFT) | SRMMU_FILE); + if (mm->context != NO_CONTEXT) { + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) { + xc1((smpfunc_t) local_ops->tlb_mm, (unsigned long) mm); + if (atomic_read(&mm->mm_users) == 1 && current->active_mm == mm) + cpumask_copy(mm_cpumask(mm), + cpumask_of(smp_processor_id())); + } + local_ops->tlb_mm(mm); + } } -static unsigned long srmmu_pte_to_pgoff(pte_t pte) +static void smp_flush_cache_range(struct vm_area_struct *vma, + unsigned long start, + unsigned long end) { - return pte_val(pte) >> SRMMU_PTE_FILE_SHIFT; + struct mm_struct *mm = vma->vm_mm; + + if (mm->context != NO_CONTEXT) { + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) + xc3((smpfunc_t) local_ops->cache_range, + (unsigned long) vma, start, end); + local_ops->cache_range(vma, start, end); + } } -static pgprot_t srmmu_pgprot_noncached(pgprot_t prot) +static void smp_flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, + unsigned long end) { - prot &= ~__pgprot(SRMMU_CACHE); + struct mm_struct *mm = vma->vm_mm; - return prot; + if (mm->context != NO_CONTEXT) { + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) + xc3((smpfunc_t) local_ops->tlb_range, + (unsigned long) vma, start, end); + local_ops->tlb_range(vma, start, end); + } } -/* Load up routines and constants for sun4m and sun4d mmu */ -void __init ld_mmu_srmmu(void) +static void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page) { - extern void ld_mmu_iommu(void); - extern void ld_mmu_iounit(void); - extern void ___xchg32_sun4md(void); + struct mm_struct *mm = vma->vm_mm; - BTFIXUPSET_SIMM13(pgdir_shift, SRMMU_PGDIR_SHIFT); - BTFIXUPSET_SETHI(pgdir_size, SRMMU_PGDIR_SIZE); - BTFIXUPSET_SETHI(pgdir_mask, SRMMU_PGDIR_MASK); + if (mm->context != NO_CONTEXT) { + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) + xc2((smpfunc_t) local_ops->cache_page, + (unsigned long) vma, page); + local_ops->cache_page(vma, page); + } +} - BTFIXUPSET_SIMM13(ptrs_per_pmd, SRMMU_PTRS_PER_PMD); - BTFIXUPSET_SIMM13(ptrs_per_pgd, SRMMU_PTRS_PER_PGD); +static void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + struct mm_struct *mm = vma->vm_mm; - BTFIXUPSET_INT(page_none, pgprot_val(SRMMU_PAGE_NONE)); - PAGE_SHARED = pgprot_val(SRMMU_PAGE_SHARED); - BTFIXUPSET_INT(page_copy, pgprot_val(SRMMU_PAGE_COPY)); - BTFIXUPSET_INT(page_readonly, pgprot_val(SRMMU_PAGE_RDONLY)); - BTFIXUPSET_INT(page_kernel, pgprot_val(SRMMU_PAGE_KERNEL)); - page_kernel = pgprot_val(SRMMU_PAGE_KERNEL); + if (mm->context != NO_CONTEXT) { + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) + xc2((smpfunc_t) local_ops->tlb_page, + (unsigned long) vma, page); + local_ops->tlb_page(vma, page); + } +} - /* Functions */ - BTFIXUPSET_CALL(pgprot_noncached, srmmu_pgprot_noncached, BTFIXUPCALL_NORM); -#ifndef CONFIG_SMP - BTFIXUPSET_CALL(___xchg32, ___xchg32_sun4md, BTFIXUPCALL_SWAPG1G2); +static void smp_flush_page_to_ram(unsigned long page) +{ + /* Current theory is that those who call this are the one's + * who have just dirtied their cache with the pages contents + * in kernel space, therefore we only run this on local cpu. + * + * XXX This experiment failed, research further... -DaveM + */ +#if 1 + xc1((smpfunc_t) local_ops->page_to_ram, page); +#endif + local_ops->page_to_ram(page); +} + +static void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) +{ + cpumask_t cpu_mask; + cpumask_copy(&cpu_mask, mm_cpumask(mm)); + cpumask_clear_cpu(smp_processor_id(), &cpu_mask); + if (!cpumask_empty(&cpu_mask)) + xc2((smpfunc_t) local_ops->sig_insns, + (unsigned long) mm, insn_addr); + local_ops->sig_insns(mm, insn_addr); +} + +static struct sparc32_cachetlb_ops smp_cachetlb_ops = { + .cache_all = smp_flush_cache_all, + .cache_mm = smp_flush_cache_mm, + .cache_page = smp_flush_cache_page, + .cache_range = smp_flush_cache_range, + .tlb_all = smp_flush_tlb_all, + .tlb_mm = smp_flush_tlb_mm, + .tlb_page = smp_flush_tlb_page, + .tlb_range = smp_flush_tlb_range, + .page_to_ram = smp_flush_page_to_ram, + .sig_insns = smp_flush_sig_insns, + .page_for_dma = smp_flush_page_for_dma, +}; #endif - BTFIXUPSET_CALL(do_check_pgt_cache, srmmu_check_pgt_cache, BTFIXUPCALL_NOP); - - BTFIXUPSET_CALL(set_pte, srmmu_set_pte, BTFIXUPCALL_SWAPO0O1); - BTFIXUPSET_CALL(switch_mm, srmmu_switch_mm, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(pte_pfn, srmmu_pte_pfn, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_page, srmmu_pmd_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pgd_page_vaddr, srmmu_pgd_page, BTFIXUPCALL_NORM); - - BTFIXUPSET_SETHI(none_mask, 0xF0000000); - - BTFIXUPSET_CALL(pte_present, srmmu_pte_present, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_clear, srmmu_pte_clear, BTFIXUPCALL_SWAPO0G0); - - BTFIXUPSET_CALL(pmd_bad, srmmu_pmd_bad, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_present, srmmu_pmd_present, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_clear, srmmu_pmd_clear, BTFIXUPCALL_SWAPO0G0); - - BTFIXUPSET_CALL(pgd_none, srmmu_pgd_none, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pgd_bad, srmmu_pgd_bad, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pgd_present, srmmu_pgd_present, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pgd_clear, srmmu_pgd_clear, BTFIXUPCALL_SWAPO0G0); - - BTFIXUPSET_CALL(mk_pte, srmmu_mk_pte, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mk_pte_phys, srmmu_mk_pte_phys, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mk_pte_io, srmmu_mk_pte_io, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pgd_set, srmmu_pgd_set, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_set, srmmu_pmd_set, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_populate, srmmu_pmd_populate, BTFIXUPCALL_NORM); - - BTFIXUPSET_INT(pte_modify_mask, SRMMU_CHG_MASK); - BTFIXUPSET_CALL(pmd_offset, srmmu_pmd_offset, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_offset_kernel, srmmu_pte_offset, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(free_pte_fast, srmmu_free_pte_fast, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_free, srmmu_pte_free, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_alloc_one_kernel, srmmu_pte_alloc_one_kernel, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_alloc_one, srmmu_pte_alloc_one, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(free_pmd_fast, srmmu_pmd_free, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_alloc_one, srmmu_pmd_alloc_one, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(free_pgd_fast, srmmu_free_pgd_fast, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(get_pgd_fast, srmmu_get_pgd_fast, BTFIXUPCALL_NORM); - - BTFIXUPSET_HALF(pte_writei, SRMMU_WRITE); - BTFIXUPSET_HALF(pte_dirtyi, SRMMU_DIRTY); - BTFIXUPSET_HALF(pte_youngi, SRMMU_REF); - BTFIXUPSET_HALF(pte_filei, SRMMU_FILE); - BTFIXUPSET_HALF(pte_wrprotecti, SRMMU_WRITE); - BTFIXUPSET_HALF(pte_mkcleani, SRMMU_DIRTY); - BTFIXUPSET_HALF(pte_mkoldi, SRMMU_REF); - BTFIXUPSET_CALL(pte_mkwrite, srmmu_pte_mkwrite, BTFIXUPCALL_ORINT(SRMMU_WRITE)); - BTFIXUPSET_CALL(pte_mkdirty, srmmu_pte_mkdirty, BTFIXUPCALL_ORINT(SRMMU_DIRTY)); - BTFIXUPSET_CALL(pte_mkyoung, srmmu_pte_mkyoung, BTFIXUPCALL_ORINT(SRMMU_REF)); - BTFIXUPSET_CALL(update_mmu_cache, srmmu_update_mmu_cache, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(destroy_context, srmmu_destroy_context, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(sparc_mapiorange, srmmu_mapiorange, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(sparc_unmapiorange, srmmu_unmapiorange, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(__swp_type, srmmu_swp_type, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(__swp_offset, srmmu_swp_offset, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(__swp_entry, srmmu_swp_entry, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(mmu_info, srmmu_mmu_info, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(alloc_thread_info, srmmu_alloc_thread_info, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(free_thread_info, srmmu_free_thread_info, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(pte_to_pgoff, srmmu_pte_to_pgoff, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pgoff_to_pte, srmmu_pgoff_to_pte, BTFIXUPCALL_NORM); +/* Load up routines and constants for sun4m and sun4d mmu */ +void __init load_mmu(void) +{ + /* Functions */ get_srmmu_type(); - patch_window_trap_handlers(); #ifdef CONFIG_SMP /* El switcheroo... */ + local_ops = sparc32_cachetlb_ops; + + if (sparc_cpu_model == sun4d || sparc_cpu_model == sparc_leon) { + smp_cachetlb_ops.tlb_all = local_ops->tlb_all; + smp_cachetlb_ops.tlb_mm = local_ops->tlb_mm; + smp_cachetlb_ops.tlb_range = local_ops->tlb_range; + smp_cachetlb_ops.tlb_page = local_ops->tlb_page; + } + + if (poke_srmmu == poke_viking) { + /* Avoid unnecessary cross calls. */ + smp_cachetlb_ops.cache_all = local_ops->cache_all; + smp_cachetlb_ops.cache_mm = local_ops->cache_mm; + smp_cachetlb_ops.cache_range = local_ops->cache_range; + smp_cachetlb_ops.cache_page = local_ops->cache_page; - BTFIXUPCOPY_CALL(local_flush_cache_all, flush_cache_all); - BTFIXUPCOPY_CALL(local_flush_cache_mm, flush_cache_mm); - BTFIXUPCOPY_CALL(local_flush_cache_range, flush_cache_range); - BTFIXUPCOPY_CALL(local_flush_cache_page, flush_cache_page); - BTFIXUPCOPY_CALL(local_flush_tlb_all, flush_tlb_all); - BTFIXUPCOPY_CALL(local_flush_tlb_mm, flush_tlb_mm); - BTFIXUPCOPY_CALL(local_flush_tlb_range, flush_tlb_range); - BTFIXUPCOPY_CALL(local_flush_tlb_page, flush_tlb_page); - BTFIXUPCOPY_CALL(local_flush_page_to_ram, __flush_page_to_ram); - BTFIXUPCOPY_CALL(local_flush_sig_insns, flush_sig_insns); - BTFIXUPCOPY_CALL(local_flush_page_for_dma, flush_page_for_dma); - - BTFIXUPSET_CALL(flush_cache_all, smp_flush_cache_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_mm, smp_flush_cache_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_range, smp_flush_cache_range, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_page, smp_flush_cache_page, BTFIXUPCALL_NORM); - if (sparc_cpu_model != sun4d) { - BTFIXUPSET_CALL(flush_tlb_all, smp_flush_tlb_all, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_mm, smp_flush_tlb_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_range, smp_flush_tlb_range, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_page, smp_flush_tlb_page, BTFIXUPCALL_NORM); + smp_cachetlb_ops.page_to_ram = local_ops->page_to_ram; + smp_cachetlb_ops.sig_insns = local_ops->sig_insns; + smp_cachetlb_ops.page_for_dma = local_ops->page_for_dma; } - BTFIXUPSET_CALL(__flush_page_to_ram, smp_flush_page_to_ram, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_sig_insns, smp_flush_sig_insns, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_page_for_dma, smp_flush_page_for_dma, BTFIXUPCALL_NORM); + + /* It really is const after this point. */ + sparc32_cachetlb_ops = (const struct sparc32_cachetlb_ops *) + &smp_cachetlb_ops; #endif if (sparc_cpu_model == sun4d) @@ -2279,6 +1807,8 @@ void __init ld_mmu_srmmu(void) #ifdef CONFIG_SMP if (sparc_cpu_model == sun4d) sun4d_init_smp(); + else if (sparc_cpu_model == sparc_leon) + leon_init_smp(); else sun4m_init_smp(); #endif diff --git a/arch/sparc/mm/srmmu_access.S b/arch/sparc/mm/srmmu_access.S new file mode 100644 index 00000000000..d0a67b2c238 --- /dev/null +++ b/arch/sparc/mm/srmmu_access.S @@ -0,0 +1,82 @@ +/* Assembler variants of srmmu access functions. + * Implemented in assembler to allow run-time patching. + * LEON uses a different ASI for MMUREGS than SUN. + * + * The leon_1insn_patch infrastructure is used + * for the run-time patching. + */ + +#include <linux/linkage.h> + +#include <asm/asmmacro.h> +#include <asm/pgtsrmmu.h> +#include <asm/asi.h> + +/* unsigned int srmmu_get_mmureg(void) */ +ENTRY(srmmu_get_mmureg) +LEON_PI(lda [%g0] ASI_LEON_MMUREGS, %o0) +SUN_PI_(lda [%g0] ASI_M_MMUREGS, %o0) + retl + nop +ENDPROC(srmmu_get_mmureg) + +/* void srmmu_set_mmureg(unsigned long regval) */ +ENTRY(srmmu_set_mmureg) +LEON_PI(sta %o0, [%g0] ASI_LEON_MMUREGS) +SUN_PI_(sta %o0, [%g0] ASI_M_MMUREGS) + retl + nop +ENDPROC(srmmu_set_mmureg) + +/* void srmmu_set_ctable_ptr(unsigned long paddr) */ +ENTRY(srmmu_set_ctable_ptr) + /* paddr = ((paddr >> 4) & SRMMU_CTX_PMASK); */ + srl %o0, 4, %g1 + and %g1, SRMMU_CTX_PMASK, %g1 + + mov SRMMU_CTXTBL_PTR, %g2 +LEON_PI(sta %g1, [%g2] ASI_LEON_MMUREGS) +SUN_PI_(sta %g1, [%g2] ASI_M_MMUREGS) + retl + nop +ENDPROC(srmmu_set_ctable_ptr) + + +/* void srmmu_set_context(int context) */ +ENTRY(srmmu_set_context) + mov SRMMU_CTX_REG, %g1 +LEON_PI(sta %o0, [%g1] ASI_LEON_MMUREGS) +SUN_PI_(sta %o0, [%g1] ASI_M_MMUREGS) + retl + nop +ENDPROC(srmmu_set_context) + + +/* int srmmu_get_context(void) */ +ENTRY(srmmu_get_context) + mov SRMMU_CTX_REG, %o0 +LEON_PI(lda [%o0] ASI_LEON_MMUREGS, %o0) +SUN_PI_(lda [%o0] ASI_M_MMUREGS, %o0) + retl + nop +ENDPROC(srmmu_get_context) + + +/* unsigned int srmmu_get_fstatus(void) */ +ENTRY(srmmu_get_fstatus) + mov SRMMU_FAULT_STATUS, %o0 +LEON_PI(lda [%o0] ASI_LEON_MMUREGS, %o0) +SUN_PI_(lda [%o0] ASI_M_MMUREGS, %o0) + retl + nop +ENDPROC(srmmu_get_fstatus) + + +/* unsigned int srmmu_get_faddr(void) */ +ENTRY(srmmu_get_faddr) + mov SRMMU_FAULT_ADDR, %o0 +LEON_PI(lda [%o0] ASI_LEON_MMUREGS, %o0) +SUN_PI_(lda [%o0] ASI_M_MMUREGS, %o0) + retl + nop +ENDPROC(srmmu_get_faddr) diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c deleted file mode 100644 index 2375fe9dc31..00000000000 --- a/arch/sparc/mm/sun4c.c +++ /dev/null @@ -1,2284 +0,0 @@ -/* sun4c.c: Doing in software what should be done in hardware. - * - * Copyright (C) 1996 David S. Miller (davem@davemloft.net) - * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) - * Copyright (C) 1996 Andrew Tridgell (Andrew.Tridgell@anu.edu.au) - * Copyright (C) 1997-2000 Anton Blanchard (anton@samba.org) - * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - */ - -#define NR_TASK_BUCKETS 512 - -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/highmem.h> -#include <linux/fs.h> -#include <linux/seq_file.h> -#include <linux/scatterlist.h> - -#include <asm/page.h> -#include <asm/pgalloc.h> -#include <asm/pgtable.h> -#include <asm/vaddrs.h> -#include <asm/idprom.h> -#include <asm/machines.h> -#include <asm/memreg.h> -#include <asm/processor.h> -#include <asm/auxio.h> -#include <asm/io.h> -#include <asm/oplib.h> -#include <asm/openprom.h> -#include <asm/mmu_context.h> -#include <asm/sun4paddr.h> -#include <asm/highmem.h> -#include <asm/btfixup.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> - -/* Because of our dynamic kernel TLB miss strategy, and how - * our DVMA mapping allocation works, you _MUST_: - * - * 1) Disable interrupts _and_ not touch any dynamic kernel - * memory while messing with kernel MMU state. By - * dynamic memory I mean any object which is not in - * the kernel image itself or a thread_union (both of - * which are locked into the MMU). - * 2) Disable interrupts while messing with user MMU state. - */ - -extern int num_segmaps, num_contexts; - -extern unsigned long page_kernel; - -#ifdef CONFIG_SUN4 -#define SUN4C_VAC_SIZE sun4c_vacinfo.num_bytes -#else -/* That's it, we prom_halt() on sun4c if the cache size is something other than 65536. - * So let's save some cycles and just use that everywhere except for that bootup - * sanity check. - */ -#define SUN4C_VAC_SIZE 65536 -#endif - -#define SUN4C_KERNEL_BUCKETS 32 - -/* Flushing the cache. */ -struct sun4c_vac_props sun4c_vacinfo; -unsigned long sun4c_kernel_faults; - -/* Invalidate every sun4c cache line tag. */ -static void __init sun4c_flush_all(void) -{ - unsigned long begin, end; - - if (sun4c_vacinfo.on) - panic("SUN4C: AIEEE, trying to invalidate vac while it is on."); - - /* Clear 'valid' bit in all cache line tags */ - begin = AC_CACHETAGS; - end = (AC_CACHETAGS + SUN4C_VAC_SIZE); - while (begin < end) { - __asm__ __volatile__("sta %%g0, [%0] %1\n\t" : : - "r" (begin), "i" (ASI_CONTROL)); - begin += sun4c_vacinfo.linesize; - } -} - -static void sun4c_flush_context_hw(void) -{ - unsigned long end = SUN4C_VAC_SIZE; - - __asm__ __volatile__( - "1: addcc %0, -4096, %0\n\t" - " bne 1b\n\t" - " sta %%g0, [%0] %2" - : "=&r" (end) - : "0" (end), "i" (ASI_HWFLUSHCONTEXT) - : "cc"); -} - -/* Must be called minimally with IRQs disabled. */ -static void sun4c_flush_segment_hw(unsigned long addr) -{ - if (sun4c_get_segmap(addr) != invalid_segment) { - unsigned long vac_size = SUN4C_VAC_SIZE; - - __asm__ __volatile__( - "1: addcc %0, -4096, %0\n\t" - " bne 1b\n\t" - " sta %%g0, [%2 + %0] %3" - : "=&r" (vac_size) - : "0" (vac_size), "r" (addr), "i" (ASI_HWFLUSHSEG) - : "cc"); - } -} - -/* File local boot time fixups. */ -BTFIXUPDEF_CALL(void, sun4c_flush_page, unsigned long) -BTFIXUPDEF_CALL(void, sun4c_flush_segment, unsigned long) -BTFIXUPDEF_CALL(void, sun4c_flush_context, void) - -#define sun4c_flush_page(addr) BTFIXUP_CALL(sun4c_flush_page)(addr) -#define sun4c_flush_segment(addr) BTFIXUP_CALL(sun4c_flush_segment)(addr) -#define sun4c_flush_context() BTFIXUP_CALL(sun4c_flush_context)() - -/* Must be called minimally with interrupts disabled. */ -static void sun4c_flush_page_hw(unsigned long addr) -{ - addr &= PAGE_MASK; - if ((int)sun4c_get_pte(addr) < 0) - __asm__ __volatile__("sta %%g0, [%0] %1" - : : "r" (addr), "i" (ASI_HWFLUSHPAGE)); -} - -/* Don't inline the software version as it eats too many cache lines if expanded. */ -static void sun4c_flush_context_sw(void) -{ - unsigned long nbytes = SUN4C_VAC_SIZE; - unsigned long lsize = sun4c_vacinfo.linesize; - - __asm__ __volatile__( - "add %2, %2, %%g1\n\t" - "add %2, %%g1, %%g2\n\t" - "add %2, %%g2, %%g3\n\t" - "add %2, %%g3, %%g4\n\t" - "add %2, %%g4, %%g5\n\t" - "add %2, %%g5, %%o4\n\t" - "add %2, %%o4, %%o5\n" - "1:\n\t" - "subcc %0, %%o5, %0\n\t" - "sta %%g0, [%0] %3\n\t" - "sta %%g0, [%0 + %2] %3\n\t" - "sta %%g0, [%0 + %%g1] %3\n\t" - "sta %%g0, [%0 + %%g2] %3\n\t" - "sta %%g0, [%0 + %%g3] %3\n\t" - "sta %%g0, [%0 + %%g4] %3\n\t" - "sta %%g0, [%0 + %%g5] %3\n\t" - "bg 1b\n\t" - " sta %%g0, [%1 + %%o4] %3\n" - : "=&r" (nbytes) - : "0" (nbytes), "r" (lsize), "i" (ASI_FLUSHCTX) - : "g1", "g2", "g3", "g4", "g5", "o4", "o5", "cc"); -} - -/* Don't inline the software version as it eats too many cache lines if expanded. */ -static void sun4c_flush_segment_sw(unsigned long addr) -{ - if (sun4c_get_segmap(addr) != invalid_segment) { - unsigned long nbytes = SUN4C_VAC_SIZE; - unsigned long lsize = sun4c_vacinfo.linesize; - - __asm__ __volatile__( - "add %2, %2, %%g1\n\t" - "add %2, %%g1, %%g2\n\t" - "add %2, %%g2, %%g3\n\t" - "add %2, %%g3, %%g4\n\t" - "add %2, %%g4, %%g5\n\t" - "add %2, %%g5, %%o4\n\t" - "add %2, %%o4, %%o5\n" - "1:\n\t" - "subcc %1, %%o5, %1\n\t" - "sta %%g0, [%0] %6\n\t" - "sta %%g0, [%0 + %2] %6\n\t" - "sta %%g0, [%0 + %%g1] %6\n\t" - "sta %%g0, [%0 + %%g2] %6\n\t" - "sta %%g0, [%0 + %%g3] %6\n\t" - "sta %%g0, [%0 + %%g4] %6\n\t" - "sta %%g0, [%0 + %%g5] %6\n\t" - "sta %%g0, [%0 + %%o4] %6\n\t" - "bg 1b\n\t" - " add %0, %%o5, %0\n" - : "=&r" (addr), "=&r" (nbytes), "=&r" (lsize) - : "0" (addr), "1" (nbytes), "2" (lsize), - "i" (ASI_FLUSHSEG) - : "g1", "g2", "g3", "g4", "g5", "o4", "o5", "cc"); - } -} - -/* Don't inline the software version as it eats too many cache lines if expanded. */ -static void sun4c_flush_page_sw(unsigned long addr) -{ - addr &= PAGE_MASK; - if ((sun4c_get_pte(addr) & (_SUN4C_PAGE_NOCACHE | _SUN4C_PAGE_VALID)) == - _SUN4C_PAGE_VALID) { - unsigned long left = PAGE_SIZE; - unsigned long lsize = sun4c_vacinfo.linesize; - - __asm__ __volatile__( - "add %2, %2, %%g1\n\t" - "add %2, %%g1, %%g2\n\t" - "add %2, %%g2, %%g3\n\t" - "add %2, %%g3, %%g4\n\t" - "add %2, %%g4, %%g5\n\t" - "add %2, %%g5, %%o4\n\t" - "add %2, %%o4, %%o5\n" - "1:\n\t" - "subcc %1, %%o5, %1\n\t" - "sta %%g0, [%0] %6\n\t" - "sta %%g0, [%0 + %2] %6\n\t" - "sta %%g0, [%0 + %%g1] %6\n\t" - "sta %%g0, [%0 + %%g2] %6\n\t" - "sta %%g0, [%0 + %%g3] %6\n\t" - "sta %%g0, [%0 + %%g4] %6\n\t" - "sta %%g0, [%0 + %%g5] %6\n\t" - "sta %%g0, [%0 + %%o4] %6\n\t" - "bg 1b\n\t" - " add %0, %%o5, %0\n" - : "=&r" (addr), "=&r" (left), "=&r" (lsize) - : "0" (addr), "1" (left), "2" (lsize), - "i" (ASI_FLUSHPG) - : "g1", "g2", "g3", "g4", "g5", "o4", "o5", "cc"); - } -} - -/* The sun4c's do have an on chip store buffer. And the way you - * clear them out isn't so obvious. The only way I can think of - * to accomplish this is to read the current context register, - * store the same value there, then read an external hardware - * register. - */ -void sun4c_complete_all_stores(void) -{ - volatile int _unused; - - _unused = sun4c_get_context(); - sun4c_set_context(_unused); -#ifdef CONFIG_SUN_AUXIO - _unused = get_auxio(); -#endif -} - -/* Bootup utility functions. */ -static inline void sun4c_init_clean_segmap(unsigned char pseg) -{ - unsigned long vaddr; - - sun4c_put_segmap(0, pseg); - for (vaddr = 0; vaddr < SUN4C_REAL_PGDIR_SIZE; vaddr += PAGE_SIZE) - sun4c_put_pte(vaddr, 0); - sun4c_put_segmap(0, invalid_segment); -} - -static inline void sun4c_init_clean_mmu(unsigned long kernel_end) -{ - unsigned long vaddr; - unsigned char savectx, ctx; - - savectx = sun4c_get_context(); - for (ctx = 0; ctx < num_contexts; ctx++) { - sun4c_set_context(ctx); - for (vaddr = 0; vaddr < 0x20000000; vaddr += SUN4C_REAL_PGDIR_SIZE) - sun4c_put_segmap(vaddr, invalid_segment); - for (vaddr = 0xe0000000; vaddr < KERNBASE; vaddr += SUN4C_REAL_PGDIR_SIZE) - sun4c_put_segmap(vaddr, invalid_segment); - for (vaddr = kernel_end; vaddr < KADB_DEBUGGER_BEGVM; vaddr += SUN4C_REAL_PGDIR_SIZE) - sun4c_put_segmap(vaddr, invalid_segment); - for (vaddr = LINUX_OPPROM_ENDVM; vaddr; vaddr += SUN4C_REAL_PGDIR_SIZE) - sun4c_put_segmap(vaddr, invalid_segment); - } - sun4c_set_context(savectx); -} - -void __init sun4c_probe_vac(void) -{ - sun4c_disable_vac(); - - if (ARCH_SUN4) { - switch (idprom->id_machtype) { - - case (SM_SUN4|SM_4_110): - sun4c_vacinfo.type = VAC_NONE; - sun4c_vacinfo.num_bytes = 0; - sun4c_vacinfo.linesize = 0; - sun4c_vacinfo.do_hwflushes = 0; - prom_printf("No VAC. Get some bucks and buy a real computer."); - prom_halt(); - break; - - case (SM_SUN4|SM_4_260): - sun4c_vacinfo.type = VAC_WRITE_BACK; - sun4c_vacinfo.num_bytes = 128 * 1024; - sun4c_vacinfo.linesize = 16; - sun4c_vacinfo.do_hwflushes = 0; - break; - - case (SM_SUN4|SM_4_330): - sun4c_vacinfo.type = VAC_WRITE_THROUGH; - sun4c_vacinfo.num_bytes = 128 * 1024; - sun4c_vacinfo.linesize = 16; - sun4c_vacinfo.do_hwflushes = 0; - break; - - case (SM_SUN4|SM_4_470): - sun4c_vacinfo.type = VAC_WRITE_BACK; - sun4c_vacinfo.num_bytes = 128 * 1024; - sun4c_vacinfo.linesize = 32; - sun4c_vacinfo.do_hwflushes = 0; - break; - - default: - prom_printf("Cannot initialize VAC - weird sun4 model idprom->id_machtype = %d", idprom->id_machtype); - prom_halt(); - }; - } else { - sun4c_vacinfo.type = VAC_WRITE_THROUGH; - - if ((idprom->id_machtype == (SM_SUN4C | SM_4C_SS1)) || - (idprom->id_machtype == (SM_SUN4C | SM_4C_SS1PLUS))) { - /* PROM on SS1 lacks this info, to be super safe we - * hard code it here since this arch is cast in stone. - */ - sun4c_vacinfo.num_bytes = 65536; - sun4c_vacinfo.linesize = 16; - } else { - sun4c_vacinfo.num_bytes = - prom_getintdefault(prom_root_node, "vac-size", 65536); - sun4c_vacinfo.linesize = - prom_getintdefault(prom_root_node, "vac-linesize", 16); - } - sun4c_vacinfo.do_hwflushes = - prom_getintdefault(prom_root_node, "vac-hwflush", 0); - - if (sun4c_vacinfo.do_hwflushes == 0) - sun4c_vacinfo.do_hwflushes = - prom_getintdefault(prom_root_node, "vac_hwflush", 0); - - if (sun4c_vacinfo.num_bytes != 65536) { - prom_printf("WEIRD Sun4C VAC cache size, " - "tell sparclinux@vger.kernel.org"); - prom_halt(); - } - } - - sun4c_vacinfo.num_lines = - (sun4c_vacinfo.num_bytes / sun4c_vacinfo.linesize); - switch (sun4c_vacinfo.linesize) { - case 16: - sun4c_vacinfo.log2lsize = 4; - break; - case 32: - sun4c_vacinfo.log2lsize = 5; - break; - default: - prom_printf("probe_vac: Didn't expect vac-linesize of %d, halting\n", - sun4c_vacinfo.linesize); - prom_halt(); - }; - - sun4c_flush_all(); - sun4c_enable_vac(); -} - -/* Patch instructions for the low level kernel fault handler. */ -extern unsigned long invalid_segment_patch1, invalid_segment_patch1_ff; -extern unsigned long invalid_segment_patch2, invalid_segment_patch2_ff; -extern unsigned long invalid_segment_patch1_1ff, invalid_segment_patch2_1ff; -extern unsigned long num_context_patch1, num_context_patch1_16; -extern unsigned long num_context_patch2_16; -extern unsigned long vac_linesize_patch, vac_linesize_patch_32; -extern unsigned long vac_hwflush_patch1, vac_hwflush_patch1_on; -extern unsigned long vac_hwflush_patch2, vac_hwflush_patch2_on; - -#define PATCH_INSN(src, dst) do { \ - daddr = &(dst); \ - iaddr = &(src); \ - *daddr = *iaddr; \ - } while (0) - -static void __init patch_kernel_fault_handler(void) -{ - unsigned long *iaddr, *daddr; - - switch (num_segmaps) { - case 128: - /* Default, nothing to do. */ - break; - case 256: - PATCH_INSN(invalid_segment_patch1_ff, - invalid_segment_patch1); - PATCH_INSN(invalid_segment_patch2_ff, - invalid_segment_patch2); - break; - case 512: - PATCH_INSN(invalid_segment_patch1_1ff, - invalid_segment_patch1); - PATCH_INSN(invalid_segment_patch2_1ff, - invalid_segment_patch2); - break; - default: - prom_printf("Unhandled number of segmaps: %d\n", - num_segmaps); - prom_halt(); - }; - switch (num_contexts) { - case 8: - /* Default, nothing to do. */ - break; - case 16: - PATCH_INSN(num_context_patch1_16, - num_context_patch1); - break; - default: - prom_printf("Unhandled number of contexts: %d\n", - num_contexts); - prom_halt(); - }; - - if (sun4c_vacinfo.do_hwflushes != 0) { - PATCH_INSN(vac_hwflush_patch1_on, vac_hwflush_patch1); - PATCH_INSN(vac_hwflush_patch2_on, vac_hwflush_patch2); - } else { - switch (sun4c_vacinfo.linesize) { - case 16: - /* Default, nothing to do. */ - break; - case 32: - PATCH_INSN(vac_linesize_patch_32, vac_linesize_patch); - break; - default: - prom_printf("Impossible VAC linesize %d, halting...\n", - sun4c_vacinfo.linesize); - prom_halt(); - }; - } -} - -static void __init sun4c_probe_mmu(void) -{ - if (ARCH_SUN4) { - switch (idprom->id_machtype) { - case (SM_SUN4|SM_4_110): - prom_printf("No support for 4100 yet\n"); - prom_halt(); - num_segmaps = 256; - num_contexts = 8; - break; - - case (SM_SUN4|SM_4_260): - /* should be 512 segmaps. when it get fixed */ - num_segmaps = 256; - num_contexts = 16; - break; - - case (SM_SUN4|SM_4_330): - num_segmaps = 256; - num_contexts = 16; - break; - - case (SM_SUN4|SM_4_470): - /* should be 1024 segmaps. when it get fixed */ - num_segmaps = 256; - num_contexts = 64; - break; - default: - prom_printf("Invalid SUN4 model\n"); - prom_halt(); - }; - } else { - if ((idprom->id_machtype == (SM_SUN4C | SM_4C_SS1)) || - (idprom->id_machtype == (SM_SUN4C | SM_4C_SS1PLUS))) { - /* Hardcode these just to be safe, PROM on SS1 does - * not have this info available in the root node. - */ - num_segmaps = 128; - num_contexts = 8; - } else { - num_segmaps = - prom_getintdefault(prom_root_node, "mmu-npmg", 128); - num_contexts = - prom_getintdefault(prom_root_node, "mmu-nctx", 0x8); - } - } - patch_kernel_fault_handler(); -} - -volatile unsigned long __iomem *sun4c_memerr_reg = NULL; - -void __init sun4c_probe_memerr_reg(void) -{ - int node; - struct linux_prom_registers regs[1]; - - if (ARCH_SUN4) { - sun4c_memerr_reg = ioremap(sun4_memreg_physaddr, PAGE_SIZE); - } else { - node = prom_getchild(prom_root_node); - node = prom_searchsiblings(prom_root_node, "memory-error"); - if (!node) - return; - if (prom_getproperty(node, "reg", (char *)regs, sizeof(regs)) <= 0) - return; - /* hmm I think regs[0].which_io is zero here anyways */ - sun4c_memerr_reg = ioremap(regs[0].phys_addr, regs[0].reg_size); - } -} - -static inline void sun4c_init_ss2_cache_bug(void) -{ - extern unsigned long start; - - if ((idprom->id_machtype == (SM_SUN4C | SM_4C_SS2)) || - (idprom->id_machtype == (SM_SUN4C | SM_4C_IPX)) || - (idprom->id_machtype == (SM_SUN4 | SM_4_330)) || - (idprom->id_machtype == (SM_SUN4C | SM_4C_ELC))) { - /* Whee.. */ - printk("SS2 cache bug detected, uncaching trap table page\n"); - sun4c_flush_page((unsigned int) &start); - sun4c_put_pte(((unsigned long) &start), - (sun4c_get_pte((unsigned long) &start) | _SUN4C_PAGE_NOCACHE)); - } -} - -/* Addr is always aligned on a page boundary for us already. */ -static int sun4c_map_dma_area(dma_addr_t *pba, unsigned long va, - unsigned long addr, int len) -{ - unsigned long page, end; - - *pba = addr; - - end = PAGE_ALIGN((addr + len)); - while (addr < end) { - page = va; - sun4c_flush_page(page); - page -= PAGE_OFFSET; - page >>= PAGE_SHIFT; - page |= (_SUN4C_PAGE_VALID | _SUN4C_PAGE_DIRTY | - _SUN4C_PAGE_NOCACHE | _SUN4C_PAGE_PRIV); - sun4c_put_pte(addr, page); - addr += PAGE_SIZE; - va += PAGE_SIZE; - } - - return 0; -} - -static struct page *sun4c_translate_dvma(unsigned long busa) -{ - /* Fortunately for us, bus_addr == uncached_virt in sun4c. */ - unsigned long pte = sun4c_get_pte(busa); - return pfn_to_page(pte & SUN4C_PFN_MASK); -} - -static void sun4c_unmap_dma_area(unsigned long busa, int len) -{ - /* Fortunately for us, bus_addr == uncached_virt in sun4c. */ - /* XXX Implement this */ -} - -/* TLB management. */ - -/* Don't change this struct without changing entry.S. This is used - * in the in-window kernel fault handler, and you don't want to mess - * with that. (See sun4c_fault in entry.S). - */ -struct sun4c_mmu_entry { - struct sun4c_mmu_entry *next; - struct sun4c_mmu_entry *prev; - unsigned long vaddr; - unsigned char pseg; - unsigned char locked; - - /* For user mappings only, and completely hidden from kernel - * TLB miss code. - */ - unsigned char ctx; - struct sun4c_mmu_entry *lru_next; - struct sun4c_mmu_entry *lru_prev; -}; - -static struct sun4c_mmu_entry mmu_entry_pool[SUN4C_MAX_SEGMAPS]; - -static void __init sun4c_init_mmu_entry_pool(void) -{ - int i; - - for (i=0; i < SUN4C_MAX_SEGMAPS; i++) { - mmu_entry_pool[i].pseg = i; - mmu_entry_pool[i].next = NULL; - mmu_entry_pool[i].prev = NULL; - mmu_entry_pool[i].vaddr = 0; - mmu_entry_pool[i].locked = 0; - mmu_entry_pool[i].ctx = 0; - mmu_entry_pool[i].lru_next = NULL; - mmu_entry_pool[i].lru_prev = NULL; - } - mmu_entry_pool[invalid_segment].locked = 1; -} - -static inline void fix_permissions(unsigned long vaddr, unsigned long bits_on, - unsigned long bits_off) -{ - unsigned long start, end; - - end = vaddr + SUN4C_REAL_PGDIR_SIZE; - for (start = vaddr; start < end; start += PAGE_SIZE) - if (sun4c_get_pte(start) & _SUN4C_PAGE_VALID) - sun4c_put_pte(start, (sun4c_get_pte(start) | bits_on) & - ~bits_off); -} - -static inline void sun4c_init_map_kernelprom(unsigned long kernel_end) -{ - unsigned long vaddr; - unsigned char pseg, ctx; -#ifdef CONFIG_SUN4 - /* sun4/110 and 260 have no kadb. */ - if ((idprom->id_machtype != (SM_SUN4 | SM_4_260)) && - (idprom->id_machtype != (SM_SUN4 | SM_4_110))) { -#endif - for (vaddr = KADB_DEBUGGER_BEGVM; - vaddr < LINUX_OPPROM_ENDVM; - vaddr += SUN4C_REAL_PGDIR_SIZE) { - pseg = sun4c_get_segmap(vaddr); - if (pseg != invalid_segment) { - mmu_entry_pool[pseg].locked = 1; - for (ctx = 0; ctx < num_contexts; ctx++) - prom_putsegment(ctx, vaddr, pseg); - fix_permissions(vaddr, _SUN4C_PAGE_PRIV, 0); - } - } -#ifdef CONFIG_SUN4 - } -#endif - for (vaddr = KERNBASE; vaddr < kernel_end; vaddr += SUN4C_REAL_PGDIR_SIZE) { - pseg = sun4c_get_segmap(vaddr); - mmu_entry_pool[pseg].locked = 1; - for (ctx = 0; ctx < num_contexts; ctx++) - prom_putsegment(ctx, vaddr, pseg); - fix_permissions(vaddr, _SUN4C_PAGE_PRIV, _SUN4C_PAGE_NOCACHE); - } -} - -static void __init sun4c_init_lock_area(unsigned long start, unsigned long end) -{ - int i, ctx; - - while (start < end) { - for (i = 0; i < invalid_segment; i++) - if (!mmu_entry_pool[i].locked) - break; - mmu_entry_pool[i].locked = 1; - sun4c_init_clean_segmap(i); - for (ctx = 0; ctx < num_contexts; ctx++) - prom_putsegment(ctx, start, mmu_entry_pool[i].pseg); - start += SUN4C_REAL_PGDIR_SIZE; - } -} - -/* Don't change this struct without changing entry.S. This is used - * in the in-window kernel fault handler, and you don't want to mess - * with that. (See sun4c_fault in entry.S). - */ -struct sun4c_mmu_ring { - struct sun4c_mmu_entry ringhd; - int num_entries; -}; - -static struct sun4c_mmu_ring sun4c_context_ring[SUN4C_MAX_CONTEXTS]; /* used user entries */ -static struct sun4c_mmu_ring sun4c_ufree_ring; /* free user entries */ -static struct sun4c_mmu_ring sun4c_ulru_ring; /* LRU user entries */ -struct sun4c_mmu_ring sun4c_kernel_ring; /* used kernel entries */ -struct sun4c_mmu_ring sun4c_kfree_ring; /* free kernel entries */ - -static inline void sun4c_init_rings(void) -{ - int i; - - for (i = 0; i < SUN4C_MAX_CONTEXTS; i++) { - sun4c_context_ring[i].ringhd.next = - sun4c_context_ring[i].ringhd.prev = - &sun4c_context_ring[i].ringhd; - sun4c_context_ring[i].num_entries = 0; - } - sun4c_ufree_ring.ringhd.next = sun4c_ufree_ring.ringhd.prev = - &sun4c_ufree_ring.ringhd; - sun4c_ufree_ring.num_entries = 0; - sun4c_ulru_ring.ringhd.lru_next = sun4c_ulru_ring.ringhd.lru_prev = - &sun4c_ulru_ring.ringhd; - sun4c_ulru_ring.num_entries = 0; - sun4c_kernel_ring.ringhd.next = sun4c_kernel_ring.ringhd.prev = - &sun4c_kernel_ring.ringhd; - sun4c_kernel_ring.num_entries = 0; - sun4c_kfree_ring.ringhd.next = sun4c_kfree_ring.ringhd.prev = - &sun4c_kfree_ring.ringhd; - sun4c_kfree_ring.num_entries = 0; -} - -static void add_ring(struct sun4c_mmu_ring *ring, - struct sun4c_mmu_entry *entry) -{ - struct sun4c_mmu_entry *head = &ring->ringhd; - - entry->prev = head; - (entry->next = head->next)->prev = entry; - head->next = entry; - ring->num_entries++; -} - -static inline void add_lru(struct sun4c_mmu_entry *entry) -{ - struct sun4c_mmu_ring *ring = &sun4c_ulru_ring; - struct sun4c_mmu_entry *head = &ring->ringhd; - - entry->lru_next = head; - (entry->lru_prev = head->lru_prev)->lru_next = entry; - head->lru_prev = entry; -} - -static void add_ring_ordered(struct sun4c_mmu_ring *ring, - struct sun4c_mmu_entry *entry) -{ - struct sun4c_mmu_entry *head = &ring->ringhd; - unsigned long addr = entry->vaddr; - - while ((head->next != &ring->ringhd) && (head->next->vaddr < addr)) - head = head->next; - - entry->prev = head; - (entry->next = head->next)->prev = entry; - head->next = entry; - ring->num_entries++; - - add_lru(entry); -} - -static inline void remove_ring(struct sun4c_mmu_ring *ring, - struct sun4c_mmu_entry *entry) -{ - struct sun4c_mmu_entry *next = entry->next; - - (next->prev = entry->prev)->next = next; - ring->num_entries--; -} - -static void remove_lru(struct sun4c_mmu_entry *entry) -{ - struct sun4c_mmu_entry *next = entry->lru_next; - - (next->lru_prev = entry->lru_prev)->lru_next = next; -} - -static void free_user_entry(int ctx, struct sun4c_mmu_entry *entry) -{ - remove_ring(sun4c_context_ring+ctx, entry); - remove_lru(entry); - add_ring(&sun4c_ufree_ring, entry); -} - -static void free_kernel_entry(struct sun4c_mmu_entry *entry, - struct sun4c_mmu_ring *ring) -{ - remove_ring(ring, entry); - add_ring(&sun4c_kfree_ring, entry); -} - -static void __init sun4c_init_fill_kernel_ring(int howmany) -{ - int i; - - while (howmany) { - for (i = 0; i < invalid_segment; i++) - if (!mmu_entry_pool[i].locked) - break; - mmu_entry_pool[i].locked = 1; - sun4c_init_clean_segmap(i); - add_ring(&sun4c_kfree_ring, &mmu_entry_pool[i]); - howmany--; - } -} - -static void __init sun4c_init_fill_user_ring(void) -{ - int i; - - for (i = 0; i < invalid_segment; i++) { - if (mmu_entry_pool[i].locked) - continue; - sun4c_init_clean_segmap(i); - add_ring(&sun4c_ufree_ring, &mmu_entry_pool[i]); - } -} - -static void sun4c_kernel_unmap(struct sun4c_mmu_entry *kentry) -{ - int savectx, ctx; - - savectx = sun4c_get_context(); - for (ctx = 0; ctx < num_contexts; ctx++) { - sun4c_set_context(ctx); - sun4c_put_segmap(kentry->vaddr, invalid_segment); - } - sun4c_set_context(savectx); -} - -static void sun4c_kernel_map(struct sun4c_mmu_entry *kentry) -{ - int savectx, ctx; - - savectx = sun4c_get_context(); - for (ctx = 0; ctx < num_contexts; ctx++) { - sun4c_set_context(ctx); - sun4c_put_segmap(kentry->vaddr, kentry->pseg); - } - sun4c_set_context(savectx); -} - -#define sun4c_user_unmap(__entry) \ - sun4c_put_segmap((__entry)->vaddr, invalid_segment) - -static void sun4c_demap_context(struct sun4c_mmu_ring *crp, unsigned char ctx) -{ - struct sun4c_mmu_entry *head = &crp->ringhd; - unsigned long flags; - - local_irq_save(flags); - if (head->next != head) { - struct sun4c_mmu_entry *entry = head->next; - int savectx = sun4c_get_context(); - - flush_user_windows(); - sun4c_set_context(ctx); - sun4c_flush_context(); - do { - struct sun4c_mmu_entry *next = entry->next; - - sun4c_user_unmap(entry); - free_user_entry(ctx, entry); - - entry = next; - } while (entry != head); - sun4c_set_context(savectx); - } - local_irq_restore(flags); -} - -static int sun4c_user_taken_entries; /* This is how much we have. */ -static int max_user_taken_entries; /* This limits us and prevents deadlock. */ - -static struct sun4c_mmu_entry *sun4c_kernel_strategy(void) -{ - struct sun4c_mmu_entry *this_entry; - - /* If some are free, return first one. */ - if (sun4c_kfree_ring.num_entries) { - this_entry = sun4c_kfree_ring.ringhd.next; - return this_entry; - } - - /* Else free one up. */ - this_entry = sun4c_kernel_ring.ringhd.prev; - sun4c_flush_segment(this_entry->vaddr); - sun4c_kernel_unmap(this_entry); - free_kernel_entry(this_entry, &sun4c_kernel_ring); - this_entry = sun4c_kfree_ring.ringhd.next; - - return this_entry; -} - -/* Using this method to free up mmu entries eliminates a lot of - * potential races since we have a kernel that incurs tlb - * replacement faults. There may be performance penalties. - * - * NOTE: Must be called with interrupts disabled. - */ -static struct sun4c_mmu_entry *sun4c_user_strategy(void) -{ - struct sun4c_mmu_entry *entry; - unsigned char ctx; - int savectx; - - /* If some are free, return first one. */ - if (sun4c_ufree_ring.num_entries) { - entry = sun4c_ufree_ring.ringhd.next; - goto unlink_out; - } - - if (sun4c_user_taken_entries) { - entry = sun4c_kernel_strategy(); - sun4c_user_taken_entries--; - goto kunlink_out; - } - - /* Grab from the beginning of the LRU list. */ - entry = sun4c_ulru_ring.ringhd.lru_next; - ctx = entry->ctx; - - savectx = sun4c_get_context(); - flush_user_windows(); - sun4c_set_context(ctx); - sun4c_flush_segment(entry->vaddr); - sun4c_user_unmap(entry); - remove_ring(sun4c_context_ring + ctx, entry); - remove_lru(entry); - sun4c_set_context(savectx); - - return entry; - -unlink_out: - remove_ring(&sun4c_ufree_ring, entry); - return entry; -kunlink_out: - remove_ring(&sun4c_kfree_ring, entry); - return entry; -} - -/* NOTE: Must be called with interrupts disabled. */ -void sun4c_grow_kernel_ring(void) -{ - struct sun4c_mmu_entry *entry; - - /* Prevent deadlock condition. */ - if (sun4c_user_taken_entries >= max_user_taken_entries) - return; - - if (sun4c_ufree_ring.num_entries) { - entry = sun4c_ufree_ring.ringhd.next; - remove_ring(&sun4c_ufree_ring, entry); - add_ring(&sun4c_kfree_ring, entry); - sun4c_user_taken_entries++; - } -} - -/* 2 page buckets for task struct and kernel stack allocation. - * - * TASK_STACK_BEGIN - * bucket[0] - * bucket[1] - * [ ... ] - * bucket[NR_TASK_BUCKETS-1] - * TASK_STACK_BEGIN + (sizeof(struct task_bucket) * NR_TASK_BUCKETS) - * - * Each slot looks like: - * - * page 1 -- task struct + beginning of kernel stack - * page 2 -- rest of kernel stack - */ - -union task_union *sun4c_bucket[NR_TASK_BUCKETS]; - -static int sun4c_lowbucket_avail; - -#define BUCKET_EMPTY ((union task_union *) 0) -#define BUCKET_SHIFT (PAGE_SHIFT + 1) /* log2(sizeof(struct task_bucket)) */ -#define BUCKET_SIZE (1 << BUCKET_SHIFT) -#define BUCKET_NUM(addr) ((((addr) - SUN4C_LOCK_VADDR) >> BUCKET_SHIFT)) -#define BUCKET_ADDR(num) (((num) << BUCKET_SHIFT) + SUN4C_LOCK_VADDR) -#define BUCKET_PTE(page) \ - ((((page) - PAGE_OFFSET) >> PAGE_SHIFT) | pgprot_val(SUN4C_PAGE_KERNEL)) -#define BUCKET_PTE_PAGE(pte) \ - (PAGE_OFFSET + (((pte) & SUN4C_PFN_MASK) << PAGE_SHIFT)) - -static void get_locked_segment(unsigned long addr) -{ - struct sun4c_mmu_entry *stolen; - unsigned long flags; - - local_irq_save(flags); - addr &= SUN4C_REAL_PGDIR_MASK; - stolen = sun4c_user_strategy(); - max_user_taken_entries--; - stolen->vaddr = addr; - flush_user_windows(); - sun4c_kernel_map(stolen); - local_irq_restore(flags); -} - -static void free_locked_segment(unsigned long addr) -{ - struct sun4c_mmu_entry *entry; - unsigned long flags; - unsigned char pseg; - - local_irq_save(flags); - addr &= SUN4C_REAL_PGDIR_MASK; - pseg = sun4c_get_segmap(addr); - entry = &mmu_entry_pool[pseg]; - - flush_user_windows(); - sun4c_flush_segment(addr); - sun4c_kernel_unmap(entry); - add_ring(&sun4c_ufree_ring, entry); - max_user_taken_entries++; - local_irq_restore(flags); -} - -static inline void garbage_collect(int entry) -{ - int start, end; - - /* 32 buckets per segment... */ - entry &= ~31; - start = entry; - for (end = (start + 32); start < end; start++) - if (sun4c_bucket[start] != BUCKET_EMPTY) - return; - - /* Entire segment empty, release it. */ - free_locked_segment(BUCKET_ADDR(entry)); -} - -static struct thread_info *sun4c_alloc_thread_info(void) -{ - unsigned long addr, pages; - int entry; - - pages = __get_free_pages(GFP_KERNEL, THREAD_INFO_ORDER); - if (!pages) - return NULL; - - for (entry = sun4c_lowbucket_avail; entry < NR_TASK_BUCKETS; entry++) - if (sun4c_bucket[entry] == BUCKET_EMPTY) - break; - if (entry == NR_TASK_BUCKETS) { - free_pages(pages, THREAD_INFO_ORDER); - return NULL; - } - if (entry >= sun4c_lowbucket_avail) - sun4c_lowbucket_avail = entry + 1; - - addr = BUCKET_ADDR(entry); - sun4c_bucket[entry] = (union task_union *) addr; - if(sun4c_get_segmap(addr) == invalid_segment) - get_locked_segment(addr); - - /* We are changing the virtual color of the page(s) - * so we must flush the cache to guarantee consistency. - */ - sun4c_flush_page(pages); -#ifndef CONFIG_SUN4 - sun4c_flush_page(pages + PAGE_SIZE); -#endif - - sun4c_put_pte(addr, BUCKET_PTE(pages)); -#ifndef CONFIG_SUN4 - sun4c_put_pte(addr + PAGE_SIZE, BUCKET_PTE(pages + PAGE_SIZE)); -#endif - -#ifdef CONFIG_DEBUG_STACK_USAGE - memset((void *)addr, 0, PAGE_SIZE << THREAD_INFO_ORDER); -#endif /* DEBUG_STACK_USAGE */ - - return (struct thread_info *) addr; -} - -static void sun4c_free_thread_info(struct thread_info *ti) -{ - unsigned long tiaddr = (unsigned long) ti; - unsigned long pages = BUCKET_PTE_PAGE(sun4c_get_pte(tiaddr)); - int entry = BUCKET_NUM(tiaddr); - - /* We are deleting a mapping, so the flush here is mandatory. */ - sun4c_flush_page(tiaddr); -#ifndef CONFIG_SUN4 - sun4c_flush_page(tiaddr + PAGE_SIZE); -#endif - sun4c_put_pte(tiaddr, 0); -#ifndef CONFIG_SUN4 - sun4c_put_pte(tiaddr + PAGE_SIZE, 0); -#endif - sun4c_bucket[entry] = BUCKET_EMPTY; - if (entry < sun4c_lowbucket_avail) - sun4c_lowbucket_avail = entry; - - free_pages(pages, THREAD_INFO_ORDER); - garbage_collect(entry); -} - -static void __init sun4c_init_buckets(void) -{ - int entry; - - if (sizeof(union thread_union) != (PAGE_SIZE << THREAD_INFO_ORDER)) { - extern void thread_info_size_is_bolixed_pete(void); - thread_info_size_is_bolixed_pete(); - } - - for (entry = 0; entry < NR_TASK_BUCKETS; entry++) - sun4c_bucket[entry] = BUCKET_EMPTY; - sun4c_lowbucket_avail = 0; -} - -static unsigned long sun4c_iobuffer_start; -static unsigned long sun4c_iobuffer_end; -static unsigned long sun4c_iobuffer_high; -static unsigned long *sun4c_iobuffer_map; -static int iobuffer_map_size; - -/* - * Alias our pages so they do not cause a trap. - * Also one page may be aliased into several I/O areas and we may - * finish these I/O separately. - */ -static char *sun4c_lockarea(char *vaddr, unsigned long size) -{ - unsigned long base, scan; - unsigned long npages; - unsigned long vpage; - unsigned long pte; - unsigned long apage; - unsigned long high; - unsigned long flags; - - npages = (((unsigned long)vaddr & ~PAGE_MASK) + - size + (PAGE_SIZE-1)) >> PAGE_SHIFT; - - scan = 0; - local_irq_save(flags); - for (;;) { - scan = find_next_zero_bit(sun4c_iobuffer_map, - iobuffer_map_size, scan); - if ((base = scan) + npages > iobuffer_map_size) goto abend; - for (;;) { - if (scan >= base + npages) goto found; - if (test_bit(scan, sun4c_iobuffer_map)) break; - scan++; - } - } - -found: - high = ((base + npages) << PAGE_SHIFT) + sun4c_iobuffer_start; - high = SUN4C_REAL_PGDIR_ALIGN(high); - while (high > sun4c_iobuffer_high) { - get_locked_segment(sun4c_iobuffer_high); - sun4c_iobuffer_high += SUN4C_REAL_PGDIR_SIZE; - } - - vpage = ((unsigned long) vaddr) & PAGE_MASK; - for (scan = base; scan < base+npages; scan++) { - pte = ((vpage-PAGE_OFFSET) >> PAGE_SHIFT); - pte |= pgprot_val(SUN4C_PAGE_KERNEL); - pte |= _SUN4C_PAGE_NOCACHE; - set_bit(scan, sun4c_iobuffer_map); - apage = (scan << PAGE_SHIFT) + sun4c_iobuffer_start; - - /* Flush original mapping so we see the right things later. */ - sun4c_flush_page(vpage); - - sun4c_put_pte(apage, pte); - vpage += PAGE_SIZE; - } - local_irq_restore(flags); - return (char *) ((base << PAGE_SHIFT) + sun4c_iobuffer_start + - (((unsigned long) vaddr) & ~PAGE_MASK)); - -abend: - local_irq_restore(flags); - printk("DMA vaddr=0x%p size=%08lx\n", vaddr, size); - panic("Out of iobuffer table"); - return NULL; -} - -static void sun4c_unlockarea(char *vaddr, unsigned long size) -{ - unsigned long vpage, npages; - unsigned long flags; - int scan, high; - - vpage = (unsigned long)vaddr & PAGE_MASK; - npages = (((unsigned long)vaddr & ~PAGE_MASK) + - size + (PAGE_SIZE-1)) >> PAGE_SHIFT; - - local_irq_save(flags); - while (npages != 0) { - --npages; - - /* This mapping is marked non-cachable, no flush necessary. */ - sun4c_put_pte(vpage, 0); - clear_bit((vpage - sun4c_iobuffer_start) >> PAGE_SHIFT, - sun4c_iobuffer_map); - vpage += PAGE_SIZE; - } - - /* garbage collect */ - scan = (sun4c_iobuffer_high - sun4c_iobuffer_start) >> PAGE_SHIFT; - while (scan >= 0 && !sun4c_iobuffer_map[scan >> 5]) - scan -= 32; - scan += 32; - high = sun4c_iobuffer_start + (scan << PAGE_SHIFT); - high = SUN4C_REAL_PGDIR_ALIGN(high) + SUN4C_REAL_PGDIR_SIZE; - while (high < sun4c_iobuffer_high) { - sun4c_iobuffer_high -= SUN4C_REAL_PGDIR_SIZE; - free_locked_segment(sun4c_iobuffer_high); - } - local_irq_restore(flags); -} - -/* Note the scsi code at init time passes to here buffers - * which sit on the kernel stack, those are already locked - * by implication and fool the page locking code above - * if passed to by mistake. - */ -static __u32 sun4c_get_scsi_one(char *bufptr, unsigned long len, struct sbus_bus *sbus) -{ - unsigned long page; - - page = ((unsigned long)bufptr) & PAGE_MASK; - if (!virt_addr_valid(page)) { - sun4c_flush_page(page); - return (__u32)bufptr; /* already locked */ - } - return (__u32)sun4c_lockarea(bufptr, len); -} - -static void sun4c_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus *sbus) -{ - while (sz != 0) { - --sz; - sg->dvma_address = (__u32)sun4c_lockarea(sg_virt(sg), sg->length); - sg->dvma_length = sg->length; - sg = sg_next(sg); - } -} - -static void sun4c_release_scsi_one(__u32 bufptr, unsigned long len, struct sbus_bus *sbus) -{ - if (bufptr < sun4c_iobuffer_start) - return; /* On kernel stack or similar, see above */ - sun4c_unlockarea((char *)bufptr, len); -} - -static void sun4c_release_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus *sbus) -{ - while (sz != 0) { - --sz; - sun4c_unlockarea((char *)sg->dvma_address, sg->length); - sg = sg_next(sg); - } -} - -#define TASK_ENTRY_SIZE BUCKET_SIZE /* see above */ -#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) - -struct vm_area_struct sun4c_kstack_vma; - -static void __init sun4c_init_lock_areas(void) -{ - unsigned long sun4c_taskstack_start; - unsigned long sun4c_taskstack_end; - int bitmap_size; - - sun4c_init_buckets(); - sun4c_taskstack_start = SUN4C_LOCK_VADDR; - sun4c_taskstack_end = (sun4c_taskstack_start + - (TASK_ENTRY_SIZE * NR_TASK_BUCKETS)); - if (sun4c_taskstack_end >= SUN4C_LOCK_END) { - prom_printf("Too many tasks, decrease NR_TASK_BUCKETS please.\n"); - prom_halt(); - } - - sun4c_iobuffer_start = sun4c_iobuffer_high = - SUN4C_REAL_PGDIR_ALIGN(sun4c_taskstack_end); - sun4c_iobuffer_end = SUN4C_LOCK_END; - bitmap_size = (sun4c_iobuffer_end - sun4c_iobuffer_start) >> PAGE_SHIFT; - bitmap_size = (bitmap_size + 7) >> 3; - bitmap_size = LONG_ALIGN(bitmap_size); - iobuffer_map_size = bitmap_size << 3; - sun4c_iobuffer_map = __alloc_bootmem(bitmap_size, SMP_CACHE_BYTES, 0UL); - memset((void *) sun4c_iobuffer_map, 0, bitmap_size); - - sun4c_kstack_vma.vm_mm = &init_mm; - sun4c_kstack_vma.vm_start = sun4c_taskstack_start; - sun4c_kstack_vma.vm_end = sun4c_taskstack_end; - sun4c_kstack_vma.vm_page_prot = PAGE_SHARED; - sun4c_kstack_vma.vm_flags = VM_READ | VM_WRITE | VM_EXEC; - insert_vm_struct(&init_mm, &sun4c_kstack_vma); -} - -/* Cache flushing on the sun4c. */ -static void sun4c_flush_cache_all(void) -{ - unsigned long begin, end; - - flush_user_windows(); - begin = (KERNBASE + SUN4C_REAL_PGDIR_SIZE); - end = (begin + SUN4C_VAC_SIZE); - - if (sun4c_vacinfo.linesize == 32) { - while (begin < end) { - __asm__ __volatile__( - "ld [%0 + 0x00], %%g0\n\t" - "ld [%0 + 0x20], %%g0\n\t" - "ld [%0 + 0x40], %%g0\n\t" - "ld [%0 + 0x60], %%g0\n\t" - "ld [%0 + 0x80], %%g0\n\t" - "ld [%0 + 0xa0], %%g0\n\t" - "ld [%0 + 0xc0], %%g0\n\t" - "ld [%0 + 0xe0], %%g0\n\t" - "ld [%0 + 0x100], %%g0\n\t" - "ld [%0 + 0x120], %%g0\n\t" - "ld [%0 + 0x140], %%g0\n\t" - "ld [%0 + 0x160], %%g0\n\t" - "ld [%0 + 0x180], %%g0\n\t" - "ld [%0 + 0x1a0], %%g0\n\t" - "ld [%0 + 0x1c0], %%g0\n\t" - "ld [%0 + 0x1e0], %%g0\n" - : : "r" (begin)); - begin += 512; - } - } else { - while (begin < end) { - __asm__ __volatile__( - "ld [%0 + 0x00], %%g0\n\t" - "ld [%0 + 0x10], %%g0\n\t" - "ld [%0 + 0x20], %%g0\n\t" - "ld [%0 + 0x30], %%g0\n\t" - "ld [%0 + 0x40], %%g0\n\t" - "ld [%0 + 0x50], %%g0\n\t" - "ld [%0 + 0x60], %%g0\n\t" - "ld [%0 + 0x70], %%g0\n\t" - "ld [%0 + 0x80], %%g0\n\t" - "ld [%0 + 0x90], %%g0\n\t" - "ld [%0 + 0xa0], %%g0\n\t" - "ld [%0 + 0xb0], %%g0\n\t" - "ld [%0 + 0xc0], %%g0\n\t" - "ld [%0 + 0xd0], %%g0\n\t" - "ld [%0 + 0xe0], %%g0\n\t" - "ld [%0 + 0xf0], %%g0\n" - : : "r" (begin)); - begin += 256; - } - } -} - -static void sun4c_flush_cache_mm(struct mm_struct *mm) -{ - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - flush_user_windows(); - - if (sun4c_context_ring[new_ctx].num_entries) { - struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd; - unsigned long flags; - - local_irq_save(flags); - if (head->next != head) { - struct sun4c_mmu_entry *entry = head->next; - int savectx = sun4c_get_context(); - - sun4c_set_context(new_ctx); - sun4c_flush_context(); - do { - struct sun4c_mmu_entry *next = entry->next; - - sun4c_user_unmap(entry); - free_user_entry(new_ctx, entry); - - entry = next; - } while (entry != head); - sun4c_set_context(savectx); - } - local_irq_restore(flags); - } - } -} - -static void sun4c_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd; - struct sun4c_mmu_entry *entry; - unsigned long flags; - - flush_user_windows(); - - local_irq_save(flags); - /* All user segmap chains are ordered on entry->vaddr. */ - for (entry = head->next; - (entry != head) && ((entry->vaddr+SUN4C_REAL_PGDIR_SIZE) < start); - entry = entry->next) - ; - - /* Tracing various job mixtures showed that this conditional - * only passes ~35% of the time for most worse case situations, - * therefore we avoid all of this gross overhead ~65% of the time. - */ - if ((entry != head) && (entry->vaddr < end)) { - int octx = sun4c_get_context(); - sun4c_set_context(new_ctx); - - /* At this point, always, (start >= entry->vaddr) and - * (entry->vaddr < end), once the latter condition - * ceases to hold, or we hit the end of the list, we - * exit the loop. The ordering of all user allocated - * segmaps makes this all work out so beautifully. - */ - do { - struct sun4c_mmu_entry *next = entry->next; - unsigned long realend; - - /* "realstart" is always >= entry->vaddr */ - realend = entry->vaddr + SUN4C_REAL_PGDIR_SIZE; - if (end < realend) - realend = end; - if ((realend - entry->vaddr) <= (PAGE_SIZE << 3)) { - unsigned long page = entry->vaddr; - while (page < realend) { - sun4c_flush_page(page); - page += PAGE_SIZE; - } - } else { - sun4c_flush_segment(entry->vaddr); - sun4c_user_unmap(entry); - free_user_entry(new_ctx, entry); - } - entry = next; - } while ((entry != head) && (entry->vaddr < end)); - sun4c_set_context(octx); - } - local_irq_restore(flags); - } -} - -static void sun4c_flush_cache_page(struct vm_area_struct *vma, unsigned long page) -{ - struct mm_struct *mm = vma->vm_mm; - int new_ctx = mm->context; - - /* Sun4c has no separate I/D caches so cannot optimize for non - * text page flushes. - */ - if (new_ctx != NO_CONTEXT) { - int octx = sun4c_get_context(); - unsigned long flags; - - flush_user_windows(); - local_irq_save(flags); - sun4c_set_context(new_ctx); - sun4c_flush_page(page); - sun4c_set_context(octx); - local_irq_restore(flags); - } -} - -static void sun4c_flush_page_to_ram(unsigned long page) -{ - unsigned long flags; - - local_irq_save(flags); - sun4c_flush_page(page); - local_irq_restore(flags); -} - -/* Sun4c cache is unified, both instructions and data live there, so - * no need to flush the on-stack instructions for new signal handlers. - */ -static void sun4c_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) -{ -} - -/* TLB flushing on the sun4c. These routines count on the cache - * flushing code to flush the user register windows so that we need - * not do so when we get here. - */ - -static void sun4c_flush_tlb_all(void) -{ - struct sun4c_mmu_entry *this_entry, *next_entry; - unsigned long flags; - int savectx, ctx; - - local_irq_save(flags); - this_entry = sun4c_kernel_ring.ringhd.next; - savectx = sun4c_get_context(); - flush_user_windows(); - while (sun4c_kernel_ring.num_entries) { - next_entry = this_entry->next; - sun4c_flush_segment(this_entry->vaddr); - for (ctx = 0; ctx < num_contexts; ctx++) { - sun4c_set_context(ctx); - sun4c_put_segmap(this_entry->vaddr, invalid_segment); - } - free_kernel_entry(this_entry, &sun4c_kernel_ring); - this_entry = next_entry; - } - sun4c_set_context(savectx); - local_irq_restore(flags); -} - -static void sun4c_flush_tlb_mm(struct mm_struct *mm) -{ - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd; - unsigned long flags; - - local_irq_save(flags); - if (head->next != head) { - struct sun4c_mmu_entry *entry = head->next; - int savectx = sun4c_get_context(); - - sun4c_set_context(new_ctx); - sun4c_flush_context(); - do { - struct sun4c_mmu_entry *next = entry->next; - - sun4c_user_unmap(entry); - free_user_entry(new_ctx, entry); - - entry = next; - } while (entry != head); - sun4c_set_context(savectx); - } - local_irq_restore(flags); - } -} - -static void sun4c_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - struct sun4c_mmu_entry *head = &sun4c_context_ring[new_ctx].ringhd; - struct sun4c_mmu_entry *entry; - unsigned long flags; - - local_irq_save(flags); - /* See commentary in sun4c_flush_cache_range(). */ - for (entry = head->next; - (entry != head) && ((entry->vaddr+SUN4C_REAL_PGDIR_SIZE) < start); - entry = entry->next) - ; - - if ((entry != head) && (entry->vaddr < end)) { - int octx = sun4c_get_context(); - - sun4c_set_context(new_ctx); - do { - struct sun4c_mmu_entry *next = entry->next; - - sun4c_flush_segment(entry->vaddr); - sun4c_user_unmap(entry); - free_user_entry(new_ctx, entry); - - entry = next; - } while ((entry != head) && (entry->vaddr < end)); - sun4c_set_context(octx); - } - local_irq_restore(flags); - } -} - -static void sun4c_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) -{ - struct mm_struct *mm = vma->vm_mm; - int new_ctx = mm->context; - - if (new_ctx != NO_CONTEXT) { - int savectx = sun4c_get_context(); - unsigned long flags; - - local_irq_save(flags); - sun4c_set_context(new_ctx); - page &= PAGE_MASK; - sun4c_flush_page(page); - sun4c_put_pte(page, 0); - sun4c_set_context(savectx); - local_irq_restore(flags); - } -} - -static inline void sun4c_mapioaddr(unsigned long physaddr, unsigned long virt_addr) -{ - unsigned long page_entry, pg_iobits; - - pg_iobits = _SUN4C_PAGE_PRESENT | _SUN4C_READABLE | _SUN4C_WRITEABLE | - _SUN4C_PAGE_IO | _SUN4C_PAGE_NOCACHE; - - page_entry = ((physaddr >> PAGE_SHIFT) & SUN4C_PFN_MASK); - page_entry |= ((pg_iobits | _SUN4C_PAGE_PRIV) & ~(_SUN4C_PAGE_PRESENT)); - sun4c_put_pte(virt_addr, page_entry); -} - -static void sun4c_mapiorange(unsigned int bus, unsigned long xpa, - unsigned long xva, unsigned int len) -{ - while (len != 0) { - len -= PAGE_SIZE; - sun4c_mapioaddr(xpa, xva); - xva += PAGE_SIZE; - xpa += PAGE_SIZE; - } -} - -static void sun4c_unmapiorange(unsigned long virt_addr, unsigned int len) -{ - while (len != 0) { - len -= PAGE_SIZE; - sun4c_put_pte(virt_addr, 0); - virt_addr += PAGE_SIZE; - } -} - -static void sun4c_alloc_context(struct mm_struct *old_mm, struct mm_struct *mm) -{ - struct ctx_list *ctxp; - - ctxp = ctx_free.next; - if (ctxp != &ctx_free) { - remove_from_ctx_list(ctxp); - add_to_used_ctxlist(ctxp); - mm->context = ctxp->ctx_number; - ctxp->ctx_mm = mm; - return; - } - ctxp = ctx_used.next; - if (ctxp->ctx_mm == old_mm) - ctxp = ctxp->next; - remove_from_ctx_list(ctxp); - add_to_used_ctxlist(ctxp); - ctxp->ctx_mm->context = NO_CONTEXT; - ctxp->ctx_mm = mm; - mm->context = ctxp->ctx_number; - sun4c_demap_context(&sun4c_context_ring[ctxp->ctx_number], - ctxp->ctx_number); -} - -/* Switch the current MM context. */ -static void sun4c_switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk, int cpu) -{ - struct ctx_list *ctx; - int dirty = 0; - - if (mm->context == NO_CONTEXT) { - dirty = 1; - sun4c_alloc_context(old_mm, mm); - } else { - /* Update the LRU ring of contexts. */ - ctx = ctx_list_pool + mm->context; - remove_from_ctx_list(ctx); - add_to_used_ctxlist(ctx); - } - if (dirty || old_mm != mm) - sun4c_set_context(mm->context); -} - -static void sun4c_destroy_context(struct mm_struct *mm) -{ - struct ctx_list *ctx_old; - - if (mm->context != NO_CONTEXT) { - sun4c_demap_context(&sun4c_context_ring[mm->context], mm->context); - ctx_old = ctx_list_pool + mm->context; - remove_from_ctx_list(ctx_old); - add_to_free_ctxlist(ctx_old); - mm->context = NO_CONTEXT; - } -} - -static void sun4c_mmu_info(struct seq_file *m) -{ - int used_user_entries, i; - - used_user_entries = 0; - for (i = 0; i < num_contexts; i++) - used_user_entries += sun4c_context_ring[i].num_entries; - - seq_printf(m, - "vacsize\t\t: %d bytes\n" - "vachwflush\t: %s\n" - "vaclinesize\t: %d bytes\n" - "mmuctxs\t\t: %d\n" - "mmupsegs\t: %d\n" - "kernelpsegs\t: %d\n" - "kfreepsegs\t: %d\n" - "usedpsegs\t: %d\n" - "ufreepsegs\t: %d\n" - "user_taken\t: %d\n" - "max_taken\t: %d\n", - sun4c_vacinfo.num_bytes, - (sun4c_vacinfo.do_hwflushes ? "yes" : "no"), - sun4c_vacinfo.linesize, - num_contexts, - (invalid_segment + 1), - sun4c_kernel_ring.num_entries, - sun4c_kfree_ring.num_entries, - used_user_entries, - sun4c_ufree_ring.num_entries, - sun4c_user_taken_entries, - max_user_taken_entries); -} - -/* Nothing below here should touch the mmu hardware nor the mmu_entry - * data structures. - */ - -/* First the functions which the mid-level code uses to directly - * manipulate the software page tables. Some defines since we are - * emulating the i386 page directory layout. - */ -#define PGD_PRESENT 0x001 -#define PGD_RW 0x002 -#define PGD_USER 0x004 -#define PGD_ACCESSED 0x020 -#define PGD_DIRTY 0x040 -#define PGD_TABLE (PGD_PRESENT | PGD_RW | PGD_USER | PGD_ACCESSED | PGD_DIRTY) - -static void sun4c_set_pte(pte_t *ptep, pte_t pte) -{ - *ptep = pte; -} - -static void sun4c_pgd_set(pgd_t * pgdp, pmd_t * pmdp) -{ -} - -static void sun4c_pmd_set(pmd_t * pmdp, pte_t * ptep) -{ - pmdp->pmdv[0] = PGD_TABLE | (unsigned long) ptep; -} - -static void sun4c_pmd_populate(pmd_t * pmdp, struct page * ptep) -{ - if (page_address(ptep) == NULL) BUG(); /* No highmem on sun4c */ - pmdp->pmdv[0] = PGD_TABLE | (unsigned long) page_address(ptep); -} - -static int sun4c_pte_present(pte_t pte) -{ - return ((pte_val(pte) & (_SUN4C_PAGE_PRESENT | _SUN4C_PAGE_PRIV)) != 0); -} -static void sun4c_pte_clear(pte_t *ptep) { *ptep = __pte(0); } - -static int sun4c_pmd_bad(pmd_t pmd) -{ - return (((pmd_val(pmd) & ~PAGE_MASK) != PGD_TABLE) || - (!virt_addr_valid(pmd_val(pmd)))); -} - -static int sun4c_pmd_present(pmd_t pmd) -{ - return ((pmd_val(pmd) & PGD_PRESENT) != 0); -} - -#if 0 /* if PMD takes one word */ -static void sun4c_pmd_clear(pmd_t *pmdp) { *pmdp = __pmd(0); } -#else /* if pmd_t is a longish aggregate */ -static void sun4c_pmd_clear(pmd_t *pmdp) { - memset((void *)pmdp, 0, sizeof(pmd_t)); -} -#endif - -static int sun4c_pgd_none(pgd_t pgd) { return 0; } -static int sun4c_pgd_bad(pgd_t pgd) { return 0; } -static int sun4c_pgd_present(pgd_t pgd) { return 1; } -static void sun4c_pgd_clear(pgd_t * pgdp) { } - -/* - * The following only work if pte_present() is true. - * Undefined behaviour if not.. - */ -static pte_t sun4c_pte_mkwrite(pte_t pte) -{ - pte = __pte(pte_val(pte) | _SUN4C_PAGE_WRITE); - if (pte_val(pte) & _SUN4C_PAGE_MODIFIED) - pte = __pte(pte_val(pte) | _SUN4C_PAGE_SILENT_WRITE); - return pte; -} - -static pte_t sun4c_pte_mkdirty(pte_t pte) -{ - pte = __pte(pte_val(pte) | _SUN4C_PAGE_MODIFIED); - if (pte_val(pte) & _SUN4C_PAGE_WRITE) - pte = __pte(pte_val(pte) | _SUN4C_PAGE_SILENT_WRITE); - return pte; -} - -static pte_t sun4c_pte_mkyoung(pte_t pte) -{ - pte = __pte(pte_val(pte) | _SUN4C_PAGE_ACCESSED); - if (pte_val(pte) & _SUN4C_PAGE_READ) - pte = __pte(pte_val(pte) | _SUN4C_PAGE_SILENT_READ); - return pte; -} - -/* - * Conversion functions: convert a page and protection to a page entry, - * and a page entry and page directory to the page they refer to. - */ -static pte_t sun4c_mk_pte(struct page *page, pgprot_t pgprot) -{ - return __pte(page_to_pfn(page) | pgprot_val(pgprot)); -} - -static pte_t sun4c_mk_pte_phys(unsigned long phys_page, pgprot_t pgprot) -{ - return __pte((phys_page >> PAGE_SHIFT) | pgprot_val(pgprot)); -} - -static pte_t sun4c_mk_pte_io(unsigned long page, pgprot_t pgprot, int space) -{ - return __pte(((page - PAGE_OFFSET) >> PAGE_SHIFT) | pgprot_val(pgprot)); -} - -static unsigned long sun4c_pte_pfn(pte_t pte) -{ - return pte_val(pte) & SUN4C_PFN_MASK; -} - -static pte_t sun4c_pgoff_to_pte(unsigned long pgoff) -{ - return __pte(pgoff | _SUN4C_PAGE_FILE); -} - -static unsigned long sun4c_pte_to_pgoff(pte_t pte) -{ - return pte_val(pte) & ((1UL << PTE_FILE_MAX_BITS) - 1); -} - - -static inline unsigned long sun4c_pmd_page_v(pmd_t pmd) -{ - return (pmd_val(pmd) & PAGE_MASK); -} - -static struct page *sun4c_pmd_page(pmd_t pmd) -{ - return virt_to_page(sun4c_pmd_page_v(pmd)); -} - -static unsigned long sun4c_pgd_page(pgd_t pgd) { return 0; } - -/* to find an entry in a page-table-directory */ -static inline pgd_t *sun4c_pgd_offset(struct mm_struct * mm, unsigned long address) -{ - return mm->pgd + (address >> SUN4C_PGDIR_SHIFT); -} - -/* Find an entry in the second-level page table.. */ -static pmd_t *sun4c_pmd_offset(pgd_t * dir, unsigned long address) -{ - return (pmd_t *) dir; -} - -/* Find an entry in the third-level page table.. */ -pte_t *sun4c_pte_offset_kernel(pmd_t * dir, unsigned long address) -{ - return (pte_t *) sun4c_pmd_page_v(*dir) + - ((address >> PAGE_SHIFT) & (SUN4C_PTRS_PER_PTE - 1)); -} - -static unsigned long sun4c_swp_type(swp_entry_t entry) -{ - return (entry.val & SUN4C_SWP_TYPE_MASK); -} - -static unsigned long sun4c_swp_offset(swp_entry_t entry) -{ - return (entry.val >> SUN4C_SWP_OFF_SHIFT) & SUN4C_SWP_OFF_MASK; -} - -static swp_entry_t sun4c_swp_entry(unsigned long type, unsigned long offset) -{ - return (swp_entry_t) { - (offset & SUN4C_SWP_OFF_MASK) << SUN4C_SWP_OFF_SHIFT - | (type & SUN4C_SWP_TYPE_MASK) }; -} - -static void sun4c_free_pte_slow(pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static void sun4c_free_pgd_slow(pgd_t *pgd) -{ - free_page((unsigned long)pgd); -} - -static pgd_t *sun4c_get_pgd_fast(void) -{ - unsigned long *ret; - - if ((ret = pgd_quicklist) != NULL) { - pgd_quicklist = (unsigned long *)(*ret); - ret[0] = ret[1]; - pgtable_cache_size--; - } else { - pgd_t *init; - - ret = (unsigned long *)__get_free_page(GFP_KERNEL); - memset (ret, 0, (KERNBASE / SUN4C_PGDIR_SIZE) * sizeof(pgd_t)); - init = sun4c_pgd_offset(&init_mm, 0); - memcpy (((pgd_t *)ret) + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); - } - return (pgd_t *)ret; -} - -static void sun4c_free_pgd_fast(pgd_t *pgd) -{ - *(unsigned long *)pgd = (unsigned long) pgd_quicklist; - pgd_quicklist = (unsigned long *) pgd; - pgtable_cache_size++; -} - - -static inline pte_t * -sun4c_pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) -{ - unsigned long *ret; - - if ((ret = (unsigned long *)pte_quicklist) != NULL) { - pte_quicklist = (unsigned long *)(*ret); - ret[0] = ret[1]; - pgtable_cache_size--; - } - return (pte_t *)ret; -} - -static pte_t *sun4c_pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) -{ - pte_t *pte; - - if ((pte = sun4c_pte_alloc_one_fast(mm, address)) != NULL) - return pte; - - pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); - return pte; -} - -static pgtable_t sun4c_pte_alloc_one(struct mm_struct *mm, unsigned long address) -{ - pte_t *pte; - struct page *page; - - pte = sun4c_pte_alloc_one_kernel(mm, address); - if (pte == NULL) - return NULL; - page = virt_to_page(pte); - pgtable_page_ctor(page); - return page; -} - -static inline void sun4c_free_pte_fast(pte_t *pte) -{ - *(unsigned long *)pte = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pte; - pgtable_cache_size++; -} - -static void sun4c_pte_free(pgtable_t pte) -{ - pgtable_page_dtor(pte); - sun4c_free_pte_fast(page_address(pte)); -} - -/* - * allocating and freeing a pmd is trivial: the 1-entry pmd is - * inside the pgd, so has no extra memory associated with it. - */ -static pmd_t *sun4c_pmd_alloc_one(struct mm_struct *mm, unsigned long address) -{ - BUG(); - return NULL; -} - -static void sun4c_free_pmd_fast(pmd_t * pmd) { } - -static void sun4c_check_pgt_cache(int low, int high) -{ - if (pgtable_cache_size > high) { - do { - if (pgd_quicklist) - sun4c_free_pgd_slow(sun4c_get_pgd_fast()); - if (pte_quicklist) - sun4c_free_pte_slow(sun4c_pte_alloc_one_fast(NULL, 0)); - } while (pgtable_cache_size > low); - } -} - -/* An experiment, turn off by default for now... -DaveM */ -#define SUN4C_PRELOAD_PSEG - -void sun4c_update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) -{ - unsigned long flags; - int pseg; - - if (vma->vm_mm->context == NO_CONTEXT) - return; - - local_irq_save(flags); - address &= PAGE_MASK; - if ((pseg = sun4c_get_segmap(address)) == invalid_segment) { - struct sun4c_mmu_entry *entry = sun4c_user_strategy(); - struct mm_struct *mm = vma->vm_mm; - unsigned long start, end; - - entry->vaddr = start = (address & SUN4C_REAL_PGDIR_MASK); - entry->ctx = mm->context; - add_ring_ordered(sun4c_context_ring + mm->context, entry); - sun4c_put_segmap(entry->vaddr, entry->pseg); - end = start + SUN4C_REAL_PGDIR_SIZE; - while (start < end) { -#ifdef SUN4C_PRELOAD_PSEG - pgd_t *pgdp = sun4c_pgd_offset(mm, start); - pte_t *ptep; - - if (!pgdp) - goto no_mapping; - ptep = sun4c_pte_offset_kernel((pmd_t *) pgdp, start); - if (!ptep || !(pte_val(*ptep) & _SUN4C_PAGE_PRESENT)) - goto no_mapping; - sun4c_put_pte(start, pte_val(*ptep)); - goto next; - - no_mapping: -#endif - sun4c_put_pte(start, 0); -#ifdef SUN4C_PRELOAD_PSEG - next: -#endif - start += PAGE_SIZE; - } -#ifndef SUN4C_PRELOAD_PSEG - sun4c_put_pte(address, pte_val(pte)); -#endif - local_irq_restore(flags); - return; - } else { - struct sun4c_mmu_entry *entry = &mmu_entry_pool[pseg]; - - remove_lru(entry); - add_lru(entry); - } - - sun4c_put_pte(address, pte_val(pte)); - local_irq_restore(flags); -} - -extern void sparc_context_init(int); -extern unsigned long end; -extern unsigned long bootmem_init(unsigned long *pages_avail); -extern unsigned long last_valid_pfn; - -void __init sun4c_paging_init(void) -{ - int i, cnt; - unsigned long kernel_end, vaddr; - extern struct resource sparc_iomap; - unsigned long end_pfn, pages_avail; - - kernel_end = (unsigned long) &end; - kernel_end = SUN4C_REAL_PGDIR_ALIGN(kernel_end); - - pages_avail = 0; - last_valid_pfn = bootmem_init(&pages_avail); - end_pfn = last_valid_pfn; - - sun4c_probe_mmu(); - invalid_segment = (num_segmaps - 1); - sun4c_init_mmu_entry_pool(); - sun4c_init_rings(); - sun4c_init_map_kernelprom(kernel_end); - sun4c_init_clean_mmu(kernel_end); - sun4c_init_fill_kernel_ring(SUN4C_KERNEL_BUCKETS); - sun4c_init_lock_area(sparc_iomap.start, IOBASE_END); - sun4c_init_lock_area(DVMA_VADDR, DVMA_END); - sun4c_init_lock_areas(); - sun4c_init_fill_user_ring(); - - sun4c_set_context(0); - memset(swapper_pg_dir, 0, PAGE_SIZE); - memset(pg0, 0, PAGE_SIZE); - memset(pg1, 0, PAGE_SIZE); - memset(pg2, 0, PAGE_SIZE); - memset(pg3, 0, PAGE_SIZE); - - /* Save work later. */ - vaddr = VMALLOC_START; - swapper_pg_dir[vaddr>>SUN4C_PGDIR_SHIFT] = __pgd(PGD_TABLE | (unsigned long) pg0); - vaddr += SUN4C_PGDIR_SIZE; - swapper_pg_dir[vaddr>>SUN4C_PGDIR_SHIFT] = __pgd(PGD_TABLE | (unsigned long) pg1); - vaddr += SUN4C_PGDIR_SIZE; - swapper_pg_dir[vaddr>>SUN4C_PGDIR_SHIFT] = __pgd(PGD_TABLE | (unsigned long) pg2); - vaddr += SUN4C_PGDIR_SIZE; - swapper_pg_dir[vaddr>>SUN4C_PGDIR_SHIFT] = __pgd(PGD_TABLE | (unsigned long) pg3); - sun4c_init_ss2_cache_bug(); - sparc_context_init(num_contexts); - - { - unsigned long zones_size[MAX_NR_ZONES]; - unsigned long zholes_size[MAX_NR_ZONES]; - unsigned long npages; - int znum; - - for (znum = 0; znum < MAX_NR_ZONES; znum++) - zones_size[znum] = zholes_size[znum] = 0; - - npages = max_low_pfn - pfn_base; - - zones_size[ZONE_DMA] = npages; - zholes_size[ZONE_DMA] = npages - pages_avail; - - npages = highend_pfn - max_low_pfn; - zones_size[ZONE_HIGHMEM] = npages; - zholes_size[ZONE_HIGHMEM] = npages - calc_highpages(); - - free_area_init_node(0, &contig_page_data, zones_size, - pfn_base, zholes_size); - } - - cnt = 0; - for (i = 0; i < num_segmaps; i++) - if (mmu_entry_pool[i].locked) - cnt++; - - max_user_taken_entries = num_segmaps - cnt - 40 - 1; - - printk("SUN4C: %d mmu entries for the kernel\n", cnt); -} - -static pgprot_t sun4c_pgprot_noncached(pgprot_t prot) -{ - prot |= __pgprot(_SUN4C_PAGE_IO | _SUN4C_PAGE_NOCACHE); - - return prot; -} - -/* Load up routines and constants for sun4c mmu */ -void __init ld_mmu_sun4c(void) -{ - extern void ___xchg32_sun4c(void); - - printk("Loading sun4c MMU routines\n"); - - /* First the constants */ - BTFIXUPSET_SIMM13(pgdir_shift, SUN4C_PGDIR_SHIFT); - BTFIXUPSET_SETHI(pgdir_size, SUN4C_PGDIR_SIZE); - BTFIXUPSET_SETHI(pgdir_mask, SUN4C_PGDIR_MASK); - - BTFIXUPSET_SIMM13(ptrs_per_pmd, SUN4C_PTRS_PER_PMD); - BTFIXUPSET_SIMM13(ptrs_per_pgd, SUN4C_PTRS_PER_PGD); - BTFIXUPSET_SIMM13(user_ptrs_per_pgd, KERNBASE / SUN4C_PGDIR_SIZE); - - BTFIXUPSET_INT(page_none, pgprot_val(SUN4C_PAGE_NONE)); - PAGE_SHARED = pgprot_val(SUN4C_PAGE_SHARED); - BTFIXUPSET_INT(page_copy, pgprot_val(SUN4C_PAGE_COPY)); - BTFIXUPSET_INT(page_readonly, pgprot_val(SUN4C_PAGE_READONLY)); - BTFIXUPSET_INT(page_kernel, pgprot_val(SUN4C_PAGE_KERNEL)); - page_kernel = pgprot_val(SUN4C_PAGE_KERNEL); - - /* Functions */ - BTFIXUPSET_CALL(pgprot_noncached, sun4c_pgprot_noncached, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(___xchg32, ___xchg32_sun4c, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(do_check_pgt_cache, sun4c_check_pgt_cache, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(flush_cache_all, sun4c_flush_cache_all, BTFIXUPCALL_NORM); - - if (sun4c_vacinfo.do_hwflushes) { - BTFIXUPSET_CALL(sun4c_flush_page, sun4c_flush_page_hw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(sun4c_flush_segment, sun4c_flush_segment_hw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(sun4c_flush_context, sun4c_flush_context_hw, BTFIXUPCALL_NORM); - } else { - BTFIXUPSET_CALL(sun4c_flush_page, sun4c_flush_page_sw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(sun4c_flush_segment, sun4c_flush_segment_sw, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(sun4c_flush_context, sun4c_flush_context_sw, BTFIXUPCALL_NORM); - } - - BTFIXUPSET_CALL(flush_tlb_mm, sun4c_flush_tlb_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_mm, sun4c_flush_cache_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(destroy_context, sun4c_destroy_context, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(switch_mm, sun4c_switch_mm, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_page, sun4c_flush_cache_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_page, sun4c_flush_tlb_page, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_range, sun4c_flush_tlb_range, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_cache_range, sun4c_flush_cache_range, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(__flush_page_to_ram, sun4c_flush_page_to_ram, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(flush_tlb_all, sun4c_flush_tlb_all, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(flush_sig_insns, sun4c_flush_sig_insns, BTFIXUPCALL_NOP); - - BTFIXUPSET_CALL(set_pte, sun4c_set_pte, BTFIXUPCALL_STO1O0); - - /* The 2.4.18 code does not set this on sun4c, how does it work? XXX */ - /* BTFIXUPSET_SETHI(none_mask, 0x00000000); */ /* Defaults to zero? */ - - BTFIXUPSET_CALL(pte_pfn, sun4c_pte_pfn, BTFIXUPCALL_NORM); -#if 0 /* PAGE_SHIFT <= 12 */ /* Eek. Investigate. XXX */ - BTFIXUPSET_CALL(pmd_page, sun4c_pmd_page, BTFIXUPCALL_ANDNINT(PAGE_SIZE - 1)); -#else - BTFIXUPSET_CALL(pmd_page, sun4c_pmd_page, BTFIXUPCALL_NORM); -#endif - BTFIXUPSET_CALL(pmd_set, sun4c_pmd_set, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_populate, sun4c_pmd_populate, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(pte_present, sun4c_pte_present, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_clear, sun4c_pte_clear, BTFIXUPCALL_STG0O0); - - BTFIXUPSET_CALL(pmd_bad, sun4c_pmd_bad, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_present, sun4c_pmd_present, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pmd_clear, sun4c_pmd_clear, BTFIXUPCALL_STG0O0); - - BTFIXUPSET_CALL(pgd_none, sun4c_pgd_none, BTFIXUPCALL_RETINT(0)); - BTFIXUPSET_CALL(pgd_bad, sun4c_pgd_bad, BTFIXUPCALL_RETINT(0)); - BTFIXUPSET_CALL(pgd_present, sun4c_pgd_present, BTFIXUPCALL_RETINT(1)); - BTFIXUPSET_CALL(pgd_clear, sun4c_pgd_clear, BTFIXUPCALL_NOP); - - BTFIXUPSET_CALL(mk_pte, sun4c_mk_pte, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mk_pte_phys, sun4c_mk_pte_phys, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mk_pte_io, sun4c_mk_pte_io, BTFIXUPCALL_NORM); - - BTFIXUPSET_INT(pte_modify_mask, _SUN4C_PAGE_CHG_MASK); - BTFIXUPSET_CALL(pmd_offset, sun4c_pmd_offset, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_offset_kernel, sun4c_pte_offset_kernel, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(free_pte_fast, sun4c_free_pte_fast, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_free, sun4c_pte_free, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_alloc_one_kernel, sun4c_pte_alloc_one_kernel, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_alloc_one, sun4c_pte_alloc_one, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(free_pmd_fast, sun4c_free_pmd_fast, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(pmd_alloc_one, sun4c_pmd_alloc_one, BTFIXUPCALL_RETO0); - BTFIXUPSET_CALL(free_pgd_fast, sun4c_free_pgd_fast, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(get_pgd_fast, sun4c_get_pgd_fast, BTFIXUPCALL_NORM); - - BTFIXUPSET_HALF(pte_writei, _SUN4C_PAGE_WRITE); - BTFIXUPSET_HALF(pte_dirtyi, _SUN4C_PAGE_MODIFIED); - BTFIXUPSET_HALF(pte_youngi, _SUN4C_PAGE_ACCESSED); - BTFIXUPSET_HALF(pte_filei, _SUN4C_PAGE_FILE); - BTFIXUPSET_HALF(pte_wrprotecti, _SUN4C_PAGE_WRITE|_SUN4C_PAGE_SILENT_WRITE); - BTFIXUPSET_HALF(pte_mkcleani, _SUN4C_PAGE_MODIFIED|_SUN4C_PAGE_SILENT_WRITE); - BTFIXUPSET_HALF(pte_mkoldi, _SUN4C_PAGE_ACCESSED|_SUN4C_PAGE_SILENT_READ); - BTFIXUPSET_CALL(pte_mkwrite, sun4c_pte_mkwrite, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_mkdirty, sun4c_pte_mkdirty, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pte_mkyoung, sun4c_pte_mkyoung, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(update_mmu_cache, sun4c_update_mmu_cache, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(pte_to_pgoff, sun4c_pte_to_pgoff, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(pgoff_to_pte, sun4c_pgoff_to_pte, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(mmu_lockarea, sun4c_lockarea, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_unlockarea, sun4c_unlockarea, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(mmu_get_scsi_one, sun4c_get_scsi_one, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_get_scsi_sgl, sun4c_get_scsi_sgl, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_release_scsi_one, sun4c_release_scsi_one, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_release_scsi_sgl, sun4c_release_scsi_sgl, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(mmu_map_dma_area, sun4c_map_dma_area, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_unmap_dma_area, sun4c_unmap_dma_area, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(mmu_translate_dvma, sun4c_translate_dvma, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(sparc_mapiorange, sun4c_mapiorange, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(sparc_unmapiorange, sun4c_unmapiorange, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(__swp_type, sun4c_swp_type, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(__swp_offset, sun4c_swp_offset, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(__swp_entry, sun4c_swp_entry, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(alloc_thread_info, sun4c_alloc_thread_info, BTFIXUPCALL_NORM); - BTFIXUPSET_CALL(free_thread_info, sun4c_free_thread_info, BTFIXUPCALL_NORM); - - BTFIXUPSET_CALL(mmu_info, sun4c_mmu_info, BTFIXUPCALL_NORM); - - /* These should _never_ get called with two level tables. */ - BTFIXUPSET_CALL(pgd_set, sun4c_pgd_set, BTFIXUPCALL_NOP); - BTFIXUPSET_CALL(pgd_page_vaddr, sun4c_pgd_page, BTFIXUPCALL_RETO0); -} diff --git a/arch/sparc/mm/swift.S b/arch/sparc/mm/swift.S index 9f4cd396a0f..5d2b88d3942 100644 --- a/arch/sparc/mm/swift.S +++ b/arch/sparc/mm/swift.S @@ -1,4 +1,4 @@ -/* $Id: swift.S,v 1.9 2002/01/08 11:11:59 davem Exp $ +/* * swift.S: MicroSparc-II mmu/cache operations. * * Copyright (C) 1999 David S. Miller (davem@redhat.com) @@ -105,7 +105,7 @@ swift_flush_cache_mm_out: .globl swift_flush_cache_range swift_flush_cache_range: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 sub %o2, %o1, %o2 sethi %hi(4096), %o3 cmp %o2, %o3 @@ -116,7 +116,7 @@ swift_flush_cache_range: .globl swift_flush_cache_page swift_flush_cache_page: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 70: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -219,7 +219,7 @@ swift_flush_sig_insns: .globl swift_flush_tlb_range .globl swift_flush_tlb_all swift_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 swift_flush_tlb_mm: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -233,7 +233,7 @@ swift_flush_tlb_all_out: .globl swift_flush_tlb_page swift_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 andn %o1, (PAGE_SIZE - 1), %o1 diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c new file mode 100644 index 00000000000..b89aba217e3 --- /dev/null +++ b/arch/sparc/mm/tlb.c @@ -0,0 +1,243 @@ +/* arch/sparc64/mm/tlb.c + * + * Copyright (C) 2004 David S. Miller <davem@redhat.com> + */ + +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/preempt.h> + +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> +#include <asm/mmu_context.h> +#include <asm/tlb.h> + +/* Heavily inspired by the ppc64 code. */ + +static DEFINE_PER_CPU(struct tlb_batch, tlb_batch); + +void flush_tlb_pending(void) +{ + struct tlb_batch *tb = &get_cpu_var(tlb_batch); + struct mm_struct *mm = tb->mm; + + if (!tb->tlb_nr) + goto out; + + flush_tsb_user(tb); + + if (CTX_VALID(mm->context)) { + if (tb->tlb_nr == 1) { + global_flush_tlb_page(mm, tb->vaddrs[0]); + } else { +#ifdef CONFIG_SMP + smp_flush_tlb_pending(tb->mm, tb->tlb_nr, + &tb->vaddrs[0]); +#else + __flush_tlb_pending(CTX_HWBITS(tb->mm->context), + tb->tlb_nr, &tb->vaddrs[0]); +#endif + } + } + + tb->tlb_nr = 0; + +out: + put_cpu_var(tlb_batch); +} + +void arch_enter_lazy_mmu_mode(void) +{ + struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + + tb->active = 1; +} + +void arch_leave_lazy_mmu_mode(void) +{ + struct tlb_batch *tb = &__get_cpu_var(tlb_batch); + + if (tb->tlb_nr) + flush_tlb_pending(); + tb->active = 0; +} + +static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr, + bool exec) +{ + struct tlb_batch *tb = &get_cpu_var(tlb_batch); + unsigned long nr; + + vaddr &= PAGE_MASK; + if (exec) + vaddr |= 0x1UL; + + nr = tb->tlb_nr; + + if (unlikely(nr != 0 && mm != tb->mm)) { + flush_tlb_pending(); + nr = 0; + } + + if (!tb->active) { + flush_tsb_user_page(mm, vaddr); + global_flush_tlb_page(mm, vaddr); + goto out; + } + + if (nr == 0) + tb->mm = mm; + + tb->vaddrs[nr] = vaddr; + tb->tlb_nr = ++nr; + if (nr >= TLB_BATCH_NR) + flush_tlb_pending(); + +out: + put_cpu_var(tlb_batch); +} + +void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, + pte_t *ptep, pte_t orig, int fullmm) +{ + if (tlb_type != hypervisor && + pte_dirty(orig)) { + unsigned long paddr, pfn = pte_pfn(orig); + struct address_space *mapping; + struct page *page; + + if (!pfn_valid(pfn)) + goto no_cache_flush; + + page = pfn_to_page(pfn); + if (PageReserved(page)) + goto no_cache_flush; + + /* A real file page? */ + mapping = page_mapping(page); + if (!mapping) + goto no_cache_flush; + + paddr = (unsigned long) page_address(page); + if ((paddr ^ vaddr) & (1 << 13)) + flush_dcache_page_all(mm, page); + } + +no_cache_flush: + if (!fullmm) + tlb_batch_add_one(mm, vaddr, pte_exec(orig)); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, + pmd_t pmd) +{ + unsigned long end; + pte_t *pte; + + pte = pte_offset_map(&pmd, vaddr); + end = vaddr + HPAGE_SIZE; + while (vaddr < end) { + if (pte_val(*pte) & _PAGE_VALID) { + bool exec = pte_exec(*pte); + + tlb_batch_add_one(mm, vaddr, exec); + } + pte++; + vaddr += PAGE_SIZE; + } + pte_unmap(pte); +} + +void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + pmd_t orig = *pmdp; + + *pmdp = pmd; + + if (mm == &init_mm) + return; + + if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { + if (pmd_val(pmd) & _PAGE_PMD_HUGE) + mm->context.huge_pte_count++; + else + mm->context.huge_pte_count--; + + /* Do not try to allocate the TSB hash table if we + * don't have one already. We have various locks held + * and thus we'll end up doing a GFP_KERNEL allocation + * in an atomic context. + * + * Instead, we let the first TLB miss on a hugepage + * take care of this. + */ + } + + if (!pmd_none(orig)) { + addr &= HPAGE_MASK; + if (pmd_trans_huge(orig)) { + pte_t orig_pte = __pte(pmd_val(orig)); + bool exec = pte_exec(orig_pte); + + tlb_batch_add_one(mm, addr, exec); + tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec); + } else { + tlb_batch_pmd_scan(mm, addr, orig); + } + } +} + +void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + pmd_t entry = *pmdp; + + pmd_val(entry) &= ~_PAGE_VALID; + + set_pmd_at(vma->vm_mm, address, pmdp, entry); + flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); +} + +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) +{ + struct list_head *lh = (struct list_head *) pgtable; + + assert_spin_locked(&mm->page_table_lock); + + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; +} + +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) +{ + struct list_head *lh; + pgtable_t pgtable; + + assert_spin_locked(&mm->page_table_lock); + + /* FIFO */ + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); + } + pte_val(pgtable[0]) = 0; + pte_val(pgtable[1]) = 0; + + return pgtable; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c new file mode 100644 index 00000000000..a06576683c3 --- /dev/null +++ b/arch/sparc/mm/tsb.c @@ -0,0 +1,538 @@ +/* arch/sparc64/mm/tsb.c + * + * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <linux/preempt.h> +#include <linux/slab.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/mmu_context.h> +#include <asm/setup.h> +#include <asm/tsb.h> +#include <asm/tlb.h> +#include <asm/oplib.h> + +extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + +static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries) +{ + vaddr >>= hash_shift; + return vaddr & (nentries - 1); +} + +static inline int tag_compare(unsigned long tag, unsigned long vaddr) +{ + return (tag == (vaddr >> 22)); +} + +/* TSB flushes need only occur on the processor initiating the address + * space modification, not on each cpu the address space has run on. + * Only the TLB flush needs that treatment. + */ + +void flush_tsb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long v; + + for (v = start; v < end; v += PAGE_SIZE) { + unsigned long hash = tsb_hash(v, PAGE_SHIFT, + KERNEL_TSB_NENTRIES); + struct tsb *ent = &swapper_tsb[hash]; + + if (tag_compare(ent->tag, v)) + ent->tag = (1UL << TSB_TAG_INVALID_BIT); + } +} + +static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v, + unsigned long hash_shift, + unsigned long nentries) +{ + unsigned long tag, ent, hash; + + v &= ~0x1UL; + hash = tsb_hash(v, hash_shift, nentries); + ent = tsb + (hash * sizeof(struct tsb)); + tag = (v >> 22UL); + + tsb_flush(ent, tag); +} + +static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift, + unsigned long tsb, unsigned long nentries) +{ + unsigned long i; + + for (i = 0; i < tb->tlb_nr; i++) + __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries); +} + +void flush_tsb_user(struct tlb_batch *tb) +{ + struct mm_struct *mm = tb->mm; + unsigned long nentries, base, flags; + + spin_lock_irqsave(&mm->context.lock, flags); + + base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; + nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one(tb, PAGE_SHIFT, base, nentries); + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; + nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries); + } +#endif + spin_unlock_irqrestore(&mm->context.lock, flags); +} + +void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) +{ + unsigned long nentries, base, flags; + + spin_lock_irqsave(&mm->context.lock, flags); + + base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; + nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries); + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + if (mm->context.tsb_block[MM_TSB_HUGE].tsb) { + base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb; + nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) + base = __pa(base); + __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries); + } +#endif + spin_unlock_irqrestore(&mm->context.lock, flags); +} + +#define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K +#define HV_PGSZ_MASK_BASE HV_PGSZ_MASK_8K + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +#define HV_PGSZ_IDX_HUGE HV_PGSZ_IDX_4MB +#define HV_PGSZ_MASK_HUGE HV_PGSZ_MASK_4MB +#endif + +static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes) +{ + unsigned long tsb_reg, base, tsb_paddr; + unsigned long page_sz, tte; + + mm->context.tsb_block[tsb_idx].tsb_nentries = + tsb_bytes / sizeof(struct tsb); + + switch (tsb_idx) { + case MM_TSB_BASE: + base = TSBMAP_8K_BASE; + break; +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + case MM_TSB_HUGE: + base = TSBMAP_4M_BASE; + break; +#endif + default: + BUG(); + } + + tte = pgprot_val(PAGE_KERNEL_LOCKED); + tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb); + BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); + + /* Use the smallest page size that can map the whole TSB + * in one TLB entry. + */ + switch (tsb_bytes) { + case 8192 << 0: + tsb_reg = 0x0UL; +#ifdef DCACHE_ALIASING_POSSIBLE + base += (tsb_paddr & 8192); +#endif + page_sz = 8192; + break; + + case 8192 << 1: + tsb_reg = 0x1UL; + page_sz = 64 * 1024; + break; + + case 8192 << 2: + tsb_reg = 0x2UL; + page_sz = 64 * 1024; + break; + + case 8192 << 3: + tsb_reg = 0x3UL; + page_sz = 64 * 1024; + break; + + case 8192 << 4: + tsb_reg = 0x4UL; + page_sz = 512 * 1024; + break; + + case 8192 << 5: + tsb_reg = 0x5UL; + page_sz = 512 * 1024; + break; + + case 8192 << 6: + tsb_reg = 0x6UL; + page_sz = 512 * 1024; + break; + + case 8192 << 7: + tsb_reg = 0x7UL; + page_sz = 4 * 1024 * 1024; + break; + + default: + printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n", + current->comm, current->pid, tsb_bytes); + do_exit(SIGSEGV); + } + tte |= pte_sz_bits(page_sz); + + if (tlb_type == cheetah_plus || tlb_type == hypervisor) { + /* Physical mapping, no locked TLB entry for TSB. */ + tsb_reg |= tsb_paddr; + + mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; + mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0; + mm->context.tsb_block[tsb_idx].tsb_map_pte = 0; + } else { + tsb_reg |= base; + tsb_reg |= (tsb_paddr & (page_sz - 1UL)); + tte |= (tsb_paddr & ~(page_sz - 1UL)); + + mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg; + mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base; + mm->context.tsb_block[tsb_idx].tsb_map_pte = tte; + } + + /* Setup the Hypervisor TSB descriptor. */ + if (tlb_type == hypervisor) { + struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx]; + + switch (tsb_idx) { + case MM_TSB_BASE: + hp->pgsz_idx = HV_PGSZ_IDX_BASE; + break; +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + case MM_TSB_HUGE: + hp->pgsz_idx = HV_PGSZ_IDX_HUGE; + break; +#endif + default: + BUG(); + } + hp->assoc = 1; + hp->num_ttes = tsb_bytes / 16; + hp->ctx_idx = 0; + switch (tsb_idx) { + case MM_TSB_BASE: + hp->pgsz_mask = HV_PGSZ_MASK_BASE; + break; +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + case MM_TSB_HUGE: + hp->pgsz_mask = HV_PGSZ_MASK_HUGE; + break; +#endif + default: + BUG(); + } + hp->tsb_base = tsb_paddr; + hp->resv = 0; + } +} + +struct kmem_cache *pgtable_cache __read_mostly; + +static struct kmem_cache *tsb_caches[8] __read_mostly; + +static const char *tsb_cache_names[8] = { + "tsb_8KB", + "tsb_16KB", + "tsb_32KB", + "tsb_64KB", + "tsb_128KB", + "tsb_256KB", + "tsb_512KB", + "tsb_1MB", +}; + +void __init pgtable_cache_init(void) +{ + unsigned long i; + + pgtable_cache = kmem_cache_create("pgtable_cache", + PAGE_SIZE, PAGE_SIZE, + 0, + _clear_page); + if (!pgtable_cache) { + prom_printf("pgtable_cache_init(): Could not create!\n"); + prom_halt(); + } + + for (i = 0; i < ARRAY_SIZE(tsb_cache_names); i++) { + unsigned long size = 8192 << i; + const char *name = tsb_cache_names[i]; + + tsb_caches[i] = kmem_cache_create(name, + size, size, + 0, NULL); + if (!tsb_caches[i]) { + prom_printf("Could not create %s cache\n", name); + prom_halt(); + } + } +} + +int sysctl_tsb_ratio = -2; + +static unsigned long tsb_size_to_rss_limit(unsigned long new_size) +{ + unsigned long num_ents = (new_size / sizeof(struct tsb)); + + if (sysctl_tsb_ratio < 0) + return num_ents - (num_ents >> -sysctl_tsb_ratio); + else + return num_ents + (num_ents >> sysctl_tsb_ratio); +} + +/* When the RSS of an address space exceeds tsb_rss_limit for a TSB, + * do_sparc64_fault() invokes this routine to try and grow it. + * + * When we reach the maximum TSB size supported, we stick ~0UL into + * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault() + * will not trigger any longer. + * + * The TSB can be anywhere from 8K to 1MB in size, in increasing powers + * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB + * must be 512K aligned. It also must be physically contiguous, so we + * cannot use vmalloc(). + * + * The idea here is to grow the TSB when the RSS of the process approaches + * the number of entries that the current TSB can hold at once. Currently, + * we trigger when the RSS hits 3/4 of the TSB capacity. + */ +void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) +{ + unsigned long max_tsb_size = 1 * 1024 * 1024; + unsigned long new_size, old_size, flags; + struct tsb *old_tsb, *new_tsb; + unsigned long new_cache_index, old_cache_index; + unsigned long new_rss_limit; + gfp_t gfp_flags; + + if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) + max_tsb_size = (PAGE_SIZE << MAX_ORDER); + + new_cache_index = 0; + for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) { + new_rss_limit = tsb_size_to_rss_limit(new_size); + if (new_rss_limit > rss) + break; + new_cache_index++; + } + + if (new_size == max_tsb_size) + new_rss_limit = ~0UL; + +retry_tsb_alloc: + gfp_flags = GFP_KERNEL; + if (new_size > (PAGE_SIZE * 2)) + gfp_flags |= __GFP_NOWARN | __GFP_NORETRY; + + new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index], + gfp_flags, numa_node_id()); + if (unlikely(!new_tsb)) { + /* Not being able to fork due to a high-order TSB + * allocation failure is very bad behavior. Just back + * down to a 0-order allocation and force no TSB + * growing for this address space. + */ + if (mm->context.tsb_block[tsb_index].tsb == NULL && + new_cache_index > 0) { + new_cache_index = 0; + new_size = 8192; + new_rss_limit = ~0UL; + goto retry_tsb_alloc; + } + + /* If we failed on a TSB grow, we are under serious + * memory pressure so don't try to grow any more. + */ + if (mm->context.tsb_block[tsb_index].tsb != NULL) + mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL; + return; + } + + /* Mark all tags as invalid. */ + tsb_init(new_tsb, new_size); + + /* Ok, we are about to commit the changes. If we are + * growing an existing TSB the locking is very tricky, + * so WATCH OUT! + * + * We have to hold mm->context.lock while committing to the + * new TSB, this synchronizes us with processors in + * flush_tsb_user() and switch_mm() for this address space. + * + * But even with that lock held, processors run asynchronously + * accessing the old TSB via TLB miss handling. This is OK + * because those actions are just propagating state from the + * Linux page tables into the TSB, page table mappings are not + * being changed. If a real fault occurs, the processor will + * synchronize with us when it hits flush_tsb_user(), this is + * also true for the case where vmscan is modifying the page + * tables. The only thing we need to be careful with is to + * skip any locked TSB entries during copy_tsb(). + * + * When we finish committing to the new TSB, we have to drop + * the lock and ask all other cpus running this address space + * to run tsb_context_switch() to see the new TSB table. + */ + spin_lock_irqsave(&mm->context.lock, flags); + + old_tsb = mm->context.tsb_block[tsb_index].tsb; + old_cache_index = + (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL); + old_size = (mm->context.tsb_block[tsb_index].tsb_nentries * + sizeof(struct tsb)); + + + /* Handle multiple threads trying to grow the TSB at the same time. + * One will get in here first, and bump the size and the RSS limit. + * The others will get in here next and hit this check. + */ + if (unlikely(old_tsb && + (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) { + spin_unlock_irqrestore(&mm->context.lock, flags); + + kmem_cache_free(tsb_caches[new_cache_index], new_tsb); + return; + } + + mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit; + + if (old_tsb) { + extern void copy_tsb(unsigned long old_tsb_base, + unsigned long old_tsb_size, + unsigned long new_tsb_base, + unsigned long new_tsb_size); + unsigned long old_tsb_base = (unsigned long) old_tsb; + unsigned long new_tsb_base = (unsigned long) new_tsb; + + if (tlb_type == cheetah_plus || tlb_type == hypervisor) { + old_tsb_base = __pa(old_tsb_base); + new_tsb_base = __pa(new_tsb_base); + } + copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size); + } + + mm->context.tsb_block[tsb_index].tsb = new_tsb; + setup_tsb_params(mm, tsb_index, new_size); + + spin_unlock_irqrestore(&mm->context.lock, flags); + + /* If old_tsb is NULL, we're being invoked for the first time + * from init_new_context(). + */ + if (old_tsb) { + /* Reload it on the local cpu. */ + tsb_context_switch(mm); + + /* Now force other processors to do the same. */ + preempt_disable(); + smp_tsb_sync(mm); + preempt_enable(); + + /* Now it is safe to free the old tsb. */ + kmem_cache_free(tsb_caches[old_cache_index], old_tsb); + } +} + +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + unsigned long huge_pte_count; +#endif + unsigned int i; + + spin_lock_init(&mm->context.lock); + + mm->context.sparc64_ctx_val = 0UL; + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + /* We reset it to zero because the fork() page copying + * will re-increment the counters as the parent PTEs are + * copied into the child address space. + */ + huge_pte_count = mm->context.huge_pte_count; + mm->context.huge_pte_count = 0; +#endif + + /* copy_mm() copies over the parent's mm_struct before calling + * us, so we need to zero out the TSB pointer or else tsb_grow() + * will be confused and think there is an older TSB to free up. + */ + for (i = 0; i < MM_NUM_TSBS; i++) + mm->context.tsb_block[i].tsb = NULL; + + /* If this is fork, inherit the parent's TSB size. We would + * grow it to that size on the first page fault anyways. + */ + tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); + +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + if (unlikely(huge_pte_count)) + tsb_grow(mm, MM_TSB_HUGE, huge_pte_count); +#endif + + if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) + return -ENOMEM; + + return 0; +} + +static void tsb_destroy_one(struct tsb_config *tp) +{ + unsigned long cache_index; + + if (!tp->tsb) + return; + cache_index = tp->tsb_reg_val & 0x7UL; + kmem_cache_free(tsb_caches[cache_index], tp->tsb); + tp->tsb = NULL; + tp->tsb_reg_val = 0UL; +} + +void destroy_context(struct mm_struct *mm) +{ + unsigned long flags, i; + + for (i = 0; i < MM_NUM_TSBS; i++) + tsb_destroy_one(&mm->context.tsb_block[i]); + + spin_lock_irqsave(&ctx_alloc_lock, flags); + + if (CTX_VALID(mm->context)) { + unsigned long nr = CTX_NRBITS(mm->context); + mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); + } + + spin_unlock_irqrestore(&ctx_alloc_lock, flags); +} diff --git a/arch/sparc/mm/tsunami.S b/arch/sparc/mm/tsunami.S index 4988e6a310b..bf10a345fa8 100644 --- a/arch/sparc/mm/tsunami.S +++ b/arch/sparc/mm/tsunami.S @@ -1,4 +1,4 @@ -/* $Id: tsunami.S,v 1.7 2001/12/21 04:56:15 davem Exp $ +/* * tsunami.S: High speed MicroSparc-I mmu/cache operations. * * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) @@ -24,7 +24,7 @@ /* Sliiick... */ tsunami_flush_cache_page: tsunami_flush_cache_range: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 tsunami_flush_cache_mm: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -46,7 +46,7 @@ tsunami_flush_sig_insns: /* More slick stuff... */ tsunami_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 tsunami_flush_tlb_mm: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -65,7 +65,7 @@ tsunami_flush_tlb_out: /* This one can be done in a fine grained manner... */ tsunami_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 andn %o1, (PAGE_SIZE - 1), %o1 @@ -93,7 +93,6 @@ tsunami_flush_tlb_page_out: ldd [src + offset + 0x00], t2; \ std t2, [dst + offset + 0x00]; - .globl tsunami_copy_1page tsunami_copy_1page: /* NOTE: This routine has to be shorter than 70insns --jj */ or %g0, (PAGE_SIZE >> 8), %g1 diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S new file mode 100644 index 00000000000..b4f4733abc6 --- /dev/null +++ b/arch/sparc/mm/ultra.S @@ -0,0 +1,878 @@ +/* + * ultra.S: Don't expand these all over the place... + * + * Copyright (C) 1997, 2000, 2008 David S. Miller (davem@davemloft.net) + */ + +#include <asm/asi.h> +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/spitfire.h> +#include <asm/mmu_context.h> +#include <asm/mmu.h> +#include <asm/pil.h> +#include <asm/head.h> +#include <asm/thread_info.h> +#include <asm/cacheflush.h> +#include <asm/hypervisor.h> +#include <asm/cpudata.h> + + /* Basically, most of the Spitfire vs. Cheetah madness + * has to do with the fact that Cheetah does not support + * IMMU flushes out of the secondary context. Someone needs + * to throw a south lake birthday party for the folks + * in Microelectronics who refused to fix this shit. + */ + + /* This file is meant to be read efficiently by the CPU, not humans. + * Staraj sie tego nikomu nie pierdolnac... + */ + .text + .align 32 + .globl __flush_tlb_mm +__flush_tlb_mm: /* 18 insns */ + /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ + ldxa [%o1] ASI_DMMU, %g2 + cmp %g2, %o0 + bne,pn %icc, __spitfire_flush_tlb_mm_slow + mov 0x50, %g3 + stxa %g0, [%g3] ASI_DMMU_DEMAP + stxa %g0, [%g3] ASI_IMMU_DEMAP + sethi %hi(KERNBASE), %g3 + flush %g3 + retl + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + + .align 32 + .globl __flush_tlb_page +__flush_tlb_page: /* 22 insns */ + /* %o0 = context, %o1 = vaddr */ + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, %pstate + mov SECONDARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + stxa %o0, [%o4] ASI_DMMU + andcc %o1, 1, %g0 + andn %o1, 1, %o3 + be,pn %icc, 1f + or %o3, 0x10, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +1: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + retl + wrpr %g7, 0x0, %pstate + nop + nop + nop + nop + + .align 32 + .globl __flush_tlb_pending +__flush_tlb_pending: /* 26 insns */ + /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ + rdpr %pstate, %g7 + sllx %o1, 3, %o1 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, %pstate + mov SECONDARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + stxa %o0, [%o4] ASI_DMMU +1: sub %o1, (1 << 3), %o1 + ldx [%o2 + %o1], %o3 + andcc %o3, 1, %g0 + andn %o3, 1, %o3 + be,pn %icc, 2f + or %o3, 0x10, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +2: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + brnz,pt %o1, 1b + nop + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + retl + wrpr %g7, 0x0, %pstate + nop + nop + nop + nop + + .align 32 + .globl __flush_tlb_kernel_range +__flush_tlb_kernel_range: /* 16 insns */ + /* %o0=start, %o1=end */ + cmp %o0, %o1 + be,pn %xcc, 2f + sethi %hi(PAGE_SIZE), %o4 + sub %o1, %o0, %o3 + sub %o3, %o4, %o3 + or %o0, 0x20, %o0 ! Nucleus +1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP + stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP + membar #Sync + brnz,pt %o3, 1b + sub %o3, %o4, %o3 +2: sethi %hi(KERNBASE), %o3 + flush %o3 + retl + nop + nop + +__spitfire_flush_tlb_mm_slow: + rdpr %pstate, %g1 + wrpr %g1, PSTATE_IE, %pstate + stxa %o0, [%o1] ASI_DMMU + stxa %g0, [%g3] ASI_DMMU_DEMAP + stxa %g0, [%g3] ASI_IMMU_DEMAP + flush %g6 + stxa %g2, [%o1] ASI_DMMU + sethi %hi(KERNBASE), %o1 + flush %o1 + retl + wrpr %g1, 0, %pstate + +/* + * The following code flushes one page_size worth. + */ + .section .kprobes.text, "ax" + .align 32 + .globl __flush_icache_page +__flush_icache_page: /* %o0 = phys_page */ + srlx %o0, PAGE_SHIFT, %o0 + sethi %hi(PAGE_OFFSET), %g1 + sllx %o0, PAGE_SHIFT, %o0 + sethi %hi(PAGE_SIZE), %g2 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 + add %o0, %g1, %o0 +1: subcc %g2, 32, %g2 + bne,pt %icc, 1b + flush %o0 + %g2 + retl + nop + +#ifdef DCACHE_ALIASING_POSSIBLE + +#if (PAGE_SHIFT != 13) +#error only page shift of 13 is supported by dcache flush +#endif + +#define DTAG_MASK 0x3 + + /* This routine is Spitfire specific so the hardcoded + * D-cache size and line-size are OK. + */ + .align 64 + .globl __flush_dcache_page +__flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */ + sethi %hi(PAGE_OFFSET), %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 + sub %o0, %g1, %o0 ! physical address + srlx %o0, 11, %o0 ! make D-cache TAG + sethi %hi(1 << 14), %o2 ! D-cache size + sub %o2, (1 << 5), %o2 ! D-cache line size +1: ldxa [%o2] ASI_DCACHE_TAG, %o3 ! load D-cache TAG + andcc %o3, DTAG_MASK, %g0 ! Valid? + be,pn %xcc, 2f ! Nope, branch + andn %o3, DTAG_MASK, %o3 ! Clear valid bits + cmp %o3, %o0 ! TAG match? + bne,pt %xcc, 2f ! Nope, branch + nop + stxa %g0, [%o2] ASI_DCACHE_TAG ! Invalidate TAG + membar #Sync +2: brnz,pt %o2, 1b + sub %o2, (1 << 5), %o2 ! D-cache line size + + /* The I-cache does not snoop local stores so we + * better flush that too when necessary. + */ + brnz,pt %o1, __flush_icache_page + sllx %o0, 11, %o0 + retl + nop + +#endif /* DCACHE_ALIASING_POSSIBLE */ + + .previous + + /* Cheetah specific versions, patched at boot time. */ +__cheetah_flush_tlb_mm: /* 19 insns */ + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, 0x0, %pstate + wrpr %g0, 1, %tl + mov PRIMARY_CONTEXT, %o2 + mov 0x40, %g3 + ldxa [%o2] ASI_DMMU, %g2 + srlx %g2, CTX_PGSZ1_NUC_SHIFT, %o1 + sllx %o1, CTX_PGSZ1_NUC_SHIFT, %o1 + or %o0, %o1, %o0 /* Preserve nucleus page size fields */ + stxa %o0, [%o2] ASI_DMMU + stxa %g0, [%g3] ASI_DMMU_DEMAP + stxa %g0, [%g3] ASI_IMMU_DEMAP + stxa %g2, [%o2] ASI_DMMU + sethi %hi(KERNBASE), %o2 + flush %o2 + wrpr %g0, 0, %tl + retl + wrpr %g7, 0x0, %pstate + +__cheetah_flush_tlb_page: /* 22 insns */ + /* %o0 = context, %o1 = vaddr */ + rdpr %pstate, %g7 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, 0x0, %pstate + wrpr %g0, 1, %tl + mov PRIMARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + srlx %g2, CTX_PGSZ1_NUC_SHIFT, %o3 + sllx %o3, CTX_PGSZ1_NUC_SHIFT, %o3 + or %o0, %o3, %o0 /* Preserve nucleus page size fields */ + stxa %o0, [%o4] ASI_DMMU + andcc %o1, 1, %g0 + be,pn %icc, 1f + andn %o1, 1, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +1: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + wrpr %g0, 0, %tl + retl + wrpr %g7, 0x0, %pstate + +__cheetah_flush_tlb_pending: /* 27 insns */ + /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ + rdpr %pstate, %g7 + sllx %o1, 3, %o1 + andn %g7, PSTATE_IE, %g2 + wrpr %g2, 0x0, %pstate + wrpr %g0, 1, %tl + mov PRIMARY_CONTEXT, %o4 + ldxa [%o4] ASI_DMMU, %g2 + srlx %g2, CTX_PGSZ1_NUC_SHIFT, %o3 + sllx %o3, CTX_PGSZ1_NUC_SHIFT, %o3 + or %o0, %o3, %o0 /* Preserve nucleus page size fields */ + stxa %o0, [%o4] ASI_DMMU +1: sub %o1, (1 << 3), %o1 + ldx [%o2 + %o1], %o3 + andcc %o3, 1, %g0 + be,pn %icc, 2f + andn %o3, 1, %o3 + stxa %g0, [%o3] ASI_IMMU_DEMAP +2: stxa %g0, [%o3] ASI_DMMU_DEMAP + membar #Sync + brnz,pt %o1, 1b + nop + stxa %g2, [%o4] ASI_DMMU + sethi %hi(KERNBASE), %o4 + flush %o4 + wrpr %g0, 0, %tl + retl + wrpr %g7, 0x0, %pstate + +#ifdef DCACHE_ALIASING_POSSIBLE +__cheetah_flush_dcache_page: /* 11 insns */ + sethi %hi(PAGE_OFFSET), %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 + sub %o0, %g1, %o0 + sethi %hi(PAGE_SIZE), %o4 +1: subcc %o4, (1 << 5), %o4 + stxa %g0, [%o0 + %o4] ASI_DCACHE_INVALIDATE + membar #Sync + bne,pt %icc, 1b + nop + retl /* I-cache flush never needed on Cheetah, see callers. */ + nop +#endif /* DCACHE_ALIASING_POSSIBLE */ + + /* Hypervisor specific versions, patched at boot time. */ +__hypervisor_tlb_tl0_error: + save %sp, -192, %sp + mov %i0, %o0 + call hypervisor_tlbop_error + mov %i1, %o1 + ret + restore + +__hypervisor_flush_tlb_mm: /* 10 insns */ + mov %o0, %o2 /* ARG2: mmu context */ + mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_FAST_MMU_DEMAP_CTX, %o1 + retl + nop + +__hypervisor_flush_tlb_page: /* 11 insns */ + /* %o0 = context, %o1 = vaddr */ + mov %o0, %g2 + mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ + mov %g2, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + retl + nop + +__hypervisor_flush_tlb_pending: /* 16 insns */ + /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ + sllx %o1, 3, %g1 + mov %o2, %g2 + mov %o0, %g3 +1: sub %g1, (1 << 3), %g1 + ldx [%g2 + %g1], %o0 /* ARG0: vaddr + IMMU-bit */ + mov %g3, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + brnz,pt %g1, 1b + nop + retl + nop + +__hypervisor_flush_tlb_kernel_range: /* 16 insns */ + /* %o0=start, %o1=end */ + cmp %o0, %o1 + be,pn %xcc, 2f + sethi %hi(PAGE_SIZE), %g3 + mov %o0, %g1 + sub %o1, %g1, %g2 + sub %g2, %g3, %g2 +1: add %g1, %g2, %o0 /* ARG0: virtual address */ + mov 0, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + ta HV_MMU_UNMAP_ADDR_TRAP + brnz,pn %o0, __hypervisor_tlb_tl0_error + mov HV_MMU_UNMAP_ADDR_TRAP, %o1 + brnz,pt %g2, 1b + sub %g2, %g3, %g2 +2: retl + nop + +#ifdef DCACHE_ALIASING_POSSIBLE + /* XXX Niagara and friends have an 8K cache, so no aliasing is + * XXX possible, but nothing explicit in the Hypervisor API + * XXX guarantees this. + */ +__hypervisor_flush_dcache_page: /* 2 insns */ + retl + nop +#endif + +tlb_patch_one: +1: lduw [%o1], %g1 + stw %g1, [%o0] + flush %o0 + subcc %o2, 1, %o2 + add %o1, 4, %o1 + bne,pt %icc, 1b + add %o0, 4, %o0 + retl + nop + + .globl cheetah_patch_cachetlbops +cheetah_patch_cachetlbops: + save %sp, -128, %sp + + sethi %hi(__flush_tlb_mm), %o0 + or %o0, %lo(__flush_tlb_mm), %o0 + sethi %hi(__cheetah_flush_tlb_mm), %o1 + or %o1, %lo(__cheetah_flush_tlb_mm), %o1 + call tlb_patch_one + mov 19, %o2 + + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__cheetah_flush_tlb_page), %o1 + or %o1, %lo(__cheetah_flush_tlb_page), %o1 + call tlb_patch_one + mov 22, %o2 + + sethi %hi(__flush_tlb_pending), %o0 + or %o0, %lo(__flush_tlb_pending), %o0 + sethi %hi(__cheetah_flush_tlb_pending), %o1 + or %o1, %lo(__cheetah_flush_tlb_pending), %o1 + call tlb_patch_one + mov 27, %o2 + +#ifdef DCACHE_ALIASING_POSSIBLE + sethi %hi(__flush_dcache_page), %o0 + or %o0, %lo(__flush_dcache_page), %o0 + sethi %hi(__cheetah_flush_dcache_page), %o1 + or %o1, %lo(__cheetah_flush_dcache_page), %o1 + call tlb_patch_one + mov 11, %o2 +#endif /* DCACHE_ALIASING_POSSIBLE */ + + ret + restore + +#ifdef CONFIG_SMP + /* These are all called by the slaves of a cross call, at + * trap level 1, with interrupts fully disabled. + * + * Register usage: + * %g5 mm->context (all tlb flushes) + * %g1 address arg 1 (tlb page and range flushes) + * %g7 address arg 2 (tlb range flush only) + * + * %g6 scratch 1 + * %g2 scratch 2 + * %g3 scratch 3 + * %g4 scratch 4 + */ + .align 32 + .globl xcall_flush_tlb_mm +xcall_flush_tlb_mm: /* 21 insns */ + mov PRIMARY_CONTEXT, %g2 + ldxa [%g2] ASI_DMMU, %g3 + srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 + sllx %g4, CTX_PGSZ1_NUC_SHIFT, %g4 + or %g5, %g4, %g5 /* Preserve nucleus page size fields */ + stxa %g5, [%g2] ASI_DMMU + mov 0x40, %g4 + stxa %g0, [%g4] ASI_DMMU_DEMAP + stxa %g0, [%g4] ASI_IMMU_DEMAP + stxa %g3, [%g2] ASI_DMMU + retry + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + + .globl xcall_flush_tlb_page +xcall_flush_tlb_page: /* 17 insns */ + /* %g5=context, %g1=vaddr */ + mov PRIMARY_CONTEXT, %g4 + ldxa [%g4] ASI_DMMU, %g2 + srlx %g2, CTX_PGSZ1_NUC_SHIFT, %g4 + sllx %g4, CTX_PGSZ1_NUC_SHIFT, %g4 + or %g5, %g4, %g5 + mov PRIMARY_CONTEXT, %g4 + stxa %g5, [%g4] ASI_DMMU + andcc %g1, 0x1, %g0 + be,pn %icc, 2f + andn %g1, 0x1, %g5 + stxa %g0, [%g5] ASI_IMMU_DEMAP +2: stxa %g0, [%g5] ASI_DMMU_DEMAP + membar #Sync + stxa %g2, [%g4] ASI_DMMU + retry + nop + nop + + .globl xcall_flush_tlb_kernel_range +xcall_flush_tlb_kernel_range: /* 25 insns */ + sethi %hi(PAGE_SIZE - 1), %g2 + or %g2, %lo(PAGE_SIZE - 1), %g2 + andn %g1, %g2, %g1 + andn %g7, %g2, %g7 + sub %g7, %g1, %g3 + add %g2, 1, %g2 + sub %g3, %g2, %g3 + or %g1, 0x20, %g1 ! Nucleus +1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP + stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP + membar #Sync + brnz,pt %g3, 1b + sub %g3, %g2, %g3 + retry + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + + /* This runs in a very controlled environment, so we do + * not need to worry about BH races etc. + */ + .globl xcall_sync_tick +xcall_sync_tick: + +661: rdpr %pstate, %g2 + wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + rdpr %pil, %g2 + wrpr %g0, PIL_NORMAL_MAX, %pil + sethi %hi(109f), %g7 + b,pt %xcc, etrap_irq +109: or %g7, %lo(109b), %g7 +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_off + nop +#endif + call smp_synchronize_tick_client + nop + b rtrap_xcall + ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 + + .globl xcall_fetch_glob_regs +xcall_fetch_glob_regs: + sethi %hi(global_cpu_snapshot), %g1 + or %g1, %lo(global_cpu_snapshot), %g1 + __GET_CPUID(%g2) + sllx %g2, 6, %g3 + add %g1, %g3, %g1 + rdpr %tstate, %g7 + stx %g7, [%g1 + GR_SNAP_TSTATE] + rdpr %tpc, %g7 + stx %g7, [%g1 + GR_SNAP_TPC] + rdpr %tnpc, %g7 + stx %g7, [%g1 + GR_SNAP_TNPC] + stx %o7, [%g1 + GR_SNAP_O7] + stx %i7, [%g1 + GR_SNAP_I7] + /* Don't try this at home kids... */ + rdpr %cwp, %g3 + sub %g3, 1, %g7 + wrpr %g7, %cwp + mov %i7, %g7 + wrpr %g3, %cwp + stx %g7, [%g1 + GR_SNAP_RPC] + sethi %hi(trap_block), %g7 + or %g7, %lo(trap_block), %g7 + sllx %g2, TRAP_BLOCK_SZ_SHIFT, %g2 + add %g7, %g2, %g7 + ldx [%g7 + TRAP_PER_CPU_THREAD], %g3 + stx %g3, [%g1 + GR_SNAP_THREAD] + retry + + .globl xcall_fetch_glob_pmu +xcall_fetch_glob_pmu: + sethi %hi(global_cpu_snapshot), %g1 + or %g1, %lo(global_cpu_snapshot), %g1 + __GET_CPUID(%g2) + sllx %g2, 6, %g3 + add %g1, %g3, %g1 + rd %pic, %g7 + stx %g7, [%g1 + (4 * 8)] + rd %pcr, %g7 + stx %g7, [%g1 + (0 * 8)] + retry + + .globl xcall_fetch_glob_pmu_n4 +xcall_fetch_glob_pmu_n4: + sethi %hi(global_cpu_snapshot), %g1 + or %g1, %lo(global_cpu_snapshot), %g1 + __GET_CPUID(%g2) + sllx %g2, 6, %g3 + add %g1, %g3, %g1 + + ldxa [%g0] ASI_PIC, %g7 + stx %g7, [%g1 + (4 * 8)] + mov 0x08, %g3 + ldxa [%g3] ASI_PIC, %g7 + stx %g7, [%g1 + (5 * 8)] + mov 0x10, %g3 + ldxa [%g3] ASI_PIC, %g7 + stx %g7, [%g1 + (6 * 8)] + mov 0x18, %g3 + ldxa [%g3] ASI_PIC, %g7 + stx %g7, [%g1 + (7 * 8)] + + mov %o0, %g2 + mov %o1, %g3 + mov %o5, %g7 + + mov HV_FAST_VT_GET_PERFREG, %o5 + mov 3, %o0 + ta HV_FAST_TRAP + stx %o1, [%g1 + (3 * 8)] + mov HV_FAST_VT_GET_PERFREG, %o5 + mov 2, %o0 + ta HV_FAST_TRAP + stx %o1, [%g1 + (2 * 8)] + mov HV_FAST_VT_GET_PERFREG, %o5 + mov 1, %o0 + ta HV_FAST_TRAP + stx %o1, [%g1 + (1 * 8)] + mov HV_FAST_VT_GET_PERFREG, %o5 + mov 0, %o0 + ta HV_FAST_TRAP + stx %o1, [%g1 + (0 * 8)] + + mov %g2, %o0 + mov %g3, %o1 + mov %g7, %o5 + + retry + +#ifdef DCACHE_ALIASING_POSSIBLE + .align 32 + .globl xcall_flush_dcache_page_cheetah +xcall_flush_dcache_page_cheetah: /* %g1 == physical page address */ + sethi %hi(PAGE_SIZE), %g3 +1: subcc %g3, (1 << 5), %g3 + stxa %g0, [%g1 + %g3] ASI_DCACHE_INVALIDATE + membar #Sync + bne,pt %icc, 1b + nop + retry + nop +#endif /* DCACHE_ALIASING_POSSIBLE */ + + .globl xcall_flush_dcache_page_spitfire +xcall_flush_dcache_page_spitfire: /* %g1 == physical page address + %g7 == kernel page virtual address + %g5 == (page->mapping != NULL) */ +#ifdef DCACHE_ALIASING_POSSIBLE + srlx %g1, (13 - 2), %g1 ! Form tag comparitor + sethi %hi(L1DCACHE_SIZE), %g3 ! D$ size == 16K + sub %g3, (1 << 5), %g3 ! D$ linesize == 32 +1: ldxa [%g3] ASI_DCACHE_TAG, %g2 + andcc %g2, 0x3, %g0 + be,pn %xcc, 2f + andn %g2, 0x3, %g2 + cmp %g2, %g1 + + bne,pt %xcc, 2f + nop + stxa %g0, [%g3] ASI_DCACHE_TAG + membar #Sync +2: cmp %g3, 0 + bne,pt %xcc, 1b + sub %g3, (1 << 5), %g3 + + brz,pn %g5, 2f +#endif /* DCACHE_ALIASING_POSSIBLE */ + sethi %hi(PAGE_SIZE), %g3 + +1: flush %g7 + subcc %g3, (1 << 5), %g3 + bne,pt %icc, 1b + add %g7, (1 << 5), %g7 + +2: retry + nop + nop + + /* %g5: error + * %g6: tlb op + */ +__hypervisor_tlb_xcall_error: + mov %g5, %g4 + mov %g6, %g5 + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o0 + call hypervisor_tlbop_error_xcall + mov %l5, %o1 + ba,a,pt %xcc, rtrap + + .globl __hypervisor_xcall_flush_tlb_mm +__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ + /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */ + mov %o0, %g2 + mov %o1, %g3 + mov %o2, %g4 + mov %o3, %g1 + mov %o5, %g7 + clr %o0 /* ARG0: CPU lists unimplemented */ + clr %o1 /* ARG1: CPU lists unimplemented */ + mov %g5, %o2 /* ARG2: mmu context */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + mov HV_FAST_MMU_DEMAP_CTX, %g6 + brnz,pn %o0, __hypervisor_tlb_xcall_error + mov %o0, %g5 + mov %g2, %o0 + mov %g3, %o1 + mov %g4, %o2 + mov %g1, %o3 + mov %g7, %o5 + membar #Sync + retry + + .globl __hypervisor_xcall_flush_tlb_page +__hypervisor_xcall_flush_tlb_page: /* 17 insns */ + /* %g5=ctx, %g1=vaddr */ + mov %o0, %g2 + mov %o1, %g3 + mov %o2, %g4 + mov %g1, %o0 /* ARG0: virtual address */ + mov %g5, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + srlx %o0, PAGE_SHIFT, %o0 + sllx %o0, PAGE_SHIFT, %o0 + ta HV_MMU_UNMAP_ADDR_TRAP + mov HV_MMU_UNMAP_ADDR_TRAP, %g6 + brnz,a,pn %o0, __hypervisor_tlb_xcall_error + mov %o0, %g5 + mov %g2, %o0 + mov %g3, %o1 + mov %g4, %o2 + membar #Sync + retry + + .globl __hypervisor_xcall_flush_tlb_kernel_range +__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ + /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ + sethi %hi(PAGE_SIZE - 1), %g2 + or %g2, %lo(PAGE_SIZE - 1), %g2 + andn %g1, %g2, %g1 + andn %g7, %g2, %g7 + sub %g7, %g1, %g3 + add %g2, 1, %g2 + sub %g3, %g2, %g3 + mov %o0, %g2 + mov %o1, %g4 + mov %o2, %g7 +1: add %g1, %g3, %o0 /* ARG0: virtual address */ + mov 0, %o1 /* ARG1: mmu context */ + mov HV_MMU_ALL, %o2 /* ARG2: flags */ + ta HV_MMU_UNMAP_ADDR_TRAP + mov HV_MMU_UNMAP_ADDR_TRAP, %g6 + brnz,pn %o0, __hypervisor_tlb_xcall_error + mov %o0, %g5 + sethi %hi(PAGE_SIZE), %o2 + brnz,pt %g3, 1b + sub %g3, %o2, %g3 + mov %g2, %o0 + mov %g4, %o1 + mov %g7, %o2 + membar #Sync + retry + + /* These just get rescheduled to PIL vectors. */ + .globl xcall_call_function +xcall_call_function: + wr %g0, (1 << PIL_SMP_CALL_FUNC), %set_softint + retry + + .globl xcall_call_function_single +xcall_call_function_single: + wr %g0, (1 << PIL_SMP_CALL_FUNC_SNGL), %set_softint + retry + + .globl xcall_receive_signal +xcall_receive_signal: + wr %g0, (1 << PIL_SMP_RECEIVE_SIGNAL), %set_softint + retry + + .globl xcall_capture +xcall_capture: + wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint + retry + + .globl xcall_new_mmu_context_version +xcall_new_mmu_context_version: + wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint + retry + +#ifdef CONFIG_KGDB + .globl xcall_kgdb_capture +xcall_kgdb_capture: + wr %g0, (1 << PIL_KGDB_CAPTURE), %set_softint + retry +#endif + +#endif /* CONFIG_SMP */ + + + .globl hypervisor_patch_cachetlbops +hypervisor_patch_cachetlbops: + save %sp, -128, %sp + + sethi %hi(__flush_tlb_mm), %o0 + or %o0, %lo(__flush_tlb_mm), %o0 + sethi %hi(__hypervisor_flush_tlb_mm), %o1 + or %o1, %lo(__hypervisor_flush_tlb_mm), %o1 + call tlb_patch_one + mov 10, %o2 + + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__hypervisor_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_flush_tlb_page), %o1 + call tlb_patch_one + mov 11, %o2 + + sethi %hi(__flush_tlb_pending), %o0 + or %o0, %lo(__flush_tlb_pending), %o0 + sethi %hi(__hypervisor_flush_tlb_pending), %o1 + or %o1, %lo(__hypervisor_flush_tlb_pending), %o1 + call tlb_patch_one + mov 16, %o2 + + sethi %hi(__flush_tlb_kernel_range), %o0 + or %o0, %lo(__flush_tlb_kernel_range), %o0 + sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 + or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 16, %o2 + +#ifdef DCACHE_ALIASING_POSSIBLE + sethi %hi(__flush_dcache_page), %o0 + or %o0, %lo(__flush_dcache_page), %o0 + sethi %hi(__hypervisor_flush_dcache_page), %o1 + or %o1, %lo(__hypervisor_flush_dcache_page), %o1 + call tlb_patch_one + mov 2, %o2 +#endif /* DCACHE_ALIASING_POSSIBLE */ + +#ifdef CONFIG_SMP + sethi %hi(xcall_flush_tlb_mm), %o0 + or %o0, %lo(xcall_flush_tlb_mm), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1 + call tlb_patch_one + mov 21, %o2 + + sethi %hi(xcall_flush_tlb_page), %o0 + or %o0, %lo(xcall_flush_tlb_page), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 + call tlb_patch_one + mov 17, %o2 + + sethi %hi(xcall_flush_tlb_kernel_range), %o0 + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 + sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 + or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 25, %o2 +#endif /* CONFIG_SMP */ + + ret + restore diff --git a/arch/sparc/mm/viking.S b/arch/sparc/mm/viking.S index 754c622548a..852257fcc82 100644 --- a/arch/sparc/mm/viking.S +++ b/arch/sparc/mm/viking.S @@ -1,4 +1,4 @@ -/* $Id: viking.S,v 1.19 2001/12/21 04:56:15 davem Exp $ +/* * viking.S: High speed Viking cache/mmu operations * * Copyright (C) 1997 Eddie C. Dost (ecd@skynet.be) @@ -14,7 +14,6 @@ #include <asm/page.h> #include <asm/pgtsrmmu.h> #include <asm/viking.h> -#include <asm/btfixup.h> #ifdef CONFIG_SMP .data @@ -109,7 +108,7 @@ viking_mxcc_flush_page: viking_flush_cache_page: viking_flush_cache_range: #ifndef CONFIG_SMP - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 #endif viking_flush_cache_mm: #ifndef CONFIG_SMP @@ -149,7 +148,7 @@ viking_flush_tlb_mm: #endif viking_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 @@ -174,7 +173,7 @@ viking_flush_tlb_range: #endif viking_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 @@ -240,7 +239,7 @@ sun4dsmp_flush_tlb_range: tst %g5 bne 3f mov SRMMU_CTX_REG, %g1 - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 sethi %hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4 @@ -266,7 +265,7 @@ sun4dsmp_flush_tlb_page: tst %g5 bne 2f mov SRMMU_CTX_REG, %g1 - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 and %o1, PAGE_MASK, %o1 |
