diff options
Diffstat (limited to 'arch/mips/mm')
45 files changed, 7017 insertions, 5301 deletions
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index b0178da019f..7f4f93ab22b 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -2,43 +2,25 @@ # Makefile for the Linux/MIPS-specific parts of the memory manager. # -obj-y += cache.o extable.o fault.o init.o pgtable.o \ - tlbex.o tlbex-fault.o +obj-y += cache.o dma-default.o extable.o fault.o \ + gup.o init.o mmap.o page.o page-funcs.o \ + tlbex.o tlbex-fault.o tlb-funcs.o uasm-mips.o obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o obj-$(CONFIG_64BIT) += pgtable-64.o obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_CPU_MIPS32) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_MIPS64) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_NEVADA) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_R10000) += c-r4k.o cex-gen.o pg-r4k.o tlb-andes.o -obj-$(CONFIG_CPU_R3000) += c-r3k.o tlb-r3k.o pg-r4k.o -obj-$(CONFIG_CPU_R4300) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_R4X00) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_R5000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_R5432) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_R8000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r8k.o -obj-$(CONFIG_CPU_RM7000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_RM9000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_SB1) += c-sb1.o cerr-sb1.o cex-sb1.o pg-sb1.o \ - tlb-r4k.o -obj-$(CONFIG_CPU_TX39XX) += c-tx39.o pg-r4k.o tlb-r3k.o -obj-$(CONFIG_CPU_TX49XX) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_VR41XX) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o +obj-$(CONFIG_CPU_R4K_CACHE_TLB) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_R3000) += c-r3k.o tlb-r3k.o +obj-$(CONFIG_CPU_R8000) += c-r4k.o cex-gen.o tlb-r8k.o +obj-$(CONFIG_CPU_SB1) += c-r4k.o cerr-sb1.o cex-sb1.o tlb-r4k.o +obj-$(CONFIG_CPU_TX39XX) += c-tx39.o tlb-r3k.o +obj-$(CONFIG_CPU_CAVIUM_OCTEON) += c-octeon.o cex-oct.o tlb-r4k.o obj-$(CONFIG_IP22_CPU_SCACHE) += sc-ip22.o -obj-$(CONFIG_R5000_CPU_SCACHE) += sc-r5k.o -obj-$(CONFIG_RM7000_CPU_SCACHE) += sc-rm7k.o +obj-$(CONFIG_R5000_CPU_SCACHE) += sc-r5k.o +obj-$(CONFIG_RM7000_CPU_SCACHE) += sc-rm7k.o +obj-$(CONFIG_MIPS_CPU_SCACHE) += sc-mips.o -# -# Choose one DMA coherency model -# -ifndef CONFIG_OWN_DMA -obj-$(CONFIG_DMA_COHERENT) += dma-coherent.o -obj-$(CONFIG_DMA_NONCOHERENT) += dma-noncoherent.o -endif -obj-$(CONFIG_DMA_IP27) += dma-ip27.o -obj-$(CONFIG_DMA_IP32) += dma-ip32.o - -EXTRA_AFLAGS := $(CFLAGS) +obj-$(CONFIG_SYS_SUPPORTS_MICROMIPS) += uasm-micromips.o diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c new file mode 100644 index 00000000000..05b1d7cf951 --- /dev/null +++ b/arch/mips/mm/c-octeon.c @@ -0,0 +1,369 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2005-2007 Cavium Networks + */ +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/mm.h> +#include <linux/bitops.h> +#include <linux/cpu.h> +#include <linux/io.h> + +#include <asm/bcache.h> +#include <asm/bootinfo.h> +#include <asm/cacheops.h> +#include <asm/cpu-features.h> +#include <asm/cpu-type.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/r4kcache.h> +#include <asm/traps.h> +#include <asm/mmu_context.h> +#include <asm/war.h> + +#include <asm/octeon/octeon.h> + +unsigned long long cache_err_dcache[NR_CPUS]; +EXPORT_SYMBOL_GPL(cache_err_dcache); + +/** + * Octeon automatically flushes the dcache on tlb changes, so + * from Linux's viewpoint it acts much like a physically + * tagged cache. No flushing is needed + * + */ +static void octeon_flush_data_cache_page(unsigned long addr) +{ + /* Nothing to do */ +} + +static inline void octeon_local_flush_icache(void) +{ + asm volatile ("synci 0($0)"); +} + +/* + * Flush local I-cache for the specified range. + */ +static void local_octeon_flush_icache_range(unsigned long start, + unsigned long end) +{ + octeon_local_flush_icache(); +} + +/** + * Flush caches as necessary for all cores affected by a + * vma. If no vma is supplied, all cores are flushed. + * + * @vma: VMA to flush or NULL to flush all icaches. + */ +static void octeon_flush_icache_all_cores(struct vm_area_struct *vma) +{ + extern void octeon_send_ipi_single(int cpu, unsigned int action); +#ifdef CONFIG_SMP + int cpu; + cpumask_t mask; +#endif + + mb(); + octeon_local_flush_icache(); +#ifdef CONFIG_SMP + preempt_disable(); + cpu = smp_processor_id(); + + /* + * If we have a vma structure, we only need to worry about + * cores it has been used on + */ + if (vma) + mask = *mm_cpumask(vma->vm_mm); + else + mask = *cpu_online_mask; + cpumask_clear_cpu(cpu, &mask); + for_each_cpu(cpu, &mask) + octeon_send_ipi_single(cpu, SMP_ICACHE_FLUSH); + + preempt_enable(); +#endif +} + + +/** + * Called to flush the icache on all cores + */ +static void octeon_flush_icache_all(void) +{ + octeon_flush_icache_all_cores(NULL); +} + + +/** + * Called to flush all memory associated with a memory + * context. + * + * @mm: Memory context to flush + */ +static void octeon_flush_cache_mm(struct mm_struct *mm) +{ + /* + * According to the R4K version of this file, CPUs without + * dcache aliases don't need to do anything here + */ +} + + +/** + * Flush a range of kernel addresses out of the icache + * + */ +static void octeon_flush_icache_range(unsigned long start, unsigned long end) +{ + octeon_flush_icache_all_cores(NULL); +} + + +/** + * Flush the icache for a trampoline. These are used for interrupt + * and exception hooking. + * + * @addr: Address to flush + */ +static void octeon_flush_cache_sigtramp(unsigned long addr) +{ + struct vm_area_struct *vma; + + down_read(¤t->mm->mmap_sem); + vma = find_vma(current->mm, addr); + octeon_flush_icache_all_cores(vma); + up_read(¤t->mm->mmap_sem); +} + + +/** + * Flush a range out of a vma + * + * @vma: VMA to flush + * @start: + * @end: + */ +static void octeon_flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if (vma->vm_flags & VM_EXEC) + octeon_flush_icache_all_cores(vma); +} + + +/** + * Flush a specific page of a vma + * + * @vma: VMA to flush page for + * @page: Page to flush + * @pfn: + */ +static void octeon_flush_cache_page(struct vm_area_struct *vma, + unsigned long page, unsigned long pfn) +{ + if (vma->vm_flags & VM_EXEC) + octeon_flush_icache_all_cores(vma); +} + +static void octeon_flush_kernel_vmap_range(unsigned long vaddr, int size) +{ + BUG(); +} + +/** + * Probe Octeon's caches + * + */ +static void probe_octeon(void) +{ + unsigned long icache_size; + unsigned long dcache_size; + unsigned int config1; + struct cpuinfo_mips *c = ¤t_cpu_data; + int cputype = current_cpu_type(); + + config1 = read_c0_config1(); + switch (cputype) { + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + c->icache.linesz = 2 << ((config1 >> 19) & 7); + c->icache.sets = 64 << ((config1 >> 22) & 7); + c->icache.ways = 1 + ((config1 >> 16) & 7); + c->icache.flags |= MIPS_CACHE_VTAG; + icache_size = + c->icache.sets * c->icache.ways * c->icache.linesz; + c->icache.waybit = ffs(icache_size / c->icache.ways) - 1; + c->dcache.linesz = 128; + if (cputype == CPU_CAVIUM_OCTEON_PLUS) + c->dcache.sets = 2; /* CN5XXX has two Dcache sets */ + else + c->dcache.sets = 1; /* CN3XXX has one Dcache set */ + c->dcache.ways = 64; + dcache_size = + c->dcache.sets * c->dcache.ways * c->dcache.linesz; + c->dcache.waybit = ffs(dcache_size / c->dcache.ways) - 1; + c->options |= MIPS_CPU_PREFETCH; + break; + + case CPU_CAVIUM_OCTEON2: + c->icache.linesz = 2 << ((config1 >> 19) & 7); + c->icache.sets = 8; + c->icache.ways = 37; + c->icache.flags |= MIPS_CACHE_VTAG; + icache_size = c->icache.sets * c->icache.ways * c->icache.linesz; + + c->dcache.linesz = 128; + c->dcache.ways = 32; + c->dcache.sets = 8; + dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz; + c->options |= MIPS_CPU_PREFETCH; + break; + + case CPU_CAVIUM_OCTEON3: + c->icache.linesz = 128; + c->icache.sets = 16; + c->icache.ways = 39; + c->icache.flags |= MIPS_CACHE_VTAG; + icache_size = c->icache.sets * c->icache.ways * c->icache.linesz; + + c->dcache.linesz = 128; + c->dcache.ways = 32; + c->dcache.sets = 8; + dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz; + c->options |= MIPS_CPU_PREFETCH; + break; + + default: + panic("Unsupported Cavium Networks CPU type"); + break; + } + + /* compute a couple of other cache variables */ + c->icache.waysize = icache_size / c->icache.ways; + c->dcache.waysize = dcache_size / c->dcache.ways; + + c->icache.sets = icache_size / (c->icache.linesz * c->icache.ways); + c->dcache.sets = dcache_size / (c->dcache.linesz * c->dcache.ways); + + if (smp_processor_id() == 0) { + pr_notice("Primary instruction cache %ldkB, %s, %d way, " + "%d sets, linesize %d bytes.\n", + icache_size >> 10, + cpu_has_vtag_icache ? + "virtually tagged" : "physically tagged", + c->icache.ways, c->icache.sets, c->icache.linesz); + + pr_notice("Primary data cache %ldkB, %d-way, %d sets, " + "linesize %d bytes.\n", + dcache_size >> 10, c->dcache.ways, + c->dcache.sets, c->dcache.linesz); + } +} + +static void octeon_cache_error_setup(void) +{ + extern char except_vec2_octeon; + set_handler(0x100, &except_vec2_octeon, 0x80); +} + +/** + * Setup the Octeon cache flush routines + * + */ +void octeon_cache_init(void) +{ + probe_octeon(); + + shm_align_mask = PAGE_SIZE - 1; + + flush_cache_all = octeon_flush_icache_all; + __flush_cache_all = octeon_flush_icache_all; + flush_cache_mm = octeon_flush_cache_mm; + flush_cache_page = octeon_flush_cache_page; + flush_cache_range = octeon_flush_cache_range; + flush_cache_sigtramp = octeon_flush_cache_sigtramp; + flush_icache_all = octeon_flush_icache_all; + flush_data_cache_page = octeon_flush_data_cache_page; + flush_icache_range = octeon_flush_icache_range; + local_flush_icache_range = local_octeon_flush_icache_range; + + __flush_kernel_vmap_range = octeon_flush_kernel_vmap_range; + + build_clear_page(); + build_copy_page(); + + board_cache_error_setup = octeon_cache_error_setup; +} + +/* + * Handle a cache error exception + */ +static RAW_NOTIFIER_HEAD(co_cache_error_chain); + +int register_co_cache_error_notifier(struct notifier_block *nb) +{ + return raw_notifier_chain_register(&co_cache_error_chain, nb); +} +EXPORT_SYMBOL_GPL(register_co_cache_error_notifier); + +int unregister_co_cache_error_notifier(struct notifier_block *nb) +{ + return raw_notifier_chain_unregister(&co_cache_error_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_co_cache_error_notifier); + +static void co_cache_error_call_notifiers(unsigned long val) +{ + int rv = raw_notifier_call_chain(&co_cache_error_chain, val, NULL); + if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) { + u64 dcache_err; + unsigned long coreid = cvmx_get_core_num(); + u64 icache_err = read_octeon_c0_icacheerr(); + + if (val) { + dcache_err = cache_err_dcache[coreid]; + cache_err_dcache[coreid] = 0; + } else { + dcache_err = read_octeon_c0_dcacheerr(); + } + + pr_err("Core%lu: Cache error exception:\n", coreid); + pr_err("cp0_errorepc == %lx\n", read_c0_errorepc()); + if (icache_err & 1) { + pr_err("CacheErr (Icache) == %llx\n", + (unsigned long long)icache_err); + write_octeon_c0_icacheerr(0); + } + if (dcache_err & 1) { + pr_err("CacheErr (Dcache) == %llx\n", + (unsigned long long)dcache_err); + } + } +} + +/* + * Called when the the exception is recoverable + */ + +asmlinkage void cache_parity_error_octeon_recoverable(void) +{ + co_cache_error_call_notifiers(0); +} + +/** + * Called when the the exception is not recoverable + */ + +asmlinkage void cache_parity_error_octeon_non_recoverable(void) +{ + co_cache_error_call_notifiers(1); + panic("Can't handle cache error: nested exception"); +} diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c index 27f4fa25e8c..135ec313c1f 100644 --- a/arch/mips/mm/c-r3k.c +++ b/arch/mips/mm/c-r3k.c @@ -1,23 +1,22 @@ /* * r2300.c: R2000 and R3000 specific mmu/cache code. * - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1996 David S. Miller (davem@davemloft.net) * * with a lot of changes to make this thing work for R3000s * Tx39XX R4k style caches added. HK * Copyright (C) 1998, 1999, 2000 Harald Koerfgen * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov - * Copyright (C) 2001, 2004 Maciej W. Rozycki + * Copyright (C) 2001, 2004, 2007 Maciej W. Rozycki */ -#include <linux/init.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/smp.h> #include <linux/mm.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/mmu_context.h> -#include <asm/system.h> #include <asm/isadep.h> #include <asm/io.h> #include <asm/bootinfo.h> @@ -26,9 +25,7 @@ static unsigned long icache_size, dcache_size; /* Size in bytes */ static unsigned long icache_lsize, dcache_lsize; /* Size in bytes */ -#undef DEBUG_CACHE - -unsigned long __init r3k_cache_size(unsigned long ca_flags) +unsigned long r3k_cache_size(unsigned long ca_flags) { unsigned long flags, status, dummy, size; volatile unsigned long *p; @@ -63,7 +60,7 @@ unsigned long __init r3k_cache_size(unsigned long ca_flags) return size * sizeof(*p); } -unsigned long __init r3k_cache_lsize(unsigned long ca_flags) +unsigned long r3k_cache_lsize(unsigned long ca_flags) { unsigned long flags, status, lsize, i; volatile unsigned long *p; @@ -92,7 +89,7 @@ unsigned long __init r3k_cache_lsize(unsigned long ca_flags) return lsize * sizeof(*p); } -static void __init r3k_probe_cache(void) +static void r3k_probe_cache(void) { dcache_size = r3k_cache_size(ST0_ISC); if (dcache_size) @@ -121,7 +118,7 @@ static void r3k_flush_icache_range(unsigned long start, unsigned long end) write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC); for (i = 0; i < size; i += 0x080) { - asm ( "sb\t$0, 0x000(%0)\n\t" + asm( "sb\t$0, 0x000(%0)\n\t" "sb\t$0, 0x004(%0)\n\t" "sb\t$0, 0x008(%0)\n\t" "sb\t$0, 0x00c(%0)\n\t" @@ -129,7 +126,7 @@ static void r3k_flush_icache_range(unsigned long start, unsigned long end) "sb\t$0, 0x014(%0)\n\t" "sb\t$0, 0x018(%0)\n\t" "sb\t$0, 0x01c(%0)\n\t" - "sb\t$0, 0x020(%0)\n\t" + "sb\t$0, 0x020(%0)\n\t" "sb\t$0, 0x024(%0)\n\t" "sb\t$0, 0x028(%0)\n\t" "sb\t$0, 0x02c(%0)\n\t" @@ -145,7 +142,7 @@ static void r3k_flush_icache_range(unsigned long start, unsigned long end) "sb\t$0, 0x054(%0)\n\t" "sb\t$0, 0x058(%0)\n\t" "sb\t$0, 0x05c(%0)\n\t" - "sb\t$0, 0x060(%0)\n\t" + "sb\t$0, 0x060(%0)\n\t" "sb\t$0, 0x064(%0)\n\t" "sb\t$0, 0x068(%0)\n\t" "sb\t$0, 0x06c(%0)\n\t" @@ -178,35 +175,35 @@ static void r3k_flush_dcache_range(unsigned long start, unsigned long end) write_c0_status((ST0_ISC|flags)&~ST0_IEC); for (i = 0; i < size; i += 0x080) { - asm ( "sb\t$0, 0x000(%0)\n\t" + asm( "sb\t$0, 0x000(%0)\n\t" "sb\t$0, 0x004(%0)\n\t" "sb\t$0, 0x008(%0)\n\t" "sb\t$0, 0x00c(%0)\n\t" - "sb\t$0, 0x010(%0)\n\t" + "sb\t$0, 0x010(%0)\n\t" "sb\t$0, 0x014(%0)\n\t" "sb\t$0, 0x018(%0)\n\t" "sb\t$0, 0x01c(%0)\n\t" - "sb\t$0, 0x020(%0)\n\t" + "sb\t$0, 0x020(%0)\n\t" "sb\t$0, 0x024(%0)\n\t" "sb\t$0, 0x028(%0)\n\t" "sb\t$0, 0x02c(%0)\n\t" - "sb\t$0, 0x030(%0)\n\t" + "sb\t$0, 0x030(%0)\n\t" "sb\t$0, 0x034(%0)\n\t" "sb\t$0, 0x038(%0)\n\t" "sb\t$0, 0x03c(%0)\n\t" - "sb\t$0, 0x040(%0)\n\t" + "sb\t$0, 0x040(%0)\n\t" "sb\t$0, 0x044(%0)\n\t" "sb\t$0, 0x048(%0)\n\t" "sb\t$0, 0x04c(%0)\n\t" - "sb\t$0, 0x050(%0)\n\t" + "sb\t$0, 0x050(%0)\n\t" "sb\t$0, 0x054(%0)\n\t" "sb\t$0, 0x058(%0)\n\t" "sb\t$0, 0x05c(%0)\n\t" - "sb\t$0, 0x060(%0)\n\t" + "sb\t$0, 0x060(%0)\n\t" "sb\t$0, 0x064(%0)\n\t" "sb\t$0, 0x068(%0)\n\t" "sb\t$0, 0x06c(%0)\n\t" - "sb\t$0, 0x070(%0)\n\t" + "sb\t$0, 0x070(%0)\n\t" "sb\t$0, 0x074(%0)\n\t" "sb\t$0, 0x078(%0)\n\t" "sb\t$0, 0x07c(%0)\n\t" @@ -217,26 +214,6 @@ static void r3k_flush_dcache_range(unsigned long start, unsigned long end) write_c0_status(flags); } -static inline unsigned long get_phys_page (unsigned long addr, - struct mm_struct *mm) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long physpage; - - pgd = pgd_offset(mm, addr); - pud = pud_offset(pgd, addr); - pmd = pmd_offset(pud, addr); - pte = pte_offset(pmd, addr); - - if ((physpage = pte_val(*pte)) & _PAGE_VALID) - return KSEG0ADDR(physpage & PAGE_MASK); - - return 0; -} - static inline void r3k_flush_cache_all(void) { } @@ -252,64 +229,79 @@ static void r3k_flush_cache_mm(struct mm_struct *mm) } static void r3k_flush_cache_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end) { } -static void r3k_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn) -{ -} - -static void r3k_flush_data_cache_page(unsigned long addr) -{ -} - -static void r3k_flush_icache_page(struct vm_area_struct *vma, struct page *page) +static void r3k_flush_cache_page(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn) { + unsigned long kaddr = KSEG0ADDR(pfn << PAGE_SHIFT); + int exec = vma->vm_flags & VM_EXEC; struct mm_struct *mm = vma->vm_mm; - unsigned long physpage; + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + pr_debug("cpage[%08lx,%08lx]\n", + cpu_context(smp_processor_id(), mm), addr); + /* No ASID => no such page in the cache. */ if (cpu_context(smp_processor_id(), mm) == 0) return; - if (!(vma->vm_flags & VM_EXEC)) + pgdp = pgd_offset(mm, addr); + pudp = pud_offset(pgdp, addr); + pmdp = pmd_offset(pudp, addr); + ptep = pte_offset(pmdp, addr); + + /* Invalid => no such page in the cache. */ + if (!(pte_val(*ptep) & _PAGE_PRESENT)) return; -#ifdef DEBUG_CACHE - printk("cpage[%d,%08lx]", cpu_context(smp_processor_id(), mm), page); -#endif + r3k_flush_dcache_range(kaddr, kaddr + PAGE_SIZE); + if (exec) + r3k_flush_icache_range(kaddr, kaddr + PAGE_SIZE); +} - physpage = (unsigned long) page_address(page); - if (physpage) - r3k_flush_icache_range(physpage, physpage + PAGE_SIZE); +static void local_r3k_flush_data_cache_page(void *addr) +{ +} + +static void r3k_flush_data_cache_page(unsigned long addr) +{ } static void r3k_flush_cache_sigtramp(unsigned long addr) { unsigned long flags; -#ifdef DEBUG_CACHE - printk("csigtramp[%08lx]", addr); -#endif + pr_debug("csigtramp[%08lx]\n", addr); flags = read_c0_status(); write_c0_status(flags&~ST0_IEC); /* Fill the TLB to avoid an exception with caches isolated. */ - asm ( "lw\t$0, 0x000(%0)\n\t" + asm( "lw\t$0, 0x000(%0)\n\t" "lw\t$0, 0x004(%0)\n\t" : : "r" (addr) ); write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC); - asm ( "sb\t$0, 0x000(%0)\n\t" + asm( "sb\t$0, 0x000(%0)\n\t" "sb\t$0, 0x004(%0)\n\t" : : "r" (addr) ); write_c0_status(flags); } +static void r3k_flush_kernel_vmap_range(unsigned long vaddr, int size) +{ + BUG(); +} + static void r3k_dma_cache_wback_inv(unsigned long start, unsigned long size) { /* Catch bad driver code */ @@ -319,7 +311,7 @@ static void r3k_dma_cache_wback_inv(unsigned long start, unsigned long size) r3k_flush_dcache_range(start, start + size); } -void __init r3k_cache_init(void) +void r3k_cache_init(void) { extern void build_clear_page(void); extern void build_copy_page(void); @@ -331,10 +323,13 @@ void __init r3k_cache_init(void) flush_cache_mm = r3k_flush_cache_mm; flush_cache_range = r3k_flush_cache_range; flush_cache_page = r3k_flush_cache_page; - flush_icache_page = r3k_flush_icache_page; flush_icache_range = r3k_flush_icache_range; + local_flush_icache_range = r3k_flush_icache_range; + + __flush_kernel_vmap_range = r3k_flush_kernel_vmap_range; flush_cache_sigtramp = r3k_flush_cache_sigtramp; + local_flush_data_cache_page = local_r3k_flush_data_cache_page; flush_data_cache_page = r3k_flush_data_cache_page; _dma_cache_wback_inv = r3k_dma_cache_wback_inv; diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 38223b44d96..f2e8302fa70 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -3,15 +3,21 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1996 David S. Miller (davem@davemloft.net) * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Ralf Baechle (ralf@gnu.org) * Copyright (C) 1999, 2000 Silicon Graphics, Inc. */ -#include <linux/config.h> +#include <linux/cpu_pm.h> +#include <linux/hardirq.h> #include <linux/init.h> +#include <linux/highmem.h> #include <linux/kernel.h> +#include <linux/linkage.h> +#include <linux/preempt.h> #include <linux/sched.h> +#include <linux/smp.h> #include <linux/mm.h> +#include <linux/module.h> #include <linux/bitops.h> #include <asm/bcache.h> @@ -20,14 +26,43 @@ #include <asm/cacheops.h> #include <asm/cpu.h> #include <asm/cpu-features.h> +#include <asm/cpu-type.h> #include <asm/io.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/r4kcache.h> -#include <asm/system.h> +#include <asm/sections.h> #include <asm/mmu_context.h> #include <asm/war.h> #include <asm/cacheflush.h> /* for run_uncached() */ +#include <asm/traps.h> +#include <asm/dma-coherence.h> + +/* + * Special Variant of smp_call_function for use by cache functions: + * + * o No return value + * o collapses to normal function call on UP kernels + * o collapses to normal function call on systems with a single shared + * primary cache. + * o doesn't disable interrupts on the local CPU + */ +static inline void r4k_on_each_cpu(void (*func) (void *info), void *info) +{ + preempt_disable(); + +#ifndef CONFIG_MIPS_MT_SMP + smp_call_function(func, info, 1); +#endif + func(info); + preempt_enable(); +} + +#if defined(CONFIG_MIPS_CMP) || defined(CONFIG_MIPS_CPS) +#define cpu_has_safe_index_cacheops 0 +#else +#define cpu_has_safe_index_cacheops 1 +#endif /* * Must die. @@ -39,13 +74,13 @@ static unsigned long scache_size __read_mostly; /* * Dummy cache handling routines for machines without boardcaches */ -static void no_sc_noop(void) {} +static void cache_noop(void) {} static struct bcache_ops no_sc_ops = { - .bc_enable = (void *)no_sc_noop, - .bc_disable = (void *)no_sc_noop, - .bc_wback_inv = (void *)no_sc_noop, - .bc_inv = (void *)no_sc_noop + .bc_enable = (void *)cache_noop, + .bc_disable = (void *)cache_noop, + .bc_wback_inv = (void *)cache_noop, + .bc_inv = (void *)cache_noop }; struct bcache_ops *bcops = &no_sc_ops; @@ -69,38 +104,98 @@ static inline void r4k_blast_dcache_page_dc32(unsigned long addr) blast_dcache32_page(addr); } -static inline void r4k_blast_dcache_page_setup(void) +static inline void r4k_blast_dcache_page_dc64(unsigned long addr) +{ + blast_dcache64_page(addr); +} + +static inline void r4k_blast_dcache_page_dc128(unsigned long addr) +{ + blast_dcache128_page(addr); +} + +static void r4k_blast_dcache_page_setup(void) { unsigned long dc_lsize = cpu_dcache_line_size(); - if (dc_lsize == 16) + switch (dc_lsize) { + case 0: + r4k_blast_dcache_page = (void *)cache_noop; + break; + case 16: r4k_blast_dcache_page = blast_dcache16_page; - else if (dc_lsize == 32) + break; + case 32: r4k_blast_dcache_page = r4k_blast_dcache_page_dc32; + break; + case 64: + r4k_blast_dcache_page = r4k_blast_dcache_page_dc64; + break; + case 128: + r4k_blast_dcache_page = r4k_blast_dcache_page_dc128; + break; + default: + break; + } } +#ifndef CONFIG_EVA +#define r4k_blast_dcache_user_page r4k_blast_dcache_page +#else + +static void (*r4k_blast_dcache_user_page)(unsigned long addr); + +static void r4k_blast_dcache_user_page_setup(void) +{ + unsigned long dc_lsize = cpu_dcache_line_size(); + + if (dc_lsize == 0) + r4k_blast_dcache_user_page = (void *)cache_noop; + else if (dc_lsize == 16) + r4k_blast_dcache_user_page = blast_dcache16_user_page; + else if (dc_lsize == 32) + r4k_blast_dcache_user_page = blast_dcache32_user_page; + else if (dc_lsize == 64) + r4k_blast_dcache_user_page = blast_dcache64_user_page; +} + +#endif + static void (* r4k_blast_dcache_page_indexed)(unsigned long addr); -static inline void r4k_blast_dcache_page_indexed_setup(void) +static void r4k_blast_dcache_page_indexed_setup(void) { unsigned long dc_lsize = cpu_dcache_line_size(); - if (dc_lsize == 16) + if (dc_lsize == 0) + r4k_blast_dcache_page_indexed = (void *)cache_noop; + else if (dc_lsize == 16) r4k_blast_dcache_page_indexed = blast_dcache16_page_indexed; else if (dc_lsize == 32) r4k_blast_dcache_page_indexed = blast_dcache32_page_indexed; + else if (dc_lsize == 64) + r4k_blast_dcache_page_indexed = blast_dcache64_page_indexed; + else if (dc_lsize == 128) + r4k_blast_dcache_page_indexed = blast_dcache128_page_indexed; } -static void (* r4k_blast_dcache)(void); +void (* r4k_blast_dcache)(void); +EXPORT_SYMBOL(r4k_blast_dcache); -static inline void r4k_blast_dcache_setup(void) +static void r4k_blast_dcache_setup(void) { unsigned long dc_lsize = cpu_dcache_line_size(); - if (dc_lsize == 16) + if (dc_lsize == 0) + r4k_blast_dcache = (void *)cache_noop; + else if (dc_lsize == 16) r4k_blast_dcache = blast_dcache16; else if (dc_lsize == 32) r4k_blast_dcache = blast_dcache32; + else if (dc_lsize == 64) + r4k_blast_dcache = blast_dcache64; + else if (dc_lsize == 128) + r4k_blast_dcache = blast_dcache128; } /* force code alignment (used for TX49XX_ICACHE_INDEX_INV_WAR) */ @@ -111,7 +206,7 @@ static inline void r4k_blast_dcache_setup(void) "1:\n\t" \ ) #define CACHE32_UNROLL32_ALIGN JUMP_TO_ALIGN(10) /* 32 * 32 = 1024 */ -#define CACHE32_UNROLL32_ALIGN2 JUMP_TO_ALIGN(11) +#define CACHE32_UNROLL32_ALIGN2 JUMP_TO_ALIGN(11) static inline void blast_r4600_v1_icache32(void) { @@ -128,19 +223,19 @@ static inline void tx49_blast_icache32(void) unsigned long end = start + current_cpu_data.icache.waysize; unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit; unsigned long ws_end = current_cpu_data.icache.ways << - current_cpu_data.icache.waybit; + current_cpu_data.icache.waybit; unsigned long ws, addr; CACHE32_UNROLL32_ALIGN2; /* I'm in even chunk. blast odd chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start + 0x400; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws,Index_Invalidate_I); + cache32_unroll32(addr|ws, Index_Invalidate_I); CACHE32_UNROLL32_ALIGN; /* I'm in odd chunk. blast even chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws,Index_Invalidate_I); + cache32_unroll32(addr|ws, Index_Invalidate_I); } static inline void blast_icache32_r4600_v1_page_indexed(unsigned long page) @@ -154,47 +249,77 @@ static inline void blast_icache32_r4600_v1_page_indexed(unsigned long page) static inline void tx49_blast_icache32_page_indexed(unsigned long page) { - unsigned long start = page; + unsigned long indexmask = current_cpu_data.icache.waysize - 1; + unsigned long start = INDEX_BASE + (page & indexmask); unsigned long end = start + PAGE_SIZE; unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit; unsigned long ws_end = current_cpu_data.icache.ways << - current_cpu_data.icache.waybit; + current_cpu_data.icache.waybit; unsigned long ws, addr; CACHE32_UNROLL32_ALIGN2; /* I'm in even chunk. blast odd chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start + 0x400; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws,Index_Invalidate_I); + cache32_unroll32(addr|ws, Index_Invalidate_I); CACHE32_UNROLL32_ALIGN; /* I'm in odd chunk. blast even chunks */ for (ws = 0; ws < ws_end; ws += ws_inc) for (addr = start; addr < end; addr += 0x400 * 2) - cache32_unroll32(addr|ws,Index_Invalidate_I); + cache32_unroll32(addr|ws, Index_Invalidate_I); } static void (* r4k_blast_icache_page)(unsigned long addr); -static inline void r4k_blast_icache_page_setup(void) +static void r4k_blast_icache_page_setup(void) { unsigned long ic_lsize = cpu_icache_line_size(); - if (ic_lsize == 16) + if (ic_lsize == 0) + r4k_blast_icache_page = (void *)cache_noop; + else if (ic_lsize == 16) r4k_blast_icache_page = blast_icache16_page; + else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2) + r4k_blast_icache_page = loongson2_blast_icache32_page; else if (ic_lsize == 32) r4k_blast_icache_page = blast_icache32_page; else if (ic_lsize == 64) r4k_blast_icache_page = blast_icache64_page; + else if (ic_lsize == 128) + r4k_blast_icache_page = blast_icache128_page; } +#ifndef CONFIG_EVA +#define r4k_blast_icache_user_page r4k_blast_icache_page +#else + +static void (*r4k_blast_icache_user_page)(unsigned long addr); + +static void __cpuinit r4k_blast_icache_user_page_setup(void) +{ + unsigned long ic_lsize = cpu_icache_line_size(); + + if (ic_lsize == 0) + r4k_blast_icache_user_page = (void *)cache_noop; + else if (ic_lsize == 16) + r4k_blast_icache_user_page = blast_icache16_user_page; + else if (ic_lsize == 32) + r4k_blast_icache_user_page = blast_icache32_user_page; + else if (ic_lsize == 64) + r4k_blast_icache_user_page = blast_icache64_user_page; +} + +#endif static void (* r4k_blast_icache_page_indexed)(unsigned long addr); -static inline void r4k_blast_icache_page_indexed_setup(void) +static void r4k_blast_icache_page_indexed_setup(void) { unsigned long ic_lsize = cpu_icache_line_size(); - if (ic_lsize == 16) + if (ic_lsize == 0) + r4k_blast_icache_page_indexed = (void *)cache_noop; + else if (ic_lsize == 16) r4k_blast_icache_page_indexed = blast_icache16_page_indexed; else if (ic_lsize == 32) { if (R4600_V1_INDEX_ICACHEOP_WAR && cpu_is_r4600_v1_x()) @@ -203,6 +328,9 @@ static inline void r4k_blast_icache_page_indexed_setup(void) else if (TX49XX_ICACHE_INDEX_INV_WAR) r4k_blast_icache_page_indexed = tx49_blast_icache32_page_indexed; + else if (current_cpu_type() == CPU_LOONGSON2) + r4k_blast_icache_page_indexed = + loongson2_blast_icache32_page_indexed; else r4k_blast_icache_page_indexed = blast_icache32_page_indexed; @@ -210,32 +338,41 @@ static inline void r4k_blast_icache_page_indexed_setup(void) r4k_blast_icache_page_indexed = blast_icache64_page_indexed; } -static void (* r4k_blast_icache)(void); +void (* r4k_blast_icache)(void); +EXPORT_SYMBOL(r4k_blast_icache); -static inline void r4k_blast_icache_setup(void) +static void r4k_blast_icache_setup(void) { unsigned long ic_lsize = cpu_icache_line_size(); - if (ic_lsize == 16) + if (ic_lsize == 0) + r4k_blast_icache = (void *)cache_noop; + else if (ic_lsize == 16) r4k_blast_icache = blast_icache16; else if (ic_lsize == 32) { if (R4600_V1_INDEX_ICACHEOP_WAR && cpu_is_r4600_v1_x()) r4k_blast_icache = blast_r4600_v1_icache32; else if (TX49XX_ICACHE_INDEX_INV_WAR) r4k_blast_icache = tx49_blast_icache32; + else if (current_cpu_type() == CPU_LOONGSON2) + r4k_blast_icache = loongson2_blast_icache32; else r4k_blast_icache = blast_icache32; } else if (ic_lsize == 64) r4k_blast_icache = blast_icache64; + else if (ic_lsize == 128) + r4k_blast_icache = blast_icache128; } static void (* r4k_blast_scache_page)(unsigned long addr); -static inline void r4k_blast_scache_page_setup(void) +static void r4k_blast_scache_page_setup(void) { unsigned long sc_lsize = cpu_scache_line_size(); - if (sc_lsize == 16) + if (scache_size == 0) + r4k_blast_scache_page = (void *)cache_noop; + else if (sc_lsize == 16) r4k_blast_scache_page = blast_scache16_page; else if (sc_lsize == 32) r4k_blast_scache_page = blast_scache32_page; @@ -247,11 +384,13 @@ static inline void r4k_blast_scache_page_setup(void) static void (* r4k_blast_scache_page_indexed)(unsigned long addr); -static inline void r4k_blast_scache_page_indexed_setup(void) +static void r4k_blast_scache_page_indexed_setup(void) { unsigned long sc_lsize = cpu_scache_line_size(); - if (sc_lsize == 16) + if (scache_size == 0) + r4k_blast_scache_page_indexed = (void *)cache_noop; + else if (sc_lsize == 16) r4k_blast_scache_page_indexed = blast_scache16_page_indexed; else if (sc_lsize == 32) r4k_blast_scache_page_indexed = blast_scache32_page_indexed; @@ -263,11 +402,13 @@ static inline void r4k_blast_scache_page_indexed_setup(void) static void (* r4k_blast_scache)(void); -static inline void r4k_blast_scache_setup(void) +static void r4k_blast_scache_setup(void) { unsigned long sc_lsize = cpu_scache_line_size(); - if (sc_lsize == 16) + if (scache_size == 0) + r4k_blast_scache = (void *)cache_noop; + else if (sc_lsize == 16) r4k_blast_scache = blast_scache16; else if (sc_lsize == 32) r4k_blast_scache = blast_scache32; @@ -277,56 +418,72 @@ static inline void r4k_blast_scache_setup(void) r4k_blast_scache = blast_scache128; } -/* - * This is former mm's flush_cache_all() which really should be - * flush_cache_vunmap these days ... - */ -static inline void local_r4k_flush_cache_all(void * args) -{ - r4k_blast_dcache(); - r4k_blast_icache(); -} - -static void r4k_flush_cache_all(void) -{ - if (!cpu_has_dc_aliases) - return; - - on_each_cpu(local_r4k_flush_cache_all, NULL, 1, 1); -} - static inline void local_r4k___flush_cache_all(void * args) { - r4k_blast_dcache(); - r4k_blast_icache(); - - switch (current_cpu_data.cputype) { + switch (current_cpu_type()) { + case CPU_LOONGSON2: + case CPU_LOONGSON3: case CPU_R4000SC: case CPU_R4000MC: case CPU_R4400SC: case CPU_R4400MC: case CPU_R10000: case CPU_R12000: + case CPU_R14000: + /* + * These caches are inclusive caches, that is, if something + * is not cached in the S-cache, we know it also won't be + * in one of the primary caches. + */ r4k_blast_scache(); + break; + + default: + r4k_blast_dcache(); + r4k_blast_icache(); + break; } } static void r4k___flush_cache_all(void) { - on_each_cpu(local_r4k___flush_cache_all, NULL, 1, 1); + r4k_on_each_cpu(local_r4k___flush_cache_all, NULL); +} + +static inline int has_valid_asid(const struct mm_struct *mm) +{ +#ifdef CONFIG_MIPS_MT_SMP + int i; + + for_each_online_cpu(i) + if (cpu_context(i, mm)) + return 1; + + return 0; +#else + return cpu_context(smp_processor_id(), mm); +#endif +} + +static void r4k__flush_cache_vmap(void) +{ + r4k_blast_dcache(); +} + +static void r4k__flush_cache_vunmap(void) +{ + r4k_blast_dcache(); } static inline void local_r4k_flush_cache_range(void * args) { struct vm_area_struct *vma = args; - int exec; + int exec = vma->vm_flags & VM_EXEC; - if (!(cpu_context(smp_processor_id(), vma->vm_mm))) + if (!(has_valid_asid(vma->vm_mm))) return; - exec = vma->vm_flags & VM_EXEC; - if (cpu_has_dc_aliases || exec) - r4k_blast_dcache(); + r4k_blast_dcache(); if (exec) r4k_blast_icache(); } @@ -334,28 +491,34 @@ static inline void local_r4k_flush_cache_range(void * args) static void r4k_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - on_each_cpu(local_r4k_flush_cache_range, vma, 1, 1); + int exec = vma->vm_flags & VM_EXEC; + + if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) + r4k_on_each_cpu(local_r4k_flush_cache_range, vma); } static inline void local_r4k_flush_cache_mm(void * args) { struct mm_struct *mm = args; - if (!cpu_context(smp_processor_id(), mm)) + if (!has_valid_asid(mm)) return; - r4k_blast_dcache(); - r4k_blast_icache(); - /* * Kludge alert. For obscure reasons R4000SC and R4400SC go nuts if we * only flush the primary caches but R10000 and R12000 behave sane ... + * R4000SC and R4400SC indexed S-cache ops also invalidate primary + * caches, so we can bail out early. */ - if (current_cpu_data.cputype == CPU_R4000SC || - current_cpu_data.cputype == CPU_R4000MC || - current_cpu_data.cputype == CPU_R4400SC || - current_cpu_data.cputype == CPU_R4400MC) + if (current_cpu_type() == CPU_R4000SC || + current_cpu_type() == CPU_R4000MC || + current_cpu_type() == CPU_R4400SC || + current_cpu_type() == CPU_R4400MC) { r4k_blast_scache(); + return; + } + + r4k_blast_dcache(); } static void r4k_flush_cache_mm(struct mm_struct *mm) @@ -363,12 +526,13 @@ static void r4k_flush_cache_mm(struct mm_struct *mm) if (!cpu_has_dc_aliases) return; - on_each_cpu(local_r4k_flush_cache_mm, mm, 1, 1); + r4k_on_each_cpu(local_r4k_flush_cache_mm, mm); } struct flush_cache_page_args { struct vm_area_struct *vma; unsigned long addr; + unsigned long pfn; }; static inline void local_r4k_flush_cache_page(void *args) @@ -376,18 +540,21 @@ static inline void local_r4k_flush_cache_page(void *args) struct flush_cache_page_args *fcp_args = args; struct vm_area_struct *vma = fcp_args->vma; unsigned long addr = fcp_args->addr; + struct page *page = pfn_to_page(fcp_args->pfn); int exec = vma->vm_flags & VM_EXEC; struct mm_struct *mm = vma->vm_mm; + int map_coherent = 0; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; + void *vaddr; /* * If ownes no valid ASID yet, cannot possibly have gotten * this page into the cache. */ - if (cpu_context(smp_processor_id(), mm) == 0) + if (!has_valid_asid(mm)) return; addr &= PAGE_MASK; @@ -400,45 +567,47 @@ static inline void local_r4k_flush_cache_page(void *args) * If the page isn't marked valid, the page cannot possibly be * in the cache. */ - if (!(pte_val(*ptep) & _PAGE_PRESENT)) + if (!(pte_present(*ptep))) return; - /* - * Doing flushes for another ASID than the current one is - * too difficult since stupid R4k caches do a TLB translation - * for every cache flush operation. So we do indexed flushes - * in that case, which doesn't overly flush the cache too much. - */ - if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID)) { - if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) { - r4k_blast_dcache_page(addr); - if (exec && !cpu_icache_snoops_remote_store) - r4k_blast_scache_page(addr); - } - if (exec) - r4k_blast_icache_page(addr); - - return; + if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID)) + vaddr = NULL; + else { + /* + * Use kmap_coherent or kmap_atomic to do flushes for + * another ASID than the current one. + */ + map_coherent = (cpu_has_dc_aliases && + page_mapped(page) && !Page_dcache_dirty(page)); + if (map_coherent) + vaddr = kmap_coherent(page, addr); + else + vaddr = kmap_atomic(page); + addr = (unsigned long)vaddr; } - /* - * Do indexed flush, too much work to get the (possible) TLB refills - * to work correctly. - */ - addr = INDEX_BASE + (addr & (dcache_size - 1)); if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) { - r4k_blast_dcache_page_indexed(addr); + vaddr ? r4k_blast_dcache_page(addr) : + r4k_blast_dcache_user_page(addr); if (exec && !cpu_icache_snoops_remote_store) - r4k_blast_scache_page_indexed(addr); + r4k_blast_scache_page(addr); } if (exec) { - if (cpu_has_vtag_icache) { + if (vaddr && cpu_has_vtag_icache && mm == current->active_mm) { int cpu = smp_processor_id(); if (cpu_context(cpu, mm) != 0) drop_mmu_context(mm, cpu); } else - r4k_blast_icache_page_indexed(addr); + vaddr ? r4k_blast_icache_page(addr) : + r4k_blast_icache_user_page(addr); + } + + if (vaddr) { + if (map_coherent) + kunmap_coherent(); + else + kunmap_atomic(vaddr); } } @@ -449,8 +618,9 @@ static void r4k_flush_cache_page(struct vm_area_struct *vma, args.vma = vma; args.addr = addr; + args.pfn = pfn; - on_each_cpu(local_r4k_flush_cache_page, &args, 1, 1); + r4k_on_each_cpu(local_r4k_flush_cache_page, &args); } static inline void local_r4k_flush_data_cache_page(void * addr) @@ -460,187 +630,89 @@ static inline void local_r4k_flush_data_cache_page(void * addr) static void r4k_flush_data_cache_page(unsigned long addr) { - on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr, 1, 1); + if (in_atomic()) + local_r4k_flush_data_cache_page((void *)addr); + else + r4k_on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr); } struct flush_icache_range_args { - unsigned long __user start; - unsigned long __user end; + unsigned long start; + unsigned long end; }; -static inline void local_r4k_flush_icache_range(void *args) +static inline void local_r4k_flush_icache_range(unsigned long start, unsigned long end) { - struct flush_icache_range_args *fir_args = args; - unsigned long dc_lsize = cpu_dcache_line_size(); - unsigned long ic_lsize = cpu_icache_line_size(); - unsigned long sc_lsize = cpu_scache_line_size(); - unsigned long start = fir_args->start; - unsigned long end = fir_args->end; - unsigned long addr, aend; - if (!cpu_has_ic_fills_f_dc) { - if (end - start > dcache_size) { + if (end - start >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; - addr = start & ~(dc_lsize - 1); - aend = (end - 1) & ~(dc_lsize - 1); - - while (1) { - /* Hit_Writeback_Inv_D */ - protected_writeback_dcache_line(addr); - if (addr == aend) - break; - addr += dc_lsize; - } - } - - if (!cpu_icache_snoops_remote_store) { - if (end - start > scache_size) { - r4k_blast_scache(); - } else { - addr = start & ~(sc_lsize - 1); - aend = (end - 1) & ~(sc_lsize - 1); - - while (1) { - /* Hit_Writeback_Inv_SD */ - protected_writeback_scache_line(addr); - if (addr == aend) - break; - addr += sc_lsize; - } - } + protected_blast_dcache_range(start, end); } } if (end - start > icache_size) r4k_blast_icache(); else { - addr = start & ~(ic_lsize - 1); - aend = (end - 1) & ~(ic_lsize - 1); - while (1) { - /* Hit_Invalidate_I */ - protected_flush_icache_line(addr); - if (addr == aend) - break; - addr += ic_lsize; + switch (boot_cpu_type()) { + case CPU_LOONGSON2: + protected_loongson2_blast_icache_range(start, end); + break; + + default: + protected_blast_icache_range(start, end); + break; } } -} - -static void r4k_flush_icache_range(unsigned long __user start, - unsigned long __user end) -{ - struct flush_icache_range_args args; - - args.start = start; - args.end = end; - - on_each_cpu(local_r4k_flush_icache_range, &args, 1, 1); - instruction_hazard(); -} - -/* - * Ok, this seriously sucks. We use them to flush a user page but don't - * know the virtual address, so we have to blast away the whole icache - * which is significantly more expensive than the real thing. Otoh we at - * least know the kernel address of the page so we can flush it - * selectivly. - */ - -struct flush_icache_page_args { - struct vm_area_struct *vma; - struct page *page; -}; - -static inline void local_r4k_flush_icache_page(void *args) -{ - struct flush_icache_page_args *fip_args = args; - struct vm_area_struct *vma = fip_args->vma; - struct page *page = fip_args->page; - +#ifdef CONFIG_EVA /* - * Tricky ... Because we don't know the virtual address we've got the - * choice of either invalidating the entire primary and secondary - * caches or invalidating the secondary caches also. With the subset - * enforcment on R4000SC, R4400SC, R10000 and R12000 invalidating the - * secondary cache will result in any entries in the primary caches - * also getting invalidated which hopefully is a bit more economical. + * Due to all possible segment mappings, there might cache aliases + * caused by the bootloader being in non-EVA mode, and the CPU switching + * to EVA during early kernel init. It's best to flush the scache + * to avoid having secondary cores fetching stale data and lead to + * kernel crashes. */ - if (cpu_has_subset_pcaches) { - unsigned long addr = (unsigned long) page_address(page); - - r4k_blast_scache_page(addr); - ClearPageDcacheDirty(page); - - return; - } - - if (!cpu_has_ic_fills_f_dc) { - unsigned long addr = (unsigned long) page_address(page); - r4k_blast_dcache_page(addr); - if (!cpu_icache_snoops_remote_store) - r4k_blast_scache_page(addr); - ClearPageDcacheDirty(page); - } + bc_wback_inv(start, (end - start)); + __sync(); +#endif +} - /* - * We're not sure of the virtual address(es) involved here, so - * we have to flush the entire I-cache. - */ - if (cpu_has_vtag_icache) { - int cpu = smp_processor_id(); +static inline void local_r4k_flush_icache_range_ipi(void *args) +{ + struct flush_icache_range_args *fir_args = args; + unsigned long start = fir_args->start; + unsigned long end = fir_args->end; - if (cpu_context(cpu, vma->vm_mm) != 0) - drop_mmu_context(vma->vm_mm, cpu); - } else - r4k_blast_icache(); + local_r4k_flush_icache_range(start, end); } -static void r4k_flush_icache_page(struct vm_area_struct *vma, - struct page *page) +static void r4k_flush_icache_range(unsigned long start, unsigned long end) { - struct flush_icache_page_args args; - - /* - * If there's no context yet, or the page isn't executable, no I-cache - * flush is needed. - */ - if (!(vma->vm_flags & VM_EXEC)) - return; + struct flush_icache_range_args args; - args.vma = vma; - args.page = page; + args.start = start; + args.end = end; - on_each_cpu(local_r4k_flush_icache_page, &args, 1, 1); + r4k_on_each_cpu(local_r4k_flush_icache_range_ipi, &args); + instruction_hazard(); } - -#ifdef CONFIG_DMA_NONCOHERENT +#if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_DMA_MAYBE_COHERENT) static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; - /* Catch bad driver code */ BUG_ON(size == 0); - if (cpu_has_subset_pcaches) { - unsigned long sc_lsize = cpu_scache_line_size(); - - if (size >= scache_size) { + preempt_disable(); + if (cpu_has_inclusive_pcaches) { + if (size >= scache_size) r4k_blast_scache(); - return; - } - - a = addr & ~(sc_lsize - 1); - end = (addr + size - 1) & ~(sc_lsize - 1); - while (1) { - flush_scache_line(a); /* Hit_Writeback_Inv_SD */ - if (a == end) - break; - a += sc_lsize; - } + else + blast_scache_range(addr, addr + size); + preempt_enable(); + __sync(); return; } @@ -649,70 +721,55 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) * subset property so we have to flush the primary caches * explicitly */ - if (size >= dcache_size) { + if (cpu_has_safe_index_cacheops && size >= dcache_size) { r4k_blast_dcache(); } else { - unsigned long dc_lsize = cpu_dcache_line_size(); - R4600_HIT_CACHEOP_WAR_IMPL; - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - flush_dcache_line(a); /* Hit_Writeback_Inv_D */ - if (a == end) - break; - a += dc_lsize; - } + blast_dcache_range(addr, addr + size); } + preempt_enable(); bc_wback_inv(addr, size); + __sync(); } static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; - /* Catch bad driver code */ BUG_ON(size == 0); - if (cpu_has_subset_pcaches) { - unsigned long sc_lsize = cpu_scache_line_size(); - - if (size >= scache_size) { + preempt_disable(); + if (cpu_has_inclusive_pcaches) { + if (size >= scache_size) r4k_blast_scache(); - return; - } - - a = addr & ~(sc_lsize - 1); - end = (addr + size - 1) & ~(sc_lsize - 1); - while (1) { - flush_scache_line(a); /* Hit_Writeback_Inv_SD */ - if (a == end) - break; - a += sc_lsize; + else { + /* + * There is no clearly documented alignment requirement + * for the cache instruction on MIPS processors and + * some processors, among them the RM5200 and RM7000 + * QED processors will throw an address error for cache + * hit ops with insufficient alignment. Solved by + * aligning the address to cache line size. + */ + blast_inv_scache_range(addr, addr + size); } + preempt_enable(); + __sync(); return; } - if (size >= dcache_size) { + if (cpu_has_safe_index_cacheops && size >= dcache_size) { r4k_blast_dcache(); } else { - unsigned long dc_lsize = cpu_dcache_line_size(); - R4600_HIT_CACHEOP_WAR_IMPL; - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - flush_dcache_line(a); /* Hit_Writeback_Inv_D */ - if (a == end) - break; - a += dc_lsize; - } + blast_inv_dcache_range(addr, addr + size); } + preempt_enable(); bc_inv(addr, size); + __sync(); } -#endif /* CONFIG_DMA_NONCOHERENT */ +#endif /* CONFIG_DMA_NONCOHERENT || CONFIG_DMA_MAYBE_COHERENT */ /* * While we're protected against bad userland addresses we don't care @@ -727,10 +784,12 @@ static void local_r4k_flush_cache_sigtramp(void * arg) unsigned long addr = (unsigned long) arg; R4600_HIT_CACHEOP_WAR_IMPL; - protected_writeback_dcache_line(addr & ~(dc_lsize - 1)); - if (!cpu_icache_snoops_remote_store) + if (dc_lsize) + protected_writeback_dcache_line(addr & ~(dc_lsize - 1)); + if (!cpu_icache_snoops_remote_store && scache_size) protected_writeback_scache_line(addr & ~(sc_lsize - 1)); - protected_flush_icache_line(addr & ~(ic_lsize - 1)); + if (ic_lsize) + protected_flush_icache_line(addr & ~(ic_lsize - 1)); if (MIPS4K_ICACHE_REFILL_WAR) { __asm__ __volatile__ ( ".set push\n\t" @@ -755,7 +814,7 @@ static void local_r4k_flush_cache_sigtramp(void * arg) static void r4k_flush_cache_sigtramp(unsigned long addr) { - on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr, 1, 1); + r4k_on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr); } static void r4k_flush_icache_all(void) @@ -764,6 +823,39 @@ static void r4k_flush_icache_all(void) r4k_blast_icache(); } +struct flush_kernel_vmap_range_args { + unsigned long vaddr; + int size; +}; + +static inline void local_r4k_flush_kernel_vmap_range(void *args) +{ + struct flush_kernel_vmap_range_args *vmra = args; + unsigned long vaddr = vmra->vaddr; + int size = vmra->size; + + /* + * Aliases only affect the primary caches so don't bother with + * S-caches or T-caches. + */ + if (cpu_has_safe_index_cacheops && size >= dcache_size) + r4k_blast_dcache(); + else { + R4600_HIT_CACHEOP_WAR_IMPL; + blast_dcache_range(vaddr, vaddr + size); + } +} + +static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size) +{ + struct flush_kernel_vmap_range_args args; + + args.vaddr = (unsigned long) vaddr; + args.size = size; + + r4k_on_each_cpu(local_r4k_flush_kernel_vmap_range, &args); +} + static inline void rm7k_erratum31(void) { const unsigned long ic_lsize = 32; @@ -796,11 +888,40 @@ static inline void rm7k_erratum31(void) } } -static char *way_string[] __initdata = { NULL, "direct mapped", "2-way", +static inline void alias_74k_erratum(struct cpuinfo_mips *c) +{ + unsigned int imp = c->processor_id & PRID_IMP_MASK; + unsigned int rev = c->processor_id & PRID_REV_MASK; + + /* + * Early versions of the 74K do not update the cache tags on a + * vtag miss/ptag hit which can occur in the case of KSEG0/KUSEG + * aliases. In this case it is better to treat the cache as always + * having aliases. + */ + switch (imp) { + case PRID_IMP_74K: + if (rev <= PRID_REV_ENCODE_332(2, 4, 0)) + c->dcache.flags |= MIPS_CACHE_VTAG; + if (rev == PRID_REV_ENCODE_332(2, 4, 0)) + write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + break; + case PRID_IMP_1074K: + if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) { + c->dcache.flags |= MIPS_CACHE_VTAG; + write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + } + break; + default: + BUG(); + } +} + +static char *way_string[] = { NULL, "direct mapped", "2-way", "3-way", "4-way", "5-way", "6-way", "7-way", "8-way" }; -static void __init probe_pcache(void) +static void probe_pcache(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config = read_c0_config(); @@ -808,7 +929,7 @@ static void __init probe_pcache(void) unsigned long config1; unsigned int lsize; - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R4600: /* QED style two way caches? */ case CPU_R4700: case CPU_R5000: @@ -816,12 +937,12 @@ static void __init probe_pcache(void) icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); c->icache.linesz = 16 << ((config & CONF_IB) >> 5); c->icache.ways = 2; - c->icache.waybit = ffs(icache_size/2) - 1; + c->icache.waybit = __ffs(icache_size/2); dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); c->dcache.ways = 2; - c->dcache.waybit= ffs(dcache_size/2) - 1; + c->dcache.waybit= __ffs(dcache_size/2); c->options |= MIPS_CPU_CACHE_CDEX_P; break; @@ -838,7 +959,7 @@ static void __init probe_pcache(void) c->dcache.ways = 2; c->dcache.waybit = 0; - c->options |= MIPS_CPU_CACHE_CDEX_P; + c->options |= MIPS_CPU_CACHE_CDEX_P | MIPS_CPU_PREFETCH; break; case CPU_TX49XX: @@ -853,6 +974,7 @@ static void __init probe_pcache(void) c->dcache.waybit = 0; c->options |= MIPS_CPU_CACHE_CDEX_P; + c->options |= MIPS_CPU_PREFETCH; break; case CPU_R4000PC: @@ -865,7 +987,7 @@ static void __init probe_pcache(void) icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); c->icache.linesz = 16 << ((config & CONF_IB) >> 5); c->icache.ways = 1; - c->icache.waybit = 0; /* doesn't matter */ + c->icache.waybit = 0; /* doesn't matter */ dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); @@ -877,6 +999,7 @@ static void __init probe_pcache(void) case CPU_R10000: case CPU_R12000: + case CPU_R14000: icache_size = 1 << (12 + ((config & R10K_CONF_IC) >> 29)); c->icache.linesz = 64; c->icache.ways = 2; @@ -891,26 +1014,27 @@ static void __init probe_pcache(void) break; case CPU_VR4133: - write_c0_config(config & ~CONF_EB); + write_c0_config(config & ~VR41_CONF_P4K); case CPU_VR4131: /* Workaround for cache instruction bug of VR4131 */ if (c->processor_id == 0x0c80U || c->processor_id == 0x0c81U || c->processor_id == 0x0c82U) { - config &= ~0x00000030U; - config |= 0x00410000U; + config |= 0x00400000U; + if (c->processor_id == 0x0c80U) + config |= VR41_CONF_BP; write_c0_config(config); - } + } else + c->options |= MIPS_CPU_CACHE_CDEX_P; + icache_size = 1 << (10 + ((config & CONF_IC) >> 9)); c->icache.linesz = 16 << ((config & CONF_IB) >> 5); c->icache.ways = 2; - c->icache.waybit = ffs(icache_size/2) - 1; + c->icache.waybit = __ffs(icache_size/2); dcache_size = 1 << (10 + ((config & CONF_DC) >> 6)); c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); c->dcache.ways = 2; - c->dcache.waybit = ffs(dcache_size/2) - 1; - - c->options |= MIPS_CPU_CACHE_CDEX_P; + c->dcache.waybit = __ffs(dcache_size/2); break; case CPU_VR41XX: @@ -922,7 +1046,7 @@ static void __init probe_pcache(void) icache_size = 1 << (10 + ((config & CONF_IC) >> 9)); c->icache.linesz = 16 << ((config & CONF_IB) >> 5); c->icache.ways = 1; - c->icache.waybit = 0; /* doesn't matter */ + c->icache.waybit = 0; /* doesn't matter */ dcache_size = 1 << (10 + ((config & CONF_DC) >> 6)); c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); @@ -935,20 +1059,77 @@ static void __init probe_pcache(void) case CPU_RM7000: rm7k_erratum31(); - case CPU_RM9000: icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); c->icache.linesz = 16 << ((config & CONF_IB) >> 5); c->icache.ways = 4; - c->icache.waybit = ffs(icache_size / c->icache.ways) - 1; + c->icache.waybit = __ffs(icache_size / c->icache.ways); dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); c->dcache.ways = 4; - c->dcache.waybit = ffs(dcache_size / c->dcache.ways) - 1; + c->dcache.waybit = __ffs(dcache_size / c->dcache.ways); -#if !defined(CONFIG_SMP) || !defined(RM9000_CDEX_SMP_WAR) c->options |= MIPS_CPU_CACHE_CDEX_P; -#endif + c->options |= MIPS_CPU_PREFETCH; + break; + + case CPU_LOONGSON2: + icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + if (prid & 0x3) + c->icache.ways = 4; + else + c->icache.ways = 2; + c->icache.waybit = 0; + + dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + if (prid & 0x3) + c->dcache.ways = 4; + else + c->dcache.ways = 2; + c->dcache.waybit = 0; + break; + + case CPU_LOONGSON3: + config1 = read_c0_config1(); + lsize = (config1 >> 19) & 7; + if (lsize) + c->icache.linesz = 2 << lsize; + else + c->icache.linesz = 0; + c->icache.sets = 64 << ((config1 >> 22) & 7); + c->icache.ways = 1 + ((config1 >> 16) & 7); + icache_size = c->icache.sets * + c->icache.ways * + c->icache.linesz; + c->icache.waybit = 0; + + lsize = (config1 >> 10) & 7; + if (lsize) + c->dcache.linesz = 2 << lsize; + else + c->dcache.linesz = 0; + c->dcache.sets = 64 << ((config1 >> 13) & 7); + c->dcache.ways = 1 + ((config1 >> 7) & 7); + dcache_size = c->dcache.sets * + c->dcache.ways * + c->dcache.linesz; + c->dcache.waybit = 0; + break; + + case CPU_CAVIUM_OCTEON3: + /* For now lie about the number of ways. */ + c->icache.linesz = 128; + c->icache.sets = 16; + c->icache.ways = 8; + c->icache.flags |= MIPS_CACHE_VTAG; + icache_size = c->icache.sets * c->icache.ways * c->icache.linesz; + + c->dcache.linesz = 128; + c->dcache.ways = 8; + c->dcache.sets = 8; + dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz; c->options |= MIPS_CPU_PREFETCH; break; @@ -962,17 +1143,21 @@ static void __init probe_pcache(void) */ config1 = read_c0_config1(); - if ((lsize = ((config1 >> 19) & 7))) - c->icache.linesz = 2 << lsize; - else - c->icache.linesz = lsize; - c->icache.sets = 64 << ((config1 >> 22) & 7); + lsize = (config1 >> 19) & 7; + + /* IL == 7 is reserved */ + if (lsize == 7) + panic("Invalid icache line size"); + + c->icache.linesz = lsize ? 2 << lsize : 0; + + c->icache.sets = 32 << (((config1 >> 22) + 1) & 7); c->icache.ways = 1 + ((config1 >> 16) & 7); icache_size = c->icache.sets * - c->icache.ways * - c->icache.linesz; - c->icache.waybit = ffs(icache_size/c->icache.ways) - 1; + c->icache.ways * + c->icache.linesz; + c->icache.waybit = __ffs(icache_size/c->icache.ways); if (config & 0x8) /* VI bit */ c->icache.flags |= MIPS_CACHE_VTAG; @@ -982,17 +1167,21 @@ static void __init probe_pcache(void) */ c->dcache.flags = 0; - if ((lsize = ((config1 >> 10) & 7))) - c->dcache.linesz = 2 << lsize; - else - c->dcache.linesz= lsize; - c->dcache.sets = 64 << ((config1 >> 13) & 7); + lsize = (config1 >> 10) & 7; + + /* DL == 7 is reserved */ + if (lsize == 7) + panic("Invalid dcache line size"); + + c->dcache.linesz = lsize ? 2 << lsize : 0; + + c->dcache.sets = 32 << (((config1 >> 13) + 1) & 7); c->dcache.ways = 1 + ((config1 >> 7) & 7); dcache_size = c->dcache.sets * - c->dcache.ways * - c->dcache.linesz; - c->dcache.waybit = ffs(dcache_size/c->dcache.ways) - 1; + c->dcache.ways * + c->dcache.linesz; + c->dcache.waybit = __ffs(dcache_size/c->dcache.ways); c->options |= MIPS_CPU_PREFETCH; break; @@ -1000,13 +1189,14 @@ static void __init probe_pcache(void) /* * Processor configuration sanity check for the R4000SC erratum - * #5. With page sizes larger than 32kB there is no possibility + * #5. With page sizes larger than 32kB there is no possibility * to get a VCE exception anymore so we don't care about this * misconfiguration. The case is rather theoretical anyway; * presumably no vendor is shipping his hardware in the "bad" * configuration. */ - if ((prid & 0xff00) == PRID_IMP_R4000 && (prid & 0xff) < 0x40 && + if ((prid & PRID_IMP_MASK) == PRID_IMP_R4000 && + (prid & PRID_REV_MASK) < PRID_REV_R4400 && !(config & CONF_SC) && c->icache.linesz != 16 && PAGE_SIZE <= 0x8000) panic("Improper R4000SC processor configuration detected"); @@ -1015,8 +1205,10 @@ static void __init probe_pcache(void) c->icache.waysize = icache_size / c->icache.ways; c->dcache.waysize = dcache_size / c->dcache.ways; - c->icache.sets = icache_size / (c->icache.linesz * c->icache.ways); - c->dcache.sets = dcache_size / (c->dcache.linesz * c->dcache.ways); + c->icache.sets = c->icache.linesz ? + icache_size / (c->icache.linesz * c->icache.ways) : 0; + c->dcache.sets = c->dcache.linesz ? + dcache_size / (c->dcache.linesz * c->dcache.ways) : 0; /* * R10000 and R12000 P-caches are odd in a positive way. They're 32kB @@ -1024,21 +1216,50 @@ static void __init probe_pcache(void) * normally they'd suffer from aliases but magic in the hardware deals * with that for us so we don't need to take care ourselves. */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_20KC: case CPU_25KF: + case CPU_SB1: + case CPU_SB1A: + case CPU_XLR: + c->dcache.flags |= MIPS_CACHE_PINDEX; + break; + case CPU_R10000: case CPU_R12000: - case CPU_SB1: + case CPU_R14000: break; + + case CPU_M14KC: + case CPU_M14KEC: case CPU_24K: - if (!(read_c0_config7() & (1 << 16))) + case CPU_34K: + case CPU_74K: + case CPU_1004K: + case CPU_1074K: + case CPU_INTERAPTIV: + case CPU_P5600: + case CPU_PROAPTIV: + case CPU_M5150: + if ((c->cputype == CPU_74K) || (c->cputype == CPU_1074K)) + alias_74k_erratum(c); + if (!(read_c0_config7() & MIPS_CONF7_IAR) && + (c->icache.waysize > PAGE_SIZE)) + c->icache.flags |= MIPS_CACHE_ALIASES; + if (read_c0_config7() & MIPS_CONF7_AR) { + /* + * Effectively physically indexed dcache, + * thus no virtual aliases. + */ + c->dcache.flags |= MIPS_CACHE_PINDEX; + break; + } default: - if (c->dcache.waysize > PAGE_SIZE) - c->dcache.flags |= MIPS_CACHE_ALIASES; + if (c->dcache.waysize > PAGE_SIZE) + c->dcache.flags |= MIPS_CACHE_ALIASES; } - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_20KC: /* * Some older 20Kc chips doesn't have the 'VI' bit in @@ -1047,22 +1268,29 @@ static void __init probe_pcache(void) c->icache.flags |= MIPS_CACHE_VTAG; break; - case CPU_AU1000: - case CPU_AU1500: - case CPU_AU1100: - case CPU_AU1550: - case CPU_AU1200: + case CPU_ALCHEMY: c->icache.flags |= MIPS_CACHE_IC_F_DC; break; + + case CPU_LOONGSON2: + /* + * LOONGSON2 has 4 way icache, but when using indexed cache op, + * one op will act on all 4 ways + */ + c->icache.ways = 1; } printk("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n", icache_size >> 10, - cpu_has_vtag_icache ? "virtually tagged" : "physically tagged", + c->icache.flags & MIPS_CACHE_VTAG ? "VIVT" : "VIPT", way_string[c->icache.ways], c->icache.linesz); - printk("Primary data cache %ldkB, %s, linesize %d bytes.\n", - dcache_size >> 10, way_string[c->dcache.ways], c->dcache.linesz); + printk("Primary data cache %ldkB, %s, %s, %s, linesize %d bytes\n", + dcache_size >> 10, way_string[c->dcache.ways], + (c->dcache.flags & MIPS_CACHE_PINDEX) ? "PIPT" : "VIPT", + (c->dcache.flags & MIPS_CACHE_ALIASES) ? + "cache aliases" : "no aliases", + c->dcache.linesz); } /* @@ -1071,18 +1299,16 @@ static void __init probe_pcache(void) * executes in KSEG1 space or else you will crash and burn badly. You have * been warned. */ -static int __init probe_scache(void) +static int probe_scache(void) { - extern unsigned long stext; unsigned long flags, addr, begin, end, pow2; unsigned int config = read_c0_config(); struct cpuinfo_mips *c = ¤t_cpu_data; - int tmp; if (config & CONF_SC) return 0; - begin = (unsigned long) &stext; + begin = (unsigned long) &_stext; begin &= ~((4 * 1024 * 1024) - 1); end = begin + (4 * 1024 * 1024); @@ -1110,7 +1336,6 @@ static int __init probe_scache(void) /* Now search for the wrap around point. */ pow2 = (128 * 1024); - tmp = 0; for (addr = begin + (128 * 1024); addr < end; addr = begin + pow2) { cache_op(Index_Load_Tag_SD, addr); __asm__ __volatile__("nop; nop; nop; nop;"); /* hazard... */ @@ -1129,10 +1354,54 @@ static int __init probe_scache(void) return 1; } +static void __init loongson2_sc_init(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + + scache_size = 512*1024; + c->scache.linesz = 32; + c->scache.ways = 4; + c->scache.waybit = 0; + c->scache.waysize = scache_size / (c->scache.ways); + c->scache.sets = scache_size / (c->scache.linesz * c->scache.ways); + pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", + scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); + + c->options |= MIPS_CPU_INCLUSIVE_CACHES; +} + +static void __init loongson3_sc_init(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int config2, lsize; + + config2 = read_c0_config2(); + lsize = (config2 >> 4) & 15; + if (lsize) + c->scache.linesz = 2 << lsize; + else + c->scache.linesz = 0; + c->scache.sets = 64 << ((config2 >> 8) & 15); + c->scache.ways = 1 + (config2 & 15); + + scache_size = c->scache.sets * + c->scache.ways * + c->scache.linesz; + /* Loongson-3 has 4 cores, 1MB scache for each. scaches are shared */ + scache_size *= 4; + c->scache.waybit = 0; + pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", + scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); + if (scache_size) + c->options |= MIPS_CPU_INCLUSIVE_CACHES; + return; +} + extern int r5k_sc_init(void); extern int rm7k_sc_init(void); +extern int mips_sc_init(void); -static void __init setup_scache(void) +static void setup_scache(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config = read_c0_config(); @@ -1141,9 +1410,9 @@ static void __init setup_scache(void) /* * Do the probing thing on R4000SC and R4400SC processors. Other * processors don't have a S-cache that would be relevant to the - * Linux memory managment. + * Linux memory management. */ - switch (c->cputype) { + switch (current_cpu_type()) { case CPU_R4000SC: case CPU_R4000MC: case CPU_R4400SC: @@ -1155,6 +1424,7 @@ static void __init setup_scache(void) case CPU_R10000: case CPU_R12000: + case CPU_R14000: scache_size = 0x80000 << ((config & R10K_CONF_SS) >> 16); c->scache.linesz = 64 << ((config >> 13) & 1); c->scache.ways = 2; @@ -1167,27 +1437,49 @@ static void __init setup_scache(void) #ifdef CONFIG_R5000_CPU_SCACHE r5k_sc_init(); #endif - return; + return; case CPU_RM7000: - case CPU_RM9000: #ifdef CONFIG_RM7000_CPU_SCACHE rm7k_sc_init(); #endif return; + case CPU_LOONGSON2: + loongson2_sc_init(); + return; + + case CPU_LOONGSON3: + loongson3_sc_init(); + return; + + case CPU_CAVIUM_OCTEON3: + case CPU_XLP: + /* don't need to worry about L2, fully coherent */ + return; + default: + if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 | + MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2)) { +#ifdef CONFIG_MIPS_CPU_SCACHE + if (mips_sc_init ()) { + scache_size = c->scache.ways * c->scache.sets * c->scache.linesz; + printk("MIPS secondary cache %ldkB, %s, linesize %d bytes.\n", + scache_size >> 10, + way_string[c->scache.ways], c->scache.linesz); + } +#else + if (!(c->scache.flags & MIPS_CACHE_NOT_PRESENT)) + panic("Dunno how to handle MIPS32 / MIPS64 second level cache"); +#endif + return; + } sc_present = 0; } if (!sc_present) return; - if ((c->isa_level == MIPS_CPU_ISA_M32 || - c->isa_level == MIPS_CPU_ISA_M64) && - !(c->scache.flags & MIPS_CACHE_NOT_PRESENT)) - panic("Dunno how to handle MIPS32 / MIPS64 second level cache"); - /* compute a couple of other cache variables */ c->scache.waysize = scache_size / c->scache.ways; @@ -1196,12 +1488,75 @@ static void __init setup_scache(void) printk("Unified secondary cache %ldkB %s, linesize %d bytes.\n", scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); - c->options |= MIPS_CPU_SUBSET_CACHES; + c->options |= MIPS_CPU_INCLUSIVE_CACHES; +} + +void au1x00_fixup_config_od(void) +{ + /* + * c0_config.od (bit 19) was write only (and read as 0) + * on the early revisions of Alchemy SOCs. It disables the bus + * transaction overlapping and needs to be set to fix various errata. + */ + switch (read_c0_prid()) { + case 0x00030100: /* Au1000 DA */ + case 0x00030201: /* Au1000 HA */ + case 0x00030202: /* Au1000 HB */ + case 0x01030200: /* Au1500 AB */ + /* + * Au1100 errata actually keeps silence about this bit, so we set it + * just in case for those revisions that require it to be set according + * to the (now gone) cpu table. + */ + case 0x02030200: /* Au1100 AB */ + case 0x02030201: /* Au1100 BA */ + case 0x02030202: /* Au1100 BC */ + set_c0_config(1 << 19); + break; + } } -static inline void coherency_setup(void) +/* CP0 hazard avoidance. */ +#define NXP_BARRIER() \ + __asm__ __volatile__( \ + ".set noreorder\n\t" \ + "nop; nop; nop; nop; nop; nop;\n\t" \ + ".set reorder\n\t") + +static void nxp_pr4450_fixup_config(void) { - change_c0_config(CONF_CM_CMASK, CONF_CM_DEFAULT); + unsigned long config0; + + config0 = read_c0_config(); + + /* clear all three cache coherency fields */ + config0 &= ~(0x7 | (7 << 25) | (7 << 28)); + config0 |= (((_page_cachable_default >> _CACHE_SHIFT) << 0) | + ((_page_cachable_default >> _CACHE_SHIFT) << 25) | + ((_page_cachable_default >> _CACHE_SHIFT) << 28)); + write_c0_config(config0); + NXP_BARRIER(); +} + +static int cca = -1; + +static int __init cca_setup(char *str) +{ + get_option(&str, &cca); + + return 0; +} + +early_param("cca", cca_setup); + +static void coherency_setup(void) +{ + if (cca < 0 || cca > 7) + cca = read_c0_config() & CONF_CM_CMASK; + _page_cachable_default = cca << _CACHE_SHIFT; + + pr_debug("Using cache attribute %d\n", cca); + change_c0_config(CONF_CM_CMASK, cca); /* * c0_status.cu=0 specifies that updates by the sc instruction use @@ -1210,7 +1565,7 @@ static inline void coherency_setup(void) * this bit and; some wire it to zero, others like Toshiba had the * silly idea of putting something else there ... */ - switch (current_cpu_data.cputype) { + switch (current_cpu_type()) { case CPU_R4000PC: case CPU_R4000SC: case CPU_R4000MC: @@ -1219,19 +1574,44 @@ static inline void coherency_setup(void) case CPU_R4400MC: clear_c0_config(CONF_CU); break; + /* + * We need to catch the early Alchemy SOCs with + * the write-only co_config.od bit and set it back to one on: + * Au1000 rev DA, HA, HB; Au1100 AB, BA, BC, Au1500 AB + */ + case CPU_ALCHEMY: + au1x00_fixup_config_od(); + break; + + case PRID_IMP_PR4450: + nxp_pr4450_fixup_config(); + break; + } +} + +static void r4k_cache_error_setup(void) +{ + extern char __weak except_vec2_generic; + extern char __weak except_vec2_sb1; + + switch (current_cpu_type()) { + case CPU_SB1: + case CPU_SB1A: + set_uncached_handler(0x100, &except_vec2_sb1, 0x80); + break; + + default: + set_uncached_handler(0x100, &except_vec2_generic, 0x80); + break; } } -void __init r4k_cache_init(void) +void r4k_cache_init(void) { extern void build_clear_page(void); extern void build_copy_page(void); - extern char except_vec2_generic; struct cpuinfo_mips *c = ¤t_cpu_data; - /* Default cache error handler for R4000 and R5000 family */ - set_uncached_handler (0x100, &except_vec2_generic, 0x80); - probe_pcache(); setup_scache(); @@ -1244,36 +1624,86 @@ void __init r4k_cache_init(void) r4k_blast_scache_page_setup(); r4k_blast_scache_page_indexed_setup(); r4k_blast_scache_setup(); +#ifdef CONFIG_EVA + r4k_blast_dcache_user_page_setup(); + r4k_blast_icache_user_page_setup(); +#endif /* * Some MIPS32 and MIPS64 processors have physically indexed caches. * This code supports virtually indexed processors and will be * unnecessarily inefficient on physically indexed processors. */ - shm_align_mask = max_t( unsigned long, - c->dcache.sets * c->dcache.linesz - 1, - PAGE_SIZE - 1); + if (c->dcache.linesz) + shm_align_mask = max_t( unsigned long, + c->dcache.sets * c->dcache.linesz - 1, + PAGE_SIZE - 1); + else + shm_align_mask = PAGE_SIZE-1; - flush_cache_all = r4k_flush_cache_all; + __flush_cache_vmap = r4k__flush_cache_vmap; + __flush_cache_vunmap = r4k__flush_cache_vunmap; + + flush_cache_all = cache_noop; __flush_cache_all = r4k___flush_cache_all; flush_cache_mm = r4k_flush_cache_mm; flush_cache_page = r4k_flush_cache_page; - flush_icache_page = r4k_flush_icache_page; flush_cache_range = r4k_flush_cache_range; + __flush_kernel_vmap_range = r4k_flush_kernel_vmap_range; + flush_cache_sigtramp = r4k_flush_cache_sigtramp; flush_icache_all = r4k_flush_icache_all; + local_flush_data_cache_page = local_r4k_flush_data_cache_page; flush_data_cache_page = r4k_flush_data_cache_page; flush_icache_range = r4k_flush_icache_range; + local_flush_icache_range = local_r4k_flush_icache_range; -#ifdef CONFIG_DMA_NONCOHERENT - _dma_cache_wback_inv = r4k_dma_cache_wback_inv; - _dma_cache_wback = r4k_dma_cache_wback_inv; - _dma_cache_inv = r4k_dma_cache_inv; +#if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_DMA_MAYBE_COHERENT) + if (coherentio) { + _dma_cache_wback_inv = (void *)cache_noop; + _dma_cache_wback = (void *)cache_noop; + _dma_cache_inv = (void *)cache_noop; + } else { + _dma_cache_wback_inv = r4k_dma_cache_wback_inv; + _dma_cache_wback = r4k_dma_cache_wback_inv; + _dma_cache_inv = r4k_dma_cache_inv; + } #endif build_clear_page(); build_copy_page(); + + /* + * We want to run CMP kernels on core with and without coherent + * caches. Therefore, do not use CONFIG_MIPS_CMP to decide whether + * or not to flush caches. + */ local_r4k___flush_cache_all(NULL); + coherency_setup(); + board_cache_error_setup = r4k_cache_error_setup; +} + +static int r4k_cache_pm_notifier(struct notifier_block *self, unsigned long cmd, + void *v) +{ + switch (cmd) { + case CPU_PM_ENTER_FAILED: + case CPU_PM_EXIT: + coherency_setup(); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block r4k_cache_pm_notifier_block = { + .notifier_call = r4k_cache_pm_notifier, +}; + +int __init r4k_cache_init_pm(void) +{ + return cpu_pm_register_notifier(&r4k_cache_pm_notifier_block); } +arch_initcall(r4k_cache_init_pm); diff --git a/arch/mips/mm/c-sb1.c b/arch/mips/mm/c-sb1.c deleted file mode 100644 index 2f08b535f20..00000000000 --- a/arch/mips/mm/c-sb1.c +++ /dev/null @@ -1,556 +0,0 @@ -/* - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) - * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org) - * Copyright (C) 2000, 2001, 2002, 2003 Broadcom Corporation - * Copyright (C) 2004 Maciej W. Rozycki - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#include <linux/config.h> -#include <linux/init.h> - -#include <asm/asm.h> -#include <asm/bootinfo.h> -#include <asm/cacheops.h> -#include <asm/cpu.h> -#include <asm/mipsregs.h> -#include <asm/mmu_context.h> -#include <asm/uaccess.h> - -extern void sb1_dma_init(void); - -/* These are probed at ld_mmu time */ -static unsigned long icache_size; -static unsigned long dcache_size; - -static unsigned short icache_line_size; -static unsigned short dcache_line_size; - -static unsigned int icache_index_mask; -static unsigned int dcache_index_mask; - -static unsigned short icache_assoc; -static unsigned short dcache_assoc; - -static unsigned short icache_sets; -static unsigned short dcache_sets; - -static unsigned int icache_range_cutoff; -static unsigned int dcache_range_cutoff; - -/* - * The dcache is fully coherent to the system, with one - * big caveat: the instruction stream. In other words, - * if we miss in the icache, and have dirty data in the - * L1 dcache, then we'll go out to memory (or the L2) and - * get the not-as-recent data. - * - * So the only time we have to flush the dcache is when - * we're flushing the icache. Since the L2 is fully - * coherent to everything, including I/O, we never have - * to flush it - */ - -#define cache_set_op(op, addr) \ - __asm__ __volatile__( \ - " .set noreorder \n" \ - " .set mips64\n\t \n" \ - " cache %0, (0<<13)(%1) \n" \ - " cache %0, (1<<13)(%1) \n" \ - " cache %0, (2<<13)(%1) \n" \ - " cache %0, (3<<13)(%1) \n" \ - " .set mips0 \n" \ - " .set reorder" \ - : \ - : "i" (op), "r" (addr)) - -#define sync() \ - __asm__ __volatile( \ - " .set mips64\n\t \n" \ - " sync \n" \ - " .set mips0") - -#define mispredict() \ - __asm__ __volatile__( \ - " bnezl $0, 1f \n" /* Force mispredict */ \ - "1: \n"); - -/* - * Writeback and invalidate the entire dcache - */ -static inline void __sb1_writeback_inv_dcache_all(void) -{ - unsigned long addr = 0; - - while (addr < dcache_line_size * dcache_sets) { - cache_set_op(Index_Writeback_Inv_D, addr); - addr += dcache_line_size; - } -} - -/* - * Writeback and invalidate a range of the dcache. The addresses are - * virtual, and since we're using index ops and bit 12 is part of both - * the virtual frame and physical index, we have to clear both sets - * (bit 12 set and cleared). - */ -static inline void __sb1_writeback_inv_dcache_range(unsigned long start, - unsigned long end) -{ - unsigned long index; - - start &= ~(dcache_line_size - 1); - end = (end + dcache_line_size - 1) & ~(dcache_line_size - 1); - - while (start != end) { - index = start & dcache_index_mask; - cache_set_op(Index_Writeback_Inv_D, index); - cache_set_op(Index_Writeback_Inv_D, index ^ (1<<12)); - start += dcache_line_size; - } - sync(); -} - -/* - * Writeback and invalidate a range of the dcache. With physical - * addresseses, we don't have to worry about possible bit 12 aliasing. - * XXXKW is it worth turning on KX and using hit ops with xkphys? - */ -static inline void __sb1_writeback_inv_dcache_phys_range(unsigned long start, - unsigned long end) -{ - start &= ~(dcache_line_size - 1); - end = (end + dcache_line_size - 1) & ~(dcache_line_size - 1); - - while (start != end) { - cache_set_op(Index_Writeback_Inv_D, start & dcache_index_mask); - start += dcache_line_size; - } - sync(); -} - - -/* - * Invalidate the entire icache - */ -static inline void __sb1_flush_icache_all(void) -{ - unsigned long addr = 0; - - while (addr < icache_line_size * icache_sets) { - cache_set_op(Index_Invalidate_I, addr); - addr += icache_line_size; - } -} - -/* - * Flush the icache for a given physical page. Need to writeback the - * dcache first, then invalidate the icache. If the page isn't - * executable, nothing is required. - */ -static void local_sb1_flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) -{ - int cpu = smp_processor_id(); - -#ifndef CONFIG_SMP - if (!(vma->vm_flags & VM_EXEC)) - return; -#endif - - __sb1_writeback_inv_dcache_range(addr, addr + PAGE_SIZE); - - /* - * Bumping the ASID is probably cheaper than the flush ... - */ - if (cpu_context(cpu, vma->vm_mm) != 0) - drop_mmu_context(vma->vm_mm, cpu); -} - -#ifdef CONFIG_SMP -struct flush_cache_page_args { - struct vm_area_struct *vma; - unsigned long addr; - unsigned long pfn; -}; - -static void sb1_flush_cache_page_ipi(void *info) -{ - struct flush_cache_page_args *args = info; - - local_sb1_flush_cache_page(args->vma, args->addr, args->pfn); -} - -/* Dirty dcache could be on another CPU, so do the IPIs */ -static void sb1_flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) -{ - struct flush_cache_page_args args; - - if (!(vma->vm_flags & VM_EXEC)) - return; - - addr &= PAGE_MASK; - args.vma = vma; - args.addr = addr; - args.pfn = pfn; - on_each_cpu(sb1_flush_cache_page_ipi, (void *) &args, 1, 1); -} -#else -void sb1_flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) - __attribute__((alias("local_sb1_flush_cache_page"))); -#endif - -/* - * Invalidate a range of the icache. The addresses are virtual, and - * the cache is virtually indexed and tagged. However, we don't - * necessarily have the right ASID context, so use index ops instead - * of hit ops. - */ -static inline void __sb1_flush_icache_range(unsigned long start, - unsigned long end) -{ - start &= ~(icache_line_size - 1); - end = (end + icache_line_size - 1) & ~(icache_line_size - 1); - - while (start != end) { - cache_set_op(Index_Invalidate_I, start & icache_index_mask); - start += icache_line_size; - } - mispredict(); - sync(); -} - - -/* - * Invalidate all caches on this CPU - */ -static void __attribute_used__ local_sb1___flush_cache_all(void) -{ - __sb1_writeback_inv_dcache_all(); - __sb1_flush_icache_all(); -} - -#ifdef CONFIG_SMP -void sb1___flush_cache_all_ipi(void *ignored) - __attribute__((alias("local_sb1___flush_cache_all"))); - -static void sb1___flush_cache_all(void) -{ - on_each_cpu(sb1___flush_cache_all_ipi, 0, 1, 1); -} -#else -void sb1___flush_cache_all(void) - __attribute__((alias("local_sb1___flush_cache_all"))); -#endif - -/* - * When flushing a range in the icache, we have to first writeback - * the dcache for the same range, so new ifetches will see any - * data that was dirty in the dcache. - * - * The start/end arguments are Kseg addresses (possibly mapped Kseg). - */ - -static void local_sb1_flush_icache_range(unsigned long start, - unsigned long end) -{ - /* Just wb-inv the whole dcache if the range is big enough */ - if ((end - start) > dcache_range_cutoff) - __sb1_writeback_inv_dcache_all(); - else - __sb1_writeback_inv_dcache_range(start, end); - - /* Just flush the whole icache if the range is big enough */ - if ((end - start) > icache_range_cutoff) - __sb1_flush_icache_all(); - else - __sb1_flush_icache_range(start, end); -} - -#ifdef CONFIG_SMP -struct flush_icache_range_args { - unsigned long start; - unsigned long end; -}; - -static void sb1_flush_icache_range_ipi(void *info) -{ - struct flush_icache_range_args *args = info; - - local_sb1_flush_icache_range(args->start, args->end); -} - -void sb1_flush_icache_range(unsigned long start, unsigned long end) -{ - struct flush_icache_range_args args; - - args.start = start; - args.end = end; - on_each_cpu(sb1_flush_icache_range_ipi, &args, 1, 1); -} -#else -void sb1_flush_icache_range(unsigned long start, unsigned long end) - __attribute__((alias("local_sb1_flush_icache_range"))); -#endif - -/* - * Flush the icache for a given physical page. Need to writeback the - * dcache first, then invalidate the icache. If the page isn't - * executable, nothing is required. - */ -static void local_sb1_flush_icache_page(struct vm_area_struct *vma, - struct page *page) -{ - unsigned long start; - int cpu = smp_processor_id(); - -#ifndef CONFIG_SMP - if (!(vma->vm_flags & VM_EXEC)) - return; -#endif - - /* Need to writeback any dirty data for that page, we have the PA */ - start = (unsigned long)(page-mem_map) << PAGE_SHIFT; - __sb1_writeback_inv_dcache_phys_range(start, start + PAGE_SIZE); - /* - * If there's a context, bump the ASID (cheaper than a flush, - * since we don't know VAs!) - */ - if (cpu_context(cpu, vma->vm_mm) != 0) { - drop_mmu_context(vma->vm_mm, cpu); - } -} - -#ifdef CONFIG_SMP -struct flush_icache_page_args { - struct vm_area_struct *vma; - struct page *page; -}; - -static void sb1_flush_icache_page_ipi(void *info) -{ - struct flush_icache_page_args *args = info; - local_sb1_flush_icache_page(args->vma, args->page); -} - -/* Dirty dcache could be on another CPU, so do the IPIs */ -static void sb1_flush_icache_page(struct vm_area_struct *vma, - struct page *page) -{ - struct flush_icache_page_args args; - - if (!(vma->vm_flags & VM_EXEC)) - return; - args.vma = vma; - args.page = page; - on_each_cpu(sb1_flush_icache_page_ipi, (void *) &args, 1, 1); -} -#else -void sb1_flush_icache_page(struct vm_area_struct *vma, struct page *page) - __attribute__((alias("local_sb1_flush_icache_page"))); -#endif - -/* - * A signal trampoline must fit into a single cacheline. - */ -static void local_sb1_flush_cache_sigtramp(unsigned long addr) -{ - cache_set_op(Index_Writeback_Inv_D, addr & dcache_index_mask); - cache_set_op(Index_Writeback_Inv_D, (addr ^ (1<<12)) & dcache_index_mask); - cache_set_op(Index_Invalidate_I, addr & icache_index_mask); - mispredict(); -} - -#ifdef CONFIG_SMP -static void sb1_flush_cache_sigtramp_ipi(void *info) -{ - unsigned long iaddr = (unsigned long) info; - local_sb1_flush_cache_sigtramp(iaddr); -} - -static void sb1_flush_cache_sigtramp(unsigned long addr) -{ - on_each_cpu(sb1_flush_cache_sigtramp_ipi, (void *) addr, 1, 1); -} -#else -void sb1_flush_cache_sigtramp(unsigned long addr) - __attribute__((alias("local_sb1_flush_cache_sigtramp"))); -#endif - - -/* - * Anything that just flushes dcache state can be ignored, as we're always - * coherent in dcache space. This is just a dummy function that all the - * nop'ed routines point to - */ -static void sb1_nop(void) -{ -} - -/* - * Cache set values (from the mips64 spec) - * 0 - 64 - * 1 - 128 - * 2 - 256 - * 3 - 512 - * 4 - 1024 - * 5 - 2048 - * 6 - 4096 - * 7 - Reserved - */ - -static unsigned int decode_cache_sets(unsigned int config_field) -{ - if (config_field == 7) { - /* JDCXXX - Find a graceful way to abort. */ - return 0; - } - return (1<<(config_field + 6)); -} - -/* - * Cache line size values (from the mips64 spec) - * 0 - No cache present. - * 1 - 4 bytes - * 2 - 8 bytes - * 3 - 16 bytes - * 4 - 32 bytes - * 5 - 64 bytes - * 6 - 128 bytes - * 7 - Reserved - */ - -static unsigned int decode_cache_line_size(unsigned int config_field) -{ - if (config_field == 0) { - return 0; - } else if (config_field == 7) { - /* JDCXXX - Find a graceful way to abort. */ - return 0; - } - return (1<<(config_field + 1)); -} - -/* - * Relevant bits of the config1 register format (from the MIPS32/MIPS64 specs) - * - * 24:22 Icache sets per way - * 21:19 Icache line size - * 18:16 Icache Associativity - * 15:13 Dcache sets per way - * 12:10 Dcache line size - * 9:7 Dcache Associativity - */ - -static char *way_string[] = { - "direct mapped", "2-way", "3-way", "4-way", - "5-way", "6-way", "7-way", "8-way", -}; - -static __init void probe_cache_sizes(void) -{ - u32 config1; - - config1 = read_c0_config1(); - icache_line_size = decode_cache_line_size((config1 >> 19) & 0x7); - dcache_line_size = decode_cache_line_size((config1 >> 10) & 0x7); - icache_sets = decode_cache_sets((config1 >> 22) & 0x7); - dcache_sets = decode_cache_sets((config1 >> 13) & 0x7); - icache_assoc = ((config1 >> 16) & 0x7) + 1; - dcache_assoc = ((config1 >> 7) & 0x7) + 1; - icache_size = icache_line_size * icache_sets * icache_assoc; - dcache_size = dcache_line_size * dcache_sets * dcache_assoc; - /* Need to remove non-index bits for index ops */ - icache_index_mask = (icache_sets - 1) * icache_line_size; - dcache_index_mask = (dcache_sets - 1) * dcache_line_size; - /* - * These are for choosing range (index ops) versus all. - * icache flushes all ways for each set, so drop icache_assoc. - * dcache flushes all ways and each setting of bit 12 for each - * index, so drop dcache_assoc and halve the dcache_sets. - */ - icache_range_cutoff = icache_sets * icache_line_size; - dcache_range_cutoff = (dcache_sets / 2) * icache_line_size; - - printk("Primary instruction cache %ldkB, %s, linesize %d bytes.\n", - icache_size >> 10, way_string[icache_assoc - 1], - icache_line_size); - printk("Primary data cache %ldkB, %s, linesize %d bytes.\n", - dcache_size >> 10, way_string[dcache_assoc - 1], - dcache_line_size); -} - -/* - * This is called from cache.c. We have to set up all the - * memory management function pointers, as well as initialize - * the caches and tlbs - */ -void sb1_cache_init(void) -{ - extern char except_vec2_sb1; - extern char handle_vec2_sb1; - - /* Special cache error handler for SB1 */ - set_uncached_handler (0x100, &except_vec2_sb1, 0x80); - - probe_cache_sizes(); - -#ifdef CONFIG_SIBYTE_DMA_PAGEOPS - sb1_dma_init(); -#endif - - /* - * None of these are needed for the SB1 - the Dcache is - * physically indexed and tagged, so no virtual aliasing can - * occur - */ - flush_cache_range = (void *) sb1_nop; - flush_cache_mm = (void (*)(struct mm_struct *))sb1_nop; - flush_cache_all = sb1_nop; - - /* These routines are for Icache coherence with the Dcache */ - flush_icache_range = sb1_flush_icache_range; - flush_icache_page = sb1_flush_icache_page; - flush_icache_all = __sb1_flush_icache_all; /* local only */ - - /* This implies an Icache flush too, so can't be nop'ed */ - flush_cache_page = sb1_flush_cache_page; - - flush_cache_sigtramp = sb1_flush_cache_sigtramp; - flush_data_cache_page = (void *) sb1_nop; - - /* Full flush */ - __flush_cache_all = sb1___flush_cache_all; - - change_c0_config(CONF_CM_CMASK, CONF_CM_DEFAULT); - - /* - * This is the only way to force the update of K0 to complete - * before subsequent instruction fetch. - */ - __asm__ __volatile__( - ".set push \n" - " .set noat \n" - " .set noreorder \n" - " .set mips3 \n" - " " STR(PTR_LA) " $1, 1f \n" - " " STR(MTC0) " $1, $14 \n" - " eret \n" - "1: .set pop" - : - : - : "memory"); - - flush_cache_all(); -} diff --git a/arch/mips/mm/c-tx39.c b/arch/mips/mm/c-tx39.c index 0a97a9434eb..8d909dbbf37 100644 --- a/arch/mips/mm/c-tx39.c +++ b/arch/mips/mm/c-tx39.c @@ -1,7 +1,7 @@ /* * r2300.c: R2000 and R3000 specific mmu/cache code. * - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1996 David S. Miller (davem@davemloft.net) * * with a lot of changes to make this thing work for R3000s * Tx39XX R4k style caches added. HK @@ -11,13 +11,13 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/smp.h> #include <linux/mm.h> #include <asm/cacheops.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/mmu_context.h> -#include <asm/system.h> #include <asm/isadep.h> #include <asm/io.h> #include <asm/bootinfo.h> @@ -33,9 +33,9 @@ extern int r3k_have_wired_reg; /* in r3k-tlb.c */ /* This sequence is required to ensure icache is disabled immediately */ #define TX39_STOP_STREAMING() \ __asm__ __volatile__( \ - ".set push\n\t" \ - ".set noreorder\n\t" \ - "b 1f\n\t" \ + ".set push\n\t" \ + ".set noreorder\n\t" \ + "b 1f\n\t" \ "nop\n\t" \ "1:\n\t" \ ".set pop" \ @@ -44,8 +44,6 @@ __asm__ __volatile__( \ /* TX39H-style cache flush routines. */ static void tx39h_flush_icache_all(void) { - unsigned long start = KSEG0; - unsigned long end = (start + icache_size); unsigned long flags, config; /* disable icache (set ICE#) */ @@ -53,40 +51,25 @@ static void tx39h_flush_icache_all(void) config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); - - /* invalidate icache */ - while (start < end) { - cache16_unroll32(start, Index_Invalidate_I); - start += 0x200; - } - + blast_icache16(); write_c0_conf(config); local_irq_restore(flags); } static void tx39h_dma_cache_wback_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; - unsigned long dc_lsize = current_cpu_data.dcache.linesz; - /* Catch bad driver code */ BUG_ON(size == 0); iob(); - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - invalidate_dcache_line(a); /* Hit_Invalidate_D */ - if (a == end) break; - a += dc_lsize; - } + blast_inv_dcache_range(addr, addr + size); } /* TX39H2,TX39H3 */ static inline void tx39_blast_dcache_page(unsigned long addr) { - if (current_cpu_data.cputype != CPU_TX3912) + if (current_cpu_type() != CPU_TX3912) blast_dcache16_page(addr); } @@ -139,13 +122,22 @@ static inline void tx39_blast_icache(void) local_irq_restore(flags); } +static void tx39__flush_cache_vmap(void) +{ + tx39_blast_dcache(); +} + +static void tx39__flush_cache_vunmap(void) +{ + tx39_blast_dcache(); +} + static inline void tx39_flush_cache_all(void) { if (!cpu_has_dc_aliases) return; tx39_blast_dcache(); - tx39_blast_icache(); } static inline void tx39___flush_cache_all(void) @@ -159,24 +151,19 @@ static void tx39_flush_cache_mm(struct mm_struct *mm) if (!cpu_has_dc_aliases) return; - if (cpu_context(smp_processor_id(), mm) != 0) { - tx39_flush_cache_all(); - } + if (cpu_context(smp_processor_id(), mm) != 0) + tx39_blast_dcache(); } static void tx39_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - int exec; - + if (!cpu_has_dc_aliases) + return; if (!(cpu_context(smp_processor_id(), vma->vm_mm))) return; - exec = vma->vm_flags & VM_EXEC; - if (cpu_has_dc_aliases || exec) - tx39_blast_dcache(); - if (exec) - tx39_blast_icache(); + tx39_blast_dcache(); } static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn) @@ -227,13 +214,17 @@ static void tx39_flush_cache_page(struct vm_area_struct *vma, unsigned long page * Do indexed flush, too much work to get the (possible) TLB refills * to work correctly. */ - page = (KSEG0 + (page & (dcache_size - 1))); if (cpu_has_dc_aliases || exec) tx39_blast_dcache_page_indexed(page); if (exec) tx39_blast_icache_page_indexed(page); } +static void local_tx39_flush_data_cache_page(void * addr) +{ + tx39_blast_dcache_page((unsigned long)addr); +} + static void tx39_flush_data_cache_page(unsigned long addr) { tx39_blast_dcache_page(addr); @@ -241,77 +232,34 @@ static void tx39_flush_data_cache_page(unsigned long addr) static void tx39_flush_icache_range(unsigned long start, unsigned long end) { - unsigned long dc_lsize = current_cpu_data.dcache.linesz; - unsigned long addr, aend; - if (end - start > dcache_size) tx39_blast_dcache(); - else { - addr = start & ~(dc_lsize - 1); - aend = (end - 1) & ~(dc_lsize - 1); - - while (1) { - /* Hit_Writeback_Inv_D */ - protected_writeback_dcache_line(addr); - if (addr == aend) - break; - addr += dc_lsize; - } - } + else + protected_blast_dcache_range(start, end); if (end - start > icache_size) tx39_blast_icache(); else { unsigned long flags, config; - addr = start & ~(dc_lsize - 1); - aend = (end - 1) & ~(dc_lsize - 1); /* disable icache (set ICE#) */ local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); - while (1) { - /* Hit_Invalidate_I */ - protected_flush_icache_line(addr); - if (addr == aend) - break; - addr += dc_lsize; - } + protected_blast_icache_range(start, end); write_c0_conf(config); local_irq_restore(flags); } } -/* - * Ok, this seriously sucks. We use them to flush a user page but don't - * know the virtual address, so we have to blast away the whole icache - * which is significantly more expensive than the real thing. Otoh we at - * least know the kernel address of the page so we can flush it - * selectivly. - */ -static void tx39_flush_icache_page(struct vm_area_struct *vma, struct page *page) +static void tx39_flush_kernel_vmap_range(unsigned long vaddr, int size) { - unsigned long addr; - /* - * If there's no context yet, or the page isn't executable, no icache - * flush is needed. - */ - if (!(vma->vm_flags & VM_EXEC)) - return; - - addr = (unsigned long) page_address(page); - tx39_blast_dcache_page(addr); - - /* - * We're not sure of the virtual address(es) involved here, so - * we have to flush the entire I-cache. - */ - tx39_blast_icache(); + BUG(); } static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; + unsigned long end; if (((size | addr) & (PAGE_SIZE - 1)) == 0) { end = addr + size; @@ -322,20 +270,13 @@ static void tx39_dma_cache_wback_inv(unsigned long addr, unsigned long size) } else if (size > dcache_size) { tx39_blast_dcache(); } else { - unsigned long dc_lsize = current_cpu_data.dcache.linesz; - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - flush_dcache_line(a); /* Hit_Writeback_Inv_D */ - if (a == end) break; - a += dc_lsize; - } + blast_dcache_range(addr, addr + size); } } static void tx39_dma_cache_inv(unsigned long addr, unsigned long size) { - unsigned long end, a; + unsigned long end; if (((size | addr) & (PAGE_SIZE - 1)) == 0) { end = addr + size; @@ -346,14 +287,7 @@ static void tx39_dma_cache_inv(unsigned long addr, unsigned long size) } else if (size > dcache_size) { tx39_blast_dcache(); } else { - unsigned long dc_lsize = current_cpu_data.dcache.linesz; - a = addr & ~(dc_lsize - 1); - end = (addr + size - 1) & ~(dc_lsize - 1); - while (1) { - invalidate_dcache_line(a); /* Hit_Invalidate_D */ - if (a == end) break; - a += dc_lsize; - } + blast_inv_dcache_range(addr, addr + size); } } @@ -388,7 +322,7 @@ static __init void tx39_probe_cache(void) TX39_CONF_DCS_SHIFT)); current_cpu_data.icache.linesz = 16; - switch (current_cpu_data.cputype) { + switch (current_cpu_type()) { case CPU_TX3912: current_cpu_data.icache.ways = 1; current_cpu_data.dcache.ways = 1; @@ -410,7 +344,7 @@ static __init void tx39_probe_cache(void) } } -void __init tx39_cache_init(void) +void tx39_cache_init(void) { extern void build_clear_page(void); extern void build_copy_page(void); @@ -422,18 +356,21 @@ void __init tx39_cache_init(void) tx39_probe_cache(); - switch (current_cpu_data.cputype) { + switch (current_cpu_type()) { case CPU_TX3912: /* TX39/H core (writethru direct-map cache) */ - flush_cache_all = tx39h_flush_icache_all; + __flush_cache_vmap = tx39__flush_cache_vmap; + __flush_cache_vunmap = tx39__flush_cache_vunmap; + flush_cache_all = tx39h_flush_icache_all; __flush_cache_all = tx39h_flush_icache_all; flush_cache_mm = (void *) tx39h_flush_icache_all; flush_cache_range = (void *) tx39h_flush_icache_all; flush_cache_page = (void *) tx39h_flush_icache_all; - flush_icache_page = (void *) tx39h_flush_icache_all; flush_icache_range = (void *) tx39h_flush_icache_all; + local_flush_icache_range = (void *) tx39h_flush_icache_all; flush_cache_sigtramp = (void *) tx39h_flush_icache_all; + local_flush_data_cache_page = (void *) tx39h_flush_icache_all; flush_data_cache_page = (void *) tx39h_flush_icache_all; _dma_cache_wback_inv = tx39h_dma_cache_wback_inv; @@ -450,15 +387,21 @@ void __init tx39_cache_init(void) write_c0_wired(0); /* set 8 on reset... */ /* board-dependent init code may set WBON */ + __flush_cache_vmap = tx39__flush_cache_vmap; + __flush_cache_vunmap = tx39__flush_cache_vunmap; + flush_cache_all = tx39_flush_cache_all; __flush_cache_all = tx39___flush_cache_all; flush_cache_mm = tx39_flush_cache_mm; flush_cache_range = tx39_flush_cache_range; flush_cache_page = tx39_flush_cache_page; - flush_icache_page = tx39_flush_icache_page; flush_icache_range = tx39_flush_icache_range; + local_flush_icache_range = tx39_flush_icache_range; + + __flush_kernel_vmap_range = tx39_flush_kernel_vmap_range; flush_cache_sigtramp = tx39_flush_cache_sigtramp; + local_flush_data_cache_page = local_tx39_flush_data_cache_page; flush_data_cache_page = tx39_flush_data_cache_page; _dma_cache_wback_inv = tx39_dma_cache_wback_inv; @@ -466,8 +409,8 @@ void __init tx39_cache_init(void) _dma_cache_inv = tx39_dma_cache_inv; shm_align_mask = max_t(unsigned long, - (dcache_size / current_cpu_data.dcache.ways) - 1, - PAGE_SIZE - 1); + (dcache_size / current_cpu_data.dcache.ways) - 1, + PAGE_SIZE - 1); break; } diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 314701a66b1..f7b91d3a371 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -3,13 +3,16 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1994 - 2003 by Ralf Baechle + * Copyright (C) 1994 - 2003, 06, 07 by Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2007 MIPS Technologies, Inc. */ -#include <linux/config.h> -#include <linux/init.h> +#include <linux/fs.h> +#include <linux/fcntl.h> #include <linux/kernel.h> +#include <linux/linkage.h> #include <linux/module.h> #include <linux/sched.h> +#include <linux/syscalls.h> #include <linux/mm.h> #include <asm/cacheflush.h> @@ -25,18 +28,29 @@ void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start, unsigned long end); void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, unsigned long pfn); -void (*flush_icache_range)(unsigned long __user start, - unsigned long __user end); -void (*flush_icache_page)(struct vm_area_struct *vma, struct page *page); +void (*flush_icache_range)(unsigned long start, unsigned long end); +EXPORT_SYMBOL_GPL(flush_icache_range); +void (*local_flush_icache_range)(unsigned long start, unsigned long end); +EXPORT_SYMBOL_GPL(local_flush_icache_range); + +void (*__flush_cache_vmap)(void); +void (*__flush_cache_vunmap)(void); + +void (*__flush_kernel_vmap_range)(unsigned long vaddr, int size); +EXPORT_SYMBOL_GPL(__flush_kernel_vmap_range); +void (*__invalidate_kernel_vmap_range)(unsigned long vaddr, int size); /* MIPS specific cache operations */ void (*flush_cache_sigtramp)(unsigned long addr); +void (*local_flush_data_cache_page)(void * addr); void (*flush_data_cache_page)(unsigned long addr); void (*flush_icache_all)(void); +EXPORT_SYMBOL_GPL(local_flush_data_cache_page); EXPORT_SYMBOL(flush_data_cache_page); +EXPORT_SYMBOL(flush_icache_all); -#ifdef CONFIG_DMA_NONCOHERENT +#if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_DMA_MAYBE_COHERENT) /* DMA cache operations. */ void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size); @@ -44,17 +58,15 @@ void (*_dma_cache_wback)(unsigned long start, unsigned long size); void (*_dma_cache_inv)(unsigned long start, unsigned long size); EXPORT_SYMBOL(_dma_cache_wback_inv); -EXPORT_SYMBOL(_dma_cache_wback); -EXPORT_SYMBOL(_dma_cache_inv); -#endif /* CONFIG_DMA_NONCOHERENT */ +#endif /* CONFIG_DMA_NONCOHERENT || CONFIG_DMA_MAYBE_COHERENT */ /* * We could optimize the case where the cache argument is not BCACHE but * that seems very atypical use ... */ -asmlinkage int sys_cacheflush(unsigned long __user addr, - unsigned long bytes, unsigned int cache) +SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, + unsigned int, cache) { if (bytes == 0) return 0; @@ -71,6 +83,8 @@ void __flush_dcache_page(struct page *page) struct address_space *mapping = page_mapping(page); unsigned long addr; + if (PageHighMem(page)) + return; if (mapping && !mapping_mapped(mapping)) { SetPageDcacheDirty(page); return; @@ -87,67 +101,128 @@ void __flush_dcache_page(struct page *page) EXPORT_SYMBOL(__flush_dcache_page); +void __flush_anon_page(struct page *page, unsigned long vmaddr) +{ + unsigned long addr = (unsigned long) page_address(page); + + if (pages_do_alias(addr, vmaddr)) { + if (page_mapped(page) && !Page_dcache_dirty(page)) { + void *kaddr; + + kaddr = kmap_coherent(page, vmaddr); + flush_data_cache_page((unsigned long)kaddr); + kunmap_coherent(); + } else + flush_data_cache_page(addr); + } +} + +EXPORT_SYMBOL(__flush_anon_page); + void __update_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) { struct page *page; unsigned long pfn, addr; + int exec = (vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc; pfn = pte_pfn(pte); - if (pfn_valid(pfn) && (page = pfn_to_page(pfn), page_mapping(page)) && - Page_dcache_dirty(page)) { - if (pages_do_alias((unsigned long)page_address(page), - address & PAGE_MASK)) { - addr = (unsigned long) page_address(page); + if (unlikely(!pfn_valid(pfn))) + return; + page = pfn_to_page(pfn); + if (page_mapping(page) && Page_dcache_dirty(page)) { + addr = (unsigned long) page_address(page); + if (exec || pages_do_alias(addr, address & PAGE_MASK)) flush_data_cache_page(addr); - } - ClearPageDcacheDirty(page); } } -#define __weak __attribute__((weak)) +unsigned long _page_cachable_default; +EXPORT_SYMBOL(_page_cachable_default); -static char cache_panic[] __initdata = "Yeee, unsupported cache architecture."; +static inline void setup_protection_map(void) +{ + if (cpu_has_rixi) { + protection_map[0] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[1] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[2] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[3] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[4] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ); + protection_map[5] = __pgprot(_page_cachable_default | _PAGE_PRESENT); + protection_map[6] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ); + protection_map[7] = __pgprot(_page_cachable_default | _PAGE_PRESENT); + + protection_map[8] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[9] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[10] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ); + protection_map[11] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); + protection_map[12] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_READ); + protection_map[13] = __pgprot(_page_cachable_default | _PAGE_PRESENT); + protection_map[14] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE | _PAGE_NO_READ); + protection_map[15] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE); + + } else { + protection_map[0] = PAGE_NONE; + protection_map[1] = PAGE_READONLY; + protection_map[2] = PAGE_COPY; + protection_map[3] = PAGE_COPY; + protection_map[4] = PAGE_READONLY; + protection_map[5] = PAGE_READONLY; + protection_map[6] = PAGE_COPY; + protection_map[7] = PAGE_COPY; + protection_map[8] = PAGE_NONE; + protection_map[9] = PAGE_READONLY; + protection_map[10] = PAGE_SHARED; + protection_map[11] = PAGE_SHARED; + protection_map[12] = PAGE_READONLY; + protection_map[13] = PAGE_READONLY; + protection_map[14] = PAGE_SHARED; + protection_map[15] = PAGE_SHARED; + } +} -void __init cpu_cache_init(void) +void cpu_cache_init(void) { if (cpu_has_3k_cache) { extern void __weak r3k_cache_init(void); r3k_cache_init(); - return; } if (cpu_has_6k_cache) { extern void __weak r6k_cache_init(void); r6k_cache_init(); - return; } if (cpu_has_4k_cache) { extern void __weak r4k_cache_init(void); r4k_cache_init(); - return; } if (cpu_has_8k_cache) { extern void __weak r8k_cache_init(void); r8k_cache_init(); - return; } if (cpu_has_tx39_cache) { extern void __weak tx39_cache_init(void); tx39_cache_init(); - return; } - if (cpu_has_sb1_cache) { - extern void __weak sb1_cache_init(void); - sb1_cache_init(); - return; + if (cpu_has_octeon_cache) { + extern void __weak octeon_cache_init(void); + + octeon_cache_init(); } - panic(cache_panic); + setup_protection_map(); +} + +int __weak __uncached_access(struct file *file, unsigned long addr) +{ + if (file->f_flags & O_DSYNC) + return 1; + + return addr >= __pa(high_memory); } diff --git a/arch/mips/mm/cerr-sb1.c b/arch/mips/mm/cerr-sb1.c index 1cf3c6006cc..ee5c1ff861a 100644 --- a/arch/mips/mm/cerr-sb1.c +++ b/arch/mips/mm/cerr-sb1.c @@ -15,7 +15,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <linux/config.h> #include <linux/sched.h> #include <asm/mipsregs.h> #include <asm/sibyte/sb1250.h> @@ -28,7 +27,7 @@ /* * We'd like to dump the L2_ECC_TAG register on errors, but errata make - * that unsafe... So for now we don't. (BCM1250/BCM112x erratum SOC-48.) + * that unsafe... So for now we don't. (BCM1250/BCM112x erratum SOC-48.) */ #undef DUMP_L2_ECC_TAG_ON_ERROR @@ -49,7 +48,7 @@ #define CP0_CERRI_EXTERNAL (1 << 26) #define CP0_CERRI_IDX_VALID(c) (!((c) & CP0_CERRI_EXTERNAL)) -#define CP0_CERRI_DATA (CP0_CERRI_DATA_PARITY) +#define CP0_CERRI_DATA (CP0_CERRI_DATA_PARITY) #define CP0_CERRD_MULTIPLE (1 << 31) #define CP0_CERRD_TAG_STATE (1 << 30) @@ -57,8 +56,8 @@ #define CP0_CERRD_DATA_SBE (1 << 28) #define CP0_CERRD_DATA_DBE (1 << 27) #define CP0_CERRD_EXTERNAL (1 << 26) -#define CP0_CERRD_LOAD (1 << 25) -#define CP0_CERRD_STORE (1 << 24) +#define CP0_CERRD_LOAD (1 << 25) +#define CP0_CERRD_STORE (1 << 24) #define CP0_CERRD_FILLWB (1 << 23) #define CP0_CERRD_COHERENCY (1 << 22) #define CP0_CERRD_DUPTAG (1 << 21) @@ -70,74 +69,74 @@ (CP0_CERRD_LOAD | CP0_CERRD_STORE | CP0_CERRD_FILLWB | CP0_CERRD_COHERENCY | CP0_CERRD_DUPTAG) #define CP0_CERRD_TYPES \ (CP0_CERRD_TAG_STATE | CP0_CERRD_TAG_ADDRESS | CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE | CP0_CERRD_EXTERNAL) -#define CP0_CERRD_DATA (CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE) +#define CP0_CERRD_DATA (CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE) -static uint32_t extract_ic(unsigned short addr, int data); -static uint32_t extract_dc(unsigned short addr, int data); +static uint32_t extract_ic(unsigned short addr, int data); +static uint32_t extract_dc(unsigned short addr, int data); static inline void breakout_errctl(unsigned int val) { if (val & CP0_ERRCTL_RECOVERABLE) - prom_printf(" recoverable"); + printk(" recoverable"); if (val & CP0_ERRCTL_DCACHE) - prom_printf(" dcache"); + printk(" dcache"); if (val & CP0_ERRCTL_ICACHE) - prom_printf(" icache"); + printk(" icache"); if (val & CP0_ERRCTL_MULTIBUS) - prom_printf(" multiple-buserr"); - prom_printf("\n"); + printk(" multiple-buserr"); + printk("\n"); } static inline void breakout_cerri(unsigned int val) { if (val & CP0_CERRI_TAG_PARITY) - prom_printf(" tag-parity"); + printk(" tag-parity"); if (val & CP0_CERRI_DATA_PARITY) - prom_printf(" data-parity"); + printk(" data-parity"); if (val & CP0_CERRI_EXTERNAL) - prom_printf(" external"); - prom_printf("\n"); + printk(" external"); + printk("\n"); } static inline void breakout_cerrd(unsigned int val) { switch (val & CP0_CERRD_CAUSES) { case CP0_CERRD_LOAD: - prom_printf(" load,"); + printk(" load,"); break; case CP0_CERRD_STORE: - prom_printf(" store,"); + printk(" store,"); break; case CP0_CERRD_FILLWB: - prom_printf(" fill/wb,"); + printk(" fill/wb,"); break; case CP0_CERRD_COHERENCY: - prom_printf(" coherency,"); + printk(" coherency,"); break; case CP0_CERRD_DUPTAG: - prom_printf(" duptags,"); + printk(" duptags,"); break; default: - prom_printf(" NO CAUSE,"); + printk(" NO CAUSE,"); break; } if (!(val & CP0_CERRD_TYPES)) - prom_printf(" NO TYPE"); + printk(" NO TYPE"); else { if (val & CP0_CERRD_MULTIPLE) - prom_printf(" multi-err"); + printk(" multi-err"); if (val & CP0_CERRD_TAG_STATE) - prom_printf(" tag-state"); + printk(" tag-state"); if (val & CP0_CERRD_TAG_ADDRESS) - prom_printf(" tag-address"); + printk(" tag-address"); if (val & CP0_CERRD_DATA_SBE) - prom_printf(" data-SBE"); + printk(" data-SBE"); if (val & CP0_CERRD_DATA_DBE) - prom_printf(" data-DBE"); + printk(" data-DBE"); if (val & CP0_CERRD_EXTERNAL) - prom_printf(" external"); + printk(" external"); } - prom_printf("\n"); + printk("\n"); } #ifndef CONFIG_SIBYTE_BUS_WATCHER @@ -155,21 +154,21 @@ static void check_bus_watcher(void) if (status & ~(1UL << 31)) { l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS)); #ifdef DUMP_L2_ECC_TAG_ON_ERROR - l2_tag = in64(IO_SPACE_BASE | A_L2_ECC_TAG); + l2_tag = in64(IOADDR(A_L2_ECC_TAG)); #endif memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS)); - prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err); - prom_printf("\nLast recorded signature:\n"); - prom_printf("Request %02x from %d, answered by %d with Dcode %d\n", + printk("Bus watcher error counters: %08x %08x\n", l2_err, memio_err); + printk("\nLast recorded signature:\n"); + printk("Request %02x from %d, answered by %d with Dcode %d\n", (unsigned int)(G_SCD_BERR_TID(status) & 0x3f), (int)(G_SCD_BERR_TID(status) >> 6), (int)G_SCD_BERR_RID(status), (int)G_SCD_BERR_DCODE(status)); #ifdef DUMP_L2_ECC_TAG_ON_ERROR - prom_printf("Last L2 tag w/ bad ECC: %016llx\n", l2_tag); + printk("Last L2 tag w/ bad ECC: %016llx\n", l2_tag); #endif } else { - prom_printf("Bus watcher indicates no error\n"); + printk("Bus watcher indicates no error\n"); } } #else @@ -178,21 +177,17 @@ extern void check_bus_watcher(void); asmlinkage void sb1_cache_error(void) { - uint64_t cerr_dpa; uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res; + unsigned long long cerr_dpa; #ifdef CONFIG_SIBYTE_BW_TRACE /* Freeze the trace buffer now */ -#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80) - csr_out32(M_BCM1480_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG); -#else - csr_out32(M_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG); -#endif - prom_printf("Trace buffer frozen\n"); + csr_out32(M_SCD_TRACE_CFG_FREEZE, IOADDR(A_SCD_TRACE_CFG)); + printk("Trace buffer frozen\n"); #endif - prom_printf("Cache error exception on CPU %x:\n", - (read_c0_prid() >> 25) & 0x7); + printk("Cache error exception on CPU %x:\n", + (read_c0_prid() >> 25) & 0x7); __asm__ __volatile__ ( " .set push\n\t" @@ -210,43 +205,43 @@ asmlinkage void sb1_cache_error(void) "=r" (dpahi), "=r" (dpalo), "=r" (eepc)); cerr_dpa = (((uint64_t)dpahi) << 32) | dpalo; - prom_printf(" c0_errorepc == %08x\n", eepc); - prom_printf(" c0_errctl == %08x", errctl); + printk(" c0_errorepc == %08x\n", eepc); + printk(" c0_errctl == %08x", errctl); breakout_errctl(errctl); if (errctl & CP0_ERRCTL_ICACHE) { - prom_printf(" c0_cerr_i == %08x", cerr_i); + printk(" c0_cerr_i == %08x", cerr_i); breakout_cerri(cerr_i); if (CP0_CERRI_IDX_VALID(cerr_i)) { /* Check index of EPC, allowing for delay slot */ if (((eepc & SB1_CACHE_INDEX_MASK) != (cerr_i & SB1_CACHE_INDEX_MASK)) && ((eepc & SB1_CACHE_INDEX_MASK) != ((cerr_i & SB1_CACHE_INDEX_MASK) - 4))) - prom_printf(" cerr_i idx doesn't match eepc\n"); + printk(" cerr_i idx doesn't match eepc\n"); else { res = extract_ic(cerr_i & SB1_CACHE_INDEX_MASK, (cerr_i & CP0_CERRI_DATA) != 0); if (!(res & cerr_i)) - prom_printf("...didn't see indicated icache problem\n"); + printk("...didn't see indicated icache problem\n"); } } } if (errctl & CP0_ERRCTL_DCACHE) { - prom_printf(" c0_cerr_d == %08x", cerr_d); + printk(" c0_cerr_d == %08x", cerr_d); breakout_cerrd(cerr_d); if (CP0_CERRD_DPA_VALID(cerr_d)) { - prom_printf(" c0_cerr_dpa == %010llx\n", cerr_dpa); + printk(" c0_cerr_dpa == %010llx\n", cerr_dpa); if (!CP0_CERRD_IDX_VALID(cerr_d)) { res = extract_dc(cerr_dpa & SB1_CACHE_INDEX_MASK, (cerr_d & CP0_CERRD_DATA) != 0); if (!(res & cerr_d)) - prom_printf("...didn't see indicated dcache problem\n"); + printk("...didn't see indicated dcache problem\n"); } else { if ((cerr_dpa & SB1_CACHE_INDEX_MASK) != (cerr_d & SB1_CACHE_INDEX_MASK)) - prom_printf(" cerr_d idx doesn't match cerr_dpa\n"); + printk(" cerr_d idx doesn't match cerr_dpa\n"); else { res = extract_dc(cerr_d & SB1_CACHE_INDEX_MASK, (cerr_d & CP0_CERRD_DATA) != 0); if (!(res & cerr_d)) - prom_printf("...didn't see indicated problem\n"); + printk("...didn't see indicated problem\n"); } } } @@ -257,7 +252,7 @@ asmlinkage void sb1_cache_error(void) /* * Calling panic() when a fatal cache error occurs scrambles the * state of the system (and the cache), making it difficult to - * investigate after the fact. However, if you just stall the CPU, + * investigate after the fact. However, if you just stall the CPU, * the other CPU may keep on running, which is typically very * undesirable. */ @@ -272,14 +267,22 @@ asmlinkage void sb1_cache_error(void) /* Parity lookup table. */ static const uint8_t parity[256] = { - 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, - 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, - 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, - 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, - 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0, - 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, - 0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1, - 1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0 + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 }; /* Masks to select bits for Hamming parity, mask_72_64[i] for bit[i] */ @@ -330,12 +333,13 @@ static uint32_t extract_ic(unsigned short addr, int data) { unsigned short way; int valid; - uint64_t taglo, va, tlo_tmp; uint32_t taghi, taglolo, taglohi; + unsigned long long taglo, va; + uint64_t tlo_tmp; uint8_t lru; int res = 0; - prom_printf("Icache index 0x%04x ", addr); + printk("Icache index 0x%04x ", addr); for (way = 0; way < 4; way++) { /* Index-load-tag-I */ __asm__ __volatile__ ( @@ -355,7 +359,7 @@ static uint32_t extract_ic(unsigned short addr, int data) taglo = ((unsigned long long)taglohi << 32) | taglolo; if (way == 0) { lru = (taghi >> 14) & 0xff; - prom_printf("[Bank %d Set 0x%02x] LRU > %d %d %d %d > MRU\n", + printk("[Bank %d Set 0x%02x] LRU > %d %d %d %d > MRU\n", ((addr >> 5) & 0x3), /* bank */ ((addr >> 7) & 0x3f), /* index */ (lru & 0x3), @@ -370,19 +374,19 @@ static uint32_t extract_ic(unsigned short addr, int data) if (valid) { tlo_tmp = taglo & 0xfff3ff; if (((taglo >> 10) & 1) ^ range_parity(tlo_tmp, 23, 0)) { - prom_printf(" ** bad parity in VTag0/G/ASID\n"); + printk(" ** bad parity in VTag0/G/ASID\n"); res |= CP0_CERRI_TAG_PARITY; } if (((taglo >> 11) & 1) ^ range_parity(taglo, 63, 24)) { - prom_printf(" ** bad parity in R/VTag1\n"); + printk(" ** bad parity in R/VTag1\n"); res |= CP0_CERRI_TAG_PARITY; } } if (valid ^ ((taghi >> 27) & 1)) { - prom_printf(" ** bad parity for valid bit\n"); + printk(" ** bad parity for valid bit\n"); res |= CP0_CERRI_TAG_PARITY; } - prom_printf(" %d [VA %016llx] [Vld? %d] raw tags: %08X-%016llX\n", + printk(" %d [VA %016llx] [Vld? %d] raw tags: %08X-%016llX\n", way, va, valid, taghi, taglo); if (data) { @@ -403,26 +407,26 @@ static uint32_t extract_ic(unsigned short addr, int data) " dmfc0 $1, $28, 1\n\t" " dsrl32 %1, $1, 0 \n\t" " sll %2, $1, 0 \n\t" - " .set pop \n" + " .set pop \n" : "=r" (datahi), "=r" (insta), "=r" (instb) : "r" ((way << 13) | addr | (offset << 3))); predecode = (datahi >> 8) & 0xff; if (((datahi >> 16) & 1) != (uint32_t)range_parity(predecode, 7, 0)) { - prom_printf(" ** bad parity in predecode\n"); + printk(" ** bad parity in predecode\n"); res |= CP0_CERRI_DATA_PARITY; } /* XXXKW should/could check predecode bits themselves */ if (((datahi >> 4) & 0xf) ^ inst_parity(insta)) { - prom_printf(" ** bad parity in instruction a\n"); + printk(" ** bad parity in instruction a\n"); res |= CP0_CERRI_DATA_PARITY; } if ((datahi & 0xf) ^ inst_parity(instb)) { - prom_printf(" ** bad parity in instruction b\n"); + printk(" ** bad parity in instruction b\n"); res |= CP0_CERRI_DATA_PARITY; } - prom_printf(" %05X-%08X%08X", datahi, insta, instb); + printk(" %05X-%08X%08X", datahi, insta, instb); } - prom_printf("\n"); + printk("\n"); } } return res; @@ -433,8 +437,8 @@ static uint8_t dc_ecc(uint64_t dword) { uint64_t t; uint32_t w; - uint8_t p; - int i; + uint8_t p; + int i; p = 0; for (i = 7; i >= 0; i--) @@ -485,12 +489,12 @@ static uint32_t extract_dc(unsigned short addr, int data) { int valid, way; unsigned char state; - uint64_t taglo, pa; uint32_t taghi, taglolo, taglohi; + unsigned long long taglo, pa; uint8_t ecc, lru; int res = 0; - prom_printf("Dcache index 0x%04x ", addr); + printk("Dcache index 0x%04x ", addr); for (way = 0; way < 4; way++) { __asm__ __volatile__ ( " .set push\n\t" @@ -510,7 +514,7 @@ static uint32_t extract_dc(unsigned short addr, int data) pa = (taglo & 0xFFFFFFE000ULL) | addr; if (way == 0) { lru = (taghi >> 14) & 0xff; - prom_printf("[Bank %d Set 0x%02x] LRU > %d %d %d %d > MRU\n", + printk("[Bank %d Set 0x%02x] LRU > %d %d %d %d > MRU\n", ((addr >> 11) & 0x2) | ((addr >> 5) & 1), /* bank */ ((addr >> 6) & 0x3f), /* index */ (lru & 0x3), @@ -520,15 +524,15 @@ static uint32_t extract_dc(unsigned short addr, int data) } state = (taghi >> 25) & 0x1f; valid = DC_TAG_VALID(state); - prom_printf(" %d [PA %010llx] [state %s (%02x)] raw tags: %08X-%016llX\n", + printk(" %d [PA %010llx] [state %s (%02x)] raw tags: %08X-%016llX\n", way, pa, dc_state_str(state), state, taghi, taglo); if (valid) { if (((taglo >> 11) & 1) ^ range_parity(taglo, 39, 26)) { - prom_printf(" ** bad parity in PTag1\n"); + printk(" ** bad parity in PTag1\n"); res |= CP0_CERRD_TAG_ADDRESS; } if (((taglo >> 10) & 1) ^ range_parity(taglo, 25, 13)) { - prom_printf(" ** bad parity in PTag0\n"); + printk(" ** bad parity in PTag0\n"); res |= CP0_CERRD_TAG_ADDRESS; } } else { @@ -536,8 +540,8 @@ static uint32_t extract_dc(unsigned short addr, int data) } if (data) { - uint64_t datalo; uint32_t datalohi, datalolo, datahi; + unsigned long long datalo; int offset; char bad_ecc = 0; @@ -559,22 +563,19 @@ static uint32_t extract_dc(unsigned short addr, int data) datalo = ((unsigned long long)datalohi << 32) | datalolo; ecc = dc_ecc(datalo); if (ecc != datahi) { - int bits = 0; + int bits; bad_ecc |= 1 << (3-offset); ecc ^= datahi; - while (ecc) { - if (ecc & 1) bits++; - ecc >>= 1; - } + bits = hweight8(ecc); res |= (bits == 1) ? CP0_CERRD_DATA_SBE : CP0_CERRD_DATA_DBE; } - prom_printf(" %02X-%016llX", datahi, datalo); + printk(" %02X-%016llX", datahi, datalo); } - prom_printf("\n"); + printk("\n"); if (bad_ecc) - prom_printf(" dwords w/ bad ECC: %d %d %d %d\n", - !!(bad_ecc & 8), !!(bad_ecc & 4), - !!(bad_ecc & 2), !!(bad_ecc & 1)); + printk(" dwords w/ bad ECC: %d %d %d %d\n", + !!(bad_ecc & 8), !!(bad_ecc & 4), + !!(bad_ecc & 2), !!(bad_ecc & 1)); } } return res; diff --git a/arch/mips/mm/cex-gen.S b/arch/mips/mm/cex-gen.S index e743622fd24..45dff5cd4b8 100644 --- a/arch/mips/mm/cex-gen.S +++ b/arch/mips/mm/cex-gen.S @@ -14,17 +14,17 @@ #include <asm/stackframe.h> /* - * Game over. Go to the button. Press gently. Swear where allowed by + * Game over. Go to the button. Press gently. Swear where allowed by * legislation. */ LEAF(except_vec2_generic) .set noreorder .set noat - .set mips0 + .set mips0 /* * This is a very bad place to be. Our cache error * detection has triggered. If we have write-back data - * in the cache, we may not be able to recover. As a + * in the cache, we may not be able to recover. As a * first-order desperate measure, turn off KSEG0 cacheing. */ mfc0 k0,CP0_CONFIG diff --git a/arch/mips/mm/cex-oct.S b/arch/mips/mm/cex-oct.S new file mode 100644 index 00000000000..9029092aa74 --- /dev/null +++ b/arch/mips/mm/cex-oct.S @@ -0,0 +1,70 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Cavium Networks + * Cache error handler + */ + +#include <asm/asm.h> +#include <asm/regdef.h> +#include <asm/mipsregs.h> +#include <asm/stackframe.h> + +/* + * Handle cache error. Indicate to the second level handler whether + * the exception is recoverable. + */ + LEAF(except_vec2_octeon) + + .set push + .set mips64r2 + .set noreorder + .set noat + + + /* due to an errata we need to read the COP0 CacheErr (Dcache) + * before any cache/DRAM access */ + + rdhwr k0, $0 /* get core_id */ + PTR_LA k1, cache_err_dcache + sll k0, k0, 3 + PTR_ADDU k1, k0, k1 /* k1 = &cache_err_dcache[core_id] */ + + dmfc0 k0, CP0_CACHEERR, 1 + sd k0, (k1) + dmtc0 $0, CP0_CACHEERR, 1 + + /* check whether this is a nested exception */ + mfc0 k1, CP0_STATUS + andi k1, k1, ST0_EXL + beqz k1, 1f + nop + j cache_parity_error_octeon_non_recoverable + nop + + /* exception is recoverable */ +1: j handle_cache_err + nop + + .set pop + END(except_vec2_octeon) + + /* We need to jump to handle_cache_err so that the previous handler + * can fit within 0x80 bytes. We also move from 0xFFFFFFFFAXXXXXXX + * space (uncached) to the 0xFFFFFFFF8XXXXXXX space (cached). */ + LEAF(handle_cache_err) + .set push + .set noreorder + .set noat + + SAVE_ALL + KMODE + jal cache_parity_error_octeon_recoverable + nop + j ret_from_exception + nop + + .set pop + END(handle_cache_err) diff --git a/arch/mips/mm/cex-sb1.S b/arch/mips/mm/cex-sb1.S index 0e71580774f..5d5f29681a2 100644 --- a/arch/mips/mm/cex-sb1.S +++ b/arch/mips/mm/cex-sb1.S @@ -15,7 +15,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <linux/init.h> #include <asm/asm.h> #include <asm/regdef.h> @@ -24,9 +23,9 @@ #include <asm/cacheops.h> #include <asm/sibyte/board.h> -#define C0_ERRCTL $26 /* CP0: Error info */ -#define C0_CERR_I $27 /* CP0: Icache error */ -#define C0_CERR_D $27,1 /* CP0: Dcache error */ +#define C0_ERRCTL $26 /* CP0: Error info */ +#define C0_CERR_I $27 /* CP0: Icache error */ +#define C0_CERR_D $27,1 /* CP0: Dcache error */ /* * Based on SiByte sample software cache-err/cerr.S @@ -34,8 +33,6 @@ * is changed. */ - __INIT - .set mips64 .set noreorder .set noat @@ -64,7 +61,7 @@ LEAF(except_vec2_sb1) sd k0,0x170($0) sd k1,0x178($0) -#if CONFIG_SB1_CEX_ALWAYS_FATAL +#ifdef CONFIG_SB1_CEX_ALWAYS_FATAL j handle_vec2_sb1 nop #else @@ -79,7 +76,7 @@ LEAF(except_vec2_sb1) recovered_dcache: /* * Unlock CacheErr-D (which in turn unlocks CacheErr-DPA). - * Ought to log the occurence of this recovered dcache error. + * Ought to log the occurrence of this recovered dcache error. */ b recovered mtc0 $0,C0_CERR_D @@ -88,7 +85,7 @@ attempt_recovery: /* * k0 has C0_ERRCTL << 1, which puts 'DC' at bit 31. Any * Dcache errors we can recover from will take more extensive - * processing. For now, they are considered "unrecoverable". + * processing. For now, they are considered "unrecoverable". * Note that 'DC' becoming set (outside of ERL mode) will * cause 'IC' to clear; so if there's an Icache error, we'll * only find out about it if we recover from this error and @@ -142,8 +139,6 @@ unrecoverable: END(except_vec2_sb1) - __FINIT - LEAF(handle_vec2_sb1) mfc0 k0,CP0_CONFIG li k1,~CONF_CM_CMASK diff --git a/arch/mips/mm/dma-coherent.c b/arch/mips/mm/dma-coherent.c deleted file mode 100644 index f6b3c722230..00000000000 --- a/arch/mips/mm/dma-coherent.c +++ /dev/null @@ -1,255 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000 Ani Joshi <ajoshi@unixbox.com> - * Copyright (C) 2000, 2001 Ralf Baechle <ralf@gnu.org> - * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. - */ -#include <linux/config.h> -#include <linux/types.h> -#include <linux/dma-mapping.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/string.h> - -#include <asm/cache.h> -#include <asm/io.h> - -void *dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t * dma_handle, gfp_t gfp) -{ - void *ret; - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - ret = (void *) __get_free_pages(gfp, get_order(size)); - - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = virt_to_phys(ret); - } - - return ret; -} - -EXPORT_SYMBOL(dma_alloc_noncoherent); - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t * dma_handle, gfp_t gfp) - __attribute__((alias("dma_alloc_noncoherent"))); - -EXPORT_SYMBOL(dma_alloc_coherent); - -void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle) -{ - unsigned long addr = (unsigned long) vaddr; - - free_pages(addr, get_order(size)); -} - -EXPORT_SYMBOL(dma_free_noncoherent); - -void dma_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle) __attribute__((alias("dma_free_noncoherent"))); - -EXPORT_SYMBOL(dma_free_coherent); - -dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); - - return __pa(ptr); -} - -EXPORT_SYMBOL(dma_map_single); - -void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_unmap_single); - -int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - int i; - - BUG_ON(direction == DMA_NONE); - - for (i = 0; i < nents; i++, sg++) { - sg->dma_address = (dma_addr_t)page_to_phys(sg->page) + sg->offset; - } - - return nents; -} - -EXPORT_SYMBOL(dma_map_sg); - -dma_addr_t dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); - - return page_to_phys(page) + offset; -} - -EXPORT_SYMBOL(dma_map_page); - -void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_unmap_page); - -void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_unmap_sg); - -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_sync_single_for_cpu); - -void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_sync_single_for_device); - -void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_sync_single_range_for_cpu); - -void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_sync_single_range_for_device); - -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_sync_sg_for_cpu); - -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_sync_sg_for_device); - -int dma_mapping_error(dma_addr_t dma_addr) -{ - return 0; -} - -EXPORT_SYMBOL(dma_mapping_error); - -int dma_supported(struct device *dev, u64 mask) -{ - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - */ - if (mask < 0x00ffffff) - return 0; - - return 1; -} - -EXPORT_SYMBOL(dma_supported); - -int dma_is_consistent(dma_addr_t dma_addr) -{ - return 1; -} - -EXPORT_SYMBOL(dma_is_consistent); - -void dma_cache_sync(void *vaddr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_cache_sync); - -/* The DAC routines are a PCIism.. */ - -#ifdef CONFIG_PCI - -#include <linux/pci.h> - -dma64_addr_t pci_dac_page_to_dma(struct pci_dev *pdev, - struct page *page, unsigned long offset, int direction) -{ - return (dma64_addr_t)page_to_phys(page) + offset; -} - -EXPORT_SYMBOL(pci_dac_page_to_dma); - -struct page *pci_dac_dma_to_page(struct pci_dev *pdev, - dma64_addr_t dma_addr) -{ - return mem_map + (dma_addr >> PAGE_SHIFT); -} - -EXPORT_SYMBOL(pci_dac_dma_to_page); - -unsigned long pci_dac_dma_to_offset(struct pci_dev *pdev, - dma64_addr_t dma_addr) -{ - return dma_addr & ~PAGE_MASK; -} - -EXPORT_SYMBOL(pci_dac_dma_to_offset); - -void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, - dma64_addr_t dma_addr, size_t len, int direction) -{ - BUG_ON(direction == PCI_DMA_NONE); -} - -EXPORT_SYMBOL(pci_dac_dma_sync_single_for_cpu); - -void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, - dma64_addr_t dma_addr, size_t len, int direction) -{ - BUG_ON(direction == PCI_DMA_NONE); -} - -EXPORT_SYMBOL(pci_dac_dma_sync_single_for_device); - -#endif /* CONFIG_PCI */ diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c new file mode 100644 index 00000000000..44b6dff5aba --- /dev/null +++ b/arch/mips/mm/dma-default.c @@ -0,0 +1,376 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2000 Ani Joshi <ajoshi@unixbox.com> + * Copyright (C) 2000, 2001, 06 Ralf Baechle <ralf@linux-mips.org> + * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. + */ + +#include <linux/types.h> +#include <linux/dma-mapping.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/scatterlist.h> +#include <linux/string.h> +#include <linux/gfp.h> +#include <linux/highmem.h> + +#include <asm/cache.h> +#include <asm/cpu-type.h> +#include <asm/io.h> + +#include <dma-coherence.h> + +#ifdef CONFIG_DMA_MAYBE_COHERENT +int coherentio = 0; /* User defined DMA coherency from command line. */ +EXPORT_SYMBOL_GPL(coherentio); +int hw_coherentio = 0; /* Actual hardware supported DMA coherency setting. */ + +static int __init setcoherentio(char *str) +{ + coherentio = 1; + pr_info("Hardware DMA cache coherency (command line)\n"); + return 0; +} +early_param("coherentio", setcoherentio); + +static int __init setnocoherentio(char *str) +{ + coherentio = 0; + pr_info("Software DMA cache coherency (command line)\n"); + return 0; +} +early_param("nocoherentio", setnocoherentio); +#endif + +static inline struct page *dma_addr_to_page(struct device *dev, + dma_addr_t dma_addr) +{ + return pfn_to_page( + plat_dma_addr_to_phys(dev, dma_addr) >> PAGE_SHIFT); +} + +/* + * The affected CPUs below in 'cpu_needs_post_dma_flush()' can + * speculatively fill random cachelines with stale data at any time, + * requiring an extra flush post-DMA. + * + * Warning on the terminology - Linux calls an uncached area coherent; + * MIPS terminology calls memory areas with hardware maintained coherency + * coherent. + */ +static inline int cpu_needs_post_dma_flush(struct device *dev) +{ + return !plat_device_is_coherent(dev) && + (boot_cpu_type() == CPU_R10000 || + boot_cpu_type() == CPU_R12000 || + boot_cpu_type() == CPU_BMIPS5000); +} + +static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) +{ + gfp_t dma_flag; + + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); + +#ifdef CONFIG_ISA + if (dev == NULL) + dma_flag = __GFP_DMA; + else +#endif +#if defined(CONFIG_ZONE_DMA32) && defined(CONFIG_ZONE_DMA) + if (dev->coherent_dma_mask < DMA_BIT_MASK(32)) + dma_flag = __GFP_DMA; + else if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + dma_flag = __GFP_DMA32; + else +#endif +#if defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_ZONE_DMA) + if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + dma_flag = __GFP_DMA32; + else +#endif +#if defined(CONFIG_ZONE_DMA) && !defined(CONFIG_ZONE_DMA32) + if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + dma_flag = __GFP_DMA; + else +#endif + dma_flag = 0; + + /* Don't invoke OOM killer */ + gfp |= __GFP_NORETRY; + + return gfp | dma_flag; +} + +void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, gfp_t gfp) +{ + void *ret; + + gfp = massage_gfp_flags(dev, gfp); + + ret = (void *) __get_free_pages(gfp, get_order(size)); + + if (ret != NULL) { + memset(ret, 0, size); + *dma_handle = plat_map_dma_mem(dev, ret, size); + } + + return ret; +} +EXPORT_SYMBOL(dma_alloc_noncoherent); + +static void *mips_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t * dma_handle, gfp_t gfp, struct dma_attrs *attrs) +{ + void *ret; + + if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) + return ret; + + gfp = massage_gfp_flags(dev, gfp); + + ret = (void *) __get_free_pages(gfp, get_order(size)); + + if (ret) { + memset(ret, 0, size); + *dma_handle = plat_map_dma_mem(dev, ret, size); + + if (!plat_device_is_coherent(dev)) { + dma_cache_wback_inv((unsigned long) ret, size); + if (!hw_coherentio) + ret = UNCAC_ADDR(ret); + } + } + + return ret; +} + + +void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + plat_unmap_dma_mem(dev, dma_handle, size, DMA_BIDIRECTIONAL); + free_pages((unsigned long) vaddr, get_order(size)); +} +EXPORT_SYMBOL(dma_free_noncoherent); + +static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, struct dma_attrs *attrs) +{ + unsigned long addr = (unsigned long) vaddr; + int order = get_order(size); + + if (dma_release_from_coherent(dev, order, vaddr)) + return; + + plat_unmap_dma_mem(dev, dma_handle, size, DMA_BIDIRECTIONAL); + + if (!plat_device_is_coherent(dev) && !hw_coherentio) + addr = CAC_ADDR(addr); + + free_pages(addr, get_order(size)); +} + +static inline void __dma_sync_virtual(void *addr, size_t size, + enum dma_data_direction direction) +{ + switch (direction) { + case DMA_TO_DEVICE: + dma_cache_wback((unsigned long)addr, size); + break; + + case DMA_FROM_DEVICE: + dma_cache_inv((unsigned long)addr, size); + break; + + case DMA_BIDIRECTIONAL: + dma_cache_wback_inv((unsigned long)addr, size); + break; + + default: + BUG(); + } +} + +/* + * A single sg entry may refer to multiple physically contiguous + * pages. But we still need to process highmem pages individually. + * If highmem is not configured then the bulk of this loop gets + * optimized out. + */ +static inline void __dma_sync(struct page *page, + unsigned long offset, size_t size, enum dma_data_direction direction) +{ + size_t left = size; + + do { + size_t len = left; + + if (PageHighMem(page)) { + void *addr; + + if (offset + len > PAGE_SIZE) { + if (offset >= PAGE_SIZE) { + page += offset >> PAGE_SHIFT; + offset &= ~PAGE_MASK; + } + len = PAGE_SIZE - offset; + } + + addr = kmap_atomic(page); + __dma_sync_virtual(addr + offset, len, direction); + kunmap_atomic(addr); + } else + __dma_sync_virtual(page_address(page) + offset, + size, direction); + offset = 0; + page++; + left -= len; + } while (left); +} + +static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) +{ + if (cpu_needs_post_dma_flush(dev)) + __dma_sync(dma_addr_to_page(dev, dma_addr), + dma_addr & ~PAGE_MASK, size, direction); + + plat_unmap_dma_mem(dev, dma_addr, size, direction); +} + +static int mips_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction, struct dma_attrs *attrs) +{ + int i; + + for (i = 0; i < nents; i++, sg++) { + if (!plat_device_is_coherent(dev)) + __dma_sync(sg_page(sg), sg->offset, sg->length, + direction); +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif + sg->dma_address = plat_map_dma_mem_page(dev, sg_page(sg)) + + sg->offset; + } + + return nents; +} + +static dma_addr_t mips_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + if (!plat_device_is_coherent(dev)) + __dma_sync(page, offset, size, direction); + + return plat_map_dma_mem_page(dev, page) + offset; +} + +static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sg, + int nhwentries, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + int i; + + for (i = 0; i < nhwentries; i++, sg++) { + if (!plat_device_is_coherent(dev) && + direction != DMA_TO_DEVICE) + __dma_sync(sg_page(sg), sg->offset, sg->length, + direction); + plat_unmap_dma_mem(dev, sg->dma_address, sg->length, direction); + } +} + +static void mips_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) +{ + if (cpu_needs_post_dma_flush(dev)) + __dma_sync(dma_addr_to_page(dev, dma_handle), + dma_handle & ~PAGE_MASK, size, direction); +} + +static void mips_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) +{ + if (!plat_device_is_coherent(dev)) + __dma_sync(dma_addr_to_page(dev, dma_handle), + dma_handle & ~PAGE_MASK, size, direction); +} + +static void mips_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction direction) +{ + int i; + + if (cpu_needs_post_dma_flush(dev)) + for (i = 0; i < nelems; i++, sg++) + __dma_sync(sg_page(sg), sg->offset, sg->length, + direction); +} + +static void mips_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nelems, enum dma_data_direction direction) +{ + int i; + + if (!plat_device_is_coherent(dev)) + for (i = 0; i < nelems; i++, sg++) + __dma_sync(sg_page(sg), sg->offset, sg->length, + direction); +} + +int mips_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return 0; +} + +int mips_dma_supported(struct device *dev, u64 mask) +{ + return plat_dma_supported(dev, mask); +} + +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(direction == DMA_NONE); + + if (!plat_device_is_coherent(dev)) + __dma_sync_virtual(vaddr, size, direction); +} + +EXPORT_SYMBOL(dma_cache_sync); + +static struct dma_map_ops mips_default_dma_map_ops = { + .alloc = mips_dma_alloc_coherent, + .free = mips_dma_free_coherent, + .map_page = mips_dma_map_page, + .unmap_page = mips_dma_unmap_page, + .map_sg = mips_dma_map_sg, + .unmap_sg = mips_dma_unmap_sg, + .sync_single_for_cpu = mips_dma_sync_single_for_cpu, + .sync_single_for_device = mips_dma_sync_single_for_device, + .sync_sg_for_cpu = mips_dma_sync_sg_for_cpu, + .sync_sg_for_device = mips_dma_sync_sg_for_device, + .mapping_error = mips_dma_mapping_error, + .dma_supported = mips_dma_supported +}; + +struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops; +EXPORT_SYMBOL(mips_dma_map_ops); + +#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) + +static int __init mips_dma_init(void) +{ + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + + return 0; +} +fs_initcall(mips_dma_init); diff --git a/arch/mips/mm/dma-ip27.c b/arch/mips/mm/dma-ip27.c deleted file mode 100644 index 8da19fd22ac..00000000000 --- a/arch/mips/mm/dma-ip27.c +++ /dev/null @@ -1,257 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000 Ani Joshi <ajoshi@unixbox.com> - * Copyright (C) 2000, 2001 Ralf Baechle <ralf@gnu.org> - * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. - */ -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/pci.h> - -#include <asm/cache.h> -#include <asm/pci/bridge.h> - -#define pdev_to_baddr(pdev, addr) \ - (BRIDGE_CONTROLLER(pdev->bus)->baddr + (addr)) -#define dev_to_baddr(dev, addr) \ - pdev_to_baddr(to_pci_dev(dev), (addr)) - -void *dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t * dma_handle, gfp_t gfp) -{ - void *ret; - - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - ret = (void *) __get_free_pages(gfp, get_order(size)); - - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = dev_to_baddr(dev, virt_to_phys(ret)); - } - - return ret; -} - -EXPORT_SYMBOL(dma_alloc_noncoherent); - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t * dma_handle, gfp_t gfp) - __attribute__((alias("dma_alloc_noncoherent"))); - -EXPORT_SYMBOL(dma_alloc_coherent); - -void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle) -{ - unsigned long addr = (unsigned long) vaddr; - - free_pages(addr, get_order(size)); -} - -EXPORT_SYMBOL(dma_free_noncoherent); - -void dma_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle) __attribute__((alias("dma_free_noncoherent"))); - -EXPORT_SYMBOL(dma_free_coherent); - -dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); - - return dev_to_baddr(dev, __pa(ptr)); -} - -EXPORT_SYMBOL(dma_map_single); - -void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_unmap_single); - -int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - int i; - - BUG_ON(direction == DMA_NONE); - - for (i = 0; i < nents; i++, sg++) { - sg->dma_address = (dma_addr_t) dev_to_baddr(dev, - page_to_phys(sg->page) + sg->offset); - } - - return nents; -} - -EXPORT_SYMBOL(dma_map_sg); - -dma_addr_t dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); - - return dev_to_baddr(dev, page_to_phys(page) + offset); -} - -EXPORT_SYMBOL(dma_map_page); - -void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_unmap_page); - -void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_unmap_sg); - -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_sync_single_for_cpu); - -void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_sync_single_for_device); - -void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_sync_single_range_for_cpu); - -void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_sync_single_range_for_device); - -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_sync_sg_for_cpu); - -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_sync_sg_for_device); - -int dma_mapping_error(dma_addr_t dma_addr) -{ - return 0; -} - -EXPORT_SYMBOL(dma_mapping_error); - -int dma_supported(struct device *dev, u64 mask) -{ - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - */ - if (mask < 0x00ffffff) - return 0; - - return 1; -} - -EXPORT_SYMBOL(dma_supported); - -int dma_is_consistent(dma_addr_t dma_addr) -{ - return 1; -} - -EXPORT_SYMBOL(dma_is_consistent); - -void dma_cache_sync(void *vaddr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); -} - -EXPORT_SYMBOL(dma_cache_sync); - -dma64_addr_t pci_dac_page_to_dma(struct pci_dev *pdev, - struct page *page, unsigned long offset, int direction) -{ - dma64_addr_t addr = page_to_phys(page) + offset; - - return (dma64_addr_t) pdev_to_baddr(pdev, addr); -} - -EXPORT_SYMBOL(pci_dac_page_to_dma); - -struct page *pci_dac_dma_to_page(struct pci_dev *pdev, - dma64_addr_t dma_addr) -{ - struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus); - - return pfn_to_page((dma_addr - bc->baddr) >> PAGE_SHIFT); -} - -EXPORT_SYMBOL(pci_dac_dma_to_page); - -unsigned long pci_dac_dma_to_offset(struct pci_dev *pdev, - dma64_addr_t dma_addr) -{ - return dma_addr & ~PAGE_MASK; -} - -EXPORT_SYMBOL(pci_dac_dma_to_offset); - -void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, - dma64_addr_t dma_addr, size_t len, int direction) -{ - BUG_ON(direction == PCI_DMA_NONE); -} - -EXPORT_SYMBOL(pci_dac_dma_sync_single_for_cpu); - -void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, - dma64_addr_t dma_addr, size_t len, int direction) -{ - BUG_ON(direction == PCI_DMA_NONE); -} - -EXPORT_SYMBOL(pci_dac_dma_sync_single_for_device); diff --git a/arch/mips/mm/dma-ip32.c b/arch/mips/mm/dma-ip32.c deleted file mode 100644 index a7e3072ff78..00000000000 --- a/arch/mips/mm/dma-ip32.c +++ /dev/null @@ -1,382 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000 Ani Joshi <ajoshi@unixbox.com> - * Copyright (C) 2000, 2001 Ralf Baechle <ralf@gnu.org> - * Copyright (C) 2005 Ilya A. Volynets-Evenbakh <ilya@total-knowledge.com> - * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. - * IP32 changes by Ilya. - */ -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/dma-mapping.h> - -#include <asm/cache.h> -#include <asm/io.h> -#include <asm/ip32/crime.h> - -/* - * Warning on the terminology - Linux calls an uncached area coherent; - * MIPS terminology calls memory areas with hardware maintained coherency - * coherent. - */ - -/* - * Few notes. - * 1. CPU sees memory as two chunks: 0-256M@0x0, and the rest @0x40000000+256M - * 2. PCI sees memory as one big chunk @0x0 (or we could use 0x40000000 for native-endian) - * 3. All other devices see memory as one big chunk at 0x40000000 - * 4. Non-PCI devices will pass NULL as struct device* - * Thus we translate differently, depending on device. - */ - -#define RAM_OFFSET_MASK 0x3fffffff - -void *dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t * dma_handle, gfp_t gfp) -{ - void *ret; - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - ret = (void *) __get_free_pages(gfp, get_order(size)); - - if (ret != NULL) { - unsigned long addr = virt_to_phys(ret)&RAM_OFFSET_MASK; - memset(ret, 0, size); - if(dev==NULL) - addr+= CRIME_HI_MEM_BASE; - *dma_handle = addr; - } - - return ret; -} - -EXPORT_SYMBOL(dma_alloc_noncoherent); - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t * dma_handle, gfp_t gfp) -{ - void *ret; - - ret = dma_alloc_noncoherent(dev, size, dma_handle, gfp); - if (ret) { - dma_cache_wback_inv((unsigned long) ret, size); - ret = UNCAC_ADDR(ret); - } - - return ret; -} - -EXPORT_SYMBOL(dma_alloc_coherent); - -void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle) -{ - free_pages((unsigned long) vaddr, get_order(size)); -} - -EXPORT_SYMBOL(dma_free_noncoherent); - -void dma_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle) -{ - unsigned long addr = (unsigned long) vaddr; - - addr = CAC_ADDR(addr); - free_pages(addr, get_order(size)); -} - -EXPORT_SYMBOL(dma_free_coherent); - -static inline void __dma_sync(unsigned long addr, size_t size, - enum dma_data_direction direction) -{ - switch (direction) { - case DMA_TO_DEVICE: - dma_cache_wback(addr, size); - break; - - case DMA_FROM_DEVICE: - dma_cache_inv(addr, size); - break; - - case DMA_BIDIRECTIONAL: - dma_cache_wback_inv(addr, size); - break; - - default: - BUG(); - } -} - -dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction direction) -{ - unsigned long addr = (unsigned long) ptr; - - switch (direction) { - case DMA_TO_DEVICE: - dma_cache_wback(addr, size); - break; - - case DMA_FROM_DEVICE: - dma_cache_inv(addr, size); - break; - - case DMA_BIDIRECTIONAL: - dma_cache_wback_inv(addr, size); - break; - - default: - BUG(); - } - - addr = virt_to_phys(ptr)&RAM_OFFSET_MASK;; - if(dev == NULL) - addr+=CRIME_HI_MEM_BASE; - return (dma_addr_t)addr; -} - -EXPORT_SYMBOL(dma_map_single); - -void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction direction) -{ - switch (direction) { - case DMA_TO_DEVICE: - break; - - case DMA_FROM_DEVICE: - break; - - case DMA_BIDIRECTIONAL: - break; - - default: - BUG(); - } -} - -EXPORT_SYMBOL(dma_unmap_single); - -int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - int i; - - BUG_ON(direction == DMA_NONE); - - for (i = 0; i < nents; i++, sg++) { - unsigned long addr; - - addr = (unsigned long) page_address(sg->page)+sg->offset; - if (addr) - __dma_sync(addr, sg->length, direction); - addr = __pa(addr)&RAM_OFFSET_MASK;; - if(dev == NULL) - addr += CRIME_HI_MEM_BASE; - sg->dma_address = (dma_addr_t)addr; - } - - return nents; -} - -EXPORT_SYMBOL(dma_map_sg); - -dma_addr_t dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction direction) -{ - unsigned long addr; - - BUG_ON(direction == DMA_NONE); - - addr = (unsigned long) page_address(page) + offset; - dma_cache_wback_inv(addr, size); - addr = __pa(addr)&RAM_OFFSET_MASK;; - if(dev == NULL) - addr += CRIME_HI_MEM_BASE; - - return (dma_addr_t)addr; -} - -EXPORT_SYMBOL(dma_map_page); - -void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); - - if (direction != DMA_TO_DEVICE) { - unsigned long addr; - - dma_address&=RAM_OFFSET_MASK; - addr = dma_address + PAGE_OFFSET; - if(dma_address>=256*1024*1024) - addr+=CRIME_HI_MEM_BASE; - dma_cache_wback_inv(addr, size); - } -} - -EXPORT_SYMBOL(dma_unmap_page); - -void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, - enum dma_data_direction direction) -{ - unsigned long addr; - int i; - - BUG_ON(direction == DMA_NONE); - - if (direction == DMA_TO_DEVICE) - return; - - for (i = 0; i < nhwentries; i++, sg++) { - addr = (unsigned long) page_address(sg->page); - if (!addr) - continue; - dma_cache_wback_inv(addr + sg->offset, sg->length); - } -} - -EXPORT_SYMBOL(dma_unmap_sg); - -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) -{ - unsigned long addr; - - BUG_ON(direction == DMA_NONE); - - dma_handle&=RAM_OFFSET_MASK; - addr = dma_handle + PAGE_OFFSET; - if(dma_handle>=256*1024*1024) - addr+=CRIME_HI_MEM_BASE; - __dma_sync(addr, size, direction); -} - -EXPORT_SYMBOL(dma_sync_single_for_cpu); - -void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) -{ - unsigned long addr; - - BUG_ON(direction == DMA_NONE); - - dma_handle&=RAM_OFFSET_MASK; - addr = dma_handle + PAGE_OFFSET; - if(dma_handle>=256*1024*1024) - addr+=CRIME_HI_MEM_BASE; - __dma_sync(addr, size, direction); -} - -EXPORT_SYMBOL(dma_sync_single_for_device); - -void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, enum dma_data_direction direction) -{ - unsigned long addr; - - BUG_ON(direction == DMA_NONE); - - dma_handle&=RAM_OFFSET_MASK; - addr = dma_handle + offset + PAGE_OFFSET; - if(dma_handle>=256*1024*1024) - addr+=CRIME_HI_MEM_BASE; - __dma_sync(addr, size, direction); -} - -EXPORT_SYMBOL(dma_sync_single_range_for_cpu); - -void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, enum dma_data_direction direction) -{ - unsigned long addr; - - BUG_ON(direction == DMA_NONE); - - dma_handle&=RAM_OFFSET_MASK; - addr = dma_handle + offset + PAGE_OFFSET; - if(dma_handle>=256*1024*1024) - addr+=CRIME_HI_MEM_BASE; - __dma_sync(addr, size, direction); -} - -EXPORT_SYMBOL(dma_sync_single_range_for_device); - -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - int i; - - BUG_ON(direction == DMA_NONE); - - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) - __dma_sync((unsigned long)page_address(sg->page), - sg->length, direction); -} - -EXPORT_SYMBOL(dma_sync_sg_for_cpu); - -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - int i; - - BUG_ON(direction == DMA_NONE); - - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) - __dma_sync((unsigned long)page_address(sg->page), - sg->length, direction); -} - -EXPORT_SYMBOL(dma_sync_sg_for_device); - -int dma_mapping_error(dma_addr_t dma_addr) -{ - return 0; -} - -EXPORT_SYMBOL(dma_mapping_error); - -int dma_supported(struct device *dev, u64 mask) -{ - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - */ - if (mask < 0x00ffffff) - return 0; - - return 1; -} - -EXPORT_SYMBOL(dma_supported); - -int dma_is_consistent(dma_addr_t dma_addr) -{ - return 1; -} - -EXPORT_SYMBOL(dma_is_consistent); - -void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction direction) -{ - if (direction == DMA_NONE) - return; - - dma_cache_wback_inv((unsigned long)vaddr, size); -} - -EXPORT_SYMBOL(dma_cache_sync); - diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c deleted file mode 100644 index cd4ea8474f8..00000000000 --- a/arch/mips/mm/dma-noncoherent.c +++ /dev/null @@ -1,370 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000 Ani Joshi <ajoshi@unixbox.com> - * Copyright (C) 2000, 2001 Ralf Baechle <ralf@gnu.org> - * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. - */ -#include <linux/config.h> -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/string.h> -#include <linux/dma-mapping.h> - -#include <asm/cache.h> -#include <asm/io.h> - -/* - * Warning on the terminology - Linux calls an uncached area coherent; - * MIPS terminology calls memory areas with hardware maintained coherency - * coherent. - */ - -void *dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t * dma_handle, gfp_t gfp) -{ - void *ret; - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - ret = (void *) __get_free_pages(gfp, get_order(size)); - - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = virt_to_phys(ret); - } - - return ret; -} - -EXPORT_SYMBOL(dma_alloc_noncoherent); - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t * dma_handle, gfp_t gfp) -{ - void *ret; - - ret = dma_alloc_noncoherent(dev, size, dma_handle, gfp); - if (ret) { - dma_cache_wback_inv((unsigned long) ret, size); - ret = UNCAC_ADDR(ret); - } - - return ret; -} - -EXPORT_SYMBOL(dma_alloc_coherent); - -void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle) -{ - free_pages((unsigned long) vaddr, get_order(size)); -} - -EXPORT_SYMBOL(dma_free_noncoherent); - -void dma_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle) -{ - unsigned long addr = (unsigned long) vaddr; - - addr = CAC_ADDR(addr); - free_pages(addr, get_order(size)); -} - -EXPORT_SYMBOL(dma_free_coherent); - -static inline void __dma_sync(unsigned long addr, size_t size, - enum dma_data_direction direction) -{ - switch (direction) { - case DMA_TO_DEVICE: - dma_cache_wback(addr, size); - break; - - case DMA_FROM_DEVICE: - dma_cache_inv(addr, size); - break; - - case DMA_BIDIRECTIONAL: - dma_cache_wback_inv(addr, size); - break; - - default: - BUG(); - } -} - -dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction direction) -{ - unsigned long addr = (unsigned long) ptr; - - __dma_sync(addr, size, direction); - - return virt_to_phys(ptr); -} - -EXPORT_SYMBOL(dma_map_single); - -void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction direction) -{ - unsigned long addr; - addr = dma_addr + PAGE_OFFSET; - - //__dma_sync(addr, size, direction); -} - -EXPORT_SYMBOL(dma_unmap_single); - -int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction direction) -{ - int i; - - BUG_ON(direction == DMA_NONE); - - for (i = 0; i < nents; i++, sg++) { - unsigned long addr; - - addr = (unsigned long) page_address(sg->page); - if (addr) { - __dma_sync(addr + sg->offset, sg->length, direction); - sg->dma_address = (dma_addr_t)page_to_phys(sg->page) - + sg->offset; - } - } - - return nents; -} - -EXPORT_SYMBOL(dma_map_sg); - -dma_addr_t dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction direction) -{ - unsigned long addr; - - BUG_ON(direction == DMA_NONE); - - addr = (unsigned long) page_address(page) + offset; - dma_cache_wback_inv(addr, size); - - return page_to_phys(page) + offset; -} - -EXPORT_SYMBOL(dma_map_page); - -void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(direction == DMA_NONE); - - if (direction != DMA_TO_DEVICE) { - unsigned long addr; - - addr = dma_address + PAGE_OFFSET; - dma_cache_wback_inv(addr, size); - } -} - -EXPORT_SYMBOL(dma_unmap_page); - -void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, - enum dma_data_direction direction) -{ - unsigned long addr; - int i; - - BUG_ON(direction == DMA_NONE); - - if (direction == DMA_TO_DEVICE) - return; - - for (i = 0; i < nhwentries; i++, sg++) { - addr = (unsigned long) page_address(sg->page); - if (addr) - __dma_sync(addr + sg->offset, sg->length, direction); - } -} - -EXPORT_SYMBOL(dma_unmap_sg); - -void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) -{ - unsigned long addr; - - BUG_ON(direction == DMA_NONE); - - addr = dma_handle + PAGE_OFFSET; - __dma_sync(addr, size, direction); -} - -EXPORT_SYMBOL(dma_sync_single_for_cpu); - -void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction direction) -{ - unsigned long addr; - - BUG_ON(direction == DMA_NONE); - - addr = dma_handle + PAGE_OFFSET; - __dma_sync(addr, size, direction); -} - -EXPORT_SYMBOL(dma_sync_single_for_device); - -void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, enum dma_data_direction direction) -{ - unsigned long addr; - - BUG_ON(direction == DMA_NONE); - - addr = dma_handle + offset + PAGE_OFFSET; - __dma_sync(addr, size, direction); -} - -EXPORT_SYMBOL(dma_sync_single_range_for_cpu); - -void dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, enum dma_data_direction direction) -{ - unsigned long addr; - - BUG_ON(direction == DMA_NONE); - - addr = dma_handle + offset + PAGE_OFFSET; - __dma_sync(addr, size, direction); -} - -EXPORT_SYMBOL(dma_sync_single_range_for_device); - -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - int i; - - BUG_ON(direction == DMA_NONE); - - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) - __dma_sync((unsigned long)page_address(sg->page), - sg->length, direction); -} - -EXPORT_SYMBOL(dma_sync_sg_for_cpu); - -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - int i; - - BUG_ON(direction == DMA_NONE); - - /* Make sure that gcc doesn't leave the empty loop body. */ - for (i = 0; i < nelems; i++, sg++) - __dma_sync((unsigned long)page_address(sg->page), - sg->length, direction); -} - -EXPORT_SYMBOL(dma_sync_sg_for_device); - -int dma_mapping_error(dma_addr_t dma_addr) -{ - return 0; -} - -EXPORT_SYMBOL(dma_mapping_error); - -int dma_supported(struct device *dev, u64 mask) -{ - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - */ - if (mask < 0x00ffffff) - return 0; - - return 1; -} - -EXPORT_SYMBOL(dma_supported); - -int dma_is_consistent(dma_addr_t dma_addr) -{ - return 1; -} - -EXPORT_SYMBOL(dma_is_consistent); - -void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction direction) -{ - if (direction == DMA_NONE) - return; - - dma_cache_wback_inv((unsigned long)vaddr, size); -} - -EXPORT_SYMBOL(dma_cache_sync); - -/* The DAC routines are a PCIism.. */ - -#ifdef CONFIG_PCI - -#include <linux/pci.h> - -dma64_addr_t pci_dac_page_to_dma(struct pci_dev *pdev, - struct page *page, unsigned long offset, int direction) -{ - return (dma64_addr_t)page_to_phys(page) + offset; -} - -EXPORT_SYMBOL(pci_dac_page_to_dma); - -struct page *pci_dac_dma_to_page(struct pci_dev *pdev, - dma64_addr_t dma_addr) -{ - return mem_map + (dma_addr >> PAGE_SHIFT); -} - -EXPORT_SYMBOL(pci_dac_dma_to_page); - -unsigned long pci_dac_dma_to_offset(struct pci_dev *pdev, - dma64_addr_t dma_addr) -{ - return dma_addr & ~PAGE_MASK; -} - -EXPORT_SYMBOL(pci_dac_dma_to_offset); - -void pci_dac_dma_sync_single_for_cpu(struct pci_dev *pdev, - dma64_addr_t dma_addr, size_t len, int direction) -{ - BUG_ON(direction == PCI_DMA_NONE); - - dma_cache_wback_inv(dma_addr + PAGE_OFFSET, len); -} - -EXPORT_SYMBOL(pci_dac_dma_sync_single_for_cpu); - -void pci_dac_dma_sync_single_for_device(struct pci_dev *pdev, - dma64_addr_t dma_addr, size_t len, int direction) -{ - BUG_ON(direction == PCI_DMA_NONE); - - dma_cache_wback_inv(dma_addr + PAGE_OFFSET, len); -} - -EXPORT_SYMBOL(pci_dac_dma_sync_single_for_device); - -#endif /* CONFIG_PCI */ diff --git a/arch/mips/mm/extable.c b/arch/mips/mm/extable.c index 297fb9f390d..9d25d2ba4b9 100644 --- a/arch/mips/mm/extable.c +++ b/arch/mips/mm/extable.c @@ -1,5 +1,9 @@ /* - * linux/arch/mips/mm/extable.c + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1997, 99, 2001 - 2004 Ralf Baechle <ralf@linux-mips.org> */ #include <linux/module.h> #include <linux/spinlock.h> diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 2d9624fd10e..becc42bb184 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -5,6 +5,7 @@ * * Copyright (C) 1995 - 2000 by Ralf Baechle */ +#include <linux/context_tracking.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/interrupt.h> @@ -16,37 +17,50 @@ #include <linux/mman.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/smp_lock.h> -#include <linux/vt_kern.h> /* For unblank_screen() */ #include <linux/module.h> +#include <linux/kprobes.h> +#include <linux/perf_event.h> #include <asm/branch.h> #include <asm/mmu_context.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/ptrace.h> #include <asm/highmem.h> /* For VMALLOC_END */ +#include <linux/kdebug.h> /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. */ -asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, - unsigned long address) +static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, + unsigned long address) { struct vm_area_struct * vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; const int field = sizeof(unsigned long) * 2; siginfo_t info; + int fault; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; #if 0 - printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", smp_processor_id(), + printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), current->comm, current->pid, field, address, write, field, regs->cp0_epc); #endif +#ifdef CONFIG_KPROBES + /* + * This is to notify the fault handler of the kprobes. The + * exception code is redundant as it is also carried in REGS, + * but we pass it anyhow. + */ + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, + (regs->cp0_cause >> 2) & 0x1f, SIGSEGV) == NOTIFY_STOP) + return; +#endif + info.si_code = SEGV_MAPERR; /* @@ -58,8 +72,18 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, * only copy the information from the master page table, * nothing more. */ +#ifdef CONFIG_64BIT +# define VMALLOC_FAULT_TARGET no_context +#else +# define VMALLOC_FAULT_TARGET vmalloc_fault +#endif + if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) - goto vmalloc_fault; + goto VMALLOC_FAULT_TARGET; +#ifdef MODULE_START + if (unlikely(address >= MODULE_START && address < MODULE_END)) + goto VMALLOC_FAULT_TARGET; +#endif /* * If we're in an interrupt or have no user @@ -68,6 +92,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, if (in_atomic() || !mm) goto bad_area_nosemaphore; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; +retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) @@ -88,31 +115,76 @@ good_area: if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) - goto bad_area; + if (cpu_has_rixi) { + if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) { +#if 0 + pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] XI violation\n", + raw_smp_processor_id(), + current->comm, current->pid, + field, address, write, + field, regs->cp0_epc); +#endif + goto bad_area; + } + if (!(vma->vm_flags & VM_READ)) { +#if 0 + pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] RI violation\n", + raw_smp_processor_id(), + current->comm, current->pid, + field, address, write, + field, regs->cp0_epc); +#endif + goto bad_area; + } + } else { + if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) + goto bad_area; + } } -survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. */ - switch (handle_mm_fault(mm, vma, address, write)) { - case VM_FAULT_MINOR: - tsk->min_flt++; - break; - case VM_FAULT_MAJOR: - tsk->maj_flt++; - break; - case VM_FAULT_SIGBUS: - goto do_sigbus; - case VM_FAULT_OOM: - goto out_of_memory; - default: + fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; BUG(); } + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); + tsk->maj_flt++; + } else { + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); + tsk->min_flt++; + } + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + + /* + * No need to up_read(&mm->mmap_sem) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + + goto retry; + } + } up_read(&mm->mmap_sem); return; @@ -147,7 +219,7 @@ bad_area_nosemaphore: } no_context: - /* Are we prepared to handle this kernel fault? */ + /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) { current->thread.cp0_baduaddr = address; return; @@ -157,30 +229,24 @@ no_context: * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - bust_spinlocks(1); printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at " "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n", - smp_processor_id(), field, address, field, regs->cp0_epc, + raw_smp_processor_id(), field, address, field, regs->cp0_epc, field, regs->regs[31]); die("Oops", regs); -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ out_of_memory: + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ up_read(&mm->mmap_sem); - if (tsk->pid == 1) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - printk("VM: killing process %s\n", tsk->comm); - if (user_mode(regs)) - do_exit(SIGKILL); - goto no_context; + if (!user_mode(regs)) + goto no_context; + pagefault_out_of_memory(); + return; do_sigbus: up_read(&mm->mmap_sem); @@ -188,11 +254,20 @@ do_sigbus: /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; - + else /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ +#if 0 + printk("do_page_fault() #3: sending SIGBUS to %s for " + "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", + tsk->comm, + write ? "write access to" : "read access from", + field, address, + field, (unsigned long) regs->cp0_epc, + field, (unsigned long) regs->regs[31]); +#endif tsk->thread.cp0_badvaddr = address; info.si_signo = SIGBUS; info.si_errno = 0; @@ -201,7 +276,7 @@ do_sigbus: force_sig_info(SIGBUS, &info, tsk); return; - +#ifndef CONFIG_64BIT vmalloc_fault: { /* @@ -217,7 +292,7 @@ vmalloc_fault: pmd_t *pmd, *pmd_k; pte_t *pte_k; - pgd = (pgd_t *) pgd_current[smp_processor_id()] + offset; + pgd = (pgd_t *) pgd_current[raw_smp_processor_id()] + offset; pgd_k = init_mm.pgd + offset; if (!pgd_present(*pgd_k)) @@ -240,4 +315,15 @@ vmalloc_fault: goto no_context; return; } +#endif +} + +asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long write, unsigned long address) +{ + enum ctx_state prev_state; + + prev_state = exception_enter(); + __do_page_fault(regs, write, address); + exception_exit(prev_state); } diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c new file mode 100644 index 00000000000..06ce17c2a90 --- /dev/null +++ b/arch/mips/mm/gup.c @@ -0,0 +1,318 @@ +/* + * Lockless get_user_pages_fast for MIPS + * + * Copyright (C) 2008 Nick Piggin + * Copyright (C) 2008 Novell Inc. + * Copyright (C) 2011 Ralf Baechle + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/vmstat.h> +#include <linux/highmem.h> +#include <linux/swap.h> +#include <linux/hugetlb.h> + +#include <asm/cpu-features.h> +#include <asm/pgtable.h> + +static inline pte_t gup_get_pte(pte_t *ptep) +{ +#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32) + pte_t pte; + +retry: + pte.pte_low = ptep->pte_low; + smp_rmb(); + pte.pte_high = ptep->pte_high; + smp_rmb(); + if (unlikely(pte.pte_low != ptep->pte_low)) + goto retry; + + return pte; +#else + return ACCESS_ONCE(*ptep); +#endif +} + +static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + pte_t *ptep = pte_offset_map(&pmd, addr); + do { + pte_t pte = gup_get_pte(ptep); + struct page *page; + + if (!pte_present(pte) || + pte_special(pte) || (write && !pte_write(pte))) { + pte_unmap(ptep); + return 0; + } + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + get_page(page); + SetPageReferenced(page); + pages[*nr] = page; + (*nr)++; + + } while (ptep++, addr += PAGE_SIZE, addr != end); + + pte_unmap(ptep - 1); + return 1; +} + +static inline void get_head_page_multiple(struct page *page, int nr) +{ + VM_BUG_ON(page != compound_head(page)); + VM_BUG_ON(page_count(page) == 0); + atomic_add(nr, &page->_count); + SetPageReferenced(page); +} + +static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + pte_t pte = *(pte_t *)&pmd; + struct page *head, *page; + int refs; + + if (write && !pte_write(pte)) + return 0; + /* hugepages are never "special" */ + VM_BUG_ON(pte_special(pte)); + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + + refs = 0; + head = pte_page(pte); + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + if (PageTail(page)) + get_huge_page_tail(page); + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + get_head_page_multiple(head, refs); + return 1; +} + +static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pmd_t *pmdp; + + pmdp = pmd_offset(&pud, addr); + do { + pmd_t pmd = *pmdp; + + next = pmd_addr_end(addr, end); + /* + * The pmd_trans_splitting() check below explains why + * pmdp_splitting_flush has to flush the tlb, to stop + * this gup-fast code from running while we set the + * splitting bit in the pmd. Returning zero will take + * the slow path that will call wait_split_huge_page() + * if the pmd is still in splitting state. gup-fast + * can't because it has irq disabled and + * wait_split_huge_page() would never return as the + * tlb flush IPI wouldn't run. + */ + if (pmd_none(pmd) || pmd_trans_splitting(pmd)) + return 0; + if (unlikely(pmd_huge(pmd))) { + if (!gup_huge_pmd(pmd, addr, next, write, pages,nr)) + return 0; + } else { + if (!gup_pte_range(pmd, addr, next, write, pages,nr)) + return 0; + } + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + pte_t pte = *(pte_t *)&pud; + struct page *head, *page; + int refs; + + if (write && !pte_write(pte)) + return 0; + /* hugepages are never "special" */ + VM_BUG_ON(pte_special(pte)); + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + + refs = 0; + head = pte_page(pte); + page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + if (PageTail(page)) + get_huge_page_tail(page); + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + get_head_page_multiple(head, refs); + return 1; +} + +static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, + int write, struct page **pages, int *nr) +{ + unsigned long next; + pud_t *pudp; + + pudp = pud_offset(&pgd, addr); + do { + pud_t pud = *pudp; + + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (unlikely(pud_huge(pud))) { + if (!gup_huge_pud(pud, addr, next, write, pages,nr)) + return 0; + } else { + if (!gup_pmd_range(pud, addr, next, write, pages,nr)) + return 0; + } + } while (pudp++, addr = next, addr != end); + + return 1; +} + +/* + * Like get_user_pages_fast() except its IRQ-safe in that it won't fall + * back to the regular GUP. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + unsigned long flags; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, + (void __user *)start, len))) + return 0; + + /* + * XXX: batch / limit 'nr', to avoid large irq off latency + * needs some instrumenting to determine the common sizes used by + * important workloads (eg. DB2), and whether limiting the batch + * size will decrease performance. + * + * It seems like we're in the clear for the moment. Direct-IO is + * the main guy that batches up lots of get_user_pages, and even + * they are limited to 64-at-a-time which is not so many. + */ + /* + * This doesn't prevent pagetable teardown, but does prevent + * the pagetables and pages from being freed. + * + * So long as we atomically load page table pointers versus teardown, + * we can follow the address down to the page and take a ref on it. + */ + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + +/** + * get_user_pages_fast() - pin user pages in memory + * @start: starting user address + * @nr_pages: number of pages from start to pin + * @write: whether pages will be written to + * @pages: array that receives pointers to the pages pinned. + * Should be at least nr_pages long. + * + * Attempt to pin user pages in memory without taking mm->mmap_sem. + * If not successful, it will fall back to taking the lock and + * calling get_user_pages(). + * + * Returns number of pages pinned. This may be fewer than the number + * requested. If nr_pages is 0 or negative, returns 0. If no pages + * were pinned, returns -errno. + */ +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + pgd_t *pgdp; + int ret, nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + + end = start + len; + if (end < start || cpu_has_dc_aliases) + goto slow_irqon; + + /* XXX: batch / limit 'nr' */ + local_irq_disable(); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + goto slow; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + goto slow; + } while (pgdp++, addr = next, addr != end); + local_irq_enable(); + + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); + return nr; +slow: + local_irq_enable(); + +slow_irqon: + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + (end - start) >> PAGE_SHIFT, + write, 0, pages, NULL); + up_read(&mm->mmap_sem); + + /* Have to be a bit careful with return values */ + if (nr > 0) { + if (ret < 0) + ret = nr; + else + ret += nr; + } + return ret; +} diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index 1f7b37b38f5..da815d29523 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -1,9 +1,16 @@ -#include <linux/config.h> +#include <linux/compiler.h> #include <linux/module.h> #include <linux/highmem.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <asm/fixmap.h> #include <asm/tlbflush.h> -void *__kmap(struct page *page) +static pte_t *kmap_pte; + +unsigned long highstart_pfn, highend_pfn; + +void *kmap(struct page *page) { void *addr; @@ -15,15 +22,16 @@ void *__kmap(struct page *page) return addr; } +EXPORT_SYMBOL(kmap); -void __kunmap(struct page *page) +void kunmap(struct page *page) { - if (in_interrupt()) - BUG(); + BUG_ON(in_interrupt()); if (!PageHighMem(page)) return; kunmap_high(page); } +EXPORT_SYMBOL(kunmap); /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because @@ -34,75 +42,80 @@ void __kunmap(struct page *page) * kmaps are appropriate for short, tight code paths only. */ -void *__kmap_atomic(struct page *page, enum km_type type) +void *kmap_atomic(struct page *page) { - enum fixed_addresses idx; unsigned long vaddr; + int idx, type; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ - inc_preempt_count(); + pagefault_disable(); if (!PageHighMem(page)) return page_address(page); + type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); #ifdef CONFIG_DEBUG_HIGHMEM - if (!pte_none(*(kmap_pte-idx))) - BUG(); + BUG_ON(!pte_none(*(kmap_pte - idx))); #endif - set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); + set_pte(kmap_pte-idx, mk_pte(page, PAGE_KERNEL)); local_flush_tlb_one((unsigned long)vaddr); return (void*) vaddr; } +EXPORT_SYMBOL(kmap_atomic); -void __kunmap_atomic(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr) { -#ifdef CONFIG_DEBUG_HIGHMEM unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + int type __maybe_unused; if (vaddr < FIXADDR_START) { // FIXME - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); return; } - if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) - BUG(); + type = kmap_atomic_idx(); +#ifdef CONFIG_DEBUG_HIGHMEM + { + int idx = type + KM_TYPE_NR * smp_processor_id(); - /* - * force other mappings to Oops if they'll try to access - * this pte without first remap it - */ - pte_clear(&init_mm, vaddr, kmap_pte-idx); - local_flush_tlb_one(vaddr); -#endif + BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); - dec_preempt_count(); - preempt_check_resched(); + /* + * force other mappings to Oops if they'll try to access + * this pte without first remap it + */ + pte_clear(&init_mm, vaddr, kmap_pte-idx); + local_flush_tlb_one(vaddr); + } +#endif + kmap_atomic_idx_pop(); + pagefault_enable(); } +EXPORT_SYMBOL(__kunmap_atomic); /* * This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +void *kmap_atomic_pfn(unsigned long pfn) { - enum fixed_addresses idx; unsigned long vaddr; + int idx, type; - inc_preempt_count(); + pagefault_disable(); + type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); + set_pte(kmap_pte-idx, pfn_pte(pfn, PAGE_KERNEL)); flush_tlb_one(vaddr); return (void*) vaddr; } -struct page *__kmap_atomic_to_page(void *ptr) +struct page *kmap_atomic_to_page(void *ptr) { unsigned long idx, vaddr = (unsigned long)ptr; pte_t *pte; @@ -115,8 +128,11 @@ struct page *__kmap_atomic_to_page(void *ptr) return pte_page(*pte); } -EXPORT_SYMBOL(__kmap); -EXPORT_SYMBOL(__kunmap); -EXPORT_SYMBOL(__kmap_atomic); -EXPORT_SYMBOL(__kunmap_atomic); -EXPORT_SYMBOL(__kmap_atomic_to_page); +void __init kmap_init(void) +{ + unsigned long kmap_vstart; + + /* cache the first kmap pte */ + kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); + kmap_pte = kmap_get_fixmap_pte(kmap_vstart); +} diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c new file mode 100644 index 00000000000..4ec8ee10d37 --- /dev/null +++ b/arch/mips/mm/hugetlbpage.c @@ -0,0 +1,97 @@ +/* + * MIPS Huge TLB Page Support for Kernel. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> + * Copyright 2005, Embedded Alley Solutions, Inc. + * Matt Porter <mporter@embeddedalley.com> + * Copyright (C) 2008, 2009 Cavium Networks, Inc. + */ + +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/err.h> +#include <linux/sysctl.h> +#include <asm/mman.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> + +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, + unsigned long sz) +{ + pgd_t *pgd; + pud_t *pud; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (pud) + pte = (pte_t *)pmd_alloc(mm, pud, addr); + + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, addr); + if (pud_present(*pud)) + pmd = pmd_offset(pud, addr); + } + return (pte_t *) pmd; +} + +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + +/* + * This function checks for proper alignment of input addr and len parameters. + */ +int is_aligned_hugepage_range(unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + return 0; +} + +struct page * +follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return (pmd_val(pmd) & _PAGE_HUGE) != 0; +} + +int pud_huge(pud_t pud) +{ + return (pud_val(pud) & _PAGE_HUGE) != 0; +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pmd); + if (page) + page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); + return page; +} diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index f75ab748e8c..6e4413330e3 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -8,11 +8,12 @@ * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. */ -#include <linux/config.h> +#include <linux/bug.h> #include <linux/init.h> #include <linux/module.h> #include <linux/signal.h> #include <linux/sched.h> +#include <linux/smp.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> @@ -24,36 +25,41 @@ #include <linux/bootmem.h> #include <linux/highmem.h> #include <linux/swap.h> +#include <linux/proc_fs.h> +#include <linux/pfn.h> +#include <linux/hardirq.h> +#include <linux/gfp.h> +#include <linux/kcore.h> +#include <asm/asm-offsets.h> #include <asm/bootinfo.h> #include <asm/cachectl.h> #include <asm/cpu.h> #include <asm/dma.h> +#include <asm/kmap_types.h> #include <asm/mmu_context.h> #include <asm/sections.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlb.h> - -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - -unsigned long highstart_pfn, highend_pfn; +#include <asm/fixmap.h> /* * We have up to 8 empty zeroed pages so we can map one of the right colour - * when needed. This is necessary only on R4000 / R4400 SC and MC versions + * when needed. This is necessary only on R4000 / R4400 SC and MC versions * where we have to avoid VCED / VECI exceptions for good performance at * any price. Since page is never written to after the initialization we * don't have to care about aliases on other CPUs. */ unsigned long empty_zero_page, zero_page_mask; +EXPORT_SYMBOL_GPL(empty_zero_page); /* * Not static inline because used by IP27 special magic initialization code */ -unsigned long setup_zero_pages(void) +void setup_zero_pages(void) { - unsigned long order, size; + unsigned int order, i; struct page *page; if (cpu_has_vce) @@ -65,41 +71,146 @@ unsigned long setup_zero_pages(void) if (!empty_zero_page) panic("Oh boy, that early out of memory?"); - page = virt_to_page(empty_zero_page); - while (page < virt_to_page(empty_zero_page + (PAGE_SIZE << order))) { - set_bit(PG_reserved, &page->flags); - set_page_count(page, 0); - page++; - } + page = virt_to_page((void *)empty_zero_page); + split_page(page, order); + for (i = 0; i < (1 << order); i++, page++) + mark_page_reserved(page); - size = PAGE_SIZE << order; - zero_page_mask = (size - 1) & PAGE_MASK; + zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; +} - return 1UL << order; +static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot) +{ + enum fixed_addresses idx; + unsigned long vaddr, flags, entrylo; + unsigned long old_ctx; + pte_t pte; + int tlbidx; + + BUG_ON(Page_dcache_dirty(page)); + + pagefault_disable(); + idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1); + idx += in_interrupt() ? FIX_N_COLOURS : 0; + vaddr = __fix_to_virt(FIX_CMAP_END - idx); + pte = mk_pte(page, prot); +#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32) + entrylo = pte.pte_high; +#else + entrylo = pte_to_entrylo(pte_val(pte)); +#endif + + local_irq_save(flags); + old_ctx = read_c0_entryhi(); + write_c0_entryhi(vaddr & (PAGE_MASK << 1)); + write_c0_entrylo0(entrylo); + write_c0_entrylo1(entrylo); + tlbidx = read_c0_wired(); + write_c0_wired(tlbidx + 1); + write_c0_index(tlbidx); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + write_c0_entryhi(old_ctx); + local_irq_restore(flags); + + return (void*) vaddr; } -#ifdef CONFIG_HIGHMEM -pte_t *kmap_pte; -pgprot_t kmap_prot; +void *kmap_coherent(struct page *page, unsigned long addr) +{ + return __kmap_pgprot(page, addr, PAGE_KERNEL); +} + +void *kmap_noncoherent(struct page *page, unsigned long addr) +{ + return __kmap_pgprot(page, addr, PAGE_KERNEL_NC); +} -#define kmap_get_fixmap_pte(vaddr) \ - pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr)) +void kunmap_coherent(void) +{ + unsigned int wired; + unsigned long flags, old_ctx; + + local_irq_save(flags); + old_ctx = read_c0_entryhi(); + wired = read_c0_wired() - 1; + write_c0_wired(wired); + write_c0_index(wired); + write_c0_entryhi(UNIQUE_ENTRYHI(wired)); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + write_c0_entryhi(old_ctx); + local_irq_restore(flags); + pagefault_enable(); +} -static void __init kmap_init(void) +void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) { - unsigned long kmap_vstart; + void *vfrom, *vto; + + vto = kmap_atomic(to); + if (cpu_has_dc_aliases && + page_mapped(from) && !Page_dcache_dirty(from)) { + vfrom = kmap_coherent(from, vaddr); + copy_page(vto, vfrom); + kunmap_coherent(); + } else { + vfrom = kmap_atomic(from); + copy_page(vto, vfrom); + kunmap_atomic(vfrom); + } + if ((!cpu_has_ic_fills_f_dc) || + pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK)) + flush_data_cache_page((unsigned long)vto); + kunmap_atomic(vto); + /* Make sure this page is cleared on other CPU's too before using it */ + smp_wmb(); +} - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); +void copy_to_user_page(struct vm_area_struct *vma, + struct page *page, unsigned long vaddr, void *dst, const void *src, + unsigned long len) +{ + if (cpu_has_dc_aliases && + page_mapped(page) && !Page_dcache_dirty(page)) { + void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK); + memcpy(vto, src, len); + kunmap_coherent(); + } else { + memcpy(dst, src, len); + if (cpu_has_dc_aliases) + SetPageDcacheDirty(page); + } + if ((vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc) + flush_cache_page(vma, vaddr, page_to_pfn(page)); +} - kmap_prot = PAGE_KERNEL; +void copy_from_user_page(struct vm_area_struct *vma, + struct page *page, unsigned long vaddr, void *dst, const void *src, + unsigned long len) +{ + if (cpu_has_dc_aliases && + page_mapped(page) && !Page_dcache_dirty(page)) { + void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK); + memcpy(dst, vfrom, len); + kunmap_coherent(); + } else { + memcpy(dst, src, len); + if (cpu_has_dc_aliases) + SetPageDcacheDirty(page); + } } +EXPORT_SYMBOL_GPL(copy_from_user_page); -#ifdef CONFIG_32BIT void __init fixrange_init(unsigned long start, unsigned long end, pgd_t *pgd_base) { +#ifdef CONFIG_HIGHMEM pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -113,16 +224,15 @@ void __init fixrange_init(unsigned long start, unsigned long end, k = __pmd_offset(vaddr); pgd = pgd_base + i; - for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { + for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { pud = (pud_t *)pgd; - for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) { + for ( ; (j < PTRS_PER_PUD) && (vaddr < end); pud++, j++) { pmd = (pmd_t *)pud; - for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) { + for (; (k < PTRS_PER_PMD) && (vaddr < end); pmd++, k++) { if (pmd_none(*pmd)) { pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); - set_pmd(pmd, __pmd(pte)); - if (pte != pte_offset_kernel(pmd, 0)) - BUG(); + set_pmd(pmd, __pmd((unsigned long)pte)); + BUG_ON(pte != pte_offset_kernel(pmd, 0)); } vaddr += PMD_SIZE; } @@ -130,64 +240,25 @@ void __init fixrange_init(unsigned long start, unsigned long end, } j = 0; } -} -#endif /* CONFIG_32BIT */ -#endif /* CONFIG_HIGHMEM */ - -#ifndef CONFIG_NEED_MULTIPLE_NODES -extern void pagetable_init(void); - -void __init paging_init(void) -{ - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; - unsigned long max_dma, high, low; - - pagetable_init(); - -#ifdef CONFIG_HIGHMEM - kmap_init(); #endif - - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - low = max_low_pfn; - high = highend_pfn; - -#ifdef CONFIG_ISA - if (low < max_dma) - zones_size[ZONE_DMA] = low; - else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; - } -#else - zones_size[ZONE_DMA] = low; -#endif -#ifdef CONFIG_HIGHMEM - if (cpu_has_dc_aliases) { - printk(KERN_WARNING "This processor doesn't support highmem."); - if (high - low) - printk(" %ldk highmem ignored", high - low); - printk("\n"); - } else - zones_size[ZONE_HIGHMEM] = high - low; -#endif - - free_area_init(zones_size); } -#define PFN_UP(x) (((x) + PAGE_SIZE - 1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) - -static inline int page_is_ram(unsigned long pagenr) +#ifndef CONFIG_NEED_MULTIPLE_NODES +int page_is_ram(unsigned long pagenr) { int i; for (i = 0; i < boot_mem_map.nr_map; i++) { unsigned long addr, end; - if (boot_mem_map.map[i].type != BOOT_MEM_RAM) + switch (boot_mem_map.map[i].type) { + case BOOT_MEM_RAM: + case BOOT_MEM_INIT_RAM: + break; + default: /* not usable memory */ continue; + } addr = PFN_UP(boot_mem_map.map[i].addr); end = PFN_DOWN(boot_mem_map.map[i].addr + @@ -200,110 +271,142 @@ static inline int page_is_ram(unsigned long pagenr) return 0; } -void __init mem_init(void) +void __init paging_init(void) { - unsigned long codesize, reservedpages, datasize, initsize; - unsigned long tmp, ram; + unsigned long max_zone_pfns[MAX_NR_ZONES]; + unsigned long lastpfn __maybe_unused; + + pagetable_init(); #ifdef CONFIG_HIGHMEM -#ifdef CONFIG_DISCONTIGMEM -#error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet" + kmap_init(); #endif - max_mapnr = num_physpages = highend_pfn; -#else - max_mapnr = num_physpages = max_low_pfn; +#ifdef CONFIG_ZONE_DMA + max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; +#endif +#ifdef CONFIG_ZONE_DMA32 + max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; +#endif + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + lastpfn = max_low_pfn; +#ifdef CONFIG_HIGHMEM + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; + lastpfn = highend_pfn; + + if (cpu_has_dc_aliases && max_low_pfn != highend_pfn) { + printk(KERN_WARNING "This processor doesn't support highmem." + " %ldk highmem ignored\n", + (highend_pfn - max_low_pfn) << (PAGE_SHIFT - 10)); + max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn; + lastpfn = max_low_pfn; + } #endif - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); - totalram_pages += free_all_bootmem(); - totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */ + free_area_init_nodes(max_zone_pfns); +} - reservedpages = ram = 0; - for (tmp = 0; tmp < max_low_pfn; tmp++) - if (page_is_ram(tmp)) { - ram++; - if (PageReserved(mem_map+tmp)) - reservedpages++; - } +#ifdef CONFIG_64BIT +static struct kcore_list kcore_kseg0; +#endif +static inline void mem_init_free_highmem(void) +{ #ifdef CONFIG_HIGHMEM + unsigned long tmp; + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { - struct page *page = mem_map + tmp; + struct page *page = pfn_to_page(tmp); - if (!page_is_ram(tmp)) { + if (!page_is_ram(tmp)) SetPageReserved(page); - continue; - } - ClearPageReserved(page); -#ifdef CONFIG_LIMITED_DMA - set_page_address(page, lowmem_page_address(page)); -#endif - set_page_count(page, 1); - __free_page(page); - totalhigh_pages++; + else + free_highmem_page(page); } - totalram_pages += totalhigh_pages; #endif +} + +void __init mem_init(void) +{ +#ifdef CONFIG_HIGHMEM +#ifdef CONFIG_DISCONTIGMEM +#error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet" +#endif + max_mapnr = highend_pfn ? highend_pfn : max_low_pfn; +#else + max_mapnr = max_low_pfn; +#endif + high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); + + free_all_bootmem(); + setup_zero_pages(); /* Setup zeroed pages. */ + mem_init_free_highmem(); + mem_init_print_info(NULL); - codesize = (unsigned long) &_etext - (unsigned long) &_text; - datasize = (unsigned long) &_edata - (unsigned long) &_etext; - initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; - - printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " - "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - ram << (PAGE_SHIFT-10), - codesize >> 10, - reservedpages << (PAGE_SHIFT-10), - datasize >> 10, - initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); +#ifdef CONFIG_64BIT + if ((unsigned long) &_text > (unsigned long) CKSEG0) + /* The -4 is a hack so that user tools don't have to handle + the overflow. */ + kclist_add(&kcore_kseg0, (void *) CKSEG0, + 0x80000000 - 4, KCORE_TEXT); +#endif } #endif /* !CONFIG_NEED_MULTIPLE_NODES */ +void free_init_pages(const char *what, unsigned long begin, unsigned long end) +{ + unsigned long pfn; + + for (pfn = PFN_UP(begin); pfn < PFN_DOWN(end); pfn++) { + struct page *page = pfn_to_page(pfn); + void *addr = phys_to_virt(PFN_PHYS(pfn)); + + memset(addr, POISON_FREE_INITMEM, PAGE_SIZE); + free_reserved_page(page); + } + printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); +} + #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { -#ifdef CONFIG_64BIT - /* Switch from KSEG0 to XKPHYS addresses */ - start = (unsigned long)phys_to_virt(CPHYSADDR(start)); - end = (unsigned long)phys_to_virt(CPHYSADDR(end)); -#endif - if (start < end) - printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", - (end - start) >> 10); - - for (; start < end; start += PAGE_SIZE) { - ClearPageReserved(virt_to_page(start)); - set_page_count(virt_to_page(start), 1); - free_page(start); - totalram_pages++; - } + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, + "initrd"); } #endif -extern unsigned long prom_free_prom_memory(void); +void (*free_init_pages_eva)(void *begin, void *end) = NULL; -void free_initmem(void) +void __init_refok free_initmem(void) { - unsigned long addr, page, freed; + prom_free_prom_memory(); + /* + * Let the platform define a specific function to free the + * init section since EVA may have used any possible mapping + * between virtual and physical addresses. + */ + if (free_init_pages_eva) + free_init_pages_eva((void *)&__init_begin, (void *)&__init_end); + else + free_initmem_default(POISON_FREE_INITMEM); +} - freed = prom_free_prom_memory(); +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT +unsigned long pgd_current[NR_CPUS]; +#endif - addr = (unsigned long) &__init_begin; - while (addr < (unsigned long) &__init_end) { -#ifdef CONFIG_64BIT - page = PAGE_OFFSET | CPHYSADDR(addr); -#else - page = addr; +/* + * gcc 3.3 and older have trouble determining that PTRS_PER_PGD and PGD_ORDER + * are constants. So we use the variants from asm-offset.h until that gcc + * will officially be retired. + * + * Align swapper_pg_dir in to 64K, allows its address to be loaded + * with a single LUI instruction in the TLB handlers. If we used + * __aligned(64K), its size would get rounded up to the alignment + * size, and waste space. So we place it in its own section and align + * it in the linker script. + */ +pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(.bss..swapper_pg_dir); +#ifndef __PAGETABLE_PMD_FOLDED +pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss; #endif - ClearPageReserved(virt_to_page(page)); - set_page_count(virt_to_page(page), 1); - free_page(page); - totalram_pages++; - freed += PAGE_SIZE; - addr += PAGE_SIZE; - } - printk(KERN_INFO "Freeing unused kernel memory: %ldk freed\n", - freed >> 10); -} +pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c index 3101d1db559..7f840bc08ab 100644 --- a/arch/mips/mm/ioremap.c +++ b/arch/mips/mm/ioremap.c @@ -9,7 +9,8 @@ #include <linux/module.h> #include <asm/addrspace.h> #include <asm/byteorder.h> - +#include <linux/sched.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <asm/cacheflush.h> #include <asm/io.h> @@ -21,14 +22,13 @@ static inline void remap_area_pte(pte_t * pte, unsigned long address, phys_t end; unsigned long pfn; pgprot_t pgprot = __pgprot(_PAGE_GLOBAL | _PAGE_PRESENT | __READABLE - | __WRITEABLE | flags); + | __WRITEABLE | flags); address &= ~PMD_MASK; end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; - if (address >= end) - BUG(); + BUG_ON(address >= end); pfn = phys_addr >> PAGE_SHIFT; do { if (!pte_none(*pte)) { @@ -52,8 +52,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, if (end > PGDIR_SIZE) end = PGDIR_SIZE; phys_addr -= address; - if (address >= end) - BUG(); + BUG_ON(address >= end); do { pte_t * pte = pte_alloc_kernel(pmd, address); if (!pte) @@ -75,8 +74,7 @@ static int remap_area_pages(unsigned long address, phys_t phys_addr, phys_addr -= address; dir = pgd_offset(&init_mm, address); flush_cache_all(); - if (address >= end) - BUG(); + BUG_ON(address >= end); do { pud_t *pud; pmd_t *pmd; @@ -176,7 +174,7 @@ void __iomem * __ioremap(phys_t phys_addr, phys_t size, unsigned long flags) #define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1) -void __iounmap(volatile void __iomem *addr) +void __iounmap(const volatile void __iomem *addr) { struct vm_struct *p; @@ -187,7 +185,7 @@ void __iounmap(volatile void __iomem *addr) if (!p) printk(KERN_ERR "iounmap: bad address %p\n", addr); - kfree(p); + kfree(p); } EXPORT_SYMBOL(__ioremap); diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c new file mode 100644 index 00000000000..f1baadd56e8 --- /dev/null +++ b/arch/mips/mm/mmap.c @@ -0,0 +1,198 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2011 Wind River Systems, + * written by Ralf Baechle <ralf@linux-mips.org> + */ +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/module.h> +#include <linux/personality.h> +#include <linux/random.h> +#include <linux/sched.h> + +unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ +EXPORT_SYMBOL(shm_align_mask); + +/* gap between mmap and stack */ +#define MIN_GAP (128*1024*1024UL) +#define MAX_GAP ((TASK_SIZE)/6*5) + +static int mmap_is_legacy(void) +{ + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + return 1; + + return sysctl_legacy_va_layout; +} + +static unsigned long mmap_base(unsigned long rnd) +{ + unsigned long gap = rlimit(RLIMIT_STACK); + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(TASK_SIZE - gap - rnd); +} + +#define COLOUR_ALIGN(addr, pgoff) \ + ((((addr) + shm_align_mask) & ~shm_align_mask) + \ + (((pgoff) << PAGE_SHIFT) & shm_align_mask)) + +enum mmap_allocation_direction {UP, DOWN}; + +static unsigned long arch_get_unmapped_area_common(struct file *filp, + unsigned long addr0, unsigned long len, unsigned long pgoff, + unsigned long flags, enum mmap_allocation_direction dir) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long addr = addr0; + int do_color_align; + struct vm_unmapped_area_info info; + + if (unlikely(len > TASK_SIZE)) + return -ENOMEM; + + if (flags & MAP_FIXED) { + /* Even MAP_FIXED mappings must reside within TASK_SIZE */ + if (TASK_SIZE - len < addr) + return -EINVAL; + + /* + * We do not accept a shared mapping if it would violate + * cache aliasing constraints. + */ + if ((flags & MAP_SHARED) && + ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) + return -EINVAL; + return addr; + } + + do_color_align = 0; + if (filp || (flags & MAP_SHARED)) + do_color_align = 1; + + /* requesting a specific address */ + if (addr) { + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + info.length = len; + info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + + if (dir == DOWN) { + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.low_limit = PAGE_SIZE; + info.high_limit = mm->mmap_base; + addr = vm_unmapped_area(&info); + + if (!(addr & ~PAGE_MASK)) + return addr; + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + } + + info.flags = 0; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + return vm_unmapped_area(&info); +} + +unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + return arch_get_unmapped_area_common(filp, + addr0, len, pgoff, flags, UP); +} + +/* + * There is no need to export this but sched.h declares the function as + * extern so making it static here results in an error. + */ +unsigned long arch_get_unmapped_area_topdown(struct file *filp, + unsigned long addr0, unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + return arch_get_unmapped_area_common(filp, + addr0, len, pgoff, flags, DOWN); +} + +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + unsigned long random_factor = 0UL; + + if (current->flags & PF_RANDOMIZE) { + random_factor = get_random_int(); + random_factor = random_factor << PAGE_SHIFT; + if (TASK_IS_32BIT_ADDR) + random_factor &= 0xfffffful; + else + random_factor &= 0xffffffful; + } + + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + mm->get_unmapped_area = arch_get_unmapped_area; + } else { + mm->mmap_base = mmap_base(random_factor); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + } +} + +static inline unsigned long brk_rnd(void) +{ + unsigned long rnd = get_random_int(); + + rnd = rnd << PAGE_SHIFT; + /* 8MB for 32bit, 256MB for 64bit */ + if (TASK_IS_32BIT_ADDR) + rnd = rnd & 0x7ffffful; + else + rnd = rnd & 0xffffffful; + + return rnd; +} + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long base = mm->brk; + unsigned long ret; + + ret = PAGE_ALIGN(base + brk_rnd()); + + if (ret < mm->brk) + return mm->brk; + + return ret; +} + +int __virt_addr_valid(const volatile void *kaddr) +{ + return pfn_valid(PFN_DOWN(virt_to_phys(kaddr))); +} +EXPORT_SYMBOL_GPL(__virt_addr_valid); diff --git a/arch/mips/mm/page-funcs.S b/arch/mips/mm/page-funcs.S new file mode 100644 index 00000000000..48a6b38ff13 --- /dev/null +++ b/arch/mips/mm/page-funcs.S @@ -0,0 +1,50 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Micro-assembler generated clear_page/copy_page functions. + * + * Copyright (C) 2012 MIPS Technologies, Inc. + * Copyright (C) 2012 Ralf Baechle <ralf@linux-mips.org> + */ +#include <asm/asm.h> +#include <asm/regdef.h> + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS +#define cpu_clear_page_function_name clear_page_cpu +#define cpu_copy_page_function_name copy_page_cpu +#else +#define cpu_clear_page_function_name clear_page +#define cpu_copy_page_function_name copy_page +#endif + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache: 0x058 bytes + * R4600 v1.7: 0x05c bytes + * R4600 v2.0: 0x060 bytes + * With prefetching, 16 word strides 0x120 bytes + */ +EXPORT(__clear_page_start) +LEAF(cpu_clear_page_function_name) +1: j 1b /* Dummy, will be replaced. */ + .space 288 +END(cpu_clear_page_function_name) +EXPORT(__clear_page_end) + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache: 0x11c bytes + * R4600 v1.7: 0x080 bytes + * R4600 v2.0: 0x07c bytes + * With prefetching, 16 word strides 0x540 bytes + */ +EXPORT(__copy_page_start) +LEAF(cpu_copy_page_function_name) +1: j 1b /* Dummy, will be replaced. */ + .space 1344 +END(cpu_copy_page_function_name) +EXPORT(__copy_page_end) diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c new file mode 100644 index 00000000000..b611102e23b --- /dev/null +++ b/arch/mips/mm/page.c @@ -0,0 +1,660 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2007 Maciej W. Rozycki + * Copyright (C) 2008 Thiemo Seufer + * Copyright (C) 2012 MIPS Technologies, Inc. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/proc_fs.h> + +#include <asm/bugs.h> +#include <asm/cacheops.h> +#include <asm/cpu-type.h> +#include <asm/inst.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/prefetch.h> +#include <asm/bootinfo.h> +#include <asm/mipsregs.h> +#include <asm/mmu_context.h> +#include <asm/cpu.h> +#include <asm/war.h> + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS +#include <asm/sibyte/sb1250.h> +#include <asm/sibyte/sb1250_regs.h> +#include <asm/sibyte/sb1250_dma.h> +#endif + +#include <asm/uasm.h> + +/* Registers used in the assembled routines. */ +#define ZERO 0 +#define AT 2 +#define A0 4 +#define A1 5 +#define A2 6 +#define T0 8 +#define T1 9 +#define T2 10 +#define T3 11 +#define T9 25 +#define RA 31 + +/* Handle labels (which must be positive integers). */ +enum label_id { + label_clear_nopref = 1, + label_clear_pref, + label_copy_nopref, + label_copy_pref_both, + label_copy_pref_store, +}; + +UASM_L_LA(_clear_nopref) +UASM_L_LA(_clear_pref) +UASM_L_LA(_copy_nopref) +UASM_L_LA(_copy_pref_both) +UASM_L_LA(_copy_pref_store) + +/* We need one branch and therefore one relocation per target label. */ +static struct uasm_label labels[5]; +static struct uasm_reloc relocs[5]; + +#define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) +#define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) + +static int pref_bias_clear_store; +static int pref_bias_copy_load; +static int pref_bias_copy_store; + +static u32 pref_src_mode; +static u32 pref_dst_mode; + +static int clear_word_size; +static int copy_word_size; + +static int half_clear_loop_size; +static int half_copy_loop_size; + +static int cache_line_size; +#define cache_line_mask() (cache_line_size - 1) + +static inline void +pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off) +{ + if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) { + if (off > 0x7fff) { + uasm_i_lui(buf, T9, uasm_rel_hi(off)); + uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); + } else + uasm_i_addiu(buf, T9, ZERO, off); + uasm_i_daddu(buf, reg1, reg2, T9); + } else { + if (off > 0x7fff) { + uasm_i_lui(buf, T9, uasm_rel_hi(off)); + uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); + UASM_i_ADDU(buf, reg1, reg2, T9); + } else + UASM_i_ADDIU(buf, reg1, reg2, off); + } +} + +static void set_prefetch_parameters(void) +{ + if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) + clear_word_size = 8; + else + clear_word_size = 4; + + if (cpu_has_64bit_gp_regs) + copy_word_size = 8; + else + copy_word_size = 4; + + /* + * The pref's used here are using "streaming" hints, which cause the + * copied data to be kicked out of the cache sooner. A page copy often + * ends up copying a lot more data than is commonly used, so this seems + * to make sense in terms of reducing cache pollution, but I've no real + * performance data to back this up. + */ + if (cpu_has_prefetch) { + /* + * XXX: Most prefetch bias values in here are based on + * guesswork. + */ + cache_line_size = cpu_dcache_line_size(); + switch (current_cpu_type()) { + case CPU_R5500: + case CPU_TX49XX: + /* These processors only support the Pref_Load. */ + pref_bias_copy_load = 256; + break; + + case CPU_R10000: + case CPU_R12000: + case CPU_R14000: + /* + * Those values have been experimentally tuned for an + * Origin 200. + */ + pref_bias_clear_store = 512; + pref_bias_copy_load = 256; + pref_bias_copy_store = 256; + pref_src_mode = Pref_LoadStreamed; + pref_dst_mode = Pref_StoreStreamed; + break; + + case CPU_SB1: + case CPU_SB1A: + pref_bias_clear_store = 128; + pref_bias_copy_load = 128; + pref_bias_copy_store = 128; + /* + * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed + * hints are broken. + */ + if (current_cpu_type() == CPU_SB1 && + (current_cpu_data.processor_id & 0xff) < 0x02) { + pref_src_mode = Pref_Load; + pref_dst_mode = Pref_Store; + } else { + pref_src_mode = Pref_LoadStreamed; + pref_dst_mode = Pref_StoreStreamed; + } + break; + + default: + pref_bias_clear_store = 128; + pref_bias_copy_load = 256; + pref_bias_copy_store = 128; + pref_src_mode = Pref_LoadStreamed; + pref_dst_mode = Pref_PrepareForStore; + break; + } + } else { + if (cpu_has_cache_cdex_s) + cache_line_size = cpu_scache_line_size(); + else if (cpu_has_cache_cdex_p) + cache_line_size = cpu_dcache_line_size(); + } + /* + * Too much unrolling will overflow the available space in + * clear_space_array / copy_page_array. + */ + half_clear_loop_size = min(16 * clear_word_size, + max(cache_line_size >> 1, + 4 * clear_word_size)); + half_copy_loop_size = min(16 * copy_word_size, + max(cache_line_size >> 1, + 4 * copy_word_size)); +} + +static void build_clear_store(u32 **buf, int off) +{ + if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) { + uasm_i_sd(buf, ZERO, off, A0); + } else { + uasm_i_sw(buf, ZERO, off, A0); + } +} + +static inline void build_clear_pref(u32 **buf, int off) +{ + if (off & cache_line_mask()) + return; + + if (pref_bias_clear_store) { + uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off, + A0); + } else if (cache_line_size == (half_clear_loop_size << 1)) { + if (cpu_has_cache_cdex_s) { + uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); + } else if (cpu_has_cache_cdex_p) { + if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { + uasm_i_nop(buf); + uasm_i_nop(buf); + uasm_i_nop(buf); + uasm_i_nop(buf); + } + + if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) + uasm_i_lw(buf, ZERO, ZERO, AT); + + uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); + } + } +} + +extern u32 __clear_page_start; +extern u32 __clear_page_end; +extern u32 __copy_page_start; +extern u32 __copy_page_end; + +void build_clear_page(void) +{ + int off; + u32 *buf = &__clear_page_start; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + int i; + static atomic_t run_once = ATOMIC_INIT(0); + + if (atomic_xchg(&run_once, 1)) { + return; + } + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + set_prefetch_parameters(); + + /* + * This algorithm makes the following assumptions: + * - The prefetch bias is a multiple of 2 words. + * - The prefetch bias is less than one page. + */ + BUG_ON(pref_bias_clear_store % (2 * clear_word_size)); + BUG_ON(PAGE_SIZE < pref_bias_clear_store); + + off = PAGE_SIZE - pref_bias_clear_store; + if (off > 0xffff || !pref_bias_clear_store) + pg_addiu(&buf, A2, A0, off); + else + uasm_i_ori(&buf, A2, A0, off); + + if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) + uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000)); + + off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size) + * cache_line_size : 0; + while (off) { + build_clear_pref(&buf, -off); + off -= cache_line_size; + } + uasm_l_clear_pref(&l, buf); + do { + build_clear_pref(&buf, off); + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < half_clear_loop_size); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + build_clear_pref(&buf, off); + if (off == -clear_word_size) + uasm_il_bne(&buf, &r, A0, A2, label_clear_pref); + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < 0); + + if (pref_bias_clear_store) { + pg_addiu(&buf, A2, A0, pref_bias_clear_store); + uasm_l_clear_nopref(&l, buf); + off = 0; + do { + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < half_clear_loop_size); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + if (off == -clear_word_size) + uasm_il_bne(&buf, &r, A0, A2, + label_clear_nopref); + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < 0); + } + + uasm_i_jr(&buf, RA); + uasm_i_nop(&buf); + + BUG_ON(buf > &__clear_page_end); + + uasm_resolve_relocs(relocs, labels); + + pr_debug("Synthesized clear page handler (%u instructions).\n", + (u32)(buf - &__clear_page_start)); + + pr_debug("\t.set push\n"); + pr_debug("\t.set noreorder\n"); + for (i = 0; i < (buf - &__clear_page_start); i++) + pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]); + pr_debug("\t.set pop\n"); +} + +static void build_copy_load(u32 **buf, int reg, int off) +{ + if (cpu_has_64bit_gp_regs) { + uasm_i_ld(buf, reg, off, A1); + } else { + uasm_i_lw(buf, reg, off, A1); + } +} + +static void build_copy_store(u32 **buf, int reg, int off) +{ + if (cpu_has_64bit_gp_regs) { + uasm_i_sd(buf, reg, off, A0); + } else { + uasm_i_sw(buf, reg, off, A0); + } +} + +static inline void build_copy_load_pref(u32 **buf, int off) +{ + if (off & cache_line_mask()) + return; + + if (pref_bias_copy_load) + uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1); +} + +static inline void build_copy_store_pref(u32 **buf, int off) +{ + if (off & cache_line_mask()) + return; + + if (pref_bias_copy_store) { + uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off, + A0); + } else if (cache_line_size == (half_copy_loop_size << 1)) { + if (cpu_has_cache_cdex_s) { + uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); + } else if (cpu_has_cache_cdex_p) { + if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { + uasm_i_nop(buf); + uasm_i_nop(buf); + uasm_i_nop(buf); + uasm_i_nop(buf); + } + + if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) + uasm_i_lw(buf, ZERO, ZERO, AT); + + uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); + } + } +} + +void build_copy_page(void) +{ + int off; + u32 *buf = &__copy_page_start; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + int i; + static atomic_t run_once = ATOMIC_INIT(0); + + if (atomic_xchg(&run_once, 1)) { + return; + } + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + set_prefetch_parameters(); + + /* + * This algorithm makes the following assumptions: + * - All prefetch biases are multiples of 8 words. + * - The prefetch biases are less than one page. + * - The store prefetch bias isn't greater than the load + * prefetch bias. + */ + BUG_ON(pref_bias_copy_load % (8 * copy_word_size)); + BUG_ON(pref_bias_copy_store % (8 * copy_word_size)); + BUG_ON(PAGE_SIZE < pref_bias_copy_load); + BUG_ON(pref_bias_copy_store > pref_bias_copy_load); + + off = PAGE_SIZE - pref_bias_copy_load; + if (off > 0xffff || !pref_bias_copy_load) + pg_addiu(&buf, A2, A0, off); + else + uasm_i_ori(&buf, A2, A0, off); + + if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) + uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000)); + + off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) * + cache_line_size : 0; + while (off) { + build_copy_load_pref(&buf, -off); + off -= cache_line_size; + } + off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) * + cache_line_size : 0; + while (off) { + build_copy_store_pref(&buf, -off); + off -= cache_line_size; + } + uasm_l_copy_pref_both(&l, buf); + do { + build_copy_load_pref(&buf, off); + build_copy_load(&buf, T0, off); + build_copy_load_pref(&buf, off + copy_word_size); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load_pref(&buf, off + 2 * copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load_pref(&buf, off + 3 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store_pref(&buf, off); + build_copy_store(&buf, T0, off); + build_copy_store_pref(&buf, off + copy_word_size); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store_pref(&buf, off + 2 * copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store_pref(&buf, off + 3 * copy_word_size); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < half_copy_loop_size); + pg_addiu(&buf, A1, A1, 2 * off); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + build_copy_load_pref(&buf, off); + build_copy_load(&buf, T0, off); + build_copy_load_pref(&buf, off + copy_word_size); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load_pref(&buf, off + 2 * copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load_pref(&buf, off + 3 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store_pref(&buf, off); + build_copy_store(&buf, T0, off); + build_copy_store_pref(&buf, off + copy_word_size); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store_pref(&buf, off + 2 * copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store_pref(&buf, off + 3 * copy_word_size); + if (off == -(4 * copy_word_size)) + uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < 0); + + if (pref_bias_copy_load - pref_bias_copy_store) { + pg_addiu(&buf, A2, A0, + pref_bias_copy_load - pref_bias_copy_store); + uasm_l_copy_pref_store(&l, buf); + off = 0; + do { + build_copy_load(&buf, T0, off); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store_pref(&buf, off); + build_copy_store(&buf, T0, off); + build_copy_store_pref(&buf, off + copy_word_size); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store_pref(&buf, off + 2 * copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store_pref(&buf, off + 3 * copy_word_size); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < half_copy_loop_size); + pg_addiu(&buf, A1, A1, 2 * off); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + build_copy_load(&buf, T0, off); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store_pref(&buf, off); + build_copy_store(&buf, T0, off); + build_copy_store_pref(&buf, off + copy_word_size); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store_pref(&buf, off + 2 * copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store_pref(&buf, off + 3 * copy_word_size); + if (off == -(4 * copy_word_size)) + uasm_il_bne(&buf, &r, A2, A0, + label_copy_pref_store); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < 0); + } + + if (pref_bias_copy_store) { + pg_addiu(&buf, A2, A0, pref_bias_copy_store); + uasm_l_copy_nopref(&l, buf); + off = 0; + do { + build_copy_load(&buf, T0, off); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store(&buf, T0, off); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < half_copy_loop_size); + pg_addiu(&buf, A1, A1, 2 * off); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + build_copy_load(&buf, T0, off); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store(&buf, T0, off); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + if (off == -(4 * copy_word_size)) + uasm_il_bne(&buf, &r, A2, A0, + label_copy_nopref); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < 0); + } + + uasm_i_jr(&buf, RA); + uasm_i_nop(&buf); + + BUG_ON(buf > &__copy_page_end); + + uasm_resolve_relocs(relocs, labels); + + pr_debug("Synthesized copy page handler (%u instructions).\n", + (u32)(buf - &__copy_page_start)); + + pr_debug("\t.set push\n"); + pr_debug("\t.set noreorder\n"); + for (i = 0; i < (buf - &__copy_page_start); i++) + pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]); + pr_debug("\t.set pop\n"); +} + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS +extern void clear_page_cpu(void *page); +extern void copy_page_cpu(void *to, void *from); + +/* + * Pad descriptors to cacheline, since each is exclusively owned by a + * particular CPU. + */ +struct dmadscr { + u64 dscr_a; + u64 dscr_b; + u64 pad_a; + u64 pad_b; +} ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS]; + +void sb1_dma_init(void) +{ + int i; + + for (i = 0; i < DM_NUM_CHANNELS; i++) { + const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) | + V_DM_DSCR_BASE_RINGSZ(1); + void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); + + __raw_writeq(base_val, base_reg); + __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); + __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); + } +} + +void clear_page(void *page) +{ + u64 to_phys = CPHYSADDR((unsigned long)page); + unsigned int cpu = smp_processor_id(); + + /* if the page is not in KSEG0, use old way */ + if ((long)KSEGX((unsigned long)page) != (long)CKSEG0) + return clear_page_cpu(page); + + page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | + M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; + page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); + __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); + + /* + * Don't really want to do it this way, but there's no + * reliable way to delay completion detection. + */ + while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) + & M_DM_DSCR_BASE_INTERRUPT)) + ; + __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); +} + +void copy_page(void *to, void *from) +{ + u64 from_phys = CPHYSADDR((unsigned long)from); + u64 to_phys = CPHYSADDR((unsigned long)to); + unsigned int cpu = smp_processor_id(); + + /* if any page is not in KSEG0, use old way */ + if ((long)KSEGX((unsigned long)to) != (long)CKSEG0 + || (long)KSEGX((unsigned long)from) != (long)CKSEG0) + return copy_page_cpu(to, from); + + page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | + M_DM_DSCRA_INTERRUPT; + page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); + __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); + + /* + * Don't really want to do it this way, but there's no + * reliable way to delay completion detection. + */ + while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) + & M_DM_DSCR_BASE_INTERRUPT)) + ; + __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); +} + +#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */ diff --git a/arch/mips/mm/pg-r4k.c b/arch/mips/mm/pg-r4k.c deleted file mode 100644 index f51e180072e..00000000000 --- a/arch/mips/mm/pg-r4k.c +++ /dev/null @@ -1,486 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/proc_fs.h> - -#include <asm/cacheops.h> -#include <asm/inst.h> -#include <asm/io.h> -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/prefetch.h> -#include <asm/system.h> -#include <asm/bootinfo.h> -#include <asm/mipsregs.h> -#include <asm/mmu_context.h> -#include <asm/cpu.h> -#include <asm/war.h> - -#define half_scache_line_size() (cpu_scache_line_size() >> 1) -#define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) -#define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) - - -/* - * Maximum sizes: - * - * R4000 128 bytes S-cache: 0x58 bytes - * R4600 v1.7: 0x5c bytes - * R4600 v2.0: 0x60 bytes - * With prefetching, 16 byte strides 0xa0 bytes - */ - -static unsigned int clear_page_array[0x130 / 4]; - -void clear_page(void * page) __attribute__((alias("clear_page_array"))); - -EXPORT_SYMBOL(clear_page); - -/* - * Maximum sizes: - * - * R4000 128 bytes S-cache: 0x11c bytes - * R4600 v1.7: 0x080 bytes - * R4600 v2.0: 0x07c bytes - * With prefetching, 16 byte strides 0x0b8 bytes - */ -static unsigned int copy_page_array[0x148 / 4]; - -void copy_page(void *to, void *from) __attribute__((alias("copy_page_array"))); - -EXPORT_SYMBOL(copy_page); - -/* - * This is suboptimal for 32-bit kernels; we assume that R10000 is only used - * with 64-bit kernels. The prefetch offsets have been experimentally tuned - * an Origin 200. - */ -static int pref_offset_clear __initdata = 512; -static int pref_offset_copy __initdata = 256; - -static unsigned int pref_src_mode __initdata; -static unsigned int pref_dst_mode __initdata; - -static int load_offset __initdata; -static int store_offset __initdata; - -static unsigned int __initdata *dest, *epc; - -static unsigned int instruction_pending; -static union mips_instruction delayed_mi; - -static void __init emit_instruction(union mips_instruction mi) -{ - if (instruction_pending) - *epc++ = delayed_mi.word; - - instruction_pending = 1; - delayed_mi = mi; -} - -static inline void flush_delay_slot_or_nop(void) -{ - if (instruction_pending) { - *epc++ = delayed_mi.word; - instruction_pending = 0; - return; - } - - *epc++ = 0; -} - -static inline unsigned int *label(void) -{ - if (instruction_pending) { - *epc++ = delayed_mi.word; - instruction_pending = 0; - } - - return epc; -} - -static inline void build_insn_word(unsigned int word) -{ - union mips_instruction mi; - - mi.word = word; - - emit_instruction(mi); -} - -static inline void build_nop(void) -{ - build_insn_word(0); /* nop */ -} - -static inline void build_src_pref(int advance) -{ - if (!(load_offset & (cpu_dcache_line_size() - 1))) { - union mips_instruction mi; - - mi.i_format.opcode = pref_op; - mi.i_format.rs = 5; /* $a1 */ - mi.i_format.rt = pref_src_mode; - mi.i_format.simmediate = load_offset + advance; - - emit_instruction(mi); - } -} - -static inline void __build_load_reg(int reg) -{ - union mips_instruction mi; - unsigned int width; - - if (cpu_has_64bit_gp_regs) { - mi.i_format.opcode = ld_op; - width = 8; - } else { - mi.i_format.opcode = lw_op; - width = 4; - } - mi.i_format.rs = 5; /* $a1 */ - mi.i_format.rt = reg; /* $reg */ - mi.i_format.simmediate = load_offset; - - load_offset += width; - emit_instruction(mi); -} - -static inline void build_load_reg(int reg) -{ - if (cpu_has_prefetch) - build_src_pref(pref_offset_copy); - - __build_load_reg(reg); -} - -static inline void build_dst_pref(int advance) -{ - if (!(store_offset & (cpu_dcache_line_size() - 1))) { - union mips_instruction mi; - - mi.i_format.opcode = pref_op; - mi.i_format.rs = 4; /* $a0 */ - mi.i_format.rt = pref_dst_mode; - mi.i_format.simmediate = store_offset + advance; - - emit_instruction(mi); - } -} - -static inline void build_cdex_s(void) -{ - union mips_instruction mi; - - if ((store_offset & (cpu_scache_line_size() - 1))) - return; - - mi.c_format.opcode = cache_op; - mi.c_format.rs = 4; /* $a0 */ - mi.c_format.c_op = 3; /* Create Dirty Exclusive */ - mi.c_format.cache = 3; /* Secondary Data Cache */ - mi.c_format.simmediate = store_offset; - - emit_instruction(mi); -} - -static inline void build_cdex_p(void) -{ - union mips_instruction mi; - - if (store_offset & (cpu_dcache_line_size() - 1)) - return; - - if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) { - build_nop(); - build_nop(); - build_nop(); - build_nop(); - } - - if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) - build_insn_word(0x3c01a000); /* lui $at, 0xa000 */ - - mi.c_format.opcode = cache_op; - mi.c_format.rs = 4; /* $a0 */ - mi.c_format.c_op = 3; /* Create Dirty Exclusive */ - mi.c_format.cache = 1; /* Data Cache */ - mi.c_format.simmediate = store_offset; - - emit_instruction(mi); -} - -static void __init __build_store_reg(int reg) -{ - union mips_instruction mi; - unsigned int width; - - if (cpu_has_64bit_gp_regs || - (cpu_has_64bit_zero_reg && reg == 0)) { - mi.i_format.opcode = sd_op; - width = 8; - } else { - mi.i_format.opcode = sw_op; - width = 4; - } - mi.i_format.rs = 4; /* $a0 */ - mi.i_format.rt = reg; /* $reg */ - mi.i_format.simmediate = store_offset; - - store_offset += width; - emit_instruction(mi); -} - -static inline void build_store_reg(int reg) -{ - if (cpu_has_prefetch) - if (reg) - build_dst_pref(pref_offset_copy); - else - build_dst_pref(pref_offset_clear); - else if (cpu_has_cache_cdex_s) - build_cdex_s(); - else if (cpu_has_cache_cdex_p) - build_cdex_p(); - - __build_store_reg(reg); -} - -static inline void build_addiu_a2_a0(unsigned long offset) -{ - union mips_instruction mi; - - BUG_ON(offset > 0x7fff); - - mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op; - mi.i_format.rs = 4; /* $a0 */ - mi.i_format.rt = 6; /* $a2 */ - mi.i_format.simmediate = offset; - - emit_instruction(mi); -} - -static inline void build_addiu_a1(unsigned long offset) -{ - union mips_instruction mi; - - BUG_ON(offset > 0x7fff); - - mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op; - mi.i_format.rs = 5; /* $a1 */ - mi.i_format.rt = 5; /* $a1 */ - mi.i_format.simmediate = offset; - - load_offset -= offset; - - emit_instruction(mi); -} - -static inline void build_addiu_a0(unsigned long offset) -{ - union mips_instruction mi; - - BUG_ON(offset > 0x7fff); - - mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op; - mi.i_format.rs = 4; /* $a0 */ - mi.i_format.rt = 4; /* $a0 */ - mi.i_format.simmediate = offset; - - store_offset -= offset; - - emit_instruction(mi); -} - -static inline void build_bne(unsigned int *dest) -{ - union mips_instruction mi; - - mi.i_format.opcode = bne_op; - mi.i_format.rs = 6; /* $a2 */ - mi.i_format.rt = 4; /* $a0 */ - mi.i_format.simmediate = dest - epc - 1; - - *epc++ = mi.word; - flush_delay_slot_or_nop(); -} - -static inline void build_jr_ra(void) -{ - union mips_instruction mi; - - mi.r_format.opcode = spec_op; - mi.r_format.rs = 31; - mi.r_format.rt = 0; - mi.r_format.rd = 0; - mi.r_format.re = 0; - mi.r_format.func = jr_op; - - *epc++ = mi.word; - flush_delay_slot_or_nop(); -} - -void __init build_clear_page(void) -{ - unsigned int loop_start; - - epc = (unsigned int *) &clear_page_array; - instruction_pending = 0; - store_offset = 0; - - if (cpu_has_prefetch) { - switch (current_cpu_data.cputype) { - case CPU_RM9000: - /* - * As a workaround for erratum G105 which make the - * PrepareForStore hint unusable we fall back to - * StoreRetained on the RM9000. Once it is known which - * versions of the RM9000 we'll be able to condition- - * alize this. - */ - - case CPU_R10000: - case CPU_R12000: - pref_src_mode = Pref_LoadStreamed; - pref_dst_mode = Pref_StoreStreamed; - break; - - default: - pref_src_mode = Pref_LoadStreamed; - pref_dst_mode = Pref_PrepareForStore; - break; - } - } - - build_addiu_a2_a0(PAGE_SIZE - (cpu_has_prefetch ? pref_offset_clear : 0)); - - if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) - build_insn_word(0x3c01a000); /* lui $at, 0xa000 */ - -dest = label(); - do { - build_store_reg(0); - build_store_reg(0); - build_store_reg(0); - build_store_reg(0); - } while (store_offset < half_scache_line_size()); - build_addiu_a0(2 * store_offset); - loop_start = store_offset; - do { - build_store_reg(0); - build_store_reg(0); - build_store_reg(0); - build_store_reg(0); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_bne(dest); - - if (cpu_has_prefetch && pref_offset_clear) { - build_addiu_a2_a0(pref_offset_clear); - dest = label(); - loop_start = store_offset; - do { - __build_store_reg(0); - __build_store_reg(0); - __build_store_reg(0); - __build_store_reg(0); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_addiu_a0(2 * store_offset); - loop_start = store_offset; - do { - __build_store_reg(0); - __build_store_reg(0); - __build_store_reg(0); - __build_store_reg(0); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_bne(dest); - } - - build_jr_ra(); - - BUG_ON(epc > clear_page_array + ARRAY_SIZE(clear_page_array)); -} - -void __init build_copy_page(void) -{ - unsigned int loop_start; - - epc = (unsigned int *) ©_page_array; - store_offset = load_offset = 0; - instruction_pending = 0; - - build_addiu_a2_a0(PAGE_SIZE - (cpu_has_prefetch ? pref_offset_copy : 0)); - - if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x()) - build_insn_word(0x3c01a000); /* lui $at, 0xa000 */ - -dest = label(); - loop_start = store_offset; - do { - build_load_reg( 8); - build_load_reg( 9); - build_load_reg(10); - build_load_reg(11); - build_store_reg( 8); - build_store_reg( 9); - build_store_reg(10); - build_store_reg(11); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_addiu_a0(2 * store_offset); - build_addiu_a1(2 * load_offset); - loop_start = store_offset; - do { - build_load_reg( 8); - build_load_reg( 9); - build_load_reg(10); - build_load_reg(11); - build_store_reg( 8); - build_store_reg( 9); - build_store_reg(10); - build_store_reg(11); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_bne(dest); - - if (cpu_has_prefetch && pref_offset_copy) { - build_addiu_a2_a0(pref_offset_copy); - dest = label(); - loop_start = store_offset; - do { - __build_load_reg( 8); - __build_load_reg( 9); - __build_load_reg(10); - __build_load_reg(11); - __build_store_reg( 8); - __build_store_reg( 9); - __build_store_reg(10); - __build_store_reg(11); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_addiu_a0(2 * store_offset); - build_addiu_a1(2 * load_offset); - loop_start = store_offset; - do { - __build_load_reg( 8); - __build_load_reg( 9); - __build_load_reg(10); - __build_load_reg(11); - __build_store_reg( 8); - __build_store_reg( 9); - __build_store_reg(10); - __build_store_reg(11); - } while ((store_offset - loop_start) < half_scache_line_size()); - build_bne(dest); - } - - build_jr_ra(); - - BUG_ON(epc > copy_page_array + ARRAY_SIZE(copy_page_array)); -} diff --git a/arch/mips/mm/pg-sb1.c b/arch/mips/mm/pg-sb1.c deleted file mode 100644 index 148c65b9cd8..00000000000 --- a/arch/mips/mm/pg-sb1.c +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) - * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org) - * Copyright (C) 2000 SiByte, Inc. - * Copyright (C) 2005 Thiemo Seufer - * - * Written by Justin Carlson of SiByte, Inc. - * and Kip Walker of Broadcom Corp. - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#include <linux/config.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/smp.h> - -#include <asm/io.h> -#include <asm/sibyte/sb1250.h> -#include <asm/sibyte/sb1250_regs.h> -#include <asm/sibyte/sb1250_dma.h> - -#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS -#define SB1_PREF_LOAD_STREAMED_HINT "0" -#define SB1_PREF_STORE_STREAMED_HINT "1" -#else -#define SB1_PREF_LOAD_STREAMED_HINT "4" -#define SB1_PREF_STORE_STREAMED_HINT "5" -#endif - -static inline void clear_page_cpu(void *page) -{ - unsigned char *addr = (unsigned char *) page; - unsigned char *end = addr + PAGE_SIZE; - - /* - * JDCXXX - This should be bottlenecked by the write buffer, but these - * things tend to be mildly unpredictable...should check this on the - * performance model - * - * We prefetch 4 lines ahead. We're also "cheating" slightly here... - * since we know we're on an SB1, we force the assembler to take - * 64-bit operands to speed things up - */ - __asm__ __volatile__( - " .set push \n" - " .set mips4 \n" - " .set noreorder \n" -#ifdef CONFIG_CPU_HAS_PREFETCH - " daddiu %0, %0, 128 \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%0) \n" - /* Prefetch the first 4 lines */ - " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%0) \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%0) \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n" - "1: sd $0, -128(%0) \n" /* Throw out a cacheline of 0's */ - " sd $0, -120(%0) \n" - " sd $0, -112(%0) \n" - " sd $0, -104(%0) \n" - " daddiu %0, %0, 32 \n" - " bnel %0, %1, 1b \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n" - " daddiu %0, %0, -128 \n" -#endif - " sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */ - "1: sd $0, 8(%0) \n" - " sd $0, 16(%0) \n" - " sd $0, 24(%0) \n" - " daddiu %0, %0, 32 \n" - " bnel %0, %1, 1b \n" - " sd $0, 0(%0) \n" - " .set pop \n" - : "+r" (addr) - : "r" (end) - : "memory"); -} - -static inline void copy_page_cpu(void *to, void *from) -{ - unsigned char *src = (unsigned char *)from; - unsigned char *dst = (unsigned char *)to; - unsigned char *end = src + PAGE_SIZE; - - /* - * The pref's used here are using "streaming" hints, which cause the - * copied data to be kicked out of the cache sooner. A page copy often - * ends up copying a lot more data than is commonly used, so this seems - * to make sense in terms of reducing cache pollution, but I've no real - * performance data to back this up - */ - __asm__ __volatile__( - " .set push \n" - " .set mips4 \n" - " .set noreorder \n" -#ifdef CONFIG_CPU_HAS_PREFETCH - " daddiu %0, %0, 128 \n" - " daddiu %1, %1, 128 \n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", -128(%0)\n" - /* Prefetch the first 4 lines */ - " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%1)\n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", -96(%0)\n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%1)\n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", -64(%0)\n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%1)\n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n" - "1: pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%1)\n" -# ifdef CONFIG_64BIT - " ld $8, -128(%0) \n" /* Block copy a cacheline */ - " ld $9, -120(%0) \n" - " ld $10, -112(%0) \n" - " ld $11, -104(%0) \n" - " sd $8, -128(%1) \n" - " sd $9, -120(%1) \n" - " sd $10, -112(%1) \n" - " sd $11, -104(%1) \n" -# else - " lw $2, -128(%0) \n" /* Block copy a cacheline */ - " lw $3, -124(%0) \n" - " lw $6, -120(%0) \n" - " lw $7, -116(%0) \n" - " lw $8, -112(%0) \n" - " lw $9, -108(%0) \n" - " lw $10, -104(%0) \n" - " lw $11, -100(%0) \n" - " sw $2, -128(%1) \n" - " sw $3, -124(%1) \n" - " sw $6, -120(%1) \n" - " sw $7, -116(%1) \n" - " sw $8, -112(%1) \n" - " sw $9, -108(%1) \n" - " sw $10, -104(%1) \n" - " sw $11, -100(%1) \n" -# endif - " daddiu %0, %0, 32 \n" - " daddiu %1, %1, 32 \n" - " bnel %0, %2, 1b \n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n" - " daddiu %0, %0, -128 \n" - " daddiu %1, %1, -128 \n" -#endif -#ifdef CONFIG_64BIT - " ld $8, 0(%0) \n" /* Block copy a cacheline */ - "1: ld $9, 8(%0) \n" - " ld $10, 16(%0) \n" - " ld $11, 24(%0) \n" - " sd $8, 0(%1) \n" - " sd $9, 8(%1) \n" - " sd $10, 16(%1) \n" - " sd $11, 24(%1) \n" -#else - " lw $2, 0(%0) \n" /* Block copy a cacheline */ - "1: lw $3, 4(%0) \n" - " lw $6, 8(%0) \n" - " lw $7, 12(%0) \n" - " lw $8, 16(%0) \n" - " lw $9, 20(%0) \n" - " lw $10, 24(%0) \n" - " lw $11, 28(%0) \n" - " sw $2, 0(%1) \n" - " sw $3, 4(%1) \n" - " sw $6, 8(%1) \n" - " sw $7, 12(%1) \n" - " sw $8, 16(%1) \n" - " sw $9, 20(%1) \n" - " sw $10, 24(%1) \n" - " sw $11, 28(%1) \n" -#endif - " daddiu %0, %0, 32 \n" - " daddiu %1, %1, 32 \n" - " bnel %0, %2, 1b \n" -#ifdef CONFIG_64BIT - " ld $8, 0(%0) \n" -#else - " lw $2, 0(%0) \n" -#endif - " .set pop \n" - : "+r" (src), "+r" (dst) - : "r" (end) -#ifdef CONFIG_64BIT - : "$8","$9","$10","$11","memory"); -#else - : "$2","$3","$6","$7","$8","$9","$10","$11","memory"); -#endif -} - - -#ifdef CONFIG_SIBYTE_DMA_PAGEOPS - -/* - * Pad descriptors to cacheline, since each is exclusively owned by a - * particular CPU. - */ -typedef struct dmadscr_s { - u64 dscr_a; - u64 dscr_b; - u64 pad_a; - u64 pad_b; -} dmadscr_t; - -static dmadscr_t page_descr[DM_NUM_CHANNELS] - __attribute__((aligned(SMP_CACHE_BYTES))); - -void sb1_dma_init(void) -{ - int i; - - for (i = 0; i < DM_NUM_CHANNELS; i++) { - const u64 base_val = CPHYSADDR(&page_descr[i]) | - V_DM_DSCR_BASE_RINGSZ(1); - volatile void *base_reg = - IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE)); - - __raw_writeq(base_val, base_reg); - __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg); - __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg); - } -} - -void clear_page(void *page) -{ - u64 to_phys = CPHYSADDR(page); - unsigned int cpu = smp_processor_id(); - - /* if the page is not in KSEG0, use old way */ - if ((long)KSEGX(page) != (long)CKSEG0) - return clear_page_cpu(page); - - page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | - M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; - page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); - __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); - - /* - * Don't really want to do it this way, but there's no - * reliable way to delay completion detection. - */ - while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) - & M_DM_DSCR_BASE_INTERRUPT)) - ; - __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); -} - -void copy_page(void *to, void *from) -{ - u64 from_phys = CPHYSADDR(from); - u64 to_phys = CPHYSADDR(to); - unsigned int cpu = smp_processor_id(); - - /* if any page is not in KSEG0, use old way */ - if ((long)KSEGX(to) != (long)CKSEG0 - || (long)KSEGX(from) != (long)CKSEG0) - return copy_page_cpu(to, from); - - page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | - M_DM_DSCRA_INTERRUPT; - page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); - __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); - - /* - * Don't really want to do it this way, but there's no - * reliable way to delay completion detection. - */ - while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) - & M_DM_DSCR_BASE_INTERRUPT)) - ; - __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); -} - -#else /* !CONFIG_SIBYTE_DMA_PAGEOPS */ - -void clear_page(void *page) -{ - return clear_page_cpu(page); -} - -void copy_page(void *to, void *from) -{ - return copy_page_cpu(to, from); -} - -#endif /* !CONFIG_SIBYTE_DMA_PAGEOPS */ - -EXPORT_SYMBOL(clear_page); -EXPORT_SYMBOL(copy_page); diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c index 4a3c4919e31..adc6911ba74 100644 --- a/arch/mips/mm/pgtable-32.c +++ b/arch/mips/mm/pgtable-32.c @@ -5,13 +5,13 @@ * * Copyright (C) 2003 by Ralf Baechle */ -#include <linux/config.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/bootmem.h> #include <linux/highmem.h> #include <asm/fixmap.h> #include <asm/pgtable.h> +#include <asm/pgalloc.h> void pgd_init(unsigned long page) { @@ -32,9 +32,10 @@ void pgd_init(unsigned long page) void __init pagetable_init(void) { -#ifdef CONFIG_HIGHMEM unsigned long vaddr; - pgd_t *pgd, *pgd_base; + pgd_t *pgd_base; +#ifdef CONFIG_HIGHMEM + pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -45,15 +46,15 @@ void __init pagetable_init(void) pgd_init((unsigned long)swapper_pg_dir + sizeof(pgd_t) * USER_PTRS_PER_PGD); -#ifdef CONFIG_HIGHMEM pgd_base = swapper_pg_dir; /* * Fixed mappings: */ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, 0, pgd_base); + fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base); +#ifdef CONFIG_HIGHMEM /* * Permanent kmaps: */ diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c index 44b5e97fff6..e8adc0069d6 100644 --- a/arch/mips/mm/pgtable-64.c +++ b/arch/mips/mm/pgtable-64.c @@ -8,51 +8,104 @@ */ #include <linux/init.h> #include <linux/mm.h> +#include <asm/fixmap.h> #include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> void pgd_init(unsigned long page) { unsigned long *p, *end; + unsigned long entry; - p = (unsigned long *) page; +#ifdef __PAGETABLE_PMD_FOLDED + entry = (unsigned long)invalid_pte_table; +#else + entry = (unsigned long)invalid_pmd_table; +#endif + + p = (unsigned long *) page; end = p + PTRS_PER_PGD; - while (p < end) { - p[0] = (unsigned long) invalid_pmd_table; - p[1] = (unsigned long) invalid_pmd_table; - p[2] = (unsigned long) invalid_pmd_table; - p[3] = (unsigned long) invalid_pmd_table; - p[4] = (unsigned long) invalid_pmd_table; - p[5] = (unsigned long) invalid_pmd_table; - p[6] = (unsigned long) invalid_pmd_table; - p[7] = (unsigned long) invalid_pmd_table; + do { + p[0] = entry; + p[1] = entry; + p[2] = entry; + p[3] = entry; + p[4] = entry; p += 8; - } + p[-3] = entry; + p[-2] = entry; + p[-1] = entry; + } while (p != end); } +#ifndef __PAGETABLE_PMD_FOLDED void pmd_init(unsigned long addr, unsigned long pagetable) { unsigned long *p, *end; - p = (unsigned long *) addr; + p = (unsigned long *) addr; end = p + PTRS_PER_PMD; - while (p < end) { - p[0] = (unsigned long)pagetable; - p[1] = (unsigned long)pagetable; - p[2] = (unsigned long)pagetable; - p[3] = (unsigned long)pagetable; - p[4] = (unsigned long)pagetable; - p[5] = (unsigned long)pagetable; - p[6] = (unsigned long)pagetable; - p[7] = (unsigned long)pagetable; + do { + p[0] = pagetable; + p[1] = pagetable; + p[2] = pagetable; + p[3] = pagetable; + p[4] = pagetable; p += 8; + p[-3] = pagetable; + p[-2] = pagetable; + p[-1] = pagetable; + } while (p != end); +} +#endif + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +void pmdp_splitting_flush(struct vm_area_struct *vma, + unsigned long address, + pmd_t *pmdp) +{ + if (!pmd_trans_splitting(*pmdp)) { + pmd_t pmd = pmd_mksplitting(*pmdp); + set_pmd_at(vma->vm_mm, address, pmdp, pmd); } } +#endif + +pmd_t mk_pmd(struct page *page, pgprot_t prot) +{ + pmd_t pmd; + + pmd_val(pmd) = (page_to_pfn(page) << _PFN_SHIFT) | pgprot_val(prot); + + return pmd; +} + +void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; + flush_tlb_all(); +} + void __init pagetable_init(void) { + unsigned long vaddr; + pgd_t *pgd_base; + /* Initialize the entire pgd. */ pgd_init((unsigned long)swapper_pg_dir); +#ifndef __PAGETABLE_PMD_FOLDED pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table); +#endif + pgd_base = swapper_pg_dir; + /* + * Fixed mappings: + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base); } diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c deleted file mode 100644 index 3fe94202da8..00000000000 --- a/arch/mips/mm/pgtable.c +++ /dev/null @@ -1,36 +0,0 @@ -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/swap.h> - -void show_mem(void) -{ -#ifndef CONFIG_NEED_MULTIPLE_NODES /* XXX(hch): later.. */ - int pfn, total = 0, reserved = 0; - int shared = 0, cached = 0; - int highmem = 0; - struct page *page; - - printk("Mem-info:\n"); - show_free_areas(); - printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - pfn = max_mapnr; - while (pfn-- > 0) { - page = pfn_to_page(pfn); - total++; - if (PageHighMem(page)) - highmem++; - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (page_count(page)) - shared += page_count(page) - 1; - } - printk("%d pages of RAM\n", total); - printk("%d pages of HIGHMEM\n",highmem); - printk("%d reserved pages\n",reserved); - printk("%d pages shared\n",shared); - printk("%d pages swap cached\n",cached); -#endif -} diff --git a/arch/mips/mm/sc-ip22.c b/arch/mips/mm/sc-ip22.c index d236cf8b737..dc7c5a5214a 100644 --- a/arch/mips/mm/sc-ip22.c +++ b/arch/mips/mm/sc-ip22.c @@ -2,7 +2,7 @@ * sc-ip22.c: Indy cache management functions. * * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org), - * derived from r4xx0.c by David S. Miller (dm@engr.sgi.com). + * derived from r4xx0.c by David S. Miller (davem@davemloft.net). */ #include <linux/init.h> #include <linux/kernel.h> @@ -12,7 +12,6 @@ #include <asm/bcache.h> #include <asm/page.h> #include <asm/pgtable.h> -#include <asm/system.h> #include <asm/bootinfo.h> #include <asm/sgi/ip22.h> #include <asm/sgi/mc.h> @@ -160,15 +159,15 @@ static inline int __init indy_sc_probe(void) } /* XXX Check with wje if the Indy caches can differenciate between - writeback + invalidate and just invalidate. */ -struct bcache_ops indy_sc_ops = { + writeback + invalidate and just invalidate. */ +static struct bcache_ops indy_sc_ops = { .bc_enable = indy_sc_enable, .bc_disable = indy_sc_disable, .bc_wback_inv = indy_sc_wback_invalidate, .bc_inv = indy_sc_wback_invalidate }; -void __init indy_sc_init(void) +void indy_sc_init(void) { if (indy_sc_probe()) { indy_sc_enable(); diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c new file mode 100644 index 00000000000..99eb8fabab6 --- /dev/null +++ b/arch/mips/mm/sc-mips.c @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2006 Chris Dearman (chris@mips.com), + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> + +#include <asm/cpu-type.h> +#include <asm/mipsregs.h> +#include <asm/bcache.h> +#include <asm/cacheops.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/mmu_context.h> +#include <asm/r4kcache.h> + +/* + * MIPS32/MIPS64 L2 cache handling + */ + +/* + * Writeback and invalidate the secondary cache before DMA. + */ +static void mips_sc_wback_inv(unsigned long addr, unsigned long size) +{ + blast_scache_range(addr, addr + size); +} + +/* + * Invalidate the secondary cache before DMA. + */ +static void mips_sc_inv(unsigned long addr, unsigned long size) +{ + unsigned long lsize = cpu_scache_line_size(); + unsigned long almask = ~(lsize - 1); + + cache_op(Hit_Writeback_Inv_SD, addr & almask); + cache_op(Hit_Writeback_Inv_SD, (addr + size - 1) & almask); + blast_inv_scache_range(addr, addr + size); +} + +static void mips_sc_enable(void) +{ + /* L2 cache is permanently enabled */ +} + +static void mips_sc_disable(void) +{ + /* L2 cache is permanently enabled */ +} + +static struct bcache_ops mips_sc_ops = { + .bc_enable = mips_sc_enable, + .bc_disable = mips_sc_disable, + .bc_wback_inv = mips_sc_wback_inv, + .bc_inv = mips_sc_inv +}; + +/* + * Check if the L2 cache controller is activated on a particular platform. + * MTI's L2 controller and the L2 cache controller of Broadcom's BMIPS + * cores both use c0_config2's bit 12 as "L2 Bypass" bit, that is the + * cache being disabled. However there is no guarantee for this to be + * true on all platforms. In an act of stupidity the spec defined bits + * 12..15 as implementation defined so below function will eventually have + * to be replaced by a platform specific probe. + */ +static inline int mips_sc_is_activated(struct cpuinfo_mips *c) +{ + unsigned int config2 = read_c0_config2(); + unsigned int tmp; + + /* Check the bypass bit (L2B) */ + switch (current_cpu_type()) { + case CPU_34K: + case CPU_74K: + case CPU_1004K: + case CPU_1074K: + case CPU_INTERAPTIV: + case CPU_PROAPTIV: + case CPU_P5600: + case CPU_BMIPS5000: + if (config2 & (1 << 12)) + return 0; + } + + tmp = (config2 >> 4) & 0x0f; + if (0 < tmp && tmp <= 7) + c->scache.linesz = 2 << tmp; + else + return 0; + return 1; +} + +static inline int __init mips_sc_probe(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int config1, config2; + unsigned int tmp; + + /* Mark as not present until probe completed */ + c->scache.flags |= MIPS_CACHE_NOT_PRESENT; + + /* Ignore anything but MIPSxx processors */ + if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 | + MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2))) + return 0; + + /* Does this MIPS32/MIPS64 CPU have a config2 register? */ + config1 = read_c0_config1(); + if (!(config1 & MIPS_CONF_M)) + return 0; + + config2 = read_c0_config2(); + + if (!mips_sc_is_activated(c)) + return 0; + + tmp = (config2 >> 8) & 0x0f; + if (0 <= tmp && tmp <= 7) + c->scache.sets = 64 << tmp; + else + return 0; + + tmp = (config2 >> 0) & 0x0f; + if (0 <= tmp && tmp <= 7) + c->scache.ways = tmp + 1; + else + return 0; + + c->scache.waysize = c->scache.sets * c->scache.linesz; + c->scache.waybit = __ffs(c->scache.waysize); + + c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT; + + return 1; +} + +int mips_sc_init(void) +{ + int found = mips_sc_probe(); + if (found) { + mips_sc_enable(); + bcops = &mips_sc_ops; + } + return found; +} diff --git a/arch/mips/mm/sc-r5k.c b/arch/mips/mm/sc-r5k.c index d35b6c1103a..0216ed6eaa2 100644 --- a/arch/mips/mm/sc-r5k.c +++ b/arch/mips/mm/sc-r5k.c @@ -1,6 +1,6 @@ /* * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org), - * derived from r4xx0.c by David S. Miller (dm@engr.sgi.com). + * derived from r4xx0.c by David S. Miller (davem@davemloft.net). */ #include <linux/init.h> #include <linux/kernel.h> @@ -12,7 +12,6 @@ #include <asm/cacheops.h> #include <asm/page.h> #include <asm/pgtable.h> -#include <asm/system.h> #include <asm/mmu_context.h> #include <asm/r4kcache.h> @@ -59,7 +58,7 @@ static void r5k_dma_cache_inv_sc(unsigned long addr, unsigned long size) static void r5k_sc_enable(void) { - unsigned long flags; + unsigned long flags; local_irq_save(flags); set_c0_config(R5K_CONF_SE); @@ -69,7 +68,7 @@ static void r5k_sc_enable(void) static void r5k_sc_disable(void) { - unsigned long flags; + unsigned long flags; local_irq_save(flags); blast_r5000_scache(); @@ -99,7 +98,7 @@ static struct bcache_ops r5k_sc_ops = { .bc_inv = r5k_dma_cache_inv_sc }; -void __init r5k_sc_init(void) +void r5k_sc_init(void) { if (r5k_sc_probe()) { r5k_sc_enable(); diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c index 9e8ff8badb1..9ac1efcfbcc 100644 --- a/arch/mips/mm/sc-rm7k.c +++ b/arch/mips/mm/sc-rm7k.c @@ -6,15 +6,16 @@ #undef DEBUG -#include <linux/init.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/bitops.h> #include <asm/addrspace.h> #include <asm/bcache.h> #include <asm/cacheops.h> #include <asm/mipsregs.h> #include <asm/processor.h> +#include <asm/sections.h> #include <asm/cacheflush.h> /* for run_uncached() */ /* Primary cache parameters. */ @@ -24,11 +25,15 @@ /* Secondary cache parameters. */ #define scache_size (256*1024) /* Fixed to 256KiB on RM7000 */ +/* Tertiary cache parameters */ +#define tc_lsize 32 + extern unsigned long icache_way_size, dcache_way_size; +static unsigned long tcache_size; #include <asm/r4kcache.h> -int rm7k_tcache_enabled; +static int rm7k_tcache_init; /* * Writeback and invalidate the primary cache dcache before DMA. @@ -43,16 +48,9 @@ static void rm7k_sc_wback_inv(unsigned long addr, unsigned long size) /* Catch bad driver code */ BUG_ON(size == 0); - a = addr & ~(sc_lsize - 1); - end = (addr + size - 1) & ~(sc_lsize - 1); - while (1) { - flush_scache_line(a); /* Hit_Writeback_Inv_SD */ - if (a == end) - break; - a += sc_lsize; - } + blast_scache_range(addr, addr + size); - if (!rm7k_tcache_enabled) + if (!rm7k_tcache_init) return; a = addr & ~(tc_pagesize - 1); @@ -74,16 +72,9 @@ static void rm7k_sc_inv(unsigned long addr, unsigned long size) /* Catch bad driver code */ BUG_ON(size == 0); - a = addr & ~(sc_lsize - 1); - end = (addr + size - 1) & ~(sc_lsize - 1); - while (1) { - invalidate_scache_line(a); /* Hit_Invalidate_SD */ - if (a == end) - break; - a += sc_lsize; - } + blast_inv_scache_range(addr, addr + size); - if (!rm7k_tcache_enabled) + if (!rm7k_tcache_init) return; a = addr & ~(tc_pagesize - 1); @@ -96,10 +87,49 @@ static void rm7k_sc_inv(unsigned long addr, unsigned long size) } } +static void blast_rm7k_tcache(void) +{ + unsigned long start = CKSEG0ADDR(0); + unsigned long end = start + tcache_size; + + write_c0_taglo(0); + + while (start < end) { + cache_op(Page_Invalidate_T, start); + start += tc_pagesize; + } +} + +/* + * This function is executed in uncached address space. + */ +static void __rm7k_tc_enable(void) +{ + int i; + + set_c0_config(RM7K_CONF_TE); + + write_c0_taglo(0); + write_c0_taghi(0); + + for (i = 0; i < tcache_size; i += tc_lsize) + cache_op(Index_Store_Tag_T, CKSEG0ADDR(i)); +} + +static void rm7k_tc_enable(void) +{ + if (read_c0_config() & RM7K_CONF_TE) + return; + + BUG_ON(tcache_size == 0); + + run_uncached(__rm7k_tc_enable); +} + /* * This function is executed in uncached address space. */ -static __init void __rm7k_sc_enable(void) +static void __rm7k_sc_enable(void) { int i; @@ -108,73 +138,132 @@ static __init void __rm7k_sc_enable(void) write_c0_taglo(0); write_c0_taghi(0); - for (i = 0; i < scache_size; i += sc_lsize) { - __asm__ __volatile__ ( - ".set noreorder\n\t" - ".set mips3\n\t" - "cache %1, (%0)\n\t" - ".set mips0\n\t" - ".set reorder" - : - : "r" (CKSEG0ADDR(i)), "i" (Index_Store_Tag_SD)); - } + for (i = 0; i < scache_size; i += sc_lsize) + cache_op(Index_Store_Tag_SD, CKSEG0ADDR(i)); } -static __init void rm7k_sc_enable(void) +static void rm7k_sc_enable(void) { if (read_c0_config() & RM7K_CONF_SE) return; - printk(KERN_INFO "Enabling secondary cache...\n"); + pr_info("Enabling secondary cache...\n"); run_uncached(__rm7k_sc_enable); + + if (rm7k_tcache_init) + rm7k_tc_enable(); +} + +static void rm7k_tc_disable(void) +{ + unsigned long flags; + + local_irq_save(flags); + blast_rm7k_tcache(); + clear_c0_config(RM7K_CONF_TE); + local_irq_save(flags); } static void rm7k_sc_disable(void) { clear_c0_config(RM7K_CONF_SE); + + if (rm7k_tcache_init) + rm7k_tc_disable(); } -struct bcache_ops rm7k_sc_ops = { +static struct bcache_ops rm7k_sc_ops = { .bc_enable = rm7k_sc_enable, .bc_disable = rm7k_sc_disable, .bc_wback_inv = rm7k_sc_wback_inv, .bc_inv = rm7k_sc_inv }; -void __init rm7k_sc_init(void) +/* + * This is a probing function like the one found in c-r4k.c, we look for the + * wrap around point with different addresses. + */ +static void __probe_tcache(void) { + unsigned long flags, addr, begin, end, pow2; + + begin = (unsigned long) &_stext; + begin &= ~((8 * 1024 * 1024) - 1); + end = begin + (8 * 1024 * 1024); + + local_irq_save(flags); + + set_c0_config(RM7K_CONF_TE); + + /* Fill size-multiple lines with a valid tag */ + pow2 = (256 * 1024); + for (addr = begin; addr <= end; addr = (begin + pow2)) { + unsigned long *p = (unsigned long *) addr; + __asm__ __volatile__("nop" : : "r" (*p)); + pow2 <<= 1; + } + + /* Load first line with a 0 tag, to check after */ + write_c0_taglo(0); + write_c0_taghi(0); + cache_op(Index_Store_Tag_T, begin); + + /* Look for the wrap-around */ + pow2 = (512 * 1024); + for (addr = begin + (512 * 1024); addr <= end; addr = begin + pow2) { + cache_op(Index_Load_Tag_T, addr); + if (!read_c0_taglo()) + break; + pow2 <<= 1; + } + + addr -= begin; + tcache_size = addr; + + clear_c0_config(RM7K_CONF_TE); + + local_irq_restore(flags); +} + +void rm7k_sc_init(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config = read_c0_config(); if ((config & RM7K_CONF_SC)) return; + c->scache.linesz = sc_lsize; + c->scache.ways = 4; + c->scache.waybit= __ffs(scache_size / c->scache.ways); + c->scache.waysize = scache_size / c->scache.ways; + c->scache.sets = scache_size / (c->scache.linesz * c->scache.ways); printk(KERN_INFO "Secondary cache size %dK, linesize %d bytes.\n", (scache_size >> 10), sc_lsize); if (!(config & RM7K_CONF_SE)) rm7k_sc_enable(); + bcops = &rm7k_sc_ops; + /* * While we're at it let's deal with the tertiary cache. */ - if (!(config & RM7K_CONF_TC)) { - - /* - * We can't enable the L3 cache yet. There may be board-specific - * magic necessary to turn it on, and blindly asking the CPU to - * start using it would may give cache errors. - * - * Also, board-specific knowledge may allow us to use the - * CACHE Flash_Invalidate_T instruction if the tag RAM supports - * it, and may specify the size of the L3 cache so we don't have - * to probe it. - */ - printk(KERN_INFO "Tertiary cache present, %s enabled\n", - (config & RM7K_CONF_TE) ? "already" : "not (yet)"); - - if ((config & RM7K_CONF_TE)) - rm7k_tcache_enabled = 1; - } - bcops = &rm7k_sc_ops; + rm7k_tcache_init = 0; + tcache_size = 0; + + if (config & RM7K_CONF_TC) + return; + + /* + * No efficient way to ask the hardware for the size of the tcache, + * so must probe for it. + */ + run_uncached(__probe_tcache); + rm7k_tc_enable(); + rm7k_tcache_init = 1; + c->tcache.linesz = tc_lsize; + c->tcache.ways = 1; + pr_info("Tertiary cache size %ldK.\n", (tcache_size >> 10)); } diff --git a/arch/mips/mm/tlb-andes.c b/arch/mips/mm/tlb-andes.c deleted file mode 100644 index 3f422a849c4..00000000000 --- a/arch/mips/mm/tlb-andes.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1997, 1998, 1999 Ralf Baechle (ralf@gnu.org) - * Copyright (C) 1999 Silicon Graphics, Inc. - * Copyright (C) 2000 Kanoj Sarcar (kanoj@sgi.com) - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/system.h> -#include <asm/mmu_context.h> - -extern void build_tlb_refill_handler(void); - -#define NTLB_ENTRIES 64 -#define NTLB_ENTRIES_HALF 32 - -void local_flush_tlb_all(void) -{ - unsigned long flags; - unsigned long old_ctx; - unsigned long entry; - - local_irq_save(flags); - /* Save old context and create impossible VPN2 value */ - old_ctx = read_c0_entryhi() & ASID_MASK; - write_c0_entryhi(CKSEG0); - write_c0_entrylo0(0); - write_c0_entrylo1(0); - - entry = read_c0_wired(); - - /* Blast 'em all away. */ - while (entry < NTLB_ENTRIES) { - write_c0_index(entry); - tlb_write_indexed(); - entry++; - } - write_c0_entryhi(old_ctx); - local_irq_restore(flags); -} - -void local_flush_tlb_mm(struct mm_struct *mm) -{ - int cpu = smp_processor_id(); - if (cpu_context(cpu, mm) != 0) { - drop_mmu_context(mm,cpu); - } -} - -void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - struct mm_struct *mm = vma->vm_mm; - int cpu = smp_processor_id(); - - if (cpu_context(cpu, mm) != 0) { - unsigned long flags; - int size; - - local_irq_save(flags); - size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - size = (size + 1) >> 1; - if (size <= NTLB_ENTRIES_HALF) { - int oldpid = (read_c0_entryhi() & ASID_MASK); - int newpid = (cpu_context(smp_processor_id(), mm) - & ASID_MASK); - - start &= (PAGE_MASK << 1); - end += ((PAGE_SIZE << 1) - 1); - end &= (PAGE_MASK << 1); - while(start < end) { - int idx; - - write_c0_entryhi(start | newpid); - start += (PAGE_SIZE << 1); - tlb_probe(); - idx = read_c0_index(); - write_c0_entrylo0(0); - write_c0_entrylo1(0); - write_c0_entryhi(CKSEG0); - if(idx < 0) - continue; - tlb_write_indexed(); - } - write_c0_entryhi(oldpid); - } else { - drop_mmu_context(mm, cpu); - } - local_irq_restore(flags); - } -} - -void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - unsigned long flags; - int size; - - size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - size = (size + 1) >> 1; - - local_irq_save(flags); - if (size <= NTLB_ENTRIES_HALF) { - int pid = read_c0_entryhi(); - - start &= (PAGE_MASK << 1); - end += ((PAGE_SIZE << 1) - 1); - end &= (PAGE_MASK << 1); - - while (start < end) { - int idx; - - write_c0_entryhi(start); - start += (PAGE_SIZE << 1); - tlb_probe(); - idx = read_c0_index(); - write_c0_entrylo0(0); - write_c0_entrylo1(0); - write_c0_entryhi(CKSEG0 + (idx << (PAGE_SHIFT+1))); - if (idx < 0) - continue; - tlb_write_indexed(); - } - write_c0_entryhi(pid); - } else { - local_flush_tlb_all(); - } - local_irq_restore(flags); -} - -void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) -{ - if (cpu_context(smp_processor_id(), vma->vm_mm) != 0) { - unsigned long flags; - int oldpid, newpid, idx; - - newpid = (cpu_context(smp_processor_id(), vma->vm_mm) & - ASID_MASK); - page &= (PAGE_MASK << 1); - local_irq_save(flags); - oldpid = (read_c0_entryhi() & ASID_MASK); - write_c0_entryhi(page | newpid); - tlb_probe(); - idx = read_c0_index(); - write_c0_entrylo0(0); - write_c0_entrylo1(0); - write_c0_entryhi(CKSEG0); - if (idx < 0) - goto finish; - tlb_write_indexed(); - - finish: - write_c0_entryhi(oldpid); - local_irq_restore(flags); - } -} - -/* - * This one is only used for pages with the global bit set so we don't care - * much about the ASID. - */ -void local_flush_tlb_one(unsigned long page) -{ - unsigned long flags; - int oldpid, idx; - - local_irq_save(flags); - page &= (PAGE_MASK << 1); - oldpid = read_c0_entryhi() & 0xff; - write_c0_entryhi(page); - tlb_probe(); - idx = read_c0_index(); - write_c0_entrylo0(0); - write_c0_entrylo1(0); - if (idx >= 0) { - /* Make sure all entries differ. */ - write_c0_entryhi(CKSEG0+(idx<<(PAGE_SHIFT+1))); - tlb_write_indexed(); - } - write_c0_entryhi(oldpid); - - local_irq_restore(flags); -} - -/* XXX Simplify this. On the R10000 writing a TLB entry for an virtual - address that already exists will overwrite the old entry and not result - in TLB malfunction or TLB shutdown. */ -void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) -{ - unsigned long flags; - pgd_t *pgdp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - int idx, pid; - - /* - * Handle debugger faulting in for debugee. - */ - if (current->active_mm != vma->vm_mm) - return; - - pid = read_c0_entryhi() & ASID_MASK; - - if ((pid != (cpu_context(smp_processor_id(), vma->vm_mm) & ASID_MASK)) - || (cpu_context(smp_processor_id(), vma->vm_mm) == 0)) { - printk(KERN_WARNING - "%s: Wheee, bogus tlbpid mmpid=%d tlbpid=%d\n", - __FUNCTION__, (int) (cpu_context(smp_processor_id(), - vma->vm_mm) & ASID_MASK), pid); - } - - local_irq_save(flags); - address &= (PAGE_MASK << 1); - write_c0_entryhi(address | (pid)); - pgdp = pgd_offset(vma->vm_mm, address); - tlb_probe(); - pudp = pud_offset(pgdp, address); - pmdp = pmd_offset(pudp, address); - idx = read_c0_index(); - ptep = pte_offset_map(pmdp, address); - write_c0_entrylo0(pte_val(*ptep++) >> 6); - write_c0_entrylo1(pte_val(*ptep) >> 6); - write_c0_entryhi(address | pid); - if (idx < 0) { - tlb_write_random(); - } else { - tlb_write_indexed(); - } - write_c0_entryhi(pid); - local_irq_restore(flags); -} - -void __init tlb_init(void) -{ - /* - * You should never change this register: - * - On R4600 1.7 the tlbp never hits for pages smaller than - * the value in the c0_pagemask register. - * - The entire mm handling assumes the c0_pagemask register to - * be set for 4kb pages. - */ - write_c0_pagemask(PM_4K); - write_c0_wired(0); - write_c0_framemask(0); - - /* From this point on the ARC firmware is dead. */ - local_flush_tlb_all(); - - /* Did I tell you that ARC SUCKS? */ - - build_tlb_refill_handler(); -} diff --git a/arch/mips/mm/tlb-funcs.S b/arch/mips/mm/tlb-funcs.S new file mode 100644 index 00000000000..a5427c6e975 --- /dev/null +++ b/arch/mips/mm/tlb-funcs.S @@ -0,0 +1,39 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Micro-assembler generated tlb handler functions. + * + * Copyright (C) 2013 Broadcom Corporation. + * + * Based on mm/page-funcs.c + * Copyright (C) 2012 MIPS Technologies, Inc. + * Copyright (C) 2012 Ralf Baechle <ralf@linux-mips.org> + */ +#include <asm/asm.h> +#include <asm/regdef.h> + +#define FASTPATH_SIZE 128 + +EXPORT(tlbmiss_handler_setup_pgd_start) +LEAF(tlbmiss_handler_setup_pgd) +1: j 1b /* Dummy, will be replaced. */ + .space 64 +END(tlbmiss_handler_setup_pgd) +EXPORT(tlbmiss_handler_setup_pgd_end) + +LEAF(handle_tlbm) + .space FASTPATH_SIZE * 4 +END(handle_tlbm) +EXPORT(handle_tlbm_end) + +LEAF(handle_tlbs) + .space FASTPATH_SIZE * 4 +END(handle_tlbs) +EXPORT(handle_tlbs_end) + +LEAF(handle_tlbl) + .space FASTPATH_SIZE * 4 +END(handle_tlbl) +EXPORT(handle_tlbl_end) diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c index 7948e9a5e37..d657493ef56 100644 --- a/arch/mips/mm/tlb-r3k.c +++ b/arch/mips/mm/tlb-r3k.c @@ -1,7 +1,7 @@ /* * r2300.c: R2000 and R3000 specific mmu/cache code. * - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1996 David S. Miller (davem@davemloft.net) * * with a lot of changes to make this thing work for R3000s * Tx39XX R4k style caches added. HK @@ -10,15 +10,15 @@ * Copyright (C) 2002 Ralf Baechle * Copyright (C) 2002 Maciej W. Rozycki */ -#include <linux/init.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/smp.h> #include <linux/mm.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/mmu_context.h> -#include <asm/system.h> +#include <asm/tlbmisc.h> #include <asm/isadep.h> #include <asm/io.h> #include <asm/bootinfo.h> @@ -82,8 +82,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, int cpu = smp_processor_id(); if (cpu_context(cpu, mm) != 0) { - unsigned long flags; - int size; + unsigned long size, flags; #ifdef DEBUG_TLB printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", @@ -121,8 +120,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) { - unsigned long flags; - int size; + unsigned long size, flags; #ifdef DEBUG_TLB printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", start, end); @@ -224,8 +222,8 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) local_irq_restore(flags); } -void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, - unsigned long entryhi, unsigned long pagemask) +void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, + unsigned long entryhi, unsigned long pagemask) { unsigned long flags; unsigned long old_ctx; @@ -246,10 +244,6 @@ void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, old_pagemask = read_c0_pagemask(); w = read_c0_wired(); write_c0_wired(w + 1); - if (read_c0_wired() != w + 1) { - printk("[tlbwired] No WIRED reg?\n"); - return; - } write_c0_index(w << 8); write_c0_pagemask(pagemask); write_c0_entryhi(entryhi); @@ -281,7 +275,7 @@ void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, } } -void __init tlb_init(void) +void tlb_init(void) { local_flush_tlb_all(); diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 8297970f0bb..3914e27456f 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -3,40 +3,56 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1996 David S. Miller (davem@davemloft.net) * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle ralf@gnu.org * Carsten Langgaard, carstenl@mips.com * Copyright (C) 2002 MIPS Technologies, Inc. All rights reserved. */ -#include <linux/config.h> +#include <linux/cpu_pm.h> #include <linux/init.h> #include <linux/sched.h> +#include <linux/smp.h> #include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/module.h> #include <asm/cpu.h> +#include <asm/cpu-type.h> #include <asm/bootinfo.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> -#include <asm/system.h> +#include <asm/tlb.h> +#include <asm/tlbmisc.h> extern void build_tlb_refill_handler(void); /* - * Make sure all entries differ. If they're not different - * MIPS32 will take revenge ... + * LOONGSON2/3 has a 4 entry itlb which is a subset of dtlb, + * unfortunately, itlb is not totally transparent to software. */ -#define UNIQUE_ENTRYHI(idx) (CKSEG0 + ((idx) << (PAGE_SHIFT + 1))) +static inline void flush_itlb(void) +{ + switch (current_cpu_type()) { + case CPU_LOONGSON2: + case CPU_LOONGSON3: + write_c0_diag(4); + break; + default: + break; + } +} -/* CP0 hazard avoidance. */ -#define BARRIER __asm__ __volatile__(".set noreorder\n\t" \ - "nop; nop; nop; nop; nop; nop;\n\t" \ - ".set reorder\n\t") +static inline void flush_itlb_vm(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_EXEC) + flush_itlb(); +} void local_flush_tlb_all(void) { unsigned long flags; unsigned long old_ctx; - int entry; + int entry, ftlbhighset; local_irq_save(flags); /* Save old context and create impossible VPN2 value */ @@ -47,18 +63,37 @@ void local_flush_tlb_all(void) entry = read_c0_wired(); /* Blast 'em all away. */ - while (entry < current_cpu_data.tlbsize) { - /* Make sure all entries differ. */ - write_c0_entryhi(UNIQUE_ENTRYHI(entry)); - write_c0_index(entry); - mtc0_tlbw_hazard(); - tlb_write_indexed(); - entry++; + if (cpu_has_tlbinv) { + if (current_cpu_data.tlbsizevtlb) { + write_c0_index(0); + mtc0_tlbw_hazard(); + tlbinvf(); /* invalidate VTLB */ + } + ftlbhighset = current_cpu_data.tlbsizevtlb + + current_cpu_data.tlbsizeftlbsets; + for (entry = current_cpu_data.tlbsizevtlb; + entry < ftlbhighset; + entry++) { + write_c0_index(entry); + mtc0_tlbw_hazard(); + tlbinvf(); /* invalidate one FTLB set */ + } + } else { + while (entry < current_cpu_data.tlbsize) { + /* Make sure all entries differ. */ + write_c0_entryhi(UNIQUE_ENTRYHI(entry)); + write_c0_index(entry); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + entry++; + } } tlbw_use_hazard(); write_c0_entryhi(old_ctx); + flush_itlb(); local_irq_restore(flags); } +EXPORT_SYMBOL(local_flush_tlb_all); /* All entries common to a mm share an asid. To effectively flush these entries, we just bump the asid. */ @@ -84,19 +119,18 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, int cpu = smp_processor_id(); if (cpu_context(cpu, mm) != 0) { - unsigned long flags; - int size; + unsigned long size, flags; - size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - size = (size + 1) >> 1; local_irq_save(flags); - if (size <= current_cpu_data.tlbsize/2) { + start = round_down(start, PAGE_SIZE << 1); + end = round_up(end, PAGE_SIZE << 1); + size = (end - start) >> (PAGE_SHIFT + 1); + if (size <= (current_cpu_data.tlbsizeftlbsets ? + current_cpu_data.tlbsize / 8 : + current_cpu_data.tlbsize / 2)) { int oldpid = read_c0_entryhi(); int newpid = cpu_asid(cpu, mm); - start &= (PAGE_MASK << 1); - end += ((PAGE_SIZE << 1) - 1); - end &= (PAGE_MASK << 1); while (start < end) { int idx; @@ -104,7 +138,7 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, start += (PAGE_SIZE << 1); mtc0_tlbw_hazard(); tlb_probe(); - BARRIER; + tlb_probe_hazard(); idx = read_c0_index(); write_c0_entrylo0(0); write_c0_entrylo1(0); @@ -120,19 +154,21 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } else { drop_mmu_context(mm, cpu); } + flush_itlb(); local_irq_restore(flags); } } void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) { - unsigned long flags; - int size; + unsigned long size, flags; + local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; - local_irq_save(flags); - if (size <= current_cpu_data.tlbsize / 2) { + if (size <= (current_cpu_data.tlbsizeftlbsets ? + current_cpu_data.tlbsize / 8 : + current_cpu_data.tlbsize / 2)) { int pid = read_c0_entryhi(); start &= (PAGE_MASK << 1); @@ -146,7 +182,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) start += (PAGE_SIZE << 1); mtc0_tlbw_hazard(); tlb_probe(); - BARRIER; + tlb_probe_hazard(); idx = read_c0_index(); write_c0_entrylo0(0); write_c0_entrylo1(0); @@ -162,6 +198,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) } else { local_flush_tlb_all(); } + flush_itlb(); local_irq_restore(flags); } @@ -180,7 +217,7 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) write_c0_entryhi(page | newpid); mtc0_tlbw_hazard(); tlb_probe(); - BARRIER; + tlb_probe_hazard(); idx = read_c0_index(); write_c0_entrylo0(0); write_c0_entrylo1(0); @@ -194,6 +231,7 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) finish: write_c0_entryhi(oldpid); + flush_itlb_vm(vma); local_irq_restore(flags); } } @@ -213,7 +251,7 @@ void local_flush_tlb_one(unsigned long page) write_c0_entryhi(page); mtc0_tlbw_hazard(); tlb_probe(); - BARRIER; + tlb_probe_hazard(); idx = read_c0_index(); write_c0_entrylo0(0); write_c0_entrylo1(0); @@ -225,7 +263,7 @@ void local_flush_tlb_one(unsigned long page) tlbw_use_hazard(); } write_c0_entryhi(oldpid); - + flush_itlb(); local_irq_restore(flags); } @@ -257,65 +295,53 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) pgdp = pgd_offset(vma->vm_mm, address); mtc0_tlbw_hazard(); tlb_probe(); - BARRIER; + tlb_probe_hazard(); pudp = pud_offset(pgdp, address); pmdp = pmd_offset(pudp, address); idx = read_c0_index(); - ptep = pte_offset_map(pmdp, address); +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + /* this could be a huge page */ + if (pmd_huge(*pmdp)) { + unsigned long lo; + write_c0_pagemask(PM_HUGE_MASK); + ptep = (pte_t *)pmdp; + lo = pte_to_entrylo(pte_val(*ptep)); + write_c0_entrylo0(lo); + write_c0_entrylo1(lo + (HPAGE_SIZE >> 7)); -#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32_R1) - write_c0_entrylo0(ptep->pte_high); - ptep++; - write_c0_entrylo1(ptep->pte_high); -#else - write_c0_entrylo0(pte_val(*ptep++) >> 6); - write_c0_entrylo1(pte_val(*ptep) >> 6); + mtc0_tlbw_hazard(); + if (idx < 0) + tlb_write_random(); + else + tlb_write_indexed(); + tlbw_use_hazard(); + write_c0_pagemask(PM_DEFAULT_MASK); + } else #endif - mtc0_tlbw_hazard(); - if (idx < 0) - tlb_write_random(); - else - tlb_write_indexed(); - tlbw_use_hazard(); - local_irq_restore(flags); -} + { + ptep = pte_offset_map(pmdp, address); -#if 0 -static void r4k_update_mmu_cache_hwbug(struct vm_area_struct * vma, - unsigned long address, pte_t pte) -{ - unsigned long flags; - unsigned int asid; - pgd_t *pgdp; - pmd_t *pmdp; - pte_t *ptep; - int idx; - - local_irq_save(flags); - address &= (PAGE_MASK << 1); - asid = read_c0_entryhi() & ASID_MASK; - write_c0_entryhi(address | asid); - pgdp = pgd_offset(vma->vm_mm, address); - mtc0_tlbw_hazard(); - tlb_probe(); - BARRIER; - pmdp = pmd_offset(pgdp, address); - idx = read_c0_index(); - ptep = pte_offset_map(pmdp, address); - write_c0_entrylo0(pte_val(*ptep++) >> 6); - write_c0_entrylo1(pte_val(*ptep) >> 6); - mtc0_tlbw_hazard(); - if (idx < 0) - tlb_write_random(); - else - tlb_write_indexed(); +#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32) + write_c0_entrylo0(ptep->pte_high); + ptep++; + write_c0_entrylo1(ptep->pte_high); +#else + write_c0_entrylo0(pte_to_entrylo(pte_val(*ptep++))); + write_c0_entrylo1(pte_to_entrylo(pte_val(*ptep))); +#endif + mtc0_tlbw_hazard(); + if (idx < 0) + tlb_write_random(); + else + tlb_write_indexed(); + } tlbw_use_hazard(); + flush_itlb_vm(vma); local_irq_restore(flags); } -#endif -void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, - unsigned long entryhi, unsigned long pagemask) +void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, + unsigned long entryhi, unsigned long pagemask) { unsigned long flags; unsigned long wired; @@ -329,7 +355,7 @@ void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, wired = read_c0_wired(); write_c0_wired(wired + 1); write_c0_index(wired); - BARRIER; + tlbw_use_hazard(); /* What is the hazard here? */ write_c0_pagemask(pagemask); write_c0_entryhi(entryhi); write_c0_entrylo0(entrylo0); @@ -339,93 +365,114 @@ void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, tlbw_use_hazard(); write_c0_entryhi(old_ctx); - BARRIER; + tlbw_use_hazard(); /* What is the hazard here? */ write_c0_pagemask(old_pagemask); local_flush_tlb_all(); local_irq_restore(flags); } -/* - * Used for loading TLB entries before trap_init() has started, when we - * don't actually want to add a wired entry which remains throughout the - * lifetime of the system - */ - -static int temp_tlb_entry __initdata; +#ifdef CONFIG_TRANSPARENT_HUGEPAGE -__init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, - unsigned long entryhi, unsigned long pagemask) +int __init has_transparent_hugepage(void) { - int ret = 0; + unsigned int mask; unsigned long flags; - unsigned long wired; - unsigned long old_pagemask; - unsigned long old_ctx; local_irq_save(flags); - /* Save old context and create impossible VPN2 value */ - old_ctx = read_c0_entryhi(); - old_pagemask = read_c0_pagemask(); - wired = read_c0_wired(); - if (--temp_tlb_entry < wired) { - printk(KERN_WARNING - "No TLB space left for add_temporary_entry\n"); - ret = -ENOSPC; - goto out; - } - - write_c0_index(temp_tlb_entry); - write_c0_pagemask(pagemask); - write_c0_entryhi(entryhi); - write_c0_entrylo0(entrylo0); - write_c0_entrylo1(entrylo1); - mtc0_tlbw_hazard(); - tlb_write_indexed(); - tlbw_use_hazard(); + write_c0_pagemask(PM_HUGE_MASK); + back_to_back_c0_hazard(); + mask = read_c0_pagemask(); + write_c0_pagemask(PM_DEFAULT_MASK); - write_c0_entryhi(old_ctx); - write_c0_pagemask(old_pagemask); -out: local_irq_restore(flags); - return ret; -} - -static void __init probe_tlb(unsigned long config) -{ - struct cpuinfo_mips *c = ¤t_cpu_data; - unsigned int reg; - /* - * If this isn't a MIPS32 / MIPS64 compliant CPU. Config 1 register - * is not supported, we assume R4k style. Cpu probing already figured - * out the number of tlb entries. - */ - if ((c->processor_id & 0xff0000) == PRID_COMP_LEGACY) - return; + return mask == PM_HUGE_MASK; +} - reg = read_c0_config1(); - if (!((config >> 7) & 3)) - panic("No TLB present"); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - c->tlbsize = ((reg >> 25) & 0x3f) + 1; +static int ntlb; +static int __init set_ntlb(char *str) +{ + get_option(&str, &ntlb); + return 1; } -void __init tlb_init(void) -{ - unsigned int config = read_c0_config(); +__setup("ntlb=", set_ntlb); +/* + * Configure TLB (for init or after a CPU has been powered off). + */ +static void r4k_tlb_configure(void) +{ /* * You should never change this register: * - On R4600 1.7 the tlbp never hits for pages smaller than * the value in the c0_pagemask register. * - The entire mm handling assumes the c0_pagemask register to - * be set for 4kb pages. + * be set to fixed-size pages. */ - probe_tlb(config); write_c0_pagemask(PM_DEFAULT_MASK); write_c0_wired(0); - temp_tlb_entry = current_cpu_data.tlbsize - 1; + if (current_cpu_type() == CPU_R10000 || + current_cpu_type() == CPU_R12000 || + current_cpu_type() == CPU_R14000) + write_c0_framemask(0); + + if (cpu_has_rixi) { + /* + * Enable the no read, no exec bits, and enable large virtual + * address. + */ + u32 pg = PG_RIE | PG_XIE; +#ifdef CONFIG_64BIT + pg |= PG_ELPA; +#endif + write_c0_pagegrain(pg); + } + + /* From this point on the ARC firmware is dead. */ local_flush_tlb_all(); + /* Did I tell you that ARC SUCKS? */ +} + +void tlb_init(void) +{ + r4k_tlb_configure(); + + if (ntlb) { + if (ntlb > 1 && ntlb <= current_cpu_data.tlbsize) { + int wired = current_cpu_data.tlbsize - ntlb; + write_c0_wired(wired); + write_c0_index(wired-1); + printk("Restricting TLB to %d entries\n", ntlb); + } else + printk("Ignoring invalid argument ntlb=%d\n", ntlb); + } + build_tlb_refill_handler(); } + +static int r4k_tlb_pm_notifier(struct notifier_block *self, unsigned long cmd, + void *v) +{ + switch (cmd) { + case CPU_PM_ENTER_FAILED: + case CPU_PM_EXIT: + r4k_tlb_configure(); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block r4k_tlb_pm_notifier_block = { + .notifier_call = r4k_tlb_pm_notifier, +}; + +static int __init r4k_tlb_init_pm(void) +{ + return cpu_pm_register_notifier(&r4k_tlb_pm_notifier_block); +} +arch_initcall(r4k_tlb_init_pm); diff --git a/arch/mips/mm/tlb-r8k.c b/arch/mips/mm/tlb-r8k.c index 1bfb09198ce..138a2ec7cc6 100644 --- a/arch/mips/mm/tlb-r8k.c +++ b/arch/mips/mm/tlb-r8k.c @@ -3,21 +3,19 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) + * Copyright (C) 1996 David S. Miller (davem@davemloft.net) * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle ralf@gnu.org * Carsten Langgaard, carstenl@mips.com * Copyright (C) 2002 MIPS Technologies, Inc. All rights reserved. */ -#include <linux/config.h> -#include <linux/init.h> #include <linux/sched.h> +#include <linux/smp.h> #include <linux/mm.h> #include <asm/cpu.h> #include <asm/bootinfo.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> -#include <asm/system.h> extern void build_tlb_refill_handler(void); @@ -57,7 +55,7 @@ void local_flush_tlb_mm(struct mm_struct *mm) int cpu = smp_processor_id(); if (cpu_context(cpu, mm) != 0) - drop_mmu_context(mm,cpu); + drop_mmu_context(mm, cpu); } void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, @@ -112,8 +110,7 @@ out_restore: /* Usable for KV1 addresses only! */ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) { - unsigned long flags; - int size; + unsigned long size, flags; size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; @@ -215,14 +212,14 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) local_irq_restore(flags); } -static void __init probe_tlb(unsigned long config) +static void probe_tlb(unsigned long config) { struct cpuinfo_mips *c = ¤t_cpu_data; c->tlbsize = 3 * 128; /* 3 sets each 128 entries */ } -void __init tlb_init(void) +void tlb_init(void) { unsigned int config = read_c0_config(); unsigned long status; diff --git a/arch/mips/mm/tlbex-fault.S b/arch/mips/mm/tlbex-fault.S index 9e7f4175b49..318855eb5f8 100644 --- a/arch/mips/mm/tlbex-fault.S +++ b/arch/mips/mm/tlbex-fault.S @@ -7,7 +7,6 @@ * Copyright (C) 1999 Silicon Graphics, Inc. */ #include <asm/mipsregs.h> -#include <asm/page.h> #include <asm/regdef.h> #include <asm/stackframe.h> @@ -19,8 +18,8 @@ move a0, sp REG_S a2, PT_BVADDR(sp) li a1, \write - jal do_page_fault - j ret_from_exception + PTR_LA ra, ret_from_exception + j do_page_fault END(tlb_do_page_fault_\write) .endm diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 0f9485806ba..e80e10bafc8 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -5,704 +5,398 @@ * * Synthesize TLB refill handlers at runtime. * - * Copyright (C) 2004,2005 by Thiemo Seufer - * Copyright (C) 2005 Maciej W. Rozycki + * Copyright (C) 2004, 2005, 2006, 2008 Thiemo Seufer + * Copyright (C) 2005, 2007, 2008, 2009 Maciej W. Rozycki + * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2008, 2009 Cavium Networks, Inc. + * Copyright (C) 2011 MIPS Technologies, Inc. + * + * ... and the days got worse and worse and now you see + * I've gone completly out of my mind. + * + * They're coming to take me a away haha + * they're coming to take me a away hoho hihi haha + * to the funny farm where code is beautiful all the time ... + * + * (Condolences to Napoleon XIV) */ -#include <stdarg.h> - -#include <linux/config.h> -#include <linux/mm.h> +#include <linux/bug.h> #include <linux/kernel.h> #include <linux/types.h> +#include <linux/smp.h> #include <linux/string.h> -#include <linux/init.h> +#include <linux/cache.h> -#include <asm/pgtable.h> #include <asm/cacheflush.h> -#include <asm/mmu_context.h> -#include <asm/inst.h> -#include <asm/elf.h> -#include <asm/smp.h> +#include <asm/cpu-type.h> +#include <asm/pgtable.h> #include <asm/war.h> +#include <asm/uasm.h> +#include <asm/setup.h> + +/* + * TLB load/store/modify handlers. + * + * Only the fastpath gets synthesized at runtime, the slowpath for + * do_page_fault remains normal asm. + */ +extern void tlb_do_page_fault_0(void); +extern void tlb_do_page_fault_1(void); + +struct work_registers { + int r1; + int r2; + int r3; +}; -/* #define DEBUG_TLB */ +struct tlb_reg_save { + unsigned long a; + unsigned long b; +} ____cacheline_aligned_in_smp; -static __init int __attribute__((unused)) r45k_bvahwbug(void) +static struct tlb_reg_save handler_reg_save[NR_CPUS]; + +static inline int r45k_bvahwbug(void) { /* XXX: We should probe for the presence of this bug, but we don't. */ return 0; } -static __init int __attribute__((unused)) r4k_250MHZhwbug(void) +static inline int r4k_250MHZhwbug(void) { /* XXX: We should probe for the presence of this bug, but we don't. */ return 0; } -static __init int __attribute__((unused)) bcm1250_m3_war(void) +static inline int __maybe_unused bcm1250_m3_war(void) { return BCM1250_M3_WAR; } -static __init int __attribute__((unused)) r10000_llsc_war(void) +static inline int __maybe_unused r10000_llsc_war(void) { return R10000_LLSC_WAR; } -/* - * A little micro-assembler, intended for TLB refill handler - * synthesizing. It is intentionally kept simple, does only support - * a subset of instructions, and does not try to hide pipeline effects - * like branch delay slots. - */ - -enum fields -{ - RS = 0x001, - RT = 0x002, - RD = 0x004, - RE = 0x008, - SIMM = 0x010, - UIMM = 0x020, - BIMM = 0x040, - JIMM = 0x080, - FUNC = 0x100, -}; - -#define OP_MASK 0x2f -#define OP_SH 26 -#define RS_MASK 0x1f -#define RS_SH 21 -#define RT_MASK 0x1f -#define RT_SH 16 -#define RD_MASK 0x1f -#define RD_SH 11 -#define RE_MASK 0x1f -#define RE_SH 6 -#define IMM_MASK 0xffff -#define IMM_SH 0 -#define JIMM_MASK 0x3ffffff -#define JIMM_SH 0 -#define FUNC_MASK 0x2f -#define FUNC_SH 0 - -enum opcode { - insn_invalid, - insn_addu, insn_addiu, insn_and, insn_andi, insn_beq, - insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl, - insn_bne, insn_daddu, insn_daddiu, insn_dmfc0, insn_dmtc0, - insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, - insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr, insn_ld, - insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0, insn_mtc0, - insn_ori, insn_rfe, insn_sc, insn_scd, insn_sd, insn_sll, - insn_sra, insn_srl, insn_subu, insn_sw, insn_tlbp, insn_tlbwi, - insn_tlbwr, insn_xor, insn_xori -}; - -struct insn { - enum opcode opcode; - u32 match; - enum fields fields; -}; - -/* This macro sets the non-variable bits of an instruction. */ -#define M(a, b, c, d, e, f) \ - ((a) << OP_SH \ - | (b) << RS_SH \ - | (c) << RT_SH \ - | (d) << RD_SH \ - | (e) << RE_SH \ - | (f) << FUNC_SH) - -static __initdata struct insn insn_table[] = { - { insn_addiu, M(addiu_op,0,0,0,0,0), RS | RT | SIMM }, - { insn_addu, M(spec_op,0,0,0,0,addu_op), RS | RT | RD }, - { insn_and, M(spec_op,0,0,0,0,and_op), RS | RT | RD }, - { insn_andi, M(andi_op,0,0,0,0,0), RS | RT | UIMM }, - { insn_beq, M(beq_op,0,0,0,0,0), RS | RT | BIMM }, - { insn_beql, M(beql_op,0,0,0,0,0), RS | RT | BIMM }, - { insn_bgez, M(bcond_op,0,bgez_op,0,0,0), RS | BIMM }, - { insn_bgezl, M(bcond_op,0,bgezl_op,0,0,0), RS | BIMM }, - { insn_bltz, M(bcond_op,0,bltz_op,0,0,0), RS | BIMM }, - { insn_bltzl, M(bcond_op,0,bltzl_op,0,0,0), RS | BIMM }, - { insn_bne, M(bne_op,0,0,0,0,0), RS | RT | BIMM }, - { insn_daddiu, M(daddiu_op,0,0,0,0,0), RS | RT | SIMM }, - { insn_daddu, M(spec_op,0,0,0,0,daddu_op), RS | RT | RD }, - { insn_dmfc0, M(cop0_op,dmfc_op,0,0,0,0), RT | RD }, - { insn_dmtc0, M(cop0_op,dmtc_op,0,0,0,0), RT | RD }, - { insn_dsll, M(spec_op,0,0,0,0,dsll_op), RT | RD | RE }, - { insn_dsll32, M(spec_op,0,0,0,0,dsll32_op), RT | RD | RE }, - { insn_dsra, M(spec_op,0,0,0,0,dsra_op), RT | RD | RE }, - { insn_dsrl, M(spec_op,0,0,0,0,dsrl_op), RT | RD | RE }, - { insn_dsubu, M(spec_op,0,0,0,0,dsubu_op), RS | RT | RD }, - { insn_eret, M(cop0_op,cop_op,0,0,0,eret_op), 0 }, - { insn_j, M(j_op,0,0,0,0,0), JIMM }, - { insn_jal, M(jal_op,0,0,0,0,0), JIMM }, - { insn_jr, M(spec_op,0,0,0,0,jr_op), RS }, - { insn_ld, M(ld_op,0,0,0,0,0), RS | RT | SIMM }, - { insn_ll, M(ll_op,0,0,0,0,0), RS | RT | SIMM }, - { insn_lld, M(lld_op,0,0,0,0,0), RS | RT | SIMM }, - { insn_lui, M(lui_op,0,0,0,0,0), RT | SIMM }, - { insn_lw, M(lw_op,0,0,0,0,0), RS | RT | SIMM }, - { insn_mfc0, M(cop0_op,mfc_op,0,0,0,0), RT | RD }, - { insn_mtc0, M(cop0_op,mtc_op,0,0,0,0), RT | RD }, - { insn_ori, M(ori_op,0,0,0,0,0), RS | RT | UIMM }, - { insn_rfe, M(cop0_op,cop_op,0,0,0,rfe_op), 0 }, - { insn_sc, M(sc_op,0,0,0,0,0), RS | RT | SIMM }, - { insn_scd, M(scd_op,0,0,0,0,0), RS | RT | SIMM }, - { insn_sd, M(sd_op,0,0,0,0,0), RS | RT | SIMM }, - { insn_sll, M(spec_op,0,0,0,0,sll_op), RT | RD | RE }, - { insn_sra, M(spec_op,0,0,0,0,sra_op), RT | RD | RE }, - { insn_srl, M(spec_op,0,0,0,0,srl_op), RT | RD | RE }, - { insn_subu, M(spec_op,0,0,0,0,subu_op), RS | RT | RD }, - { insn_sw, M(sw_op,0,0,0,0,0), RS | RT | SIMM }, - { insn_tlbp, M(cop0_op,cop_op,0,0,0,tlbp_op), 0 }, - { insn_tlbwi, M(cop0_op,cop_op,0,0,0,tlbwi_op), 0 }, - { insn_tlbwr, M(cop0_op,cop_op,0,0,0,tlbwr_op), 0 }, - { insn_xor, M(spec_op,0,0,0,0,xor_op), RS | RT | RD }, - { insn_xori, M(xori_op,0,0,0,0,0), RS | RT | UIMM }, - { insn_invalid, 0, 0 } -}; - -#undef M - -static __init u32 build_rs(u32 arg) -{ - if (arg & ~RS_MASK) - printk(KERN_WARNING "TLB synthesizer field overflow\n"); - - return (arg & RS_MASK) << RS_SH; -} - -static __init u32 build_rt(u32 arg) +static int use_bbit_insns(void) { - if (arg & ~RT_MASK) - printk(KERN_WARNING "TLB synthesizer field overflow\n"); - - return (arg & RT_MASK) << RT_SH; -} - -static __init u32 build_rd(u32 arg) -{ - if (arg & ~RD_MASK) - printk(KERN_WARNING "TLB synthesizer field overflow\n"); - - return (arg & RD_MASK) << RD_SH; + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: + return 1; + default: + return 0; + } } -static __init u32 build_re(u32 arg) +static int use_lwx_insns(void) { - if (arg & ~RE_MASK) - printk(KERN_WARNING "TLB synthesizer field overflow\n"); - - return (arg & RE_MASK) << RE_SH; + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: + return 1; + default: + return 0; + } } - -static __init u32 build_simm(s32 arg) +#if defined(CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE) && \ + CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0 +static bool scratchpad_available(void) { - if (arg > 0x7fff || arg < -0x8000) - printk(KERN_WARNING "TLB synthesizer field overflow\n"); - - return arg & 0xffff; + return true; } - -static __init u32 build_uimm(u32 arg) +static int scratchpad_offset(int i) { - if (arg & ~IMM_MASK) - printk(KERN_WARNING "TLB synthesizer field overflow\n"); - - return arg & IMM_MASK; -} - -static __init u32 build_bimm(s32 arg) -{ - if (arg > 0x1ffff || arg < -0x20000) - printk(KERN_WARNING "TLB synthesizer field overflow\n"); - - if (arg & 0x3) - printk(KERN_WARNING "Invalid TLB synthesizer branch target\n"); - - return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 2) & 0x7fff); + /* + * CVMSEG starts at address -32768 and extends for + * CAVIUM_OCTEON_CVMSEG_SIZE 128 byte cache lines. + */ + i += 1; /* Kernel use starts at the top and works down. */ + return CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128 - (8 * i) - 32768; } - -static __init u32 build_jimm(u32 arg) +#else +static bool scratchpad_available(void) { - if (arg & ~((JIMM_MASK) << 2)) - printk(KERN_WARNING "TLB synthesizer field overflow\n"); - - return (arg >> 2) & JIMM_MASK; + return false; } - -static __init u32 build_func(u32 arg) +static int scratchpad_offset(int i) { - if (arg & ~FUNC_MASK) - printk(KERN_WARNING "TLB synthesizer field overflow\n"); - - return arg & FUNC_MASK; + BUG(); + /* Really unreachable, but evidently some GCC want this. */ + return 0; } - +#endif /* - * The order of opcode arguments is implicitly left to right, - * starting with RS and ending with FUNC or IMM. + * Found by experiment: At least some revisions of the 4kc throw under + * some circumstances a machine check exception, triggered by invalid + * values in the index register. Delaying the tlbp instruction until + * after the next branch, plus adding an additional nop in front of + * tlbwi/tlbwr avoids the invalid index register values. Nobody knows + * why; it's not an issue caused by the core RTL. + * */ -static void __init build_insn(u32 **buf, enum opcode opc, ...) +static int m4kc_tlbp_war(void) { - struct insn *ip = NULL; - unsigned int i; - va_list ap; - u32 op; - - for (i = 0; insn_table[i].opcode != insn_invalid; i++) - if (insn_table[i].opcode == opc) { - ip = &insn_table[i]; - break; - } - - if (!ip) - panic("Unsupported TLB synthesizer instruction %d", opc); - - op = ip->match; - va_start(ap, opc); - if (ip->fields & RS) op |= build_rs(va_arg(ap, u32)); - if (ip->fields & RT) op |= build_rt(va_arg(ap, u32)); - if (ip->fields & RD) op |= build_rd(va_arg(ap, u32)); - if (ip->fields & RE) op |= build_re(va_arg(ap, u32)); - if (ip->fields & SIMM) op |= build_simm(va_arg(ap, s32)); - if (ip->fields & UIMM) op |= build_uimm(va_arg(ap, u32)); - if (ip->fields & BIMM) op |= build_bimm(va_arg(ap, s32)); - if (ip->fields & JIMM) op |= build_jimm(va_arg(ap, u32)); - if (ip->fields & FUNC) op |= build_func(va_arg(ap, u32)); - va_end(ap); - - **buf = op; - (*buf)++; + return (current_cpu_data.processor_id & 0xffff00) == + (PRID_COMP_MIPS | PRID_IMP_4KC); } -#define I_u1u2u3(op) \ - static inline void i##op(u32 **buf, unsigned int a, \ - unsigned int b, unsigned int c) \ - { \ - build_insn(buf, insn##op, a, b, c); \ - } - -#define I_u2u1u3(op) \ - static inline void i##op(u32 **buf, unsigned int a, \ - unsigned int b, unsigned int c) \ - { \ - build_insn(buf, insn##op, b, a, c); \ - } - -#define I_u3u1u2(op) \ - static inline void i##op(u32 **buf, unsigned int a, \ - unsigned int b, unsigned int c) \ - { \ - build_insn(buf, insn##op, b, c, a); \ - } - -#define I_u1u2s3(op) \ - static inline void i##op(u32 **buf, unsigned int a, \ - unsigned int b, signed int c) \ - { \ - build_insn(buf, insn##op, a, b, c); \ - } - -#define I_u2s3u1(op) \ - static inline void i##op(u32 **buf, unsigned int a, \ - signed int b, unsigned int c) \ - { \ - build_insn(buf, insn##op, c, a, b); \ - } - -#define I_u2u1s3(op) \ - static inline void i##op(u32 **buf, unsigned int a, \ - unsigned int b, signed int c) \ - { \ - build_insn(buf, insn##op, b, a, c); \ - } - -#define I_u1u2(op) \ - static inline void i##op(u32 **buf, unsigned int a, \ - unsigned int b) \ - { \ - build_insn(buf, insn##op, a, b); \ - } - -#define I_u1s2(op) \ - static inline void i##op(u32 **buf, unsigned int a, \ - signed int b) \ - { \ - build_insn(buf, insn##op, a, b); \ - } - -#define I_u1(op) \ - static inline void i##op(u32 **buf, unsigned int a) \ - { \ - build_insn(buf, insn##op, a); \ - } - -#define I_0(op) \ - static inline void i##op(u32 **buf) \ - { \ - build_insn(buf, insn##op); \ - } - -I_u2u1s3(_addiu); -I_u3u1u2(_addu); -I_u2u1u3(_andi); -I_u3u1u2(_and); -I_u1u2s3(_beq); -I_u1u2s3(_beql); -I_u1s2(_bgez); -I_u1s2(_bgezl); -I_u1s2(_bltz); -I_u1s2(_bltzl); -I_u1u2s3(_bne); -I_u1u2(_dmfc0); -I_u1u2(_dmtc0); -I_u2u1s3(_daddiu); -I_u3u1u2(_daddu); -I_u2u1u3(_dsll); -I_u2u1u3(_dsll32); -I_u2u1u3(_dsra); -I_u2u1u3(_dsrl); -I_u3u1u2(_dsubu); -I_0(_eret); -I_u1(_j); -I_u1(_jal); -I_u1(_jr); -I_u2s3u1(_ld); -I_u2s3u1(_ll); -I_u2s3u1(_lld); -I_u1s2(_lui); -I_u2s3u1(_lw); -I_u1u2(_mfc0); -I_u1u2(_mtc0); -I_u2u1u3(_ori); -I_0(_rfe); -I_u2s3u1(_sc); -I_u2s3u1(_scd); -I_u2s3u1(_sd); -I_u2u1u3(_sll); -I_u2u1u3(_sra); -I_u2u1u3(_srl); -I_u3u1u2(_subu); -I_u2s3u1(_sw); -I_0(_tlbp); -I_0(_tlbwi); -I_0(_tlbwr); -I_u3u1u2(_xor) -I_u2u1u3(_xori); - -/* - * handling labels - */ - +/* Handle labels (which must be positive integers). */ enum label_id { - label_invalid, - label_second_part, + label_second_part = 1, label_leave, label_vmalloc, label_vmalloc_done, - label_tlbw_hazard, - label_split, + label_tlbw_hazard_0, + label_split = label_tlbw_hazard_0 + 8, + label_tlbl_goaround1, + label_tlbl_goaround2, label_nopage_tlbl, label_nopage_tlbs, label_nopage_tlbm, label_smp_pgtable_change, label_r3000_write_probe_fail, + label_large_segbits_fault, +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + label_tlb_huge_update, +#endif }; -struct label { - u32 *addr; - enum label_id lab; -}; - -static __init void build_label(struct label **lab, u32 *addr, - enum label_id l) -{ - (*lab)->addr = addr; - (*lab)->lab = l; - (*lab)++; -} - -#define L_LA(lb) \ - static inline void l##lb(struct label **lab, u32 *addr) \ - { \ - build_label(lab, addr, label##lb); \ - } - -L_LA(_second_part) -L_LA(_leave) -L_LA(_vmalloc) -L_LA(_vmalloc_done) -L_LA(_tlbw_hazard) -L_LA(_split) -L_LA(_nopage_tlbl) -L_LA(_nopage_tlbs) -L_LA(_nopage_tlbm) -L_LA(_smp_pgtable_change) -L_LA(_r3000_write_probe_fail) - -/* convenience macros for instructions */ -#ifdef CONFIG_64BIT -# define i_LW(buf, rs, rt, off) i_ld(buf, rs, rt, off) -# define i_SW(buf, rs, rt, off) i_sd(buf, rs, rt, off) -# define i_SLL(buf, rs, rt, sh) i_dsll(buf, rs, rt, sh) -# define i_SRA(buf, rs, rt, sh) i_dsra(buf, rs, rt, sh) -# define i_SRL(buf, rs, rt, sh) i_dsrl(buf, rs, rt, sh) -# define i_MFC0(buf, rt, rd) i_dmfc0(buf, rt, rd) -# define i_MTC0(buf, rt, rd) i_dmtc0(buf, rt, rd) -# define i_ADDIU(buf, rs, rt, val) i_daddiu(buf, rs, rt, val) -# define i_ADDU(buf, rs, rt, rd) i_daddu(buf, rs, rt, rd) -# define i_SUBU(buf, rs, rt, rd) i_dsubu(buf, rs, rt, rd) -# define i_LL(buf, rs, rt, off) i_lld(buf, rs, rt, off) -# define i_SC(buf, rs, rt, off) i_scd(buf, rs, rt, off) -#else -# define i_LW(buf, rs, rt, off) i_lw(buf, rs, rt, off) -# define i_SW(buf, rs, rt, off) i_sw(buf, rs, rt, off) -# define i_SLL(buf, rs, rt, sh) i_sll(buf, rs, rt, sh) -# define i_SRA(buf, rs, rt, sh) i_sra(buf, rs, rt, sh) -# define i_SRL(buf, rs, rt, sh) i_srl(buf, rs, rt, sh) -# define i_MFC0(buf, rt, rd) i_mfc0(buf, rt, rd) -# define i_MTC0(buf, rt, rd) i_mtc0(buf, rt, rd) -# define i_ADDIU(buf, rs, rt, val) i_addiu(buf, rs, rt, val) -# define i_ADDU(buf, rs, rt, rd) i_addu(buf, rs, rt, rd) -# define i_SUBU(buf, rs, rt, rd) i_subu(buf, rs, rt, rd) -# define i_LL(buf, rs, rt, off) i_ll(buf, rs, rt, off) -# define i_SC(buf, rs, rt, off) i_sc(buf, rs, rt, off) +UASM_L_LA(_second_part) +UASM_L_LA(_leave) +UASM_L_LA(_vmalloc) +UASM_L_LA(_vmalloc_done) +/* _tlbw_hazard_x is handled differently. */ +UASM_L_LA(_split) +UASM_L_LA(_tlbl_goaround1) +UASM_L_LA(_tlbl_goaround2) +UASM_L_LA(_nopage_tlbl) +UASM_L_LA(_nopage_tlbs) +UASM_L_LA(_nopage_tlbm) +UASM_L_LA(_smp_pgtable_change) +UASM_L_LA(_r3000_write_probe_fail) +UASM_L_LA(_large_segbits_fault) +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT +UASM_L_LA(_tlb_huge_update) #endif -#define i_b(buf, off) i_beq(buf, 0, 0, off) -#define i_beqz(buf, rs, off) i_beq(buf, rs, 0, off) -#define i_beqzl(buf, rs, off) i_beql(buf, rs, 0, off) -#define i_bnez(buf, rs, off) i_bne(buf, rs, 0, off) -#define i_bnezl(buf, rs, off) i_bnel(buf, rs, 0, off) -#define i_move(buf, a, b) i_ADDU(buf, a, 0, b) -#define i_nop(buf) i_sll(buf, 0, 0, 0) -#define i_ssnop(buf) i_sll(buf, 0, 0, 1) -#define i_ehb(buf) i_sll(buf, 0, 0, 3) +static int hazard_instance; -#ifdef CONFIG_64BIT -static __init int __attribute__((unused)) in_compat_space_p(long addr) +static void uasm_bgezl_hazard(u32 **p, struct uasm_reloc **r, int instance) { - /* Is this address in 32bit compat space? */ - return (((addr) & 0xffffffff00000000L) == 0xffffffff00000000L); + switch (instance) { + case 0 ... 7: + uasm_il_bgezl(p, r, 0, label_tlbw_hazard_0 + instance); + return; + default: + BUG(); + } } -static __init int __attribute__((unused)) rel_highest(long val) +static void uasm_bgezl_label(struct uasm_label **l, u32 **p, int instance) { - return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000; + switch (instance) { + case 0 ... 7: + uasm_build_label(l, *p, label_tlbw_hazard_0 + instance); + break; + default: + BUG(); + } } -static __init int __attribute__((unused)) rel_higher(long val) -{ - return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000; -} +/* + * pgtable bits are assigned dynamically depending on processor feature + * and statically based on kernel configuration. This spits out the actual + * values the kernel is using. Required to make sense from disassembled + * TLB exception handlers. + */ +static void output_pgtable_bits_defines(void) +{ +#define pr_define(fmt, ...) \ + pr_debug("#define " fmt, ##__VA_ARGS__) + + pr_debug("#include <asm/asm.h>\n"); + pr_debug("#include <asm/regdef.h>\n"); + pr_debug("\n"); + + pr_define("_PAGE_PRESENT_SHIFT %d\n", _PAGE_PRESENT_SHIFT); + pr_define("_PAGE_READ_SHIFT %d\n", _PAGE_READ_SHIFT); + pr_define("_PAGE_WRITE_SHIFT %d\n", _PAGE_WRITE_SHIFT); + pr_define("_PAGE_ACCESSED_SHIFT %d\n", _PAGE_ACCESSED_SHIFT); + pr_define("_PAGE_MODIFIED_SHIFT %d\n", _PAGE_MODIFIED_SHIFT); +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT); + pr_define("_PAGE_SPLITTING_SHIFT %d\n", _PAGE_SPLITTING_SHIFT); #endif - -static __init int rel_hi(long val) -{ - return ((((val + 0x8000L) >> 16) & 0xffff) ^ 0x8000) - 0x8000; + if (cpu_has_rixi) { +#ifdef _PAGE_NO_EXEC_SHIFT + pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT); +#endif +#ifdef _PAGE_NO_READ_SHIFT + pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT); +#endif + } + pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT); + pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT); + pr_define("_PAGE_DIRTY_SHIFT %d\n", _PAGE_DIRTY_SHIFT); + pr_define("_PFN_SHIFT %d\n", _PFN_SHIFT); + pr_debug("\n"); } -static __init int rel_lo(long val) +static inline void dump_handler(const char *symbol, const u32 *handler, int count) { - return ((val & 0xffff) ^ 0x8000) - 0x8000; -} + int i; -static __init void i_LA_mostly(u32 **buf, unsigned int rs, long addr) -{ -#ifdef CONFIG_64BIT - if (!in_compat_space_p(addr)) { - i_lui(buf, rs, rel_highest(addr)); - if (rel_higher(addr)) - i_daddiu(buf, rs, rs, rel_higher(addr)); - if (rel_hi(addr)) { - i_dsll(buf, rs, rs, 16); - i_daddiu(buf, rs, rs, rel_hi(addr)); - i_dsll(buf, rs, rs, 16); - } else - i_dsll32(buf, rs, rs, 0); - } else -#endif - i_lui(buf, rs, rel_hi(addr)); -} + pr_debug("LEAF(%s)\n", symbol); -static __init void __attribute__((unused)) i_LA(u32 **buf, unsigned int rs, - long addr) -{ - i_LA_mostly(buf, rs, addr); - if (rel_lo(addr)) - i_ADDIU(buf, rs, rs, rel_lo(addr)); -} + pr_debug("\t.set push\n"); + pr_debug("\t.set noreorder\n"); -/* - * handle relocations - */ + for (i = 0; i < count; i++) + pr_debug("\t.word\t0x%08x\t\t# %p\n", handler[i], &handler[i]); -struct reloc { - u32 *addr; - unsigned int type; - enum label_id lab; -}; + pr_debug("\t.set\tpop\n"); -static __init void r_mips_pc16(struct reloc **rel, u32 *addr, - enum label_id l) -{ - (*rel)->addr = addr; - (*rel)->type = R_MIPS_PC16; - (*rel)->lab = l; - (*rel)++; + pr_debug("\tEND(%s)\n", symbol); } -static inline void __resolve_relocs(struct reloc *rel, struct label *lab) -{ - long laddr = (long)lab->addr; - long raddr = (long)rel->addr; +/* The only general purpose registers allowed in TLB handlers. */ +#define K0 26 +#define K1 27 - switch (rel->type) { - case R_MIPS_PC16: - *rel->addr |= build_bimm(laddr - (raddr + 4)); - break; +/* Some CP0 registers */ +#define C0_INDEX 0, 0 +#define C0_ENTRYLO0 2, 0 +#define C0_TCBIND 2, 2 +#define C0_ENTRYLO1 3, 0 +#define C0_CONTEXT 4, 0 +#define C0_PAGEMASK 5, 0 +#define C0_BADVADDR 8, 0 +#define C0_ENTRYHI 10, 0 +#define C0_EPC 14, 0 +#define C0_XCONTEXT 20, 0 - default: - panic("Unsupported TLB synthesizer relocation %d", - rel->type); - } -} +#ifdef CONFIG_64BIT +# define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_XCONTEXT) +#else +# define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_CONTEXT) +#endif -static __init void resolve_relocs(struct reloc *rel, struct label *lab) -{ - struct label *l; +/* The worst case length of the handler is around 18 instructions for + * R3000-style TLBs and up to 63 instructions for R4000-style TLBs. + * Maximum space available is 32 instructions for R3000 and 64 + * instructions for R4000. + * + * We deliberately chose a buffer size of 128, so we won't scribble + * over anything important on overflow before we panic. + */ +static u32 tlb_handler[128]; - for (; rel->lab != label_invalid; rel++) - for (l = lab; l->lab != label_invalid; l++) - if (rel->lab == l->lab) - __resolve_relocs(rel, l); -} +/* simply assume worst case size for labels and relocs */ +static struct uasm_label labels[128]; +static struct uasm_reloc relocs[128]; -static __init void move_relocs(struct reloc *rel, u32 *first, u32 *end, - long off) -{ - for (; rel->lab != label_invalid; rel++) - if (rel->addr >= first && rel->addr < end) - rel->addr += off; -} +static int check_for_high_segbits; -static __init void move_labels(struct label *lab, u32 *first, u32 *end, - long off) +static unsigned int kscratch_used_mask; + +static inline int __maybe_unused c0_kscratch(void) { - for (; lab->lab != label_invalid; lab++) - if (lab->addr >= first && lab->addr < end) - lab->addr += off; + switch (current_cpu_type()) { + case CPU_XLP: + case CPU_XLR: + return 22; + default: + return 31; + } } -static __init void copy_handler(struct reloc *rel, struct label *lab, - u32 *first, u32 *end, u32 *target) +static int allocate_kscratch(void) { - long off = (long)(target - first); + int r; + unsigned int a = cpu_data[0].kscratch_mask & ~kscratch_used_mask; - memcpy(target, first, (end - first) * sizeof(u32)); + r = ffs(a); - move_relocs(rel, first, end, off); - move_labels(lab, first, end, off); -} + if (r == 0) + return -1; -static __init int __attribute__((unused)) insn_has_bdelay(struct reloc *rel, - u32 *addr) -{ - for (; rel->lab != label_invalid; rel++) { - if (rel->addr == addr - && (rel->type == R_MIPS_PC16 - || rel->type == R_MIPS_26)) - return 1; - } + r--; /* make it zero based */ - return 0; -} + kscratch_used_mask |= (1 << r); -/* convenience functions for labeled branches */ -static void __attribute__((unused)) il_bltz(u32 **p, struct reloc **r, - unsigned int reg, enum label_id l) -{ - r_mips_pc16(r, *p, l); - i_bltz(p, reg, 0); + return r; } -static void __attribute__((unused)) il_b(u32 **p, struct reloc **r, - enum label_id l) -{ - r_mips_pc16(r, *p, l); - i_b(p, 0); -} +static int scratch_reg; +static int pgd_reg; +enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch}; -static void il_beqz(u32 **p, struct reloc **r, unsigned int reg, - enum label_id l) +static struct work_registers build_get_work_registers(u32 **p) { - r_mips_pc16(r, *p, l); - i_beqz(p, reg, 0); -} + struct work_registers r; -static void __attribute__((unused)) -il_beqzl(u32 **p, struct reloc **r, unsigned int reg, enum label_id l) -{ - r_mips_pc16(r, *p, l); - i_beqzl(p, reg, 0); -} + if (scratch_reg >= 0) { + /* Save in CPU local C0_KScratch? */ + UASM_i_MTC0(p, 1, c0_kscratch(), scratch_reg); + r.r1 = K0; + r.r2 = K1; + r.r3 = 1; + return r; + } -static void il_bnez(u32 **p, struct reloc **r, unsigned int reg, - enum label_id l) -{ - r_mips_pc16(r, *p, l); - i_bnez(p, reg, 0); + if (num_possible_cpus() > 1) { + /* Get smp_processor_id */ + UASM_i_CPUID_MFC0(p, K0, SMP_CPUID_REG); + UASM_i_SRL_SAFE(p, K0, K0, SMP_CPUID_REGSHIFT); + + /* handler_reg_save index in K0 */ + UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save))); + + UASM_i_LA(p, K1, (long)&handler_reg_save); + UASM_i_ADDU(p, K0, K0, K1); + } else { + UASM_i_LA(p, K0, (long)&handler_reg_save); + } + /* K0 now points to save area, save $1 and $2 */ + UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), K0); + UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), K0); + + r.r1 = K1; + r.r2 = 1; + r.r3 = 2; + return r; } -static void il_bgezl(u32 **p, struct reloc **r, unsigned int reg, - enum label_id l) +static void build_restore_work_registers(u32 **p) { - r_mips_pc16(r, *p, l); - i_bgezl(p, reg, 0); + if (scratch_reg >= 0) { + UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); + return; + } + /* K0 already points to save area, restore $1 and $2 */ + UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), K0); + UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), K0); } -/* The only general purpose registers allowed in TLB handlers. */ -#define K0 26 -#define K1 27 - -/* Some CP0 registers */ -#define C0_INDEX 0 -#define C0_ENTRYLO0 2 -#define C0_ENTRYLO1 3 -#define C0_CONTEXT 4 -#define C0_BADVADDR 8 -#define C0_ENTRYHI 10 -#define C0_EPC 14 -#define C0_XCONTEXT 20 +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT -#ifdef CONFIG_64BIT -# define GET_CONTEXT(buf, reg) i_MFC0(buf, reg, C0_XCONTEXT) -#else -# define GET_CONTEXT(buf, reg) i_MFC0(buf, reg, C0_CONTEXT) -#endif - -/* The worst case length of the handler is around 18 instructions for - * R3000-style TLBs and up to 63 instructions for R4000-style TLBs. - * Maximum space available is 32 instructions for R3000 and 64 - * instructions for R4000. +/* + * CONFIG_MIPS_PGD_C0_CONTEXT implies 64 bit and lack of pgd_current, + * we cannot do r3000 under these circumstances. * - * We deliberately chose a buffer size of 128, so we won't scribble - * over anything important on overflow before we panic. + * Declare pgd_current here instead of including mmu_context.h to avoid type + * conflicts for tlbmiss_handler_setup_pgd */ -static __initdata u32 tlb_handler[128]; - -/* simply assume worst case size for labels and relocs */ -static __initdata struct label labels[128]; -static __initdata struct reloc relocs[128]; +extern unsigned long pgd_current[]; /* * The R3000 TLB handler is simple. */ -static void __init build_r3000_tlb_refill_handler(void) +static void build_r3000_tlb_refill_handler(void) { long pgdc = (long)pgd_current; u32 *p; @@ -710,40 +404,35 @@ static void __init build_r3000_tlb_refill_handler(void) memset(tlb_handler, 0, sizeof(tlb_handler)); p = tlb_handler; - i_mfc0(&p, K0, C0_BADVADDR); - i_lui(&p, K1, rel_hi(pgdc)); /* cp0 delay */ - i_lw(&p, K1, rel_lo(pgdc), K1); - i_srl(&p, K0, K0, 22); /* load delay */ - i_sll(&p, K0, K0, 2); - i_addu(&p, K1, K1, K0); - i_mfc0(&p, K0, C0_CONTEXT); - i_lw(&p, K1, 0, K1); /* cp0 delay */ - i_andi(&p, K0, K0, 0xffc); /* load delay */ - i_addu(&p, K1, K1, K0); - i_lw(&p, K0, 0, K1); - i_nop(&p); /* load delay */ - i_mtc0(&p, K0, C0_ENTRYLO0); - i_mfc0(&p, K1, C0_EPC); /* cp0 delay */ - i_tlbwr(&p); /* cp0 delay */ - i_jr(&p, K1); - i_rfe(&p); /* branch delay */ + uasm_i_mfc0(&p, K0, C0_BADVADDR); + uasm_i_lui(&p, K1, uasm_rel_hi(pgdc)); /* cp0 delay */ + uasm_i_lw(&p, K1, uasm_rel_lo(pgdc), K1); + uasm_i_srl(&p, K0, K0, 22); /* load delay */ + uasm_i_sll(&p, K0, K0, 2); + uasm_i_addu(&p, K1, K1, K0); + uasm_i_mfc0(&p, K0, C0_CONTEXT); + uasm_i_lw(&p, K1, 0, K1); /* cp0 delay */ + uasm_i_andi(&p, K0, K0, 0xffc); /* load delay */ + uasm_i_addu(&p, K1, K1, K0); + uasm_i_lw(&p, K0, 0, K1); + uasm_i_nop(&p); /* load delay */ + uasm_i_mtc0(&p, K0, C0_ENTRYLO0); + uasm_i_mfc0(&p, K1, C0_EPC); /* cp0 delay */ + uasm_i_tlbwr(&p); /* cp0 delay */ + uasm_i_jr(&p, K1); + uasm_i_rfe(&p); /* branch delay */ if (p > tlb_handler + 32) panic("TLB refill handler space exceeded"); - printk("Synthesized TLB refill handler (%u instructions).\n", - (unsigned int)(p - tlb_handler)); -#ifdef DEBUG_TLB - { - int i; + pr_debug("Wrote TLB refill handler (%u instructions).\n", + (unsigned int)(p - tlb_handler)); - for (i = 0; i < (p - tlb_handler); i++) - printk("%08x\n", tlb_handler[i]); - } -#endif + memcpy((void *)ebase, tlb_handler, 0x80); - memcpy((void *)CAC_BASE, tlb_handler, 0x80); + dump_handler("r3000_tlb_refill", (u32 *)ebase, 32); } +#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ /* * The R4000 TLB handler is much more complicated. We have two @@ -752,7 +441,7 @@ static void __init build_r3000_tlb_refill_handler(void) * other one.To keep things simple, we first assume linear space, * then we relocate it to the final handler layout as needed. */ -static __initdata u32 final_handler[64]; +static u32 final_handler[64]; /* * Hazards @@ -760,8 +449,8 @@ static __initdata u32 final_handler[64]; * From the IDT errata for the QED RM5230 (Nevada), processor revision 1.0: * 2. A timing hazard exists for the TLBP instruction. * - * stalling_instruction - * TLBP + * stalling_instruction + * TLBP * * The JTLB is being read for the TLBP throughout the stall generated by the * previous instruction. This is not really correct as the stalling instruction @@ -772,46 +461,69 @@ static __initdata u32 final_handler[64]; * The software work-around is to not allow the instruction preceding the TLBP * to stall - make it an NOP or some other instruction guaranteed not to stall. * - * Errata 2 will not be fixed. This errata is also on the R5000. + * Errata 2 will not be fixed. This errata is also on the R5000. * * As if we MIPS hackers wouldn't know how to nop pipelines happy ... */ -static __init void __attribute__((unused)) build_tlb_probe_entry(u32 **p) +static void __maybe_unused build_tlb_probe_entry(u32 **p) { - switch (current_cpu_data.cputype) { - /* Found by experiment: R4600 v2.0 needs this, too. */ + switch (current_cpu_type()) { + /* Found by experiment: R4600 v2.0/R4700 needs this, too. */ case CPU_R4600: + case CPU_R4700: case CPU_R5000: - case CPU_R5000A: case CPU_NEVADA: - i_nop(p); - i_tlbp(p); + uasm_i_nop(p); + uasm_i_tlbp(p); break; default: - i_tlbp(p); + uasm_i_tlbp(p); break; } } /* * Write random or indexed TLB entry, and care about the hazards from - * the preceeding mtc0 and for the following eret. + * the preceding mtc0 and for the following eret. */ enum tlb_write_entry { tlb_random, tlb_indexed }; -static __init void build_tlb_write_entry(u32 **p, struct label **l, - struct reloc **r, - enum tlb_write_entry wmode) +static void build_tlb_write_entry(u32 **p, struct uasm_label **l, + struct uasm_reloc **r, + enum tlb_write_entry wmode) { void(*tlbw)(u32 **) = NULL; switch (wmode) { - case tlb_random: tlbw = i_tlbwr; break; - case tlb_indexed: tlbw = i_tlbwi; break; + case tlb_random: tlbw = uasm_i_tlbwr; break; + case tlb_indexed: tlbw = uasm_i_tlbwi; break; } - switch (current_cpu_data.cputype) { + if (cpu_has_mips_r2) { + /* + * The architecture spec says an ehb is required here, + * but a number of cores do not have the hazard and + * using an ehb causes an expensive pipeline stall. + */ + switch (current_cpu_type()) { + case CPU_M14KC: + case CPU_74K: + case CPU_1074K: + case CPU_PROAPTIV: + case CPU_P5600: + case CPU_M5150: + break; + + default: + uasm_i_ehb(p); + break; + } + tlbw(p); + return; + } + + switch (current_cpu_type()) { case CPU_R4000PC: case CPU_R4000SC: case CPU_R4000MC: @@ -822,195 +534,385 @@ static __init void build_tlb_write_entry(u32 **p, struct label **l, * This branch uses up a mtc0 hazard nop slot and saves * two nops after the tlbw instruction. */ - il_bgezl(p, r, 0, label_tlbw_hazard); + uasm_bgezl_hazard(p, r, hazard_instance); tlbw(p); - l_tlbw_hazard(l, *p); - i_nop(p); + uasm_bgezl_label(l, p, hazard_instance); + hazard_instance++; + uasm_i_nop(p); break; case CPU_R4600: case CPU_R4700: + uasm_i_nop(p); + tlbw(p); + uasm_i_nop(p); + break; + case CPU_R5000: - case CPU_R5000A: - i_nop(p); + case CPU_NEVADA: + uasm_i_nop(p); /* QED specifies 2 nops hazard */ + uasm_i_nop(p); /* QED specifies 2 nops hazard */ tlbw(p); - i_nop(p); break; case CPU_R4300: case CPU_5KC: case CPU_TX49XX: - case CPU_AU1000: - case CPU_AU1100: - case CPU_AU1500: - case CPU_AU1550: - case CPU_AU1200: case CPU_PR4450: - i_nop(p); + case CPU_XLR: + uasm_i_nop(p); tlbw(p); break; case CPU_R10000: case CPU_R12000: + case CPU_R14000: case CPU_4KC: + case CPU_4KEC: + case CPU_M14KC: + case CPU_M14KEC: case CPU_SB1: case CPU_SB1A: case CPU_4KSC: case CPU_20KC: case CPU_25KF: + case CPU_BMIPS32: + case CPU_BMIPS3300: + case CPU_BMIPS4350: + case CPU_BMIPS4380: + case CPU_BMIPS5000: + case CPU_LOONGSON2: + case CPU_LOONGSON3: + case CPU_R5500: + if (m4kc_tlbp_war()) + uasm_i_nop(p); + case CPU_ALCHEMY: tlbw(p); break; - case CPU_NEVADA: - i_nop(p); /* QED specifies 2 nops hazard */ - /* - * This branch uses up a mtc0 hazard nop slot and saves - * a nop after the tlbw instruction. - */ - il_bgezl(p, r, 0, label_tlbw_hazard); - tlbw(p); - l_tlbw_hazard(l, *p); - break; - case CPU_RM7000: - i_nop(p); - i_nop(p); - i_nop(p); - i_nop(p); - tlbw(p); - break; - - case CPU_4KEC: - case CPU_24K: - case CPU_34K: - i_ehb(p); + uasm_i_nop(p); + uasm_i_nop(p); + uasm_i_nop(p); + uasm_i_nop(p); tlbw(p); break; - case CPU_RM9000: - /* - * When the JTLB is updated by tlbwi or tlbwr, a subsequent - * use of the JTLB for instructions should not occur for 4 - * cpu cycles and use for data translations should not occur - * for 3 cpu cycles. - */ - i_ssnop(p); - i_ssnop(p); - i_ssnop(p); - i_ssnop(p); - tlbw(p); - i_ssnop(p); - i_ssnop(p); - i_ssnop(p); - i_ssnop(p); - break; - case CPU_VR4111: case CPU_VR4121: case CPU_VR4122: case CPU_VR4181: case CPU_VR4181A: - i_nop(p); - i_nop(p); + uasm_i_nop(p); + uasm_i_nop(p); tlbw(p); - i_nop(p); - i_nop(p); + uasm_i_nop(p); + uasm_i_nop(p); break; case CPU_VR4131: case CPU_VR4133: case CPU_R5432: - i_nop(p); - i_nop(p); + uasm_i_nop(p); + uasm_i_nop(p); + tlbw(p); + break; + + case CPU_JZRISC: tlbw(p); + uasm_i_nop(p); break; default: panic("No TLB refill handler yet (CPU type: %d)", - current_cpu_data.cputype); + current_cpu_type()); break; } } +static __maybe_unused void build_convert_pte_to_entrylo(u32 **p, + unsigned int reg) +{ + if (cpu_has_rixi) { + UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL)); + } else { +#ifdef CONFIG_64BIT_PHYS_ADDR + uasm_i_dsrl_safe(p, reg, reg, ilog2(_PAGE_GLOBAL)); +#else + UASM_i_SRL(p, reg, reg, ilog2(_PAGE_GLOBAL)); +#endif + } +} + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + +static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, + unsigned int tmp, enum label_id lid, + int restore_scratch) +{ + if (restore_scratch) { + /* Reset default page size */ + if (PM_DEFAULT_MASK >> 16) { + uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); + uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff); + uasm_i_mtc0(p, tmp, C0_PAGEMASK); + uasm_il_b(p, r, lid); + } else if (PM_DEFAULT_MASK) { + uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK); + uasm_i_mtc0(p, tmp, C0_PAGEMASK); + uasm_il_b(p, r, lid); + } else { + uasm_i_mtc0(p, 0, C0_PAGEMASK); + uasm_il_b(p, r, lid); + } + if (scratch_reg >= 0) + UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); + else + UASM_i_LW(p, 1, scratchpad_offset(0), 0); + } else { + /* Reset default page size */ + if (PM_DEFAULT_MASK >> 16) { + uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); + uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff); + uasm_il_b(p, r, lid); + uasm_i_mtc0(p, tmp, C0_PAGEMASK); + } else if (PM_DEFAULT_MASK) { + uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK); + uasm_il_b(p, r, lid); + uasm_i_mtc0(p, tmp, C0_PAGEMASK); + } else { + uasm_il_b(p, r, lid); + uasm_i_mtc0(p, 0, C0_PAGEMASK); + } + } +} + +static void build_huge_tlb_write_entry(u32 **p, struct uasm_label **l, + struct uasm_reloc **r, + unsigned int tmp, + enum tlb_write_entry wmode, + int restore_scratch) +{ + /* Set huge page tlb entry size */ + uasm_i_lui(p, tmp, PM_HUGE_MASK >> 16); + uasm_i_ori(p, tmp, tmp, PM_HUGE_MASK & 0xffff); + uasm_i_mtc0(p, tmp, C0_PAGEMASK); + + build_tlb_write_entry(p, l, r, wmode); + + build_restore_pagemask(p, r, tmp, label_leave, restore_scratch); +} + +/* + * Check if Huge PTE is present, if so then jump to LABEL. + */ +static void +build_is_huge_pte(u32 **p, struct uasm_reloc **r, unsigned int tmp, + unsigned int pmd, int lid) +{ + UASM_i_LW(p, tmp, 0, pmd); + if (use_bbit_insns()) { + uasm_il_bbit1(p, r, tmp, ilog2(_PAGE_HUGE), lid); + } else { + uasm_i_andi(p, tmp, tmp, _PAGE_HUGE); + uasm_il_bnez(p, r, tmp, lid); + } +} + +static void build_huge_update_entries(u32 **p, unsigned int pte, + unsigned int tmp) +{ + int small_sequence; + + /* + * A huge PTE describes an area the size of the + * configured huge page size. This is twice the + * of the large TLB entry size we intend to use. + * A TLB entry half the size of the configured + * huge page size is configured into entrylo0 + * and entrylo1 to cover the contiguous huge PTE + * address space. + */ + small_sequence = (HPAGE_SIZE >> 7) < 0x10000; + + /* We can clobber tmp. It isn't used after this.*/ + if (!small_sequence) + uasm_i_lui(p, tmp, HPAGE_SIZE >> (7 + 16)); + + build_convert_pte_to_entrylo(p, pte); + UASM_i_MTC0(p, pte, C0_ENTRYLO0); /* load it */ + /* convert to entrylo1 */ + if (small_sequence) + UASM_i_ADDIU(p, pte, pte, HPAGE_SIZE >> 7); + else + UASM_i_ADDU(p, pte, pte, tmp); + + UASM_i_MTC0(p, pte, C0_ENTRYLO1); /* load it */ +} + +static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r, + struct uasm_label **l, + unsigned int pte, + unsigned int ptr) +{ +#ifdef CONFIG_SMP + UASM_i_SC(p, pte, 0, ptr); + uasm_il_beqz(p, r, pte, label_tlb_huge_update); + UASM_i_LW(p, pte, 0, ptr); /* Needed because SC killed our PTE */ +#else + UASM_i_SW(p, pte, 0, ptr); +#endif + build_huge_update_entries(p, pte, ptr); + build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0); +} +#endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */ + #ifdef CONFIG_64BIT /* * TMP and PTR are scratch. * TMP will be clobbered, PTR will hold the pmd entry. */ -static __init void -build_get_pmde64(u32 **p, struct label **l, struct reloc **r, +static void +build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, unsigned int tmp, unsigned int ptr) { +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT long pgdc = (long)pgd_current; - +#endif /* * The vmalloc handling is not in the hotpath. */ - i_dmfc0(p, tmp, C0_BADVADDR); - il_bltz(p, r, tmp, label_vmalloc); - /* No i_nop needed here, since the next insn doesn't touch TMP. */ + uasm_i_dmfc0(p, tmp, C0_BADVADDR); -#ifdef CONFIG_SMP -# ifdef CONFIG_BUILD_ELF64 - /* - * 64 bit SMP running in XKPHYS has smp_processor_id() << 3 - * stored in CONTEXT. - */ - i_dmfc0(p, ptr, C0_CONTEXT); - i_dsrl(p, ptr, ptr, 23); - i_LA_mostly(p, tmp, pgdc); - i_daddu(p, ptr, ptr, tmp); - i_dmfc0(p, tmp, C0_BADVADDR); - i_ld(p, ptr, rel_lo(pgdc), ptr); -# else - /* - * 64 bit SMP running in compat space has the lower part of - * &pgd_current[smp_processor_id()] stored in CONTEXT. - */ - if (!in_compat_space_p(pgdc)) - panic("Invalid page directory address!"); + if (check_for_high_segbits) { + /* + * The kernel currently implicitely assumes that the + * MIPS SEGBITS parameter for the processor is + * (PGDIR_SHIFT+PGDIR_BITS) or less, and will never + * allocate virtual addresses outside the maximum + * range for SEGBITS = (PGDIR_SHIFT+PGDIR_BITS). But + * that doesn't prevent user code from accessing the + * higher xuseg addresses. Here, we make sure that + * everything but the lower xuseg addresses goes down + * the module_alloc/vmalloc path. + */ + uasm_i_dsrl_safe(p, ptr, tmp, PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); + uasm_il_bnez(p, r, ptr, label_vmalloc); + } else { + uasm_il_bltz(p, r, tmp, label_vmalloc); + } + /* No uasm_i_nop needed here, since the next insn doesn't touch TMP. */ - i_dmfc0(p, ptr, C0_CONTEXT); - i_dsra(p, ptr, ptr, 23); - i_ld(p, ptr, 0, ptr); -# endif + if (pgd_reg != -1) { + /* pgd is in pgd_reg */ + UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); + } else { +#if defined(CONFIG_MIPS_PGD_C0_CONTEXT) + /* + * &pgd << 11 stored in CONTEXT [23..63]. + */ + UASM_i_MFC0(p, ptr, C0_CONTEXT); + + /* Clear lower 23 bits of context. */ + uasm_i_dins(p, ptr, 0, 0, 23); + + /* 1 0 1 0 1 << 6 xkphys cached */ + uasm_i_ori(p, ptr, ptr, 0x540); + uasm_i_drotr(p, ptr, ptr, 11); +#elif defined(CONFIG_SMP) + UASM_i_CPUID_MFC0(p, ptr, SMP_CPUID_REG); + uasm_i_dsrl_safe(p, ptr, ptr, SMP_CPUID_PTRSHIFT); + UASM_i_LA_mostly(p, tmp, pgdc); + uasm_i_daddu(p, ptr, ptr, tmp); + uasm_i_dmfc0(p, tmp, C0_BADVADDR); + uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); #else - i_LA_mostly(p, ptr, pgdc); - i_ld(p, ptr, rel_lo(pgdc), ptr); + UASM_i_LA_mostly(p, ptr, pgdc); + uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); #endif + } + + uasm_l_vmalloc_done(l, *p); - l_vmalloc_done(l, *p); - i_dsrl(p, tmp, tmp, PGDIR_SHIFT-3); /* get pgd offset in bytes */ - i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3); - i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */ - i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */ - i_ld(p, ptr, 0, ptr); /* get pmd pointer */ - i_dsrl(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */ - i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3); - i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */ + /* get pgd offset in bytes */ + uasm_i_dsrl_safe(p, tmp, tmp, PGDIR_SHIFT - 3); + + uasm_i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3); + uasm_i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */ +#ifndef __PAGETABLE_PMD_FOLDED + uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + uasm_i_ld(p, ptr, 0, ptr); /* get pmd pointer */ + uasm_i_dsrl_safe(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */ + uasm_i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3); + uasm_i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */ +#endif } /* * BVADDR is the faulting address, PTR is scratch. * PTR will hold the pgd for vmalloc. */ -static __init void -build_get_pgd_vmalloc64(u32 **p, struct label **l, struct reloc **r, - unsigned int bvaddr, unsigned int ptr) +static void +build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, + unsigned int bvaddr, unsigned int ptr, + enum vmalloc64_mode mode) { long swpd = (long)swapper_pg_dir; + int single_insn_swpd; + int did_vmalloc_branch = 0; - l_vmalloc(l, *p); - i_LA(p, ptr, VMALLOC_START); - i_dsubu(p, bvaddr, bvaddr, ptr); + single_insn_swpd = uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd); - if (in_compat_space_p(swpd) && !rel_lo(swpd)) { - il_b(p, r, label_vmalloc_done); - i_lui(p, ptr, rel_hi(swpd)); - } else { - i_LA_mostly(p, ptr, swpd); - il_b(p, r, label_vmalloc_done); - i_daddiu(p, ptr, ptr, rel_lo(swpd)); + uasm_l_vmalloc(l, *p); + + if (mode != not_refill && check_for_high_segbits) { + if (single_insn_swpd) { + uasm_il_bltz(p, r, bvaddr, label_vmalloc_done); + uasm_i_lui(p, ptr, uasm_rel_hi(swpd)); + did_vmalloc_branch = 1; + /* fall through */ + } else { + uasm_il_bgez(p, r, bvaddr, label_large_segbits_fault); + } + } + if (!did_vmalloc_branch) { + if (uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd)) { + uasm_il_b(p, r, label_vmalloc_done); + uasm_i_lui(p, ptr, uasm_rel_hi(swpd)); + } else { + UASM_i_LA_mostly(p, ptr, swpd); + uasm_il_b(p, r, label_vmalloc_done); + if (uasm_in_compat_space_p(swpd)) + uasm_i_addiu(p, ptr, ptr, uasm_rel_lo(swpd)); + else + uasm_i_daddiu(p, ptr, ptr, uasm_rel_lo(swpd)); + } + } + if (mode != not_refill && check_for_high_segbits) { + uasm_l_large_segbits_fault(l, *p); + /* + * We get here if we are an xsseg address, or if we are + * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary. + * + * Ignoring xsseg (assume disabled so would generate + * (address errors?), the only remaining possibility + * is the upper xuseg addresses. On processors with + * TLB_SEGBITS <= PGDIR_SHIFT+PGDIR_BITS, these + * addresses would have taken an address error. We try + * to mimic that here by taking a load/istream page + * fault. + */ + UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); + uasm_i_jr(p, ptr); + + if (mode == refill_scratch) { + if (scratch_reg >= 0) + UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); + else + UASM_i_LW(p, 1, scratchpad_offset(0), 0); + } else { + uasm_i_nop(p); + } } } @@ -1020,35 +922,41 @@ build_get_pgd_vmalloc64(u32 **p, struct label **l, struct reloc **r, * TMP and PTR are scratch. * TMP will be clobbered, PTR will hold the pgd entry. */ -static __init void __attribute__((unused)) +static void __maybe_unused build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr) { - long pgdc = (long)pgd_current; + if (pgd_reg != -1) { + /* pgd is in pgd_reg */ + uasm_i_mfc0(p, ptr, c0_kscratch(), pgd_reg); + uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + } else { + long pgdc = (long)pgd_current; - /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */ + /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */ #ifdef CONFIG_SMP - i_mfc0(p, ptr, C0_CONTEXT); - i_LA_mostly(p, tmp, pgdc); - i_srl(p, ptr, ptr, 23); - i_addu(p, ptr, tmp, ptr); + uasm_i_mfc0(p, ptr, SMP_CPUID_REG); + UASM_i_LA_mostly(p, tmp, pgdc); + uasm_i_srl(p, ptr, ptr, SMP_CPUID_PTRSHIFT); + uasm_i_addu(p, ptr, tmp, ptr); #else - i_LA_mostly(p, ptr, pgdc); + UASM_i_LA_mostly(p, ptr, pgdc); #endif - i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ - i_lw(p, ptr, rel_lo(pgdc), ptr); - i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */ - i_sll(p, tmp, tmp, PGD_T_LOG2); - i_addu(p, ptr, ptr, tmp); /* add in pgd offset */ + uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); + } + uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */ + uasm_i_sll(p, tmp, tmp, PGD_T_LOG2); + uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */ } #endif /* !CONFIG_64BIT */ -static __init void build_adjust_context(u32 **p, unsigned int ctx) +static void build_adjust_context(u32 **p, unsigned int ctx) { - unsigned int shift = 4 - (PTE_T_LOG2 + 1); + unsigned int shift = 4 - (PTE_T_LOG2 + 1) + PAGE_SHIFT - 12; unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1); - switch (current_cpu_data.cputype) { + switch (current_cpu_type()) { case CPU_VR41XX: case CPU_VR4111: case CPU_VR4121: @@ -1065,11 +973,11 @@ static __init void build_adjust_context(u32 **p, unsigned int ctx) } if (shift) - i_SRL(p, ctx, ctx, shift); - i_andi(p, ctx, ctx, mask); + UASM_i_SRL(p, ctx, ctx, shift); + uasm_i_andi(p, ctx, ctx, mask); } -static __init void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) +static void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) { /* * Bug workaround for the Nevada. It seems as if under certain @@ -1078,24 +986,23 @@ static __init void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) * in a different cacheline or a load instruction, probably any * memory reference, is between them. */ - switch (current_cpu_data.cputype) { + switch (current_cpu_type()) { case CPU_NEVADA: - i_LW(p, ptr, 0, ptr); + UASM_i_LW(p, ptr, 0, ptr); GET_CONTEXT(p, tmp); /* get context reg */ break; default: GET_CONTEXT(p, tmp); /* get context reg */ - i_LW(p, ptr, 0, ptr); + UASM_i_LW(p, ptr, 0, ptr); break; } build_adjust_context(p, tmp); - i_ADDU(p, ptr, ptr, tmp); /* add in offset */ + UASM_i_ADDU(p, ptr, ptr, tmp); /* add in offset */ } -static __init void build_update_entries(u32 **p, unsigned int tmp, - unsigned int ptep) +static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep) { /* * 64bit address support (36bit on a 32bit CPU) in a 32bit @@ -1103,258 +1010,565 @@ static __init void build_update_entries(u32 **p, unsigned int tmp, */ #ifdef CONFIG_64BIT_PHYS_ADDR if (cpu_has_64bits) { - i_ld(p, tmp, 0, ptep); /* get even pte */ - i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ - i_dsrl(p, tmp, tmp, 6); /* convert to entrylo0 */ - i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */ - i_dsrl(p, ptep, ptep, 6); /* convert to entrylo1 */ - i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */ + uasm_i_ld(p, tmp, 0, ptep); /* get even pte */ + uasm_i_ld(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ + if (cpu_has_rixi) { + UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); + UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ + UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); + } else { + uasm_i_dsrl_safe(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */ + UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ + uasm_i_dsrl_safe(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */ + } + UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ } else { int pte_off_even = sizeof(pte_t) / 2; int pte_off_odd = pte_off_even + sizeof(pte_t); /* The pte entries are pre-shifted */ - i_lw(p, tmp, pte_off_even, ptep); /* get even pte */ - i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */ - i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */ - i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */ + uasm_i_lw(p, tmp, pte_off_even, ptep); /* get even pte */ + UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ + uasm_i_lw(p, ptep, pte_off_odd, ptep); /* get odd pte */ + UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ } #else - i_LW(p, tmp, 0, ptep); /* get even pte */ - i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ + UASM_i_LW(p, tmp, 0, ptep); /* get even pte */ + UASM_i_LW(p, ptep, sizeof(pte_t), ptep); /* get odd pte */ if (r45k_bvahwbug()) build_tlb_probe_entry(p); - i_SRL(p, tmp, tmp, 6); /* convert to entrylo0 */ - if (r4k_250MHZhwbug()) - i_mtc0(p, 0, C0_ENTRYLO0); - i_mtc0(p, tmp, C0_ENTRYLO0); /* load it */ - i_SRL(p, ptep, ptep, 6); /* convert to entrylo1 */ - if (r45k_bvahwbug()) - i_mfc0(p, tmp, C0_INDEX); + if (cpu_has_rixi) { + UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); + if (r4k_250MHZhwbug()) + UASM_i_MTC0(p, 0, C0_ENTRYLO0); + UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ + UASM_i_ROTR(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); + } else { + UASM_i_SRL(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); /* convert to entrylo0 */ + if (r4k_250MHZhwbug()) + UASM_i_MTC0(p, 0, C0_ENTRYLO0); + UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ + UASM_i_SRL(p, ptep, ptep, ilog2(_PAGE_GLOBAL)); /* convert to entrylo1 */ + if (r45k_bvahwbug()) + uasm_i_mfc0(p, tmp, C0_INDEX); + } if (r4k_250MHZhwbug()) - i_mtc0(p, 0, C0_ENTRYLO1); - i_mtc0(p, ptep, C0_ENTRYLO1); /* load it */ + UASM_i_MTC0(p, 0, C0_ENTRYLO1); + UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ #endif } -static void __init build_r4000_tlb_refill_handler(void) +struct mips_huge_tlb_info { + int huge_pte; + int restore_scratch; +}; + +static struct mips_huge_tlb_info +build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, + struct uasm_reloc **r, unsigned int tmp, + unsigned int ptr, int c0_scratch_reg) +{ + struct mips_huge_tlb_info rv; + unsigned int even, odd; + int vmalloc_branch_delay_filled = 0; + const int scratch = 1; /* Our extra working register */ + + rv.huge_pte = scratch; + rv.restore_scratch = 0; + + if (check_for_high_segbits) { + UASM_i_MFC0(p, tmp, C0_BADVADDR); + + if (pgd_reg != -1) + UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); + else + UASM_i_MFC0(p, ptr, C0_CONTEXT); + + if (c0_scratch_reg >= 0) + UASM_i_MTC0(p, scratch, c0_kscratch(), c0_scratch_reg); + else + UASM_i_SW(p, scratch, scratchpad_offset(0), 0); + + uasm_i_dsrl_safe(p, scratch, tmp, + PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); + uasm_il_bnez(p, r, scratch, label_vmalloc); + + if (pgd_reg == -1) { + vmalloc_branch_delay_filled = 1; + /* Clear lower 23 bits of context. */ + uasm_i_dins(p, ptr, 0, 0, 23); + } + } else { + if (pgd_reg != -1) + UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); + else + UASM_i_MFC0(p, ptr, C0_CONTEXT); + + UASM_i_MFC0(p, tmp, C0_BADVADDR); + + if (c0_scratch_reg >= 0) + UASM_i_MTC0(p, scratch, c0_kscratch(), c0_scratch_reg); + else + UASM_i_SW(p, scratch, scratchpad_offset(0), 0); + + if (pgd_reg == -1) + /* Clear lower 23 bits of context. */ + uasm_i_dins(p, ptr, 0, 0, 23); + + uasm_il_bltz(p, r, tmp, label_vmalloc); + } + + if (pgd_reg == -1) { + vmalloc_branch_delay_filled = 1; + /* 1 0 1 0 1 << 6 xkphys cached */ + uasm_i_ori(p, ptr, ptr, 0x540); + uasm_i_drotr(p, ptr, ptr, 11); + } + +#ifdef __PAGETABLE_PMD_FOLDED +#define LOC_PTEP scratch +#else +#define LOC_PTEP ptr +#endif + + if (!vmalloc_branch_delay_filled) + /* get pgd offset in bytes */ + uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3); + + uasm_l_vmalloc_done(l, *p); + + /* + * tmp ptr + * fall-through case = badvaddr *pgd_current + * vmalloc case = badvaddr swapper_pg_dir + */ + + if (vmalloc_branch_delay_filled) + /* get pgd offset in bytes */ + uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3); + +#ifdef __PAGETABLE_PMD_FOLDED + GET_CONTEXT(p, tmp); /* get context reg */ +#endif + uasm_i_andi(p, scratch, scratch, (PTRS_PER_PGD - 1) << 3); + + if (use_lwx_insns()) { + UASM_i_LWX(p, LOC_PTEP, scratch, ptr); + } else { + uasm_i_daddu(p, ptr, ptr, scratch); /* add in pgd offset */ + uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */ + } + +#ifndef __PAGETABLE_PMD_FOLDED + /* get pmd offset in bytes */ + uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3); + uasm_i_andi(p, scratch, scratch, (PTRS_PER_PMD - 1) << 3); + GET_CONTEXT(p, tmp); /* get context reg */ + + if (use_lwx_insns()) { + UASM_i_LWX(p, scratch, scratch, ptr); + } else { + uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */ + UASM_i_LW(p, scratch, 0, ptr); + } +#endif + /* Adjust the context during the load latency. */ + build_adjust_context(p, tmp); + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + uasm_il_bbit1(p, r, scratch, ilog2(_PAGE_HUGE), label_tlb_huge_update); + /* + * The in the LWX case we don't want to do the load in the + * delay slot. It cannot issue in the same cycle and may be + * speculative and unneeded. + */ + if (use_lwx_insns()) + uasm_i_nop(p); +#endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */ + + + /* build_update_entries */ + if (use_lwx_insns()) { + even = ptr; + odd = tmp; + UASM_i_LWX(p, even, scratch, tmp); + UASM_i_ADDIU(p, tmp, tmp, sizeof(pte_t)); + UASM_i_LWX(p, odd, scratch, tmp); + } else { + UASM_i_ADDU(p, ptr, scratch, tmp); /* add in offset */ + even = tmp; + odd = ptr; + UASM_i_LW(p, even, 0, ptr); /* get even pte */ + UASM_i_LW(p, odd, sizeof(pte_t), ptr); /* get odd pte */ + } + if (cpu_has_rixi) { + uasm_i_drotr(p, even, even, ilog2(_PAGE_GLOBAL)); + UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */ + uasm_i_drotr(p, odd, odd, ilog2(_PAGE_GLOBAL)); + } else { + uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_GLOBAL)); + UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */ + uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_GLOBAL)); + } + UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */ + + if (c0_scratch_reg >= 0) { + UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg); + build_tlb_write_entry(p, l, r, tlb_random); + uasm_l_leave(l, *p); + rv.restore_scratch = 1; + } else if (PAGE_SHIFT == 14 || PAGE_SHIFT == 13) { + build_tlb_write_entry(p, l, r, tlb_random); + uasm_l_leave(l, *p); + UASM_i_LW(p, scratch, scratchpad_offset(0), 0); + } else { + UASM_i_LW(p, scratch, scratchpad_offset(0), 0); + build_tlb_write_entry(p, l, r, tlb_random); + uasm_l_leave(l, *p); + rv.restore_scratch = 1; + } + + uasm_i_eret(p); /* return from trap */ + + return rv; +} + +/* + * For a 64-bit kernel, we are using the 64-bit XTLB refill exception + * because EXL == 0. If we wrap, we can also use the 32 instruction + * slots before the XTLB refill exception handler which belong to the + * unused TLB refill exception. + */ +#define MIPS64_REFILL_INSNS 32 + +static void build_r4000_tlb_refill_handler(void) { u32 *p = tlb_handler; - struct label *l = labels; - struct reloc *r = relocs; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; u32 *f; unsigned int final_len; + struct mips_huge_tlb_info htlb_info __maybe_unused; + enum vmalloc64_mode vmalloc_mode __maybe_unused; memset(tlb_handler, 0, sizeof(tlb_handler)); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); memset(final_handler, 0, sizeof(final_handler)); - /* - * create the plain linear handler - */ - if (bcm1250_m3_war()) { - i_MFC0(&p, K0, C0_BADVADDR); - i_MFC0(&p, K1, C0_ENTRYHI); - i_xor(&p, K0, K0, K1); - i_SRL(&p, K0, K0, PAGE_SHIFT + 1); - il_bnez(&p, &r, K0, label_leave); - /* No need for i_nop */ - } + if (IS_ENABLED(CONFIG_64BIT) && (scratch_reg >= 0 || scratchpad_available()) && use_bbit_insns()) { + htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1, + scratch_reg); + vmalloc_mode = refill_scratch; + } else { + htlb_info.huge_pte = K0; + htlb_info.restore_scratch = 0; + vmalloc_mode = refill_noscratch; + /* + * create the plain linear handler + */ + if (bcm1250_m3_war()) { + unsigned int segbits = 44; + + uasm_i_dmfc0(&p, K0, C0_BADVADDR); + uasm_i_dmfc0(&p, K1, C0_ENTRYHI); + uasm_i_xor(&p, K0, K0, K1); + uasm_i_dsrl_safe(&p, K1, K0, 62); + uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); + uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); + uasm_i_or(&p, K0, K0, K1); + uasm_il_bnez(&p, &r, K0, label_leave); + /* No need for uasm_i_nop */ + } #ifdef CONFIG_64BIT - build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ + build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ #else - build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ + build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ +#endif + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + build_is_huge_pte(&p, &r, K0, K1, label_tlb_huge_update); #endif - build_get_ptep(&p, K0, K1); - build_update_entries(&p, K0, K1); - build_tlb_write_entry(&p, &l, &r, tlb_random); - l_leave(&l, p); - i_eret(&p); /* return from trap */ + build_get_ptep(&p, K0, K1); + build_update_entries(&p, K0, K1); + build_tlb_write_entry(&p, &l, &r, tlb_random); + uasm_l_leave(&l, p); + uasm_i_eret(&p); /* return from trap */ + } +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + uasm_l_tlb_huge_update(&l, p); + build_huge_update_entries(&p, htlb_info.huge_pte, K1); + build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random, + htlb_info.restore_scratch); +#endif #ifdef CONFIG_64BIT - build_get_pgd_vmalloc64(&p, &l, &r, K0, K1); + build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, vmalloc_mode); #endif /* * Overflow check: For the 64bit handler, we need at least one * free instruction slot for the wrap-around branch. In worst * case, if the intended insertion point is a delay slot, we - * need three, with the the second nop'ed and the third being + * need three, with the second nop'ed and the third being * unused. */ -#ifdef CONFIG_32BIT - if ((p - tlb_handler) > 64) - panic("TLB refill handler space exceeded"); + switch (boot_cpu_type()) { + default: + if (sizeof(long) == 4) { + case CPU_LOONGSON2: + /* Loongson2 ebase is different than r4k, we have more space */ + if ((p - tlb_handler) > 64) + panic("TLB refill handler space exceeded"); + /* + * Now fold the handler in the TLB refill handler space. + */ + f = final_handler; + /* Simplest case, just copy the handler. */ + uasm_copy_handler(relocs, labels, tlb_handler, p, f); + final_len = p - tlb_handler; + break; + } else { + if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1) + || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3) + && uasm_insn_has_bdelay(relocs, + tlb_handler + MIPS64_REFILL_INSNS - 3))) + panic("TLB refill handler space exceeded"); + /* + * Now fold the handler in the TLB refill handler space. + */ + f = final_handler + MIPS64_REFILL_INSNS; + if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) { + /* Just copy the handler. */ + uasm_copy_handler(relocs, labels, tlb_handler, p, f); + final_len = p - tlb_handler; + } else { +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + const enum label_id ls = label_tlb_huge_update; #else - if (((p - tlb_handler) > 63) - || (((p - tlb_handler) > 61) - && insn_has_bdelay(relocs, tlb_handler + 29))) - panic("TLB refill handler space exceeded"); + const enum label_id ls = label_vmalloc; #endif - - /* - * Now fold the handler in the TLB refill handler space. - */ -#ifdef CONFIG_32BIT - f = final_handler; - /* Simplest case, just copy the handler. */ - copy_handler(relocs, labels, tlb_handler, p, f); - final_len = p - tlb_handler; -#else /* CONFIG_64BIT */ - f = final_handler + 32; - if ((p - tlb_handler) <= 32) { - /* Just copy the handler. */ - copy_handler(relocs, labels, tlb_handler, p, f); - final_len = p - tlb_handler; - } else { - u32 *split = tlb_handler + 30; - - /* - * Find the split point. - */ - if (insn_has_bdelay(relocs, split - 1)) - split--; - - /* Copy first part of the handler. */ - copy_handler(relocs, labels, tlb_handler, split, f); - f += split - tlb_handler; - - /* Insert branch. */ - l_split(&l, final_handler); - il_b(&f, &r, label_split); - if (insn_has_bdelay(relocs, split)) - i_nop(&f); - else { - copy_handler(relocs, labels, split, split + 1, f); - move_labels(labels, f, f + 1, -1); - f++; - split++; + u32 *split; + int ov = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++) + ; + BUG_ON(i == ARRAY_SIZE(labels)); + split = labels[i].addr; + + /* + * See if we have overflown one way or the other. + */ + if (split > tlb_handler + MIPS64_REFILL_INSNS || + split < p - MIPS64_REFILL_INSNS) + ov = 1; + + if (ov) { + /* + * Split two instructions before the end. One + * for the branch and one for the instruction + * in the delay slot. + */ + split = tlb_handler + MIPS64_REFILL_INSNS - 2; + + /* + * If the branch would fall in a delay slot, + * we must back up an additional instruction + * so that it is no longer in a delay slot. + */ + if (uasm_insn_has_bdelay(relocs, split - 1)) + split--; + } + /* Copy first part of the handler. */ + uasm_copy_handler(relocs, labels, tlb_handler, split, f); + f += split - tlb_handler; + + if (ov) { + /* Insert branch. */ + uasm_l_split(&l, final_handler); + uasm_il_b(&f, &r, label_split); + if (uasm_insn_has_bdelay(relocs, split)) + uasm_i_nop(&f); + else { + uasm_copy_handler(relocs, labels, + split, split + 1, f); + uasm_move_labels(labels, f, f + 1, -1); + f++; + split++; + } + } + + /* Copy the rest of the handler. */ + uasm_copy_handler(relocs, labels, split, p, final_handler); + final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) + + (p - split); + } } - - /* Copy the rest of the handler. */ - copy_handler(relocs, labels, split, p, final_handler); - final_len = (f - (final_handler + 32)) + (p - split); + break; } -#endif /* CONFIG_64BIT */ - resolve_relocs(relocs, labels); - printk("Synthesized TLB refill handler (%u instructions).\n", - final_len); + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB refill handler (%u instructions).\n", + final_len); -#ifdef DEBUG_TLB - { - int i; + memcpy((void *)ebase, final_handler, 0x100); - f = final_handler; -#ifdef CONFIG_64BIT - if (final_len > 32) - final_len = 64; - else - f = final_handler + 32; -#endif /* CONFIG_64BIT */ - for (i = 0; i < final_len; i++) - printk("%08x\n", f[i]); - } -#endif - - memcpy((void *)CAC_BASE, final_handler, 0x100); + dump_handler("r4000_tlb_refill", (u32 *)ebase, 64); } -/* - * TLB load/store/modify handlers. - * - * Only the fastpath gets synthesized at runtime, the slowpath for - * do_page_fault remains normal asm. - */ -extern void tlb_do_page_fault_0(void); -extern void tlb_do_page_fault_1(void); +extern u32 handle_tlbl[], handle_tlbl_end[]; +extern u32 handle_tlbs[], handle_tlbs_end[]; +extern u32 handle_tlbm[], handle_tlbm_end[]; +extern u32 tlbmiss_handler_setup_pgd_start[], tlbmiss_handler_setup_pgd[]; +extern u32 tlbmiss_handler_setup_pgd_end[]; -#define __tlb_handler_align \ - __attribute__((__aligned__(1 << CONFIG_MIPS_L1_CACHE_SHIFT))) +static void build_setup_pgd(void) +{ + const int a0 = 4; + const int __maybe_unused a1 = 5; + const int __maybe_unused a2 = 6; + u32 *p = tlbmiss_handler_setup_pgd_start; + const int tlbmiss_handler_setup_pgd_size = + tlbmiss_handler_setup_pgd_end - tlbmiss_handler_setup_pgd_start; +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT + long pgdc = (long)pgd_current; +#endif -/* - * 128 instructions for the fastpath handler is generous and should - * never be exceeded. - */ -#define FASTPATH_SIZE 128 + memset(tlbmiss_handler_setup_pgd, 0, tlbmiss_handler_setup_pgd_size * + sizeof(tlbmiss_handler_setup_pgd[0])); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + pgd_reg = allocate_kscratch(); +#ifdef CONFIG_MIPS_PGD_C0_CONTEXT + if (pgd_reg == -1) { + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + /* PGD << 11 in c0_Context */ + /* + * If it is a ckseg0 address, convert to a physical + * address. Shifting right by 29 and adding 4 will + * result in zero for these addresses. + * + */ + UASM_i_SRA(&p, a1, a0, 29); + UASM_i_ADDIU(&p, a1, a1, 4); + uasm_il_bnez(&p, &r, a1, label_tlbl_goaround1); + uasm_i_nop(&p); + uasm_i_dinsm(&p, a0, 0, 29, 64 - 29); + uasm_l_tlbl_goaround1(&l, p); + UASM_i_SLL(&p, a0, a0, 11); + uasm_i_jr(&p, 31); + UASM_i_MTC0(&p, a0, C0_CONTEXT); + } else { + /* PGD in c0_KScratch */ + uasm_i_jr(&p, 31); + UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); + } +#else +#ifdef CONFIG_SMP + /* Save PGD to pgd_current[smp_processor_id()] */ + UASM_i_CPUID_MFC0(&p, a1, SMP_CPUID_REG); + UASM_i_SRL_SAFE(&p, a1, a1, SMP_CPUID_PTRSHIFT); + UASM_i_LA_mostly(&p, a2, pgdc); + UASM_i_ADDU(&p, a2, a2, a1); + UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); +#else + UASM_i_LA_mostly(&p, a2, pgdc); + UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); +#endif /* SMP */ + uasm_i_jr(&p, 31); + + /* if pgd_reg is allocated, save PGD also to scratch register */ + if (pgd_reg != -1) + UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); + else + uasm_i_nop(&p); +#endif + if (p >= tlbmiss_handler_setup_pgd_end) + panic("tlbmiss_handler_setup_pgd space exceeded"); + + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote tlbmiss_handler_setup_pgd (%u instructions).\n", + (unsigned int)(p - tlbmiss_handler_setup_pgd)); -u32 __tlb_handler_align handle_tlbl[FASTPATH_SIZE]; -u32 __tlb_handler_align handle_tlbs[FASTPATH_SIZE]; -u32 __tlb_handler_align handle_tlbm[FASTPATH_SIZE]; + dump_handler("tlbmiss_handler", tlbmiss_handler_setup_pgd, + tlbmiss_handler_setup_pgd_size); +} -static void __init -iPTE_LW(u32 **p, struct label **l, unsigned int pte, unsigned int ptr) +static void +iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) { #ifdef CONFIG_SMP # ifdef CONFIG_64BIT_PHYS_ADDR if (cpu_has_64bits) - i_lld(p, pte, 0, ptr); + uasm_i_lld(p, pte, 0, ptr); else # endif - i_LL(p, pte, 0, ptr); + UASM_i_LL(p, pte, 0, ptr); #else # ifdef CONFIG_64BIT_PHYS_ADDR if (cpu_has_64bits) - i_ld(p, pte, 0, ptr); + uasm_i_ld(p, pte, 0, ptr); else # endif - i_LW(p, pte, 0, ptr); + UASM_i_LW(p, pte, 0, ptr); #endif } -static void __init -iPTE_SW(u32 **p, struct reloc **r, unsigned int pte, unsigned int ptr, +static void +iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, unsigned int mode) { #ifdef CONFIG_64BIT_PHYS_ADDR unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY); #endif - i_ori(p, pte, pte, mode); + uasm_i_ori(p, pte, pte, mode); #ifdef CONFIG_SMP # ifdef CONFIG_64BIT_PHYS_ADDR if (cpu_has_64bits) - i_scd(p, pte, 0, ptr); + uasm_i_scd(p, pte, 0, ptr); else # endif - i_SC(p, pte, 0, ptr); + UASM_i_SC(p, pte, 0, ptr); if (r10000_llsc_war()) - il_beqzl(p, r, pte, label_smp_pgtable_change); + uasm_il_beqzl(p, r, pte, label_smp_pgtable_change); else - il_beqz(p, r, pte, label_smp_pgtable_change); + uasm_il_beqz(p, r, pte, label_smp_pgtable_change); # ifdef CONFIG_64BIT_PHYS_ADDR if (!cpu_has_64bits) { - /* no i_nop needed */ - i_ll(p, pte, sizeof(pte_t) / 2, ptr); - i_ori(p, pte, pte, hwmode); - i_sc(p, pte, sizeof(pte_t) / 2, ptr); - il_beqz(p, r, pte, label_smp_pgtable_change); - /* no i_nop needed */ - i_lw(p, pte, 0, ptr); + /* no uasm_i_nop needed */ + uasm_i_ll(p, pte, sizeof(pte_t) / 2, ptr); + uasm_i_ori(p, pte, pte, hwmode); + uasm_i_sc(p, pte, sizeof(pte_t) / 2, ptr); + uasm_il_beqz(p, r, pte, label_smp_pgtable_change); + /* no uasm_i_nop needed */ + uasm_i_lw(p, pte, 0, ptr); } else - i_nop(p); + uasm_i_nop(p); # else - i_nop(p); + uasm_i_nop(p); # endif #else # ifdef CONFIG_64BIT_PHYS_ADDR if (cpu_has_64bits) - i_sd(p, pte, 0, ptr); + uasm_i_sd(p, pte, 0, ptr); else # endif - i_SW(p, pte, 0, ptr); + UASM_i_SW(p, pte, 0, ptr); # ifdef CONFIG_64BIT_PHYS_ADDR if (!cpu_has_64bits) { - i_lw(p, pte, sizeof(pte_t) / 2, ptr); - i_ori(p, pte, pte, hwmode); - i_sw(p, pte, sizeof(pte_t) / 2, ptr); - i_lw(p, pte, 0, ptr); + uasm_i_lw(p, pte, sizeof(pte_t) / 2, ptr); + uasm_i_ori(p, pte, pte, hwmode); + uasm_i_sw(p, pte, sizeof(pte_t) / 2, ptr); + uasm_i_lw(p, pte, 0, ptr); } # endif #endif @@ -1365,19 +1579,36 @@ iPTE_SW(u32 **p, struct reloc **r, unsigned int pte, unsigned int ptr, * the page table where this PTE is located, PTE will be re-loaded * with it's original value. */ -static void __init -build_pte_present(u32 **p, struct label **l, struct reloc **r, - unsigned int pte, unsigned int ptr, enum label_id lid) -{ - i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_READ); - i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_READ); - il_bnez(p, r, pte, lid); - iPTE_LW(p, l, pte, ptr); +static void +build_pte_present(u32 **p, struct uasm_reloc **r, + int pte, int ptr, int scratch, enum label_id lid) +{ + int t = scratch >= 0 ? scratch : pte; + + if (cpu_has_rixi) { + if (use_bbit_insns()) { + uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid); + uasm_i_nop(p); + } else { + uasm_i_andi(p, t, pte, _PAGE_PRESENT); + uasm_il_beqz(p, r, t, lid); + if (pte == t) + /* You lose the SMP race :-(*/ + iPTE_LW(p, pte, ptr); + } + } else { + uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_READ); + uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_READ); + uasm_il_bnez(p, r, t, lid); + if (pte == t) + /* You lose the SMP race :-(*/ + iPTE_LW(p, pte, ptr); + } } /* Make PTE valid, store result in PTR. */ -static void __init -build_make_valid(u32 **p, struct reloc **r, unsigned int pte, +static void +build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr) { unsigned int mode = _PAGE_VALID | _PAGE_ACCESSED; @@ -1389,21 +1620,28 @@ build_make_valid(u32 **p, struct reloc **r, unsigned int pte, * Check if PTE can be written to, if not branch to LABEL. Regardless * restore PTE with value from PTR when done. */ -static void __init -build_pte_writable(u32 **p, struct label **l, struct reloc **r, - unsigned int pte, unsigned int ptr, enum label_id lid) -{ - i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE); - i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE); - il_bnez(p, r, pte, lid); - iPTE_LW(p, l, pte, ptr); +static void +build_pte_writable(u32 **p, struct uasm_reloc **r, + unsigned int pte, unsigned int ptr, int scratch, + enum label_id lid) +{ + int t = scratch >= 0 ? scratch : pte; + + uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_WRITE); + uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_WRITE); + uasm_il_bnez(p, r, t, lid); + if (pte == t) + /* You lose the SMP race :-(*/ + iPTE_LW(p, pte, ptr); + else + uasm_i_nop(p); } /* Make PTE writable, update software status bits as well, then store * at PTR. */ -static void __init -build_make_write(u32 **p, struct reloc **r, unsigned int pte, +static void +build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr) { unsigned int mode = (_PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID @@ -1416,15 +1654,27 @@ build_make_write(u32 **p, struct reloc **r, unsigned int pte, * Check if PTE can be modified, if not branch to LABEL. Regardless * restore PTE with value from PTR when done. */ -static void __init -build_pte_modifiable(u32 **p, struct label **l, struct reloc **r, - unsigned int pte, unsigned int ptr, enum label_id lid) -{ - i_andi(p, pte, pte, _PAGE_WRITE); - il_beqz(p, r, pte, lid); - iPTE_LW(p, l, pte, ptr); +static void +build_pte_modifiable(u32 **p, struct uasm_reloc **r, + unsigned int pte, unsigned int ptr, int scratch, + enum label_id lid) +{ + if (use_bbit_insns()) { + uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid); + uasm_i_nop(p); + } else { + int t = scratch >= 0 ? scratch : pte; + uasm_i_andi(p, t, pte, _PAGE_WRITE); + uasm_il_beqz(p, r, t, lid); + if (pte == t) + /* You lose the SMP race :-(*/ + iPTE_LW(p, pte, ptr); + } } +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT + + /* * R3000 style TLB load/store/modify handlers. */ @@ -1433,14 +1683,14 @@ build_pte_modifiable(u32 **p, struct label **l, struct reloc **r, * This places the pte into ENTRYLO0 and writes it with tlbwi. * Then it returns. */ -static void __init +static void build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp) { - i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ - i_mfc0(p, tmp, C0_EPC); /* cp0 delay */ - i_tlbwi(p); - i_jr(p, tmp); - i_rfe(p); /* branch delay */ + uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ + uasm_i_mfc0(p, tmp, C0_EPC); /* cp0 delay */ + uasm_i_tlbwi(p); + uasm_i_jr(p, tmp); + uasm_i_rfe(p); /* branch delay */ } /* @@ -1449,317 +1699,502 @@ build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp) * may have the probe fail bit set as a result of a trap on a * kseg2 access, i.e. without refill. Then it returns. */ -static void __init -build_r3000_tlb_reload_write(u32 **p, struct label **l, struct reloc **r, - unsigned int pte, unsigned int tmp) -{ - i_mfc0(p, tmp, C0_INDEX); - i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ - il_bltz(p, r, tmp, label_r3000_write_probe_fail); /* cp0 delay */ - i_mfc0(p, tmp, C0_EPC); /* branch delay */ - i_tlbwi(p); /* cp0 delay */ - i_jr(p, tmp); - i_rfe(p); /* branch delay */ - l_r3000_write_probe_fail(l, *p); - i_tlbwr(p); /* cp0 delay */ - i_jr(p, tmp); - i_rfe(p); /* branch delay */ -} - -static void __init +static void +build_r3000_tlb_reload_write(u32 **p, struct uasm_label **l, + struct uasm_reloc **r, unsigned int pte, + unsigned int tmp) +{ + uasm_i_mfc0(p, tmp, C0_INDEX); + uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ + uasm_il_bltz(p, r, tmp, label_r3000_write_probe_fail); /* cp0 delay */ + uasm_i_mfc0(p, tmp, C0_EPC); /* branch delay */ + uasm_i_tlbwi(p); /* cp0 delay */ + uasm_i_jr(p, tmp); + uasm_i_rfe(p); /* branch delay */ + uasm_l_r3000_write_probe_fail(l, *p); + uasm_i_tlbwr(p); /* cp0 delay */ + uasm_i_jr(p, tmp); + uasm_i_rfe(p); /* branch delay */ +} + +static void build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte, unsigned int ptr) { long pgdc = (long)pgd_current; - i_mfc0(p, pte, C0_BADVADDR); - i_lui(p, ptr, rel_hi(pgdc)); /* cp0 delay */ - i_lw(p, ptr, rel_lo(pgdc), ptr); - i_srl(p, pte, pte, 22); /* load delay */ - i_sll(p, pte, pte, 2); - i_addu(p, ptr, ptr, pte); - i_mfc0(p, pte, C0_CONTEXT); - i_lw(p, ptr, 0, ptr); /* cp0 delay */ - i_andi(p, pte, pte, 0xffc); /* load delay */ - i_addu(p, ptr, ptr, pte); - i_lw(p, pte, 0, ptr); - i_tlbp(p); /* load delay */ + uasm_i_mfc0(p, pte, C0_BADVADDR); + uasm_i_lui(p, ptr, uasm_rel_hi(pgdc)); /* cp0 delay */ + uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); + uasm_i_srl(p, pte, pte, 22); /* load delay */ + uasm_i_sll(p, pte, pte, 2); + uasm_i_addu(p, ptr, ptr, pte); + uasm_i_mfc0(p, pte, C0_CONTEXT); + uasm_i_lw(p, ptr, 0, ptr); /* cp0 delay */ + uasm_i_andi(p, pte, pte, 0xffc); /* load delay */ + uasm_i_addu(p, ptr, ptr, pte); + uasm_i_lw(p, pte, 0, ptr); + uasm_i_tlbp(p); /* load delay */ } -static void __init build_r3000_tlb_load_handler(void) +static void build_r3000_tlb_load_handler(void) { u32 *p = handle_tlbl; - struct label *l = labels; - struct reloc *r = relocs; + const int handle_tlbl_size = handle_tlbl_end - handle_tlbl; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; - memset(handle_tlbl, 0, sizeof(handle_tlbl)); + memset(handle_tlbl, 0, handle_tlbl_size * sizeof(handle_tlbl[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); build_r3000_tlbchange_handler_head(&p, K0, K1); - build_pte_present(&p, &l, &r, K0, K1, label_nopage_tlbl); - i_nop(&p); /* load delay */ + build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl); + uasm_i_nop(&p); /* load delay */ build_make_valid(&p, &r, K0, K1); build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); - l_nopage_tlbl(&l, p); - i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); - i_nop(&p); + uasm_l_nopage_tlbl(&l, p); + uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); + uasm_i_nop(&p); - if ((p - handle_tlbl) > FASTPATH_SIZE) + if (p >= handle_tlbl_end) panic("TLB load handler fastpath space exceeded"); - resolve_relocs(relocs, labels); - printk("Synthesized TLB load handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbl)); + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", + (unsigned int)(p - handle_tlbl)); -#ifdef DEBUG_TLB - { - int i; - - for (i = 0; i < (p - handle_tlbl); i++) - printk("%08x\n", handle_tlbl[i]); - } -#endif + dump_handler("r3000_tlb_load", handle_tlbl, handle_tlbl_size); } -static void __init build_r3000_tlb_store_handler(void) +static void build_r3000_tlb_store_handler(void) { u32 *p = handle_tlbs; - struct label *l = labels; - struct reloc *r = relocs; + const int handle_tlbs_size = handle_tlbs_end - handle_tlbs; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; - memset(handle_tlbs, 0, sizeof(handle_tlbs)); + memset(handle_tlbs, 0, handle_tlbs_size * sizeof(handle_tlbs[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); build_r3000_tlbchange_handler_head(&p, K0, K1); - build_pte_writable(&p, &l, &r, K0, K1, label_nopage_tlbs); - i_nop(&p); /* load delay */ + build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs); + uasm_i_nop(&p); /* load delay */ build_make_write(&p, &r, K0, K1); build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); - l_nopage_tlbs(&l, p); - i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); - i_nop(&p); + uasm_l_nopage_tlbs(&l, p); + uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); + uasm_i_nop(&p); - if ((p - handle_tlbs) > FASTPATH_SIZE) + if (p >= handle_tlbs_end) panic("TLB store handler fastpath space exceeded"); - resolve_relocs(relocs, labels); - printk("Synthesized TLB store handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbs)); - -#ifdef DEBUG_TLB - { - int i; + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", + (unsigned int)(p - handle_tlbs)); - for (i = 0; i < (p - handle_tlbs); i++) - printk("%08x\n", handle_tlbs[i]); - } -#endif + dump_handler("r3000_tlb_store", handle_tlbs, handle_tlbs_size); } -static void __init build_r3000_tlb_modify_handler(void) +static void build_r3000_tlb_modify_handler(void) { u32 *p = handle_tlbm; - struct label *l = labels; - struct reloc *r = relocs; + const int handle_tlbm_size = handle_tlbm_end - handle_tlbm; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; - memset(handle_tlbm, 0, sizeof(handle_tlbm)); + memset(handle_tlbm, 0, handle_tlbm_size * sizeof(handle_tlbm[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); build_r3000_tlbchange_handler_head(&p, K0, K1); - build_pte_modifiable(&p, &l, &r, K0, K1, label_nopage_tlbm); - i_nop(&p); /* load delay */ + build_pte_modifiable(&p, &r, K0, K1, -1, label_nopage_tlbm); + uasm_i_nop(&p); /* load delay */ build_make_write(&p, &r, K0, K1); build_r3000_pte_reload_tlbwi(&p, K0, K1); - l_nopage_tlbm(&l, p); - i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); - i_nop(&p); + uasm_l_nopage_tlbm(&l, p); + uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); + uasm_i_nop(&p); - if ((p - handle_tlbm) > FASTPATH_SIZE) + if (p >= handle_tlbm_end) panic("TLB modify handler fastpath space exceeded"); - resolve_relocs(relocs, labels); - printk("Synthesized TLB modify handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbm)); + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", + (unsigned int)(p - handle_tlbm)); -#ifdef DEBUG_TLB - { - int i; - - for (i = 0; i < (p - handle_tlbm); i++) - printk("%08x\n", handle_tlbm[i]); - } -#endif + dump_handler("r3000_tlb_modify", handle_tlbm, handle_tlbm_size); } +#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ /* * R4000 style TLB load/store/modify handlers. */ -static void __init -build_r4000_tlbchange_handler_head(u32 **p, struct label **l, - struct reloc **r, unsigned int pte, - unsigned int ptr) +static struct work_registers +build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l, + struct uasm_reloc **r) { + struct work_registers wr = build_get_work_registers(p); + #ifdef CONFIG_64BIT - build_get_pmde64(p, l, r, pte, ptr); /* get pmd in ptr */ + build_get_pmde64(p, l, r, wr.r1, wr.r2); /* get pmd in ptr */ #else - build_get_pgde32(p, pte, ptr); /* get pgd in ptr */ + build_get_pgde32(p, wr.r1, wr.r2); /* get pgd in ptr */ +#endif + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + /* + * For huge tlb entries, pmd doesn't contain an address but + * instead contains the tlb pte. Check the PAGE_HUGE bit and + * see if we need to jump to huge tlb processing. + */ + build_is_huge_pte(p, r, wr.r1, wr.r2, label_tlb_huge_update); #endif - i_MFC0(p, pte, C0_BADVADDR); - i_LW(p, ptr, 0, ptr); - i_SRL(p, pte, pte, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2); - i_andi(p, pte, pte, (PTRS_PER_PTE - 1) << PTE_T_LOG2); - i_ADDU(p, ptr, ptr, pte); + UASM_i_MFC0(p, wr.r1, C0_BADVADDR); + UASM_i_LW(p, wr.r2, 0, wr.r2); + UASM_i_SRL(p, wr.r1, wr.r1, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2); + uasm_i_andi(p, wr.r1, wr.r1, (PTRS_PER_PTE - 1) << PTE_T_LOG2); + UASM_i_ADDU(p, wr.r2, wr.r2, wr.r1); #ifdef CONFIG_SMP - l_smp_pgtable_change(l, *p); -# endif - iPTE_LW(p, l, pte, ptr); /* get even pte */ - build_tlb_probe_entry(p); + uasm_l_smp_pgtable_change(l, *p); +#endif + iPTE_LW(p, wr.r1, wr.r2); /* get even pte */ + if (!m4kc_tlbp_war()) + build_tlb_probe_entry(p); + return wr; } -static void __init -build_r4000_tlbchange_handler_tail(u32 **p, struct label **l, - struct reloc **r, unsigned int tmp, +static void +build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l, + struct uasm_reloc **r, unsigned int tmp, unsigned int ptr) { - i_ori(p, ptr, ptr, sizeof(pte_t)); - i_xori(p, ptr, ptr, sizeof(pte_t)); + uasm_i_ori(p, ptr, ptr, sizeof(pte_t)); + uasm_i_xori(p, ptr, ptr, sizeof(pte_t)); build_update_entries(p, tmp, ptr); build_tlb_write_entry(p, l, r, tlb_indexed); - l_leave(l, *p); - i_eret(p); /* return from trap */ + uasm_l_leave(l, *p); + build_restore_work_registers(p); + uasm_i_eret(p); /* return from trap */ #ifdef CONFIG_64BIT - build_get_pgd_vmalloc64(p, l, r, tmp, ptr); + build_get_pgd_vmalloc64(p, l, r, tmp, ptr, not_refill); #endif } -static void __init build_r4000_tlb_load_handler(void) +static void build_r4000_tlb_load_handler(void) { u32 *p = handle_tlbl; - struct label *l = labels; - struct reloc *r = relocs; + const int handle_tlbl_size = handle_tlbl_end - handle_tlbl; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + struct work_registers wr; - memset(handle_tlbl, 0, sizeof(handle_tlbl)); + memset(handle_tlbl, 0, handle_tlbl_size * sizeof(handle_tlbl[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); if (bcm1250_m3_war()) { - i_MFC0(&p, K0, C0_BADVADDR); - i_MFC0(&p, K1, C0_ENTRYHI); - i_xor(&p, K0, K0, K1); - i_SRL(&p, K0, K0, PAGE_SHIFT + 1); - il_bnez(&p, &r, K0, label_leave); - /* No need for i_nop */ + unsigned int segbits = 44; + + uasm_i_dmfc0(&p, K0, C0_BADVADDR); + uasm_i_dmfc0(&p, K1, C0_ENTRYHI); + uasm_i_xor(&p, K0, K0, K1); + uasm_i_dsrl_safe(&p, K1, K0, 62); + uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); + uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); + uasm_i_or(&p, K0, K0, K1); + uasm_il_bnez(&p, &r, K0, label_leave); + /* No need for uasm_i_nop */ } - build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); - build_pte_present(&p, &l, &r, K0, K1, label_nopage_tlbl); - build_make_valid(&p, &r, K0, K1); - build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); + wr = build_r4000_tlbchange_handler_head(&p, &l, &r); + build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); + if (m4kc_tlbp_war()) + build_tlb_probe_entry(&p); - l_nopage_tlbl(&l, p); - i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); - i_nop(&p); + if (cpu_has_rixi) { + /* + * If the page is not _PAGE_VALID, RI or XI could not + * have triggered it. Skip the expensive test.. + */ + if (use_bbit_insns()) { + uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), + label_tlbl_goaround1); + } else { + uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); + uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround1); + } + uasm_i_nop(&p); - if ((p - handle_tlbl) > FASTPATH_SIZE) - panic("TLB load handler fastpath space exceeded"); + uasm_i_tlbr(&p); - resolve_relocs(relocs, labels); - printk("Synthesized TLB load handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbl)); + switch (current_cpu_type()) { + default: + if (cpu_has_mips_r2) { + uasm_i_ehb(&p); -#ifdef DEBUG_TLB - { - int i; + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: + break; + } + } - for (i = 0; i < (p - handle_tlbl); i++) - printk("%08x\n", handle_tlbl[i]); + /* Examine entrylo 0 or 1 based on ptr. */ + if (use_bbit_insns()) { + uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); + } else { + uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); + uasm_i_beqz(&p, wr.r3, 8); + } + /* load it in the delay slot*/ + UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); + /* load it if ptr is odd */ + UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1); + /* + * If the entryLo (now in wr.r3) is valid (bit 1), RI or + * XI must have triggered it. + */ + if (use_bbit_insns()) { + uasm_il_bbit1(&p, &r, wr.r3, 1, label_nopage_tlbl); + uasm_i_nop(&p); + uasm_l_tlbl_goaround1(&l, p); + } else { + uasm_i_andi(&p, wr.r3, wr.r3, 2); + uasm_il_bnez(&p, &r, wr.r3, label_nopage_tlbl); + uasm_i_nop(&p); + } + uasm_l_tlbl_goaround1(&l, p); } + build_make_valid(&p, &r, wr.r1, wr.r2); + build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + /* + * This is the entry point when build_r4000_tlbchange_handler_head + * spots a huge page. + */ + uasm_l_tlb_huge_update(&l, p); + iPTE_LW(&p, wr.r1, wr.r2); + build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); + build_tlb_probe_entry(&p); + + if (cpu_has_rixi) { + /* + * If the page is not _PAGE_VALID, RI or XI could not + * have triggered it. Skip the expensive test.. + */ + if (use_bbit_insns()) { + uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), + label_tlbl_goaround2); + } else { + uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); + uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); + } + uasm_i_nop(&p); + + uasm_i_tlbr(&p); + + switch (current_cpu_type()) { + default: + if (cpu_has_mips_r2) { + uasm_i_ehb(&p); + + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: + break; + } + } + + /* Examine entrylo 0 or 1 based on ptr. */ + if (use_bbit_insns()) { + uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); + } else { + uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); + uasm_i_beqz(&p, wr.r3, 8); + } + /* load it in the delay slot*/ + UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); + /* load it if ptr is odd */ + UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1); + /* + * If the entryLo (now in wr.r3) is valid (bit 1), RI or + * XI must have triggered it. + */ + if (use_bbit_insns()) { + uasm_il_bbit0(&p, &r, wr.r3, 1, label_tlbl_goaround2); + } else { + uasm_i_andi(&p, wr.r3, wr.r3, 2); + uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); + } + if (PM_DEFAULT_MASK == 0) + uasm_i_nop(&p); + /* + * We clobbered C0_PAGEMASK, restore it. On the other branch + * it is restored in build_huge_tlb_write_entry. + */ + build_restore_pagemask(&p, &r, wr.r3, label_nopage_tlbl, 0); + + uasm_l_tlbl_goaround2(&l, p); + } + uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID)); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); +#endif + + uasm_l_nopage_tlbl(&l, p); + build_restore_work_registers(&p); +#ifdef CONFIG_CPU_MICROMIPS + if ((unsigned long)tlb_do_page_fault_0 & 1) { + uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_0)); + uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_0)); + uasm_i_jr(&p, K0); + } else #endif + uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); + uasm_i_nop(&p); + + if (p >= handle_tlbl_end) + panic("TLB load handler fastpath space exceeded"); + + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", + (unsigned int)(p - handle_tlbl)); + + dump_handler("r4000_tlb_load", handle_tlbl, handle_tlbl_size); } -static void __init build_r4000_tlb_store_handler(void) +static void build_r4000_tlb_store_handler(void) { u32 *p = handle_tlbs; - struct label *l = labels; - struct reloc *r = relocs; + const int handle_tlbs_size = handle_tlbs_end - handle_tlbs; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + struct work_registers wr; - memset(handle_tlbs, 0, sizeof(handle_tlbs)); + memset(handle_tlbs, 0, handle_tlbs_size * sizeof(handle_tlbs[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); - build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); - build_pte_writable(&p, &l, &r, K0, K1, label_nopage_tlbs); - build_make_write(&p, &r, K0, K1); - build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); + wr = build_r4000_tlbchange_handler_head(&p, &l, &r); + build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs); + if (m4kc_tlbp_war()) + build_tlb_probe_entry(&p); + build_make_write(&p, &r, wr.r1, wr.r2); + build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); - l_nopage_tlbs(&l, p); - i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); - i_nop(&p); +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + /* + * This is the entry point when + * build_r4000_tlbchange_handler_head spots a huge page. + */ + uasm_l_tlb_huge_update(&l, p); + iPTE_LW(&p, wr.r1, wr.r2); + build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs); + build_tlb_probe_entry(&p); + uasm_i_ori(&p, wr.r1, wr.r1, + _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); +#endif - if ((p - handle_tlbs) > FASTPATH_SIZE) - panic("TLB store handler fastpath space exceeded"); + uasm_l_nopage_tlbs(&l, p); + build_restore_work_registers(&p); +#ifdef CONFIG_CPU_MICROMIPS + if ((unsigned long)tlb_do_page_fault_1 & 1) { + uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1)); + uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1)); + uasm_i_jr(&p, K0); + } else +#endif + uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); + uasm_i_nop(&p); - resolve_relocs(relocs, labels); - printk("Synthesized TLB store handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbs)); + if (p >= handle_tlbs_end) + panic("TLB store handler fastpath space exceeded"); -#ifdef DEBUG_TLB - { - int i; + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", + (unsigned int)(p - handle_tlbs)); - for (i = 0; i < (p - handle_tlbs); i++) - printk("%08x\n", handle_tlbs[i]); - } -#endif + dump_handler("r4000_tlb_store", handle_tlbs, handle_tlbs_size); } -static void __init build_r4000_tlb_modify_handler(void) +static void build_r4000_tlb_modify_handler(void) { u32 *p = handle_tlbm; - struct label *l = labels; - struct reloc *r = relocs; + const int handle_tlbm_size = handle_tlbm_end - handle_tlbm; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + struct work_registers wr; - memset(handle_tlbm, 0, sizeof(handle_tlbm)); + memset(handle_tlbm, 0, handle_tlbm_size * sizeof(handle_tlbm[0])); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); - build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); - build_pte_modifiable(&p, &l, &r, K0, K1, label_nopage_tlbm); + wr = build_r4000_tlbchange_handler_head(&p, &l, &r); + build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm); + if (m4kc_tlbp_war()) + build_tlb_probe_entry(&p); /* Present and writable bits set, set accessed and dirty bits. */ - build_make_write(&p, &r, K0, K1); - build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); + build_make_write(&p, &r, wr.r1, wr.r2); + build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + /* + * This is the entry point when + * build_r4000_tlbchange_handler_head spots a huge page. + */ + uasm_l_tlb_huge_update(&l, p); + iPTE_LW(&p, wr.r1, wr.r2); + build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm); + build_tlb_probe_entry(&p); + uasm_i_ori(&p, wr.r1, wr.r1, + _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2); +#endif - l_nopage_tlbm(&l, p); - i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); - i_nop(&p); + uasm_l_nopage_tlbm(&l, p); + build_restore_work_registers(&p); +#ifdef CONFIG_CPU_MICROMIPS + if ((unsigned long)tlb_do_page_fault_1 & 1) { + uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1)); + uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1)); + uasm_i_jr(&p, K0); + } else +#endif + uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); + uasm_i_nop(&p); - if ((p - handle_tlbm) > FASTPATH_SIZE) + if (p >= handle_tlbm_end) panic("TLB modify handler fastpath space exceeded"); - resolve_relocs(relocs, labels); - printk("Synthesized TLB modify handler fastpath (%u instructions).\n", - (unsigned int)(p - handle_tlbm)); + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", + (unsigned int)(p - handle_tlbm)); -#ifdef DEBUG_TLB - { - int i; + dump_handler("r4000_tlb_modify", handle_tlbm, handle_tlbm_size); +} - for (i = 0; i < (p - handle_tlbm); i++) - printk("%08x\n", handle_tlbm[i]); - } -#endif +static void flush_tlb_handlers(void) +{ + local_flush_icache_range((unsigned long)handle_tlbl, + (unsigned long)handle_tlbl_end); + local_flush_icache_range((unsigned long)handle_tlbs, + (unsigned long)handle_tlbs_end); + local_flush_icache_range((unsigned long)handle_tlbm, + (unsigned long)handle_tlbm_end); + local_flush_icache_range((unsigned long)tlbmiss_handler_setup_pgd, + (unsigned long)tlbmiss_handler_setup_pgd_end); } -void __init build_tlb_refill_handler(void) +void build_tlb_refill_handler(void) { /* * The refill handler is generated per-CPU, multi-node systems @@ -1768,7 +2203,13 @@ void __init build_tlb_refill_handler(void) */ static int run_once = 0; - switch (current_cpu_data.cputype) { + output_pgtable_bits_defines(); + +#ifdef CONFIG_64BIT + check_for_high_segbits = current_cpu_data.vmbits > (PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3); +#endif + + switch (current_cpu_type()) { case CPU_R2000: case CPU_R3000: case CPU_R3000A: @@ -1776,13 +2217,22 @@ void __init build_tlb_refill_handler(void) case CPU_TX3912: case CPU_TX3922: case CPU_TX3927: - build_r3000_tlb_refill_handler(); +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT + if (cpu_has_local_ebase) + build_r3000_tlb_refill_handler(); if (!run_once) { + if (!cpu_has_local_ebase) + build_r3000_tlb_refill_handler(); + build_setup_pgd(); build_r3000_tlb_load_handler(); build_r3000_tlb_store_handler(); build_r3000_tlb_modify_handler(); + flush_tlb_handlers(); run_once++; } +#else + panic("No R3000 TLB refill handler"); +#endif break; case CPU_R6000: @@ -1795,22 +2245,18 @@ void __init build_tlb_refill_handler(void) break; default: - build_r4000_tlb_refill_handler(); if (!run_once) { + scratch_reg = allocate_kscratch(); + build_setup_pgd(); build_r4000_tlb_load_handler(); build_r4000_tlb_store_handler(); build_r4000_tlb_modify_handler(); + if (!cpu_has_local_ebase) + build_r4000_tlb_refill_handler(); + flush_tlb_handlers(); run_once++; } + if (cpu_has_local_ebase) + build_r4000_tlb_refill_handler(); } } - -void __init flush_tlb_handlers(void) -{ - flush_icache_range((unsigned long)handle_tlbl, - (unsigned long)handle_tlbl + sizeof(handle_tlbl)); - flush_icache_range((unsigned long)handle_tlbs, - (unsigned long)handle_tlbs + sizeof(handle_tlbs)); - flush_icache_range((unsigned long)handle_tlbm, - (unsigned long)handle_tlbm + sizeof(handle_tlbm)); -} diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c new file mode 100644 index 00000000000..8399ddf03a0 --- /dev/null +++ b/arch/mips/mm/uasm-micromips.c @@ -0,0 +1,235 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * A small micro-assembler. It is intentionally kept simple, does only + * support a subset of instructions, and does not try to hide pipeline + * effects like branch delay slots. + * + * Copyright (C) 2004, 2005, 2006, 2008 Thiemo Seufer + * Copyright (C) 2005, 2007 Maciej W. Rozycki + * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2012, 2013 MIPS Technologies, Inc. All rights reserved. + */ + +#include <linux/kernel.h> +#include <linux/types.h> + +#include <asm/inst.h> +#include <asm/elf.h> +#include <asm/bugs.h> +#define UASM_ISA _UASM_ISA_MICROMIPS +#include <asm/uasm.h> + +#define RS_MASK 0x1f +#define RS_SH 16 +#define RT_MASK 0x1f +#define RT_SH 21 +#define SCIMM_MASK 0x3ff +#define SCIMM_SH 16 + +/* This macro sets the non-variable bits of an instruction. */ +#define M(a, b, c, d, e, f) \ + ((a) << OP_SH \ + | (b) << RT_SH \ + | (c) << RS_SH \ + | (d) << RD_SH \ + | (e) << RE_SH \ + | (f) << FUNC_SH) + +/* Define these when we are not the ISA the kernel is being compiled with. */ +#ifndef CONFIG_CPU_MICROMIPS +#define MM_uasm_i_b(buf, off) ISAOPC(_beq)(buf, 0, 0, off) +#define MM_uasm_i_beqz(buf, rs, off) ISAOPC(_beq)(buf, rs, 0, off) +#define MM_uasm_i_beqzl(buf, rs, off) ISAOPC(_beql)(buf, rs, 0, off) +#define MM_uasm_i_bnez(buf, rs, off) ISAOPC(_bne)(buf, rs, 0, off) +#endif + +#include "uasm.c" + +static struct insn insn_table_MM[] = { + { insn_addu, M(mm_pool32a_op, 0, 0, 0, 0, mm_addu32_op), RT | RS | RD }, + { insn_addiu, M(mm_addiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, + { insn_and, M(mm_pool32a_op, 0, 0, 0, 0, mm_and_op), RT | RS | RD }, + { insn_andi, M(mm_andi32_op, 0, 0, 0, 0, 0), RT | RS | UIMM }, + { insn_beq, M(mm_beq32_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, + { insn_beql, 0, 0 }, + { insn_bgez, M(mm_pool32i_op, mm_bgez_op, 0, 0, 0, 0), RS | BIMM }, + { insn_bgezl, 0, 0 }, + { insn_bltz, M(mm_pool32i_op, mm_bltz_op, 0, 0, 0, 0), RS | BIMM }, + { insn_bltzl, 0, 0 }, + { insn_bne, M(mm_bne32_op, 0, 0, 0, 0, 0), RT | RS | BIMM }, + { insn_cache, M(mm_pool32b_op, 0, 0, mm_cache_func, 0, 0), RT | RS | SIMM }, + { insn_daddu, 0, 0 }, + { insn_daddiu, 0, 0 }, + { insn_divu, M(mm_pool32a_op, 0, 0, 0, mm_divu_op, mm_pool32axf_op), RT | RS }, + { insn_dmfc0, 0, 0 }, + { insn_dmtc0, 0, 0 }, + { insn_dsll, 0, 0 }, + { insn_dsll32, 0, 0 }, + { insn_dsra, 0, 0 }, + { insn_dsrl, 0, 0 }, + { insn_dsrl32, 0, 0 }, + { insn_drotr, 0, 0 }, + { insn_drotr32, 0, 0 }, + { insn_dsubu, 0, 0 }, + { insn_eret, M(mm_pool32a_op, 0, 0, 0, mm_eret_op, mm_pool32axf_op), 0 }, + { insn_ins, M(mm_pool32a_op, 0, 0, 0, 0, mm_ins_op), RT | RS | RD | RE }, + { insn_ext, M(mm_pool32a_op, 0, 0, 0, 0, mm_ext_op), RT | RS | RD | RE }, + { insn_j, M(mm_j32_op, 0, 0, 0, 0, 0), JIMM }, + { insn_jal, M(mm_jal32_op, 0, 0, 0, 0, 0), JIMM }, + { insn_jalr, M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RT | RS }, + { insn_jr, M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RS }, + { insn_lb, M(mm_lb32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, + { insn_ld, 0, 0 }, + { insn_lh, M(mm_lh32_op, 0, 0, 0, 0, 0), RS | RS | SIMM }, + { insn_ll, M(mm_pool32c_op, 0, 0, (mm_ll_func << 1), 0, 0), RS | RT | SIMM }, + { insn_lld, 0, 0 }, + { insn_lui, M(mm_pool32i_op, mm_lui_op, 0, 0, 0, 0), RS | SIMM }, + { insn_lw, M(mm_lw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, + { insn_mfc0, M(mm_pool32a_op, 0, 0, 0, mm_mfc0_op, mm_pool32axf_op), RT | RS | RD }, + { insn_mfhi, M(mm_pool32a_op, 0, 0, 0, mm_mfhi32_op, mm_pool32axf_op), RS }, + { insn_mflo, M(mm_pool32a_op, 0, 0, 0, mm_mflo32_op, mm_pool32axf_op), RS }, + { insn_mtc0, M(mm_pool32a_op, 0, 0, 0, mm_mtc0_op, mm_pool32axf_op), RT | RS | RD }, + { insn_mul, M(mm_pool32a_op, 0, 0, 0, 0, mm_mul_op), RT | RS | RD }, + { insn_or, M(mm_pool32a_op, 0, 0, 0, 0, mm_or32_op), RT | RS | RD }, + { insn_ori, M(mm_ori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM }, + { insn_pref, M(mm_pool32c_op, 0, 0, (mm_pref_func << 1), 0, 0), RT | RS | SIMM }, + { insn_rfe, 0, 0 }, + { insn_sc, M(mm_pool32c_op, 0, 0, (mm_sc_func << 1), 0, 0), RT | RS | SIMM }, + { insn_scd, 0, 0 }, + { insn_sd, 0, 0 }, + { insn_sll, M(mm_pool32a_op, 0, 0, 0, 0, mm_sll32_op), RT | RS | RD }, + { insn_sllv, M(mm_pool32a_op, 0, 0, 0, 0, mm_sllv32_op), RT | RS | RD }, + { insn_slt, M(mm_pool32a_op, 0, 0, 0, 0, mm_slt_op), RT | RS | RD }, + { insn_sltiu, M(mm_sltiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, + { insn_sltu, M(mm_pool32a_op, 0, 0, 0, 0, mm_sltu_op), RT | RS | RD }, + { insn_sra, M(mm_pool32a_op, 0, 0, 0, 0, mm_sra_op), RT | RS | RD }, + { insn_srl, M(mm_pool32a_op, 0, 0, 0, 0, mm_srl32_op), RT | RS | RD }, + { insn_srlv, M(mm_pool32a_op, 0, 0, 0, 0, mm_srlv32_op), RT | RS | RD }, + { insn_rotr, M(mm_pool32a_op, 0, 0, 0, 0, mm_rotr_op), RT | RS | RD }, + { insn_subu, M(mm_pool32a_op, 0, 0, 0, 0, mm_subu32_op), RT | RS | RD }, + { insn_sw, M(mm_sw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, + { insn_sync, M(mm_pool32a_op, 0, 0, 0, mm_sync_op, mm_pool32axf_op), RS }, + { insn_tlbp, M(mm_pool32a_op, 0, 0, 0, mm_tlbp_op, mm_pool32axf_op), 0 }, + { insn_tlbr, M(mm_pool32a_op, 0, 0, 0, mm_tlbr_op, mm_pool32axf_op), 0 }, + { insn_tlbwi, M(mm_pool32a_op, 0, 0, 0, mm_tlbwi_op, mm_pool32axf_op), 0 }, + { insn_tlbwr, M(mm_pool32a_op, 0, 0, 0, mm_tlbwr_op, mm_pool32axf_op), 0 }, + { insn_wait, M(mm_pool32a_op, 0, 0, 0, mm_wait_op, mm_pool32axf_op), SCIMM }, + { insn_wsbh, M(mm_pool32a_op, 0, 0, 0, mm_wsbh_op, mm_pool32axf_op), RT | RS }, + { insn_xor, M(mm_pool32a_op, 0, 0, 0, 0, mm_xor32_op), RT | RS | RD }, + { insn_xori, M(mm_xori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM }, + { insn_dins, 0, 0 }, + { insn_dinsm, 0, 0 }, + { insn_syscall, M(mm_pool32a_op, 0, 0, 0, mm_syscall_op, mm_pool32axf_op), SCIMM}, + { insn_bbit0, 0, 0 }, + { insn_bbit1, 0, 0 }, + { insn_lwx, 0, 0 }, + { insn_ldx, 0, 0 }, + { insn_invalid, 0, 0 } +}; + +#undef M + +static inline u32 build_bimm(s32 arg) +{ + WARN(arg > 0xffff || arg < -0x10000, + KERN_WARNING "Micro-assembler field overflow\n"); + + WARN(arg & 0x3, KERN_WARNING "Invalid micro-assembler branch target\n"); + + return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 1) & 0x7fff); +} + +static inline u32 build_jimm(u32 arg) +{ + + WARN(arg & ~((JIMM_MASK << 2) | 1), + KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg >> 1) & JIMM_MASK; +} + +/* + * The order of opcode arguments is implicitly left to right, + * starting with RS and ending with FUNC or IMM. + */ +static void build_insn(u32 **buf, enum opcode opc, ...) +{ + struct insn *ip = NULL; + unsigned int i; + va_list ap; + u32 op; + + for (i = 0; insn_table_MM[i].opcode != insn_invalid; i++) + if (insn_table_MM[i].opcode == opc) { + ip = &insn_table_MM[i]; + break; + } + + if (!ip || (opc == insn_daddiu && r4k_daddiu_bug())) + panic("Unsupported Micro-assembler instruction %d", opc); + + op = ip->match; + va_start(ap, opc); + if (ip->fields & RS) { + if (opc == insn_mfc0 || opc == insn_mtc0) + op |= build_rt(va_arg(ap, u32)); + else + op |= build_rs(va_arg(ap, u32)); + } + if (ip->fields & RT) { + if (opc == insn_mfc0 || opc == insn_mtc0) + op |= build_rs(va_arg(ap, u32)); + else + op |= build_rt(va_arg(ap, u32)); + } + if (ip->fields & RD) + op |= build_rd(va_arg(ap, u32)); + if (ip->fields & RE) + op |= build_re(va_arg(ap, u32)); + if (ip->fields & SIMM) + op |= build_simm(va_arg(ap, s32)); + if (ip->fields & UIMM) + op |= build_uimm(va_arg(ap, u32)); + if (ip->fields & BIMM) + op |= build_bimm(va_arg(ap, s32)); + if (ip->fields & JIMM) + op |= build_jimm(va_arg(ap, u32)); + if (ip->fields & FUNC) + op |= build_func(va_arg(ap, u32)); + if (ip->fields & SET) + op |= build_set(va_arg(ap, u32)); + if (ip->fields & SCIMM) + op |= build_scimm(va_arg(ap, u32)); + va_end(ap); + +#ifdef CONFIG_CPU_LITTLE_ENDIAN + **buf = ((op & 0xffff) << 16) | (op >> 16); +#else + **buf = op; +#endif + (*buf)++; +} + +static inline void +__resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab) +{ + long laddr = (long)lab->addr; + long raddr = (long)rel->addr; + + switch (rel->type) { + case R_MIPS_PC16: +#ifdef CONFIG_CPU_LITTLE_ENDIAN + *rel->addr |= (build_bimm(laddr - (raddr + 4)) << 16); +#else + *rel->addr |= build_bimm(laddr - (raddr + 4)); +#endif + break; + + default: + panic("Unsupported Micro-assembler relocation %d", + rel->type); + } +} diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c new file mode 100644 index 00000000000..6708a2dbf93 --- /dev/null +++ b/arch/mips/mm/uasm-mips.c @@ -0,0 +1,220 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * A small micro-assembler. It is intentionally kept simple, does only + * support a subset of instructions, and does not try to hide pipeline + * effects like branch delay slots. + * + * Copyright (C) 2004, 2005, 2006, 2008 Thiemo Seufer + * Copyright (C) 2005, 2007 Maciej W. Rozycki + * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2012, 2013 MIPS Technologies, Inc. All rights reserved. + */ + +#include <linux/kernel.h> +#include <linux/types.h> + +#include <asm/inst.h> +#include <asm/elf.h> +#include <asm/bugs.h> +#define UASM_ISA _UASM_ISA_CLASSIC +#include <asm/uasm.h> + +#define RS_MASK 0x1f +#define RS_SH 21 +#define RT_MASK 0x1f +#define RT_SH 16 +#define SCIMM_MASK 0xfffff +#define SCIMM_SH 6 + +/* This macro sets the non-variable bits of an instruction. */ +#define M(a, b, c, d, e, f) \ + ((a) << OP_SH \ + | (b) << RS_SH \ + | (c) << RT_SH \ + | (d) << RD_SH \ + | (e) << RE_SH \ + | (f) << FUNC_SH) + +/* Define these when we are not the ISA the kernel is being compiled with. */ +#ifdef CONFIG_CPU_MICROMIPS +#define CL_uasm_i_b(buf, off) ISAOPC(_beq)(buf, 0, 0, off) +#define CL_uasm_i_beqz(buf, rs, off) ISAOPC(_beq)(buf, rs, 0, off) +#define CL_uasm_i_beqzl(buf, rs, off) ISAOPC(_beql)(buf, rs, 0, off) +#define CL_uasm_i_bnez(buf, rs, off) ISAOPC(_bne)(buf, rs, 0, off) +#endif + +#include "uasm.c" + +static struct insn insn_table[] = { + { insn_addiu, M(addiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_addu, M(spec_op, 0, 0, 0, 0, addu_op), RS | RT | RD }, + { insn_andi, M(andi_op, 0, 0, 0, 0, 0), RS | RT | UIMM }, + { insn_and, M(spec_op, 0, 0, 0, 0, and_op), RS | RT | RD }, + { insn_bbit0, M(lwc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, + { insn_bbit1, M(swc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, + { insn_beql, M(beql_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, + { insn_beq, M(beq_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, + { insn_bgezl, M(bcond_op, 0, bgezl_op, 0, 0, 0), RS | BIMM }, + { insn_bgez, M(bcond_op, 0, bgez_op, 0, 0, 0), RS | BIMM }, + { insn_bltzl, M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM }, + { insn_bltz, M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM }, + { insn_bne, M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM }, + { insn_cache, M(cache_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD }, + { insn_dinsm, M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE }, + { insn_dins, M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE }, + { insn_divu, M(spec_op, 0, 0, 0, 0, divu_op), RS | RT }, + { insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET}, + { insn_dmtc0, M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET}, + { insn_drotr32, M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE }, + { insn_drotr, M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE }, + { insn_dsll32, M(spec_op, 0, 0, 0, 0, dsll32_op), RT | RD | RE }, + { insn_dsll, M(spec_op, 0, 0, 0, 0, dsll_op), RT | RD | RE }, + { insn_dsra, M(spec_op, 0, 0, 0, 0, dsra_op), RT | RD | RE }, + { insn_dsrl32, M(spec_op, 0, 0, 0, 0, dsrl32_op), RT | RD | RE }, + { insn_dsrl, M(spec_op, 0, 0, 0, 0, dsrl_op), RT | RD | RE }, + { insn_dsubu, M(spec_op, 0, 0, 0, 0, dsubu_op), RS | RT | RD }, + { insn_eret, M(cop0_op, cop_op, 0, 0, 0, eret_op), 0 }, + { insn_ext, M(spec3_op, 0, 0, 0, 0, ext_op), RS | RT | RD | RE }, + { insn_ins, M(spec3_op, 0, 0, 0, 0, ins_op), RS | RT | RD | RE }, + { insn_j, M(j_op, 0, 0, 0, 0, 0), JIMM }, + { insn_jal, M(jal_op, 0, 0, 0, 0, 0), JIMM }, + { insn_jalr, M(spec_op, 0, 0, 0, 0, jalr_op), RS | RD }, + { insn_j, M(j_op, 0, 0, 0, 0, 0), JIMM }, + { insn_jr, M(spec_op, 0, 0, 0, 0, jr_op), RS }, + { insn_lb, M(lb_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_ld, M(ld_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_ldx, M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD }, + { insn_lh, M(lh_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_lld, M(lld_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_ll, M(ll_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_lui, M(lui_op, 0, 0, 0, 0, 0), RT | SIMM }, + { insn_lw, M(lw_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_lwx, M(spec3_op, 0, 0, 0, lwx_op, lx_op), RS | RT | RD }, + { insn_mfc0, M(cop0_op, mfc_op, 0, 0, 0, 0), RT | RD | SET}, + { insn_mfhi, M(spec_op, 0, 0, 0, 0, mfhi_op), RD }, + { insn_mflo, M(spec_op, 0, 0, 0, 0, mflo_op), RD }, + { insn_mtc0, M(cop0_op, mtc_op, 0, 0, 0, 0), RT | RD | SET}, + { insn_mul, M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD}, + { insn_ori, M(ori_op, 0, 0, 0, 0, 0), RS | RT | UIMM }, + { insn_or, M(spec_op, 0, 0, 0, 0, or_op), RS | RT | RD }, + { insn_pref, M(pref_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_rfe, M(cop0_op, cop_op, 0, 0, 0, rfe_op), 0 }, + { insn_rotr, M(spec_op, 1, 0, 0, 0, srl_op), RT | RD | RE }, + { insn_scd, M(scd_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_sc, M(sc_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_sd, M(sd_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_sll, M(spec_op, 0, 0, 0, 0, sll_op), RT | RD | RE }, + { insn_sllv, M(spec_op, 0, 0, 0, 0, sllv_op), RS | RT | RD }, + { insn_slt, M(spec_op, 0, 0, 0, 0, slt_op), RS | RT | RD }, + { insn_sltiu, M(sltiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_sltu, M(spec_op, 0, 0, 0, 0, sltu_op), RS | RT | RD }, + { insn_sra, M(spec_op, 0, 0, 0, 0, sra_op), RT | RD | RE }, + { insn_srl, M(spec_op, 0, 0, 0, 0, srl_op), RT | RD | RE }, + { insn_srlv, M(spec_op, 0, 0, 0, 0, srlv_op), RS | RT | RD }, + { insn_subu, M(spec_op, 0, 0, 0, 0, subu_op), RS | RT | RD }, + { insn_sw, M(sw_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_sync, M(spec_op, 0, 0, 0, 0, sync_op), RE }, + { insn_syscall, M(spec_op, 0, 0, 0, 0, syscall_op), SCIMM}, + { insn_tlbp, M(cop0_op, cop_op, 0, 0, 0, tlbp_op), 0 }, + { insn_tlbr, M(cop0_op, cop_op, 0, 0, 0, tlbr_op), 0 }, + { insn_tlbwi, M(cop0_op, cop_op, 0, 0, 0, tlbwi_op), 0 }, + { insn_tlbwr, M(cop0_op, cop_op, 0, 0, 0, tlbwr_op), 0 }, + { insn_wait, M(cop0_op, cop_op, 0, 0, 0, wait_op), SCIMM }, + { insn_wsbh, M(spec3_op, 0, 0, 0, wsbh_op, bshfl_op), RT | RD }, + { insn_xori, M(xori_op, 0, 0, 0, 0, 0), RS | RT | UIMM }, + { insn_xor, M(spec_op, 0, 0, 0, 0, xor_op), RS | RT | RD }, + { insn_yield, M(spec3_op, 0, 0, 0, 0, yield_op), RS | RD }, + { insn_invalid, 0, 0 } +}; + +#undef M + +static inline u32 build_bimm(s32 arg) +{ + WARN(arg > 0x1ffff || arg < -0x20000, + KERN_WARNING "Micro-assembler field overflow\n"); + + WARN(arg & 0x3, KERN_WARNING "Invalid micro-assembler branch target\n"); + + return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 2) & 0x7fff); +} + +static inline u32 build_jimm(u32 arg) +{ + WARN(arg & ~(JIMM_MASK << 2), + KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg >> 2) & JIMM_MASK; +} + +/* + * The order of opcode arguments is implicitly left to right, + * starting with RS and ending with FUNC or IMM. + */ +static void build_insn(u32 **buf, enum opcode opc, ...) +{ + struct insn *ip = NULL; + unsigned int i; + va_list ap; + u32 op; + + for (i = 0; insn_table[i].opcode != insn_invalid; i++) + if (insn_table[i].opcode == opc) { + ip = &insn_table[i]; + break; + } + + if (!ip || (opc == insn_daddiu && r4k_daddiu_bug())) + panic("Unsupported Micro-assembler instruction %d", opc); + + op = ip->match; + va_start(ap, opc); + if (ip->fields & RS) + op |= build_rs(va_arg(ap, u32)); + if (ip->fields & RT) + op |= build_rt(va_arg(ap, u32)); + if (ip->fields & RD) + op |= build_rd(va_arg(ap, u32)); + if (ip->fields & RE) + op |= build_re(va_arg(ap, u32)); + if (ip->fields & SIMM) + op |= build_simm(va_arg(ap, s32)); + if (ip->fields & UIMM) + op |= build_uimm(va_arg(ap, u32)); + if (ip->fields & BIMM) + op |= build_bimm(va_arg(ap, s32)); + if (ip->fields & JIMM) + op |= build_jimm(va_arg(ap, u32)); + if (ip->fields & FUNC) + op |= build_func(va_arg(ap, u32)); + if (ip->fields & SET) + op |= build_set(va_arg(ap, u32)); + if (ip->fields & SCIMM) + op |= build_scimm(va_arg(ap, u32)); + va_end(ap); + + **buf = op; + (*buf)++; +} + +static inline void +__resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab) +{ + long laddr = (long)lab->addr; + long raddr = (long)rel->addr; + + switch (rel->type) { + case R_MIPS_PC16: + *rel->addr |= build_bimm(laddr - (raddr + 4)); + break; + + default: + panic("Unsupported Micro-assembler relocation %d", + rel->type); + } +} diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c new file mode 100644 index 00000000000..a01b0d6cedd --- /dev/null +++ b/arch/mips/mm/uasm.c @@ -0,0 +1,581 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * A small micro-assembler. It is intentionally kept simple, does only + * support a subset of instructions, and does not try to hide pipeline + * effects like branch delay slots. + * + * Copyright (C) 2004, 2005, 2006, 2008 Thiemo Seufer + * Copyright (C) 2005, 2007 Maciej W. Rozycki + * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2012, 2013 MIPS Technologies, Inc. All rights reserved. + */ + +enum fields { + RS = 0x001, + RT = 0x002, + RD = 0x004, + RE = 0x008, + SIMM = 0x010, + UIMM = 0x020, + BIMM = 0x040, + JIMM = 0x080, + FUNC = 0x100, + SET = 0x200, + SCIMM = 0x400 +}; + +#define OP_MASK 0x3f +#define OP_SH 26 +#define RD_MASK 0x1f +#define RD_SH 11 +#define RE_MASK 0x1f +#define RE_SH 6 +#define IMM_MASK 0xffff +#define IMM_SH 0 +#define JIMM_MASK 0x3ffffff +#define JIMM_SH 0 +#define FUNC_MASK 0x3f +#define FUNC_SH 0 +#define SET_MASK 0x7 +#define SET_SH 0 + +enum opcode { + insn_invalid, + insn_addiu, insn_addu, insn_and, insn_andi, insn_bbit0, insn_bbit1, + insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl, + insn_bne, insn_cache, insn_daddiu, insn_daddu, insn_dins, insn_dinsm, + insn_divu, insn_dmfc0, insn_dmtc0, insn_drotr, insn_drotr32, insn_dsll, + insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, insn_dsubu, insn_eret, + insn_ext, insn_ins, insn_j, insn_jal, insn_jalr, insn_jr, insn_lb, + insn_ld, insn_ldx, insn_lh, insn_ll, insn_lld, insn_lui, insn_lw, + insn_lwx, insn_mfc0, insn_mfhi, insn_mflo, insn_mtc0, insn_mul, + insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sc, insn_scd, + insn_sd, insn_sll, insn_sllv, insn_slt, insn_sltiu, insn_sltu, insn_sra, + insn_srl, insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall, + insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh, + insn_xor, insn_xori, insn_yield, +}; + +struct insn { + enum opcode opcode; + u32 match; + enum fields fields; +}; + +static inline u32 build_rs(u32 arg) +{ + WARN(arg & ~RS_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg & RS_MASK) << RS_SH; +} + +static inline u32 build_rt(u32 arg) +{ + WARN(arg & ~RT_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg & RT_MASK) << RT_SH; +} + +static inline u32 build_rd(u32 arg) +{ + WARN(arg & ~RD_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg & RD_MASK) << RD_SH; +} + +static inline u32 build_re(u32 arg) +{ + WARN(arg & ~RE_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg & RE_MASK) << RE_SH; +} + +static inline u32 build_simm(s32 arg) +{ + WARN(arg > 0x7fff || arg < -0x8000, + KERN_WARNING "Micro-assembler field overflow\n"); + + return arg & 0xffff; +} + +static inline u32 build_uimm(u32 arg) +{ + WARN(arg & ~IMM_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return arg & IMM_MASK; +} + +static inline u32 build_scimm(u32 arg) +{ + WARN(arg & ~SCIMM_MASK, + KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg & SCIMM_MASK) << SCIMM_SH; +} + +static inline u32 build_func(u32 arg) +{ + WARN(arg & ~FUNC_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return arg & FUNC_MASK; +} + +static inline u32 build_set(u32 arg) +{ + WARN(arg & ~SET_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return arg & SET_MASK; +} + +static void build_insn(u32 **buf, enum opcode opc, ...); + +#define I_u1u2u3(op) \ +Ip_u1u2u3(op) \ +{ \ + build_insn(buf, insn##op, a, b, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_s3s1s2(op) \ +Ip_s3s1s2(op) \ +{ \ + build_insn(buf, insn##op, b, c, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1u3(op) \ +Ip_u2u1u3(op) \ +{ \ + build_insn(buf, insn##op, b, a, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u3u2u1(op) \ +Ip_u3u2u1(op) \ +{ \ + build_insn(buf, insn##op, c, b, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u3u1u2(op) \ +Ip_u3u1u2(op) \ +{ \ + build_insn(buf, insn##op, b, c, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u1u2s3(op) \ +Ip_u1u2s3(op) \ +{ \ + build_insn(buf, insn##op, a, b, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2s3u1(op) \ +Ip_u2s3u1(op) \ +{ \ + build_insn(buf, insn##op, c, a, b); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1s3(op) \ +Ip_u2u1s3(op) \ +{ \ + build_insn(buf, insn##op, b, a, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1msbu3(op) \ +Ip_u2u1msbu3(op) \ +{ \ + build_insn(buf, insn##op, b, a, c+d-1, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1msb32u3(op) \ +Ip_u2u1msbu3(op) \ +{ \ + build_insn(buf, insn##op, b, a, c+d-33, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1msbdu3(op) \ +Ip_u2u1msbu3(op) \ +{ \ + build_insn(buf, insn##op, b, a, d-1, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u1u2(op) \ +Ip_u1u2(op) \ +{ \ + build_insn(buf, insn##op, a, b); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1(op) \ +Ip_u1u2(op) \ +{ \ + build_insn(buf, insn##op, b, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u1s2(op) \ +Ip_u1s2(op) \ +{ \ + build_insn(buf, insn##op, a, b); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u1(op) \ +Ip_u1(op) \ +{ \ + build_insn(buf, insn##op, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_0(op) \ +Ip_0(op) \ +{ \ + build_insn(buf, insn##op); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +I_u2u1s3(_addiu) +I_u3u1u2(_addu) +I_u2u1u3(_andi) +I_u3u1u2(_and) +I_u1u2s3(_beq) +I_u1u2s3(_beql) +I_u1s2(_bgez) +I_u1s2(_bgezl) +I_u1s2(_bltz) +I_u1s2(_bltzl) +I_u1u2s3(_bne) +I_u2s3u1(_cache) +I_u1u2u3(_dmfc0) +I_u1u2u3(_dmtc0) +I_u2u1s3(_daddiu) +I_u3u1u2(_daddu) +I_u1u2(_divu) +I_u2u1u3(_dsll) +I_u2u1u3(_dsll32) +I_u2u1u3(_dsra) +I_u2u1u3(_dsrl) +I_u2u1u3(_dsrl32) +I_u2u1u3(_drotr) +I_u2u1u3(_drotr32) +I_u3u1u2(_dsubu) +I_0(_eret) +I_u2u1msbdu3(_ext) +I_u2u1msbu3(_ins) +I_u1(_j) +I_u1(_jal) +I_u2u1(_jalr) +I_u1(_jr) +I_u2s3u1(_lb) +I_u2s3u1(_ld) +I_u2s3u1(_lh) +I_u2s3u1(_ll) +I_u2s3u1(_lld) +I_u1s2(_lui) +I_u2s3u1(_lw) +I_u1u2u3(_mfc0) +I_u1(_mfhi) +I_u1(_mflo) +I_u1u2u3(_mtc0) +I_u3u1u2(_mul) +I_u2u1u3(_ori) +I_u3u1u2(_or) +I_0(_rfe) +I_u2s3u1(_sc) +I_u2s3u1(_scd) +I_u2s3u1(_sd) +I_u2u1u3(_sll) +I_u3u2u1(_sllv) +I_s3s1s2(_slt) +I_u2u1s3(_sltiu) +I_u3u1u2(_sltu) +I_u2u1u3(_sra) +I_u2u1u3(_srl) +I_u3u2u1(_srlv) +I_u2u1u3(_rotr) +I_u3u1u2(_subu) +I_u2s3u1(_sw) +I_u1(_sync) +I_0(_tlbp) +I_0(_tlbr) +I_0(_tlbwi) +I_0(_tlbwr) +I_u1(_wait); +I_u2u1(_wsbh) +I_u3u1u2(_xor) +I_u2u1u3(_xori) +I_u2u1(_yield) +I_u2u1msbu3(_dins); +I_u2u1msb32u3(_dinsm); +I_u1(_syscall); +I_u1u2s3(_bbit0); +I_u1u2s3(_bbit1); +I_u3u1u2(_lwx) +I_u3u1u2(_ldx) + +#ifdef CONFIG_CPU_CAVIUM_OCTEON +#include <asm/octeon/octeon.h> +void ISAFUNC(uasm_i_pref)(u32 **buf, unsigned int a, signed int b, + unsigned int c) +{ + if (OCTEON_IS_MODEL(OCTEON_CN63XX_PASS1_X) && a <= 24 && a != 5) + /* + * As per erratum Core-14449, replace prefetches 0-4, + * 6-24 with 'pref 28'. + */ + build_insn(buf, insn_pref, c, 28, b); + else + build_insn(buf, insn_pref, c, a, b); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_i_pref)); +#else +I_u2s3u1(_pref) +#endif + +/* Handle labels. */ +void ISAFUNC(uasm_build_label)(struct uasm_label **lab, u32 *addr, int lid) +{ + (*lab)->addr = addr; + (*lab)->lab = lid; + (*lab)++; +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_build_label)); + +int ISAFUNC(uasm_in_compat_space_p)(long addr) +{ + /* Is this address in 32bit compat space? */ +#ifdef CONFIG_64BIT + return (((addr) & 0xffffffff00000000L) == 0xffffffff00000000L); +#else + return 1; +#endif +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_in_compat_space_p)); + +static int uasm_rel_highest(long val) +{ +#ifdef CONFIG_64BIT + return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000; +#else + return 0; +#endif +} + +static int uasm_rel_higher(long val) +{ +#ifdef CONFIG_64BIT + return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000; +#else + return 0; +#endif +} + +int ISAFUNC(uasm_rel_hi)(long val) +{ + return ((((val + 0x8000L) >> 16) & 0xffff) ^ 0x8000) - 0x8000; +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_rel_hi)); + +int ISAFUNC(uasm_rel_lo)(long val) +{ + return ((val & 0xffff) ^ 0x8000) - 0x8000; +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_rel_lo)); + +void ISAFUNC(UASM_i_LA_mostly)(u32 **buf, unsigned int rs, long addr) +{ + if (!ISAFUNC(uasm_in_compat_space_p)(addr)) { + ISAFUNC(uasm_i_lui)(buf, rs, uasm_rel_highest(addr)); + if (uasm_rel_higher(addr)) + ISAFUNC(uasm_i_daddiu)(buf, rs, rs, uasm_rel_higher(addr)); + if (ISAFUNC(uasm_rel_hi(addr))) { + ISAFUNC(uasm_i_dsll)(buf, rs, rs, 16); + ISAFUNC(uasm_i_daddiu)(buf, rs, rs, + ISAFUNC(uasm_rel_hi)(addr)); + ISAFUNC(uasm_i_dsll)(buf, rs, rs, 16); + } else + ISAFUNC(uasm_i_dsll32)(buf, rs, rs, 0); + } else + ISAFUNC(uasm_i_lui)(buf, rs, ISAFUNC(uasm_rel_hi(addr))); +} +UASM_EXPORT_SYMBOL(ISAFUNC(UASM_i_LA_mostly)); + +void ISAFUNC(UASM_i_LA)(u32 **buf, unsigned int rs, long addr) +{ + ISAFUNC(UASM_i_LA_mostly)(buf, rs, addr); + if (ISAFUNC(uasm_rel_lo(addr))) { + if (!ISAFUNC(uasm_in_compat_space_p)(addr)) + ISAFUNC(uasm_i_daddiu)(buf, rs, rs, + ISAFUNC(uasm_rel_lo(addr))); + else + ISAFUNC(uasm_i_addiu)(buf, rs, rs, + ISAFUNC(uasm_rel_lo(addr))); + } +} +UASM_EXPORT_SYMBOL(ISAFUNC(UASM_i_LA)); + +/* Handle relocations. */ +void ISAFUNC(uasm_r_mips_pc16)(struct uasm_reloc **rel, u32 *addr, int lid) +{ + (*rel)->addr = addr; + (*rel)->type = R_MIPS_PC16; + (*rel)->lab = lid; + (*rel)++; +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_r_mips_pc16)); + +static inline void __resolve_relocs(struct uasm_reloc *rel, + struct uasm_label *lab); + +void ISAFUNC(uasm_resolve_relocs)(struct uasm_reloc *rel, + struct uasm_label *lab) +{ + struct uasm_label *l; + + for (; rel->lab != UASM_LABEL_INVALID; rel++) + for (l = lab; l->lab != UASM_LABEL_INVALID; l++) + if (rel->lab == l->lab) + __resolve_relocs(rel, l); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_resolve_relocs)); + +void ISAFUNC(uasm_move_relocs)(struct uasm_reloc *rel, u32 *first, u32 *end, + long off) +{ + for (; rel->lab != UASM_LABEL_INVALID; rel++) + if (rel->addr >= first && rel->addr < end) + rel->addr += off; +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_move_relocs)); + +void ISAFUNC(uasm_move_labels)(struct uasm_label *lab, u32 *first, u32 *end, + long off) +{ + for (; lab->lab != UASM_LABEL_INVALID; lab++) + if (lab->addr >= first && lab->addr < end) + lab->addr += off; +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_move_labels)); + +void ISAFUNC(uasm_copy_handler)(struct uasm_reloc *rel, struct uasm_label *lab, + u32 *first, u32 *end, u32 *target) +{ + long off = (long)(target - first); + + memcpy(target, first, (end - first) * sizeof(u32)); + + ISAFUNC(uasm_move_relocs(rel, first, end, off)); + ISAFUNC(uasm_move_labels(lab, first, end, off)); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_copy_handler)); + +int ISAFUNC(uasm_insn_has_bdelay)(struct uasm_reloc *rel, u32 *addr) +{ + for (; rel->lab != UASM_LABEL_INVALID; rel++) { + if (rel->addr == addr + && (rel->type == R_MIPS_PC16 + || rel->type == R_MIPS_26)) + return 1; + } + + return 0; +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_insn_has_bdelay)); + +/* Convenience functions for labeled branches. */ +void ISAFUNC(uasm_il_bltz)(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + ISAFUNC(uasm_i_bltz)(p, reg, 0); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bltz)); + +void ISAFUNC(uasm_il_b)(u32 **p, struct uasm_reloc **r, int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + ISAFUNC(uasm_i_b)(p, 0); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_b)); + +void ISAFUNC(uasm_il_beq)(u32 **p, struct uasm_reloc **r, unsigned int r1, + unsigned int r2, int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + ISAFUNC(uasm_i_beq)(p, r1, r2, 0); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beq)); + +void ISAFUNC(uasm_il_beqz)(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + ISAFUNC(uasm_i_beqz)(p, reg, 0); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beqz)); + +void ISAFUNC(uasm_il_beqzl)(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + ISAFUNC(uasm_i_beqzl)(p, reg, 0); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_beqzl)); + +void ISAFUNC(uasm_il_bne)(u32 **p, struct uasm_reloc **r, unsigned int reg1, + unsigned int reg2, int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + ISAFUNC(uasm_i_bne)(p, reg1, reg2, 0); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bne)); + +void ISAFUNC(uasm_il_bnez)(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + ISAFUNC(uasm_i_bnez)(p, reg, 0); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bnez)); + +void ISAFUNC(uasm_il_bgezl)(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + ISAFUNC(uasm_i_bgezl)(p, reg, 0); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bgezl)); + +void ISAFUNC(uasm_il_bgez)(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + ISAFUNC(uasm_i_bgez)(p, reg, 0); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bgez)); + +void ISAFUNC(uasm_il_bbit0)(u32 **p, struct uasm_reloc **r, unsigned int reg, + unsigned int bit, int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + ISAFUNC(uasm_i_bbit0)(p, reg, bit, 0); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bbit0)); + +void ISAFUNC(uasm_il_bbit1)(u32 **p, struct uasm_reloc **r, unsigned int reg, + unsigned int bit, int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + ISAFUNC(uasm_i_bbit1)(p, reg, bit, 0); +} +UASM_EXPORT_SYMBOL(ISAFUNC(uasm_il_bbit1)); |
