diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-26 14:17:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-26 14:17:28 -0700 |
commit | 6fd4ce886440c7a1b08b6642e606ee19097829df (patch) | |
tree | cd12dc0ec7e40e9d9a26e942999de93a5e673b4d /arch/mips/mm | |
parent | ba5b56cb3e3d2cab73d4fee9a022bb69462a8cd9 (diff) | |
parent | bf28607fbe529e20180080c4a0295b0a47834fde (diff) |
Merge branch 'upstream' of git://git.linux-mips.org/pub/scm/upstream-linus
* 'upstream' of git://git.linux-mips.org/pub/scm/upstream-linus: (31 commits)
MIPS: Close races in TLB modify handlers.
MIPS: Add uasm UASM_i_SRL_SAFE macro.
MIPS: RB532: Use hex_to_bin()
MIPS: Enable cpu_has_clo_clz for MIPS Technologies' platforms
MIPS: PowerTV: Provide cpu-feature-overrides.h
MIPS: Remove pointless return statement from empty void functions.
MIPS: Limit fixrange_init() to the FIXMAP region
MIPS: Install handlers for software IRQs
MIPS: Move FIXADDR_TOP into spaces.h
MIPS: Add SYNC after cacheflush
MIPS: pfn_valid() is broken on low memory HIGHMEM systems
MIPS: HIGHMEM DMA on noncoherent MIPS32 processors
MIPS: topdown mmap support
MIPS: Remove redundant addr_limit assignment on exec.
MIPS: AR7: Replace __attribute__((__packed__)) with __packed
MIPS: AR7: Remove 'space before tabs' in platform.c
MIPS: Lantiq: Add missing clk_enable and clk_disable functions.
MIPS: AR7: Fix trailing semicolon bug in clock.c
MAINTAINERS: Update MIPS entry.
MIPS: BCM63xx: Remove duplicate PERF_IRQSTAT_REG definition
...
Diffstat (limited to 'arch/mips/mm')
-rw-r--r-- | arch/mips/mm/c-r4k.c | 4 | ||||
-rw-r--r-- | arch/mips/mm/dma-default.c | 114 | ||||
-rw-r--r-- | arch/mips/mm/init.c | 8 | ||||
-rw-r--r-- | arch/mips/mm/mmap.c | 193 | ||||
-rw-r--r-- | arch/mips/mm/pgtable-32.c | 2 | ||||
-rw-r--r-- | arch/mips/mm/pgtable-64.c | 2 | ||||
-rw-r--r-- | arch/mips/mm/tlbex.c | 292 |
7 files changed, 442 insertions, 173 deletions
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index eeb642e4066..b9aabb998a3 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -604,6 +604,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) r4k_blast_scache(); else blast_scache_range(addr, addr + size); + __sync(); return; } @@ -620,6 +621,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) } bc_wback_inv(addr, size); + __sync(); } static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) @@ -647,6 +649,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) (addr + size - 1) & almask); blast_inv_scache_range(addr, addr + size); } + __sync(); return; } @@ -663,6 +666,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) } bc_inv(addr, size); + __sync(); } #endif /* CONFIG_DMA_NONCOHERENT */ diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 21ea14efb83..46084912e58 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -15,18 +15,18 @@ #include <linux/scatterlist.h> #include <linux/string.h> #include <linux/gfp.h> +#include <linux/highmem.h> #include <asm/cache.h> #include <asm/io.h> #include <dma-coherence.h> -static inline unsigned long dma_addr_to_virt(struct device *dev, +static inline struct page *dma_addr_to_page(struct device *dev, dma_addr_t dma_addr) { - unsigned long addr = plat_dma_addr_to_phys(dev, dma_addr); - - return (unsigned long)phys_to_virt(addr); + return pfn_to_page( + plat_dma_addr_to_phys(dev, dma_addr) >> PAGE_SHIFT); } /* @@ -148,20 +148,20 @@ static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr, free_pages(addr, get_order(size)); } -static inline void __dma_sync(unsigned long addr, size_t size, +static inline void __dma_sync_virtual(void *addr, size_t size, enum dma_data_direction direction) { switch (direction) { case DMA_TO_DEVICE: - dma_cache_wback(addr, size); + dma_cache_wback((unsigned long)addr, size); break; case DMA_FROM_DEVICE: - dma_cache_inv(addr, size); + dma_cache_inv((unsigned long)addr, size); break; case DMA_BIDIRECTIONAL: - dma_cache_wback_inv(addr, size); + dma_cache_wback_inv((unsigned long)addr, size); break; default: @@ -169,12 +169,49 @@ static inline void __dma_sync(unsigned long addr, size_t size, } } +/* + * A single sg entry may refer to multiple physically contiguous + * pages. But we still need to process highmem pages individually. + * If highmem is not configured then the bulk of this loop gets + * optimized out. + */ +static inline void __dma_sync(struct page *page, + unsigned long offset, size_t size, enum dma_data_direction direction) +{ + size_t left = size; + + do { + size_t len = left; + + if (PageHighMem(page)) { + void *addr; + + if (offset + len > PAGE_SIZE) { + if (offset >= PAGE_SIZE) { + page += offset >> PAGE_SHIFT; + offset &= ~PAGE_MASK; + } + len = PAGE_SIZE - offset; + } + + addr = kmap_atomic(page); + __dma_sync_virtual(addr + offset, len, direction); + kunmap_atomic(addr); + } else + __dma_sync_virtual(page_address(page) + offset, + size, direction); + offset = 0; + page++; + left -= len; + } while (left); +} + static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { if (cpu_is_noncoherent_r10000(dev)) - __dma_sync(dma_addr_to_virt(dev, dma_addr), size, - direction); + __dma_sync(dma_addr_to_page(dev, dma_addr), + dma_addr & ~PAGE_MASK, size, direction); plat_unmap_dma_mem(dev, dma_addr, size, direction); } @@ -185,13 +222,11 @@ static int mips_dma_map_sg(struct device *dev, struct scatterlist *sg, int i; for (i = 0; i < nents; i++, sg++) { - unsigned long addr; - - addr = (unsigned long) sg_virt(sg); - if (!plat_device_is_coherent(dev) && addr) - __dma_sync(addr, sg->length, direction); - sg->dma_address = plat_map_dma_mem(dev, - (void *)addr, sg->length); + if (!plat_device_is_coherent(dev)) + __dma_sync(sg_page(sg), sg->offset, sg->length, + direction); + sg->dma_address = plat_map_dma_mem_page(dev, sg_page(sg)) + + sg->offset; } return nents; @@ -201,30 +236,23 @@ static dma_addr_t mips_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction direction, struct dma_attrs *attrs) { - unsigned long addr; - - addr = (unsigned long) page_address(page) + offset; - if (!plat_device_is_coherent(dev)) - __dma_sync(addr, size, direction); + __dma_sync(page, offset, size, direction); - return plat_map_dma_mem(dev, (void *)addr, size); + return plat_map_dma_mem_page(dev, page) + offset; } static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, enum dma_data_direction direction, struct dma_attrs *attrs) { - unsigned long addr; int i; for (i = 0; i < nhwentries; i++, sg++) { if (!plat_device_is_coherent(dev) && - direction != DMA_TO_DEVICE) { - addr = (unsigned long) sg_virt(sg); - if (addr) - __dma_sync(addr, sg->length, direction); - } + direction != DMA_TO_DEVICE) + __dma_sync(sg_page(sg), sg->offset, sg->length, + direction); plat_unmap_dma_mem(dev, sg->dma_address, sg->length, direction); } } @@ -232,24 +260,18 @@ static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sg, static void mips_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) { - if (cpu_is_noncoherent_r10000(dev)) { - unsigned long addr; - - addr = dma_addr_to_virt(dev, dma_handle); - __dma_sync(addr, size, direction); - } + if (cpu_is_noncoherent_r10000(dev)) + __dma_sync(dma_addr_to_page(dev, dma_handle), + dma_handle & ~PAGE_MASK, size, direction); } static void mips_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) { plat_extra_sync_for_device(dev); - if (!plat_device_is_coherent(dev)) { - unsigned long addr; - - addr = dma_addr_to_virt(dev, dma_handle); - __dma_sync(addr, size, direction); - } + if (!plat_device_is_coherent(dev)) + __dma_sync(dma_addr_to_page(dev, dma_handle), + dma_handle & ~PAGE_MASK, size, direction); } static void mips_dma_sync_sg_for_cpu(struct device *dev, @@ -260,8 +282,8 @@ static void mips_dma_sync_sg_for_cpu(struct device *dev, /* Make sure that gcc doesn't leave the empty loop body. */ for (i = 0; i < nelems; i++, sg++) { if (cpu_is_noncoherent_r10000(dev)) - __dma_sync((unsigned long)page_address(sg_page(sg)), - sg->length, direction); + __dma_sync(sg_page(sg), sg->offset, sg->length, + direction); } } @@ -273,8 +295,8 @@ static void mips_dma_sync_sg_for_device(struct device *dev, /* Make sure that gcc doesn't leave the empty loop body. */ for (i = 0; i < nelems; i++, sg++) { if (!plat_device_is_coherent(dev)) - __dma_sync((unsigned long)page_address(sg_page(sg)), - sg->length, direction); + __dma_sync(sg_page(sg), sg->offset, sg->length, + direction); } } @@ -295,7 +317,7 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size, plat_extra_sync_for_device(dev); if (!plat_device_is_coherent(dev)) - __dma_sync((unsigned long)vaddr, size, direction); + __dma_sync_virtual(vaddr, size, direction); } EXPORT_SYMBOL(dma_cache_sync); diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 1aadeb42c5a..b7ebc4fa89b 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -277,11 +277,11 @@ void __init fixrange_init(unsigned long start, unsigned long end, k = __pmd_offset(vaddr); pgd = pgd_base + i; - for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { + for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { pud = (pud_t *)pgd; - for ( ; (j < PTRS_PER_PUD) && (vaddr != end); pud++, j++) { + for ( ; (j < PTRS_PER_PUD) && (vaddr < end); pud++, j++) { pmd = (pmd_t *)pud; - for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) { + for (; (k < PTRS_PER_PMD) && (vaddr < end); pmd++, k++) { if (pmd_none(*pmd)) { pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); set_pmd(pmd, __pmd((unsigned long)pte)); @@ -368,7 +368,7 @@ void __init mem_init(void) #ifdef CONFIG_DISCONTIGMEM #error "CONFIG_HIGHMEM and CONFIG_DISCONTIGMEM dont work together yet" #endif - max_mapnr = highend_pfn; + max_mapnr = highend_pfn ? highend_pfn : max_low_pfn; #else max_mapnr = max_low_pfn; #endif diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index ae3c20a9556..9ff5d0fac55 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -10,6 +10,7 @@ #include <linux/mm.h> #include <linux/mman.h> #include <linux/module.h> +#include <linux/personality.h> #include <linux/random.h> #include <linux/sched.h> @@ -17,21 +18,65 @@ unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ EXPORT_SYMBOL(shm_align_mask); +/* gap between mmap and stack */ +#define MIN_GAP (128*1024*1024UL) +#define MAX_GAP ((TASK_SIZE)/6*5) + +static int mmap_is_legacy(void) +{ + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) + return 1; + + return sysctl_legacy_va_layout; +} + +static unsigned long mmap_base(unsigned long rnd) +{ + unsigned long gap = rlimit(RLIMIT_STACK); + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(TASK_SIZE - gap - rnd); +} + +static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr, + unsigned long pgoff) +{ + unsigned long base = addr & ~shm_align_mask; + unsigned long off = (pgoff << PAGE_SHIFT) & shm_align_mask; + + if (base + off <= addr) + return base + off; + + return base - off; +} + #define COLOUR_ALIGN(addr,pgoff) \ ((((addr) + shm_align_mask) & ~shm_align_mask) + \ (((pgoff) << PAGE_SHIFT) & shm_align_mask)) -unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) +enum mmap_allocation_direction {UP, DOWN}; + +static unsigned long arch_get_unmapped_area_foo(struct file *filp, + unsigned long addr0, unsigned long len, unsigned long pgoff, + unsigned long flags, enum mmap_allocation_direction dir) { - struct vm_area_struct * vmm; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long addr = addr0; int do_color_align; - if (len > TASK_SIZE) + if (unlikely(len > TASK_SIZE)) return -ENOMEM; if (flags & MAP_FIXED) { - /* Even MAP_FIXED mappings must reside within TASK_SIZE. */ + /* Even MAP_FIXED mappings must reside within TASK_SIZE */ if (TASK_SIZE - len < addr) return -EINVAL; @@ -48,34 +93,130 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, do_color_align = 0; if (filp || (flags & MAP_SHARED)) do_color_align = 1; + + /* requesting a specific address */ if (addr) { if (do_color_align) addr = COLOUR_ALIGN(addr, pgoff); else addr = PAGE_ALIGN(addr); - vmm = find_vma(current->mm, addr); + + vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vmm || addr + len <= vmm->vm_start)) + (!vma || addr + len <= vma->vm_start)) return addr; } - addr = current->mm->mmap_base; - if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); - for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) { - /* At this point: (!vmm || addr < vmm->vm_end). */ - if (TASK_SIZE - len < addr) - return -ENOMEM; - if (!vmm || addr + len <= vmm->vm_start) - return addr; - addr = vmm->vm_end; + if (dir == UP) { + addr = mm->mmap_base; + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (TASK_SIZE - len < addr) + return -ENOMEM; + if (!vma || addr + len <= vma->vm_start) + return addr; + addr = vma->vm_end; + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + } + } else { + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache; + if (do_color_align) { + unsigned long base = + COLOUR_ALIGN_DOWN(addr - len, pgoff); + + addr = base + len; + } + + /* make sure it can fit in the remaining address space */ + if (likely(addr > len)) { + vma = find_vma(mm, addr - len); + if (!vma || addr <= vma->vm_start) { + /* remember the address as a hint for next time */ + return mm->free_area_cache = addr-len; + } + } + + if (unlikely(mm->mmap_base < len)) + goto bottomup; + + addr = mm->mmap_base-len; if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); + addr = COLOUR_ALIGN_DOWN(addr, pgoff); + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (likely(!vma || addr+len <= vma->vm_start)) { + /* remember the address as a hint for next time */ + return mm->free_area_cache = addr; + } + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start-len; + if (do_color_align) + addr = COLOUR_ALIGN_DOWN(addr, pgoff); + } while (likely(len < vma->vm_start)); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->cached_hole_size = ~0UL; + mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + + return addr; } } +unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + return arch_get_unmapped_area_foo(filp, + addr0, len, pgoff, flags, UP); +} + +/* + * There is no need to export this but sched.h declares the function as + * extern so making it static here results in an error. + */ +unsigned long arch_get_unmapped_area_topdown(struct file *filp, + unsigned long addr0, unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + return arch_get_unmapped_area_foo(filp, + addr0, len, pgoff, flags, DOWN); +} + void arch_pick_mmap_layout(struct mm_struct *mm) { unsigned long random_factor = 0UL; @@ -89,9 +230,15 @@ void arch_pick_mmap_layout(struct mm_struct *mm) random_factor &= 0xffffffful; } - mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; - mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(random_factor); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } } static inline unsigned long brk_rnd(void) diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c index 575e4019227..adc6911ba74 100644 --- a/arch/mips/mm/pgtable-32.c +++ b/arch/mips/mm/pgtable-32.c @@ -52,7 +52,7 @@ void __init pagetable_init(void) * Fixed mappings: */ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, 0, pgd_base); + fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base); #ifdef CONFIG_HIGHMEM /* diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c index 78eaa4f0b0e..cda4e300eb0 100644 --- a/arch/mips/mm/pgtable-64.c +++ b/arch/mips/mm/pgtable-64.c @@ -76,5 +76,5 @@ void __init pagetable_init(void) * Fixed mappings: */ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - fixrange_init(vaddr, 0, pgd_base); + fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base); } diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 424ed4b92e6..b6e1cff5066 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -42,6 +42,18 @@ extern void tlb_do_page_fault_0(void); extern void tlb_do_page_fault_1(void); +struct work_registers { + int r1; + int r2; + int r3; +}; + +struct tlb_reg_save { + unsigned long a; + unsigned long b; +} ____cacheline_aligned_in_smp; + +static struct tlb_reg_save handler_reg_save[NR_CPUS]; static inline int r45k_bvahwbug(void) { @@ -248,6 +260,73 @@ static int scratch_reg __cpuinitdata; static int pgd_reg __cpuinitdata; enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch}; +static struct work_registers __cpuinit build_get_work_registers(u32 **p) +{ + struct work_registers r; + + int smp_processor_id_reg; + int smp_processor_id_sel; + int smp_processor_id_shift; + + if (scratch_reg > 0) { + /* Save in CPU local C0_KScratch? */ + UASM_i_MTC0(p, 1, 31, scratch_reg); + r.r1 = K0; + r.r2 = K1; + r.r3 = 1; + return r; + } + + if (num_possible_cpus() > 1) { +#ifdef CONFIG_MIPS_PGD_C0_CONTEXT + smp_processor_id_shift = 51; + smp_processor_id_reg = 20; /* XContext */ + smp_processor_id_sel = 0; +#else +# ifdef CONFIG_32BIT + smp_processor_id_shift = 25; + smp_processor_id_reg = 4; /* Context */ + smp_processor_id_sel = 0; +# endif +# ifdef CONFIG_64BIT + smp_processor_id_shift = 26; + smp_processor_id_reg = 4; /* Context */ + smp_processor_id_sel = 0; +# endif +#endif + /* Get smp_processor_id */ + UASM_i_MFC0(p, K0, smp_processor_id_reg, smp_processor_id_sel); + UASM_i_SRL_SAFE(p, K0, K0, smp_processor_id_shift); + + /* handler_reg_save index in K0 */ + UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save))); + + UASM_i_LA(p, K1, (long)&handler_reg_save); + UASM_i_ADDU(p, K0, K0, K1); + } else { + UASM_i_LA(p, K0, (long)&handler_reg_save); + } + /* K0 now points to save area, save $1 and $2 */ + UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), K0); + UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), K0); + + r.r1 = K1; + r.r2 = 1; + r.r3 = 2; + return r; +} + +static void __cpuinit build_restore_work_registers(u32 **p) +{ + if (scratch_reg > 0) { + UASM_i_MFC0(p, 1, 31, scratch_reg); + return; + } + /* K0 already points to save area, restore $1 and $2 */ + UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), K0); + UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), K0); +} + #ifndef CONFIG_MIPS_PGD_C0_CONTEXT /* @@ -1160,9 +1239,6 @@ static void __cpuinit build_r4000_tlb_refill_handler(void) memset(relocs, 0, sizeof(relocs)); memset(final_handler, 0, sizeof(final_handler)); - if (scratch_reg == 0) - scratch_reg = allocate_kscratch(); - if ((scratch_reg > 0 || scratchpad_available()) && use_bbit_insns()) { htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1, scratch_reg); @@ -1462,22 +1538,28 @@ iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, */ static void __cpuinit build_pte_present(u32 **p, struct uasm_reloc **r, - unsigned int pte, unsigned int ptr, enum label_id lid) + int pte, int ptr, int scratch, enum label_id lid) { + int t = scratch >= 0 ? scratch : pte; + if (kernel_uses_smartmips_rixi) { if (use_bbit_insns()) { uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid); uasm_i_nop(p); } else { - uasm_i_andi(p, pte, pte, _PAGE_PRESENT); - uasm_il_beqz(p, r, pte, lid); - iPTE_LW(p, pte, ptr); + uasm_i_andi(p, t, pte, _PAGE_PRESENT); + uasm_il_beqz(p, r, t, lid); + if (pte == t) + /* You lose the SMP race :-(*/ + iPTE_LW(p, pte, ptr); } } else { - uasm_i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_READ); - uasm_i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_READ); - uasm_il_bnez(p, r, pte, lid); - iPTE_LW(p, pte, ptr); + uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_READ); + uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_READ); + uasm_il_bnez(p, r, t, lid); + if (pte == t) + /* You lose the SMP race :-(*/ + iPTE_LW(p, pte, ptr); } } @@ -1497,19 +1579,19 @@ build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte, */ static void __cpuinit build_pte_writable(u32 **p, struct uasm_reloc **r, - unsigned int pte, unsigned int ptr, enum label_id lid) + unsigned int pte, unsigned int ptr, int scratch, + enum label_id lid) { - if (use_bbit_insns()) { - uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid); - uasm_i_nop(p); - uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid); - uasm_i_nop(p); - } else { - uasm_i_andi(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE); - uasm_i_xori(p, pte, pte, _PAGE_PRESENT | _PAGE_WRITE); - uasm_il_bnez(p, r, pte, lid); + int t = scratch >= 0 ? scratch : pte; + + uasm_i_andi(p, t, pte, _PAGE_PRESENT | _PAGE_WRITE); + uasm_i_xori(p, t, t, _PAGE_PRESENT | _PAGE_WRITE); + uasm_il_bnez(p, r, t, lid); + if (pte == t) + /* You lose the SMP race :-(*/ iPTE_LW(p, pte, ptr); - } + else + uasm_i_nop(p); } /* Make PTE writable, update software status bits as well, then store @@ -1531,15 +1613,19 @@ build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte, */ static void __cpuinit build_pte_modifiable(u32 **p, struct uasm_reloc **r, - unsigned int pte, unsigned int ptr, enum label_id lid) + unsigned int pte, unsigned int ptr, int scratch, + enum label_id lid) { if (use_bbit_insns()) { uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid); uasm_i_nop(p); } else { - uasm_i_andi(p, pte, pte, _PAGE_WRITE); - uasm_il_beqz(p, r, pte, lid); - iPTE_LW(p, pte, ptr); + int t = scratch >= 0 ? scratch : pte; + uasm_i_andi(p, t, pte, _PAGE_WRITE); + uasm_il_beqz(p, r, t, lid); + if (pte == t) + /* You lose the SMP race :-(*/ + iPTE_LW(p, pte, ptr); } } @@ -1619,7 +1705,7 @@ static void __cpuinit build_r3000_tlb_load_handler(void) memset(relocs, 0, sizeof(relocs)); build_r3000_tlbchange_handler_head(&p, K0, K1); - build_pte_present(&p, &r, K0, K1, label_nopage_tlbl); + build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl); uasm_i_nop(&p); /* load delay */ build_make_valid(&p, &r, K0, K1); build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); @@ -1649,7 +1735,7 @@ static void __cpuinit build_r3000_tlb_store_handler(void) memset(relocs, 0, sizeof(relocs)); build_r3000_tlbchange_handler_head(&p, K0, K1); - build_pte_writable(&p, &r, K0, K1, label_nopage_tlbs); + build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs); uasm_i_nop(&p); /* load delay */ build_make_write(&p, &r, K0, K1); build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); @@ -1673,13 +1759,14 @@ static void __cpuinit build_r3000_tlb_modify_handler(void) u32 *p = handle_tlbm; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; + struct work_registers wr; memset(handle_tlbm, 0, sizeof(handle_tlbm)); memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); build_r3000_tlbchange_handler_head(&p, K0, K1); - build_pte_modifiable(&p, &r, K0, K1, label_nopage_tlbm); + build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm); uasm_i_nop(&p); /* load delay */ build_make_write(&p, &r, K0, K1); build_r3000_pte_reload_tlbwi(&p, K0, K1); @@ -1702,15 +1789,16 @@ static void __cpuinit build_r3000_tlb_modify_handler(void) /* * R4000 style TLB load/store/modify handlers. */ -static void __cpuinit +static struct work_registers __cpuinit build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l, - struct uasm_reloc **r, unsigned int pte, - unsigned int ptr) + struct uasm_reloc **r) { + struct work_registers wr = build_get_work_registers(p); + #ifdef CONFIG_64BIT - build_get_pmde64(p, l, r, pte, ptr); /* get pmd in ptr */ + build_get_pmde64(p, l, r, wr.r1, wr.r2); /* get pmd in ptr */ #else - build_get_pgde32(p, pte, ptr); /* get pgd in ptr */ + build_get_pgde32(p, wr.r1, wr.r2); /* get pgd in ptr */ #endif #ifdef CONFIG_HUGETLB_PAGE @@ -1719,21 +1807,22 @@ build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l, * instead contains the tlb pte. Check the PAGE_HUGE bit and * see if we need to jump to huge tlb processing. */ - build_is_huge_pte(p, r, pte, ptr, label_tlb_huge_update); + build_is_huge_pte(p, r, wr.r1, wr.r2, label_tlb_huge_update); #endif - UASM_i_MFC0(p, pte, C0_BADVADDR); - UASM_i_LW(p, ptr, 0, ptr); - UASM_i_SRL(p, pte, pte, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2); - uasm_i_andi(p, pte, pte, (PTRS_PER_PTE - 1) << PTE_T_LOG2); - UASM_i_ADDU(p, ptr, ptr, pte); + UASM_i_MFC0(p, wr.r1, C0_BADVADDR); + UASM_i_LW(p, wr.r2, 0, wr.r2); + UASM_i_SRL(p, wr.r1, wr.r1, PAGE_SHIFT + PTE_ORDER - PTE_T_LOG2); + uasm_i_andi(p, wr.r1, wr.r1, (PTRS_PER_PTE - 1) << PTE_T_LOG2); + UASM_i_ADDU(p, wr.r2, wr.r2, wr.r1); #ifdef CONFIG_SMP uasm_l_smp_pgtable_change(l, *p); #endif - iPTE_LW(p, pte, ptr); /* get even pte */ + iPTE_LW(p, wr.r1, wr.r2); /* get even pte */ if (!m4kc_tlbp_war()) build_tlb_probe_entry(p); + return wr; } static void __cpuinit @@ -1746,6 +1835,7 @@ build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l, build_update_entries(p, tmp, ptr); build_tlb_write_entry(p, l, r, tlb_indexed); uasm_l_leave(l, *p); + build_restore_work_registers(p); uasm_i_eret(p); /* return from trap */ #ifdef CONFIG_64BIT @@ -1758,6 +1848,7 @@ static void __cpuinit build_r4000_tlb_load_handler(void) u32 *p = handle_tlbl; struct uasm_label *l = labels; struct uasm_reloc *r = relocs; + struct work_registers wr; memset(handle_tlbl, 0, sizeof(handle_tlbl)); memset(labels, 0, sizeof(labels)); @@ -1777,8 +1868,8 @@ static void __cpuinit build_r4000_tlb_load_handler(void) /* No need for uasm_i_nop */ } - build_r4000_tlbchange_handler_head(&p, &l, &r, K0, K1); - build_pte_present(&p, &r, K0, K1, label_nopage_tlbl); + wr = build_r4000_tlbchange_handler_head(&p, &l, &r); + build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); if (m4kc_tlbp_war()) build_tlb_probe_entry(&p); @@ -1788,44 +1879,43 @@ static void __cpuinit build_r4000_tlb_load_handler(void) * have triggered it. Skip the expensive test.. */ if (use_bbit_insns()) { - uasm_il_bbit0(&p, &r, K0, ilog2(_PAGE_VALID), + uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), label_tlbl_goaround1); } else { - uasm_i_andi(&p, K0, K0, _PAGE_VALID); - uasm_il_beqz(&p, &r, K0, label_tlbl_goaround1); + uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); + uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround1); } uasm_i_nop(&p); uasm_i_tlbr(&p); /* Examine entrylo 0 or 1 based on ptr. */ if (use_bbit_insns()) { - uasm_i_bbit0(&p, K1, ilog2(sizeof(pte_t)), 8); + uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); } else { - uasm_i_andi(&p, K0, K1, sizeof(pte_t)); - uasm_i_beqz(&p, K0, 8); + uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); + uasm_i_beqz(&p, wr.r3, 8); } - - UASM_i_MFC0(&p, K0, C0_ENTRYLO0); /* load it in the delay slot*/ - UASM_i_MFC0(&p, K0, C0_ENTRYLO1); /* load it if ptr is odd */ + /* load it in the delay slot*/ + UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); + /* load it if ptr is odd */ + UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1); /* - * If the entryLo (now in K0) is valid (bit 1), RI or + * If the entryLo (now in wr.r3) is valid (bit 1), RI or * XI must have triggered it. */ if (use_bbit_insns()) { - uasm_il_bbit1(&p, &r, K0, 1, label_nopage_tlbl); - /* Reload the PTE value */ - iPTE_LW(&p, K0, K1); + uasm_il_bbit1(&p, &r, wr.r3, 1, label_nopage_tlbl); + uasm_i_nop(&p); uasm_l_tlbl_goaround1(&l, p); } else { - uasm_i_andi(&p, K0, K0, 2); - uasm_il_bnez(&p, &r, K0, label_nopage_tlbl); - uasm_l_tlbl_goaround1(&l, p); - /* Reload the PTE value */ - iPTE_LW(&p, K0, K1); + uasm_i_andi(&p, wr.r3, wr.r3, 2); + uasm_il_bnez(&p, &r, wr.r3, label_nopage_tlbl); + uasm_i_nop(&p); } + uasm_l_tlbl_goaround1(&l, p); } - build_make_valid(&p, &r, K0, K1); - build_r4000_tlbchange_handler_tail(&p, &l, &r, K0, K1); + build_make_valid(&p, &r, wr.r1, wr.r2); + build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); #ifdef CONFIG_HUGETLB_PAGE /* @@ -1833,8 +1923,8 @@ static void __cpuinit build_r4000_tlb_load_handler(void) * spots a huge page. */ uasm_l_tlb_huge_update(&l, p); - iPTE_LW(&p, K0, K1); - build_pte_present(&p, &r, K0, K1, label_nopage_tlbl); + iPTE_LW(&p, wr.r1, wr.r2); + build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); build_tlb_probe_entry(&p); if (kernel_uses_smartmips_rixi) { @@ -1843,50 +1933,51 @@ static void __cpuinit build_r4000_tlb_load_handler(void) * have triggered it. Skip the expensive test.. */ if (use_bbit_insns()) { - uasm_il_bbit0(&p, &r, K0, ilog2(_PAGE_VALID), + uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), label_tlbl_goaround2); } else { - uasm_i_andi(&p, K0, K0, _PAGE_VALID); - uasm_il_beqz(&p, &r, K0, label_tlbl_goaround2); + uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); + uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); } uasm_i_nop(&p); uasm_i_tlbr(&p); /* Examine entrylo 0 or 1 based on ptr. */ if (use_bbit_insns()) { - uasm_i_bbit0(&p, K1, ilog2(sizeof(pte_t)), 8); + uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); } else { - uasm_i_andi(&p, K0, K1, sizeof(pte_t)); - uasm_i_beqz(&p, K0, 8); + uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); + uasm_i_beqz(&p, wr.r3, 8); } - UASM_i_MFC0(&p, K0, C0_ENTRYLO0); /* load it in the delay slot*/ - UASM_i_MFC0(&p, K0, C0_ENTRYLO1); /* load it if ptr is odd */ + /* load it in the delay slot*/ + UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); + /* load it if ptr is odd */ + UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1); /* - * If the entryLo (now in K0) is valid (bit 1), RI or + * If the entryLo (now in wr.r3) is valid (bit 1), RI or * XI must have triggered it. */ if (use_bbit_insns()) { - uasm_il_bbit0(&p, &r, K0, 1, label_tlbl_goaround2); + uasm_il_bbit0(&p, &r, wr.r3, 1, label_tlbl_goaround2); } else { - uasm_i_andi(&p, K0, K0, 2); - uasm_il_beqz(&p, &r, K0, label_tlbl_goaround2); + uasm_i_andi(&p, wr.r3, wr.r3, 2); + uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); } - /* Reload the PTE value */ - iPTE_LW(&p, K0, K1); |