diff options
Diffstat (limited to 'arch/tile/mm')
| -rw-r--r-- | arch/tile/mm/elf.c | 109 | ||||
| -rw-r--r-- | arch/tile/mm/fault.c | 164 | ||||
| -rw-r--r-- | arch/tile/mm/highmem.c | 4 | ||||
| -rw-r--r-- | arch/tile/mm/homecache.c | 194 | ||||
| -rw-r--r-- | arch/tile/mm/hugetlbpage.c | 177 | ||||
| -rw-r--r-- | arch/tile/mm/init.c | 210 | ||||
| -rw-r--r-- | arch/tile/mm/migrate_32.S | 4 | ||||
| -rw-r--r-- | arch/tile/mm/migrate_64.S | 4 | ||||
| -rw-r--r-- | arch/tile/mm/mmap.c | 26 | ||||
| -rw-r--r-- | arch/tile/mm/pgtable.c | 101 |
10 files changed, 368 insertions, 625 deletions
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c index 758b6038c2b..23f044e8a7a 100644 --- a/arch/tile/mm/elf.c +++ b/arch/tile/mm/elf.c @@ -21,7 +21,8 @@ #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/sections.h> -#include <arch/sim_def.h> +#include <asm/vdso.h> +#include <arch/sim.h> /* Notify a running simulator, if any, that an exec just occurred. */ static void sim_notify_exec(const char *binary_name) @@ -36,28 +37,57 @@ static void sim_notify_exec(const char *binary_name) } while (c); } -static int notify_exec(void) +static int notify_exec(struct mm_struct *mm) { - int retval = 0; /* failure */ - struct vm_area_struct *vma = current->mm->mmap; - while (vma) { - if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) + char *buf, *path; + struct vm_area_struct *vma; + + if (!sim_is_simulator()) + return 1; + + if (mm->exe_file == NULL) + return 0; + + for (vma = current->mm->mmap; ; vma = vma->vm_next) { + if (vma == NULL) + return 0; + if (vma->vm_file == mm->exe_file) break; - vma = vma->vm_next; } - if (vma) { - char *buf = (char *) __get_free_page(GFP_KERNEL); - if (buf) { - char *path = d_path(&vma->vm_file->f_path, - buf, PAGE_SIZE); - if (!IS_ERR(path)) { - sim_notify_exec(path); - retval = 1; - } - free_page((unsigned long)buf); + + buf = (char *) __get_free_page(GFP_KERNEL); + if (buf == NULL) + return 0; + + path = d_path(&mm->exe_file->f_path, buf, PAGE_SIZE); + if (IS_ERR(path)) { + free_page((unsigned long)buf); + return 0; + } + + /* + * Notify simulator of an ET_DYN object so we know the load address. + * The somewhat cryptic overuse of SIM_CONTROL_DLOPEN allows us + * to be backward-compatible with older simulator releases. + */ + if (vma->vm_start == (ELF_ET_DYN_BASE & PAGE_MASK)) { + char buf[64]; + int i; + + snprintf(buf, sizeof(buf), "0x%lx:@", vma->vm_start); + for (i = 0; ; ++i) { + char c = buf[i]; + __insn_mtspr(SPR_SIM_CONTROL, + (SIM_CONTROL_DLOPEN + | (c << _SIM_CONTROL_OPERATOR_BITS))); + if (c == '\0') + break; } } - return retval; + + sim_notify_exec(path); + free_page((unsigned long)buf); + return 1; } /* Notify a running simulator, if any, that we loaded an interpreter. */ @@ -73,57 +103,23 @@ static void sim_notify_interp(unsigned long load_addr) } -/* Kernel address of page used to map read-only kernel data into userspace. */ -static void *vdso_page; - -/* One-entry array used for install_special_mapping. */ -static struct page *vdso_pages[1]; - -static int __init vdso_setup(void) -{ - vdso_page = (void *)get_zeroed_page(GFP_ATOMIC); - memcpy(vdso_page, __rt_sigreturn, __rt_sigreturn_end - __rt_sigreturn); - vdso_pages[0] = virt_to_page(vdso_page); - return 0; -} -device_initcall(vdso_setup); - -const char *arch_vma_name(struct vm_area_struct *vma) -{ - if (vma->vm_private_data == vdso_pages) - return "[vdso]"; -#ifndef __tilegx__ - if (vma->vm_start == MEM_USER_INTRPT) - return "[intrpt]"; -#endif - return NULL; -} - int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack) { struct mm_struct *mm = current->mm; - unsigned long vdso_base; int retval = 0; + down_write(&mm->mmap_sem); + /* * Notify the simulator that an exec just occurred. * If we can't find the filename of the mapping, just use * whatever was passed as the linux_binprm filename. */ - if (!notify_exec()) + if (!notify_exec(mm)) sim_notify_exec(bprm->filename); - down_write(&mm->mmap_sem); - - /* - * MAYWRITE to allow gdb to COW and set breakpoints - */ - vdso_base = VDSO_BASE; - retval = install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pages); + retval = setup_vdso_pages(); #ifndef __tilegx__ /* @@ -135,7 +131,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, if (!retval) { unsigned long addr = MEM_USER_INTRPT; addr = mmap_region(NULL, addr, INTRPT_SIZE, - MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0); if (addr > (unsigned long) -PAGE_SIZE) diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 84ce7abbf5a..6c0571216a9 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -34,6 +34,7 @@ #include <linux/hugetlb.h> #include <linux/syscalls.h> #include <linux/uaccess.h> +#include <linux/kdebug.h> #include <asm/pgalloc.h> #include <asm/sections.h> @@ -70,9 +71,10 @@ static noinline void force_sig_info_fault(const char *type, int si_signo, * Synthesize the fault a PL0 process would get by doing a word-load of * an unaligned address or a high kernel address. */ -SYSCALL_DEFINE2(cmpxchg_badaddr, unsigned long, address, - struct pt_regs *, regs) +SYSCALL_DEFINE1(cmpxchg_badaddr, unsigned long, address) { + struct pt_regs *regs = current_pt_regs(); + if (address >= PAGE_OFFSET) force_sig_info_fault("atomic segfault", SIGSEGV, SEGV_MAPERR, address, INT_DTLB_MISS, current, regs); @@ -121,10 +123,9 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) return NULL; - if (!pmd_present(*pmd)) { + if (!pmd_present(*pmd)) set_pmd(pmd, *pmd_k); - arch_flush_lazy_mmu_mode(); - } else + else BUG_ON(pmd_ptfn(*pmd) != pmd_ptfn(*pmd_k)); return pmd_k; } @@ -148,8 +149,6 @@ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) pmd_k = vmalloc_sync_one(pgd, address); if (!pmd_k) return -1; - if (pmd_huge(*pmd_k)) - return 0; /* support TILE huge_vmap() API */ pte_k = pte_offset_kernel(pmd_k, address); if (!pte_present(*pte_k)) return -1; @@ -279,10 +278,9 @@ static int handle_page_fault(struct pt_regs *regs, if (!is_page_fault) write = 1; - flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0)); + flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; - is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL); + is_kernel_mode = !user_mode(regs); tsk = validate_current(); @@ -364,6 +362,9 @@ static int handle_page_fault(struct pt_regs *regs, goto bad_area_nosemaphore; } + if (!is_kernel_mode) + flags |= FAULT_FLAG_USER; + /* * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -424,12 +425,12 @@ good_area: #endif if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!is_page_fault || !(vma->vm_flags & VM_READ)) goto bad_area; } - survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -454,6 +455,7 @@ good_area: tsk->min_flt++; if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; /* * No need to up_read(&mm->mmap_sem) as we would @@ -464,28 +466,15 @@ good_area: } } -#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() - /* - * If this was an asynchronous fault, - * restart the appropriate engine. - */ - switch (fault_num) { #if CHIP_HAS_TILE_DMA() + /* If this was a DMA TLB fault, restart the DMA engine. */ + switch (fault_num) { case INT_DMATLB_MISS: case INT_DMATLB_MISS_DWNCL: case INT_DMATLB_ACCESS: case INT_DMATLB_ACCESS_DWNCL: __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); break; -#endif -#if CHIP_HAS_SN_PROC() - case INT_SNITLB_MISS: - case INT_SNITLB_MISS_DWNCL: - __insn_mtspr(SPR_SNCTL, - __insn_mfspr(SPR_SNCTL) & - ~SPR_SNCTL__FRZPROC_MASK); - break; -#endif } #endif @@ -566,15 +555,10 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_global_init(tsk)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - pr_alert("VM: killing process %s\n", tsk->comm); - if (!is_kernel_mode) - do_group_exit(SIGKILL); - goto no_context; + if (is_kernel_mode) + goto no_context; + pagefault_out_of_memory(); + return 0; do_sigbus: up_read(&mm->mmap_sem); @@ -720,8 +704,60 @@ void do_page_fault(struct pt_regs *regs, int fault_num, { int is_page_fault; +#ifdef CONFIG_KPROBES + /* + * This is to notify the fault handler of the kprobes. The + * exception code is redundant as it is also carried in REGS, + * but we pass it anyhow. + */ + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, + regs->faultnum, SIGSEGV) == NOTIFY_STOP) + return; +#endif + +#ifdef __tilegx__ + /* + * We don't need early do_page_fault_ics() support, since unlike + * Pro we don't need to worry about unlocking the atomic locks. + * There is only one current case in GX where we touch any memory + * under ICS other than our own kernel stack, and we handle that + * here. (If we crash due to trying to touch our own stack, + * we're in too much trouble for C code to help out anyway.) + */ + if (write & ~1) { + unsigned long pc = write & ~1; + if (pc >= (unsigned long) __start_unalign_asm_code && + pc < (unsigned long) __end_unalign_asm_code) { + struct thread_info *ti = current_thread_info(); + /* + * Our EX_CONTEXT is still what it was from the + * initial unalign exception, but now we've faulted + * on the JIT page. We would like to complete the + * page fault however is appropriate, and then retry + * the instruction that caused the unalign exception. + * Our state has been "corrupted" by setting the low + * bit in "sp", and stashing r0..r3 in the + * thread_info area, so we revert all of that, then + * continue as if this were a normal page fault. + */ + regs->sp &= ~1UL; + regs->regs[0] = ti->unalign_jit_tmp[0]; + regs->regs[1] = ti->unalign_jit_tmp[1]; + regs->regs[2] = ti->unalign_jit_tmp[2]; + regs->regs[3] = ti->unalign_jit_tmp[3]; + write &= 1; + } else { + pr_alert("%s/%d: ICS set at page fault at %#lx: %#lx\n", + current->comm, current->pid, pc, address); + show_regs(regs); + do_group_exit(SIGKILL); + return; + } + } +#else /* This case should have been handled by do_page_fault_ics(). */ BUG_ON(write & ~1); +#endif #if CHIP_HAS_TILE_DMA() /* @@ -750,10 +786,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num, case INT_DMATLB_MISS: case INT_DMATLB_MISS_DWNCL: #endif -#if CHIP_HAS_SN_PROC() - case INT_SNITLB_MISS: - case INT_SNITLB_MISS_DWNCL: -#endif is_page_fault = 1; break; @@ -769,8 +801,8 @@ void do_page_fault(struct pt_regs *regs, int fault_num, panic("Bad fault number %d in do_page_fault", fault_num); } -#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() - if (EX1_PL(regs->ex1) != USER_PL) { +#if CHIP_HAS_TILE_DMA() + if (!user_mode(regs)) { struct async_tlb *async; switch (fault_num) { #if CHIP_HAS_TILE_DMA() @@ -781,12 +813,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num, async = ¤t->thread.dma_async_tlb; break; #endif -#if CHIP_HAS_SN_PROC() - case INT_SNITLB_MISS: - case INT_SNITLB_MISS_DWNCL: - async = ¤t->thread.sn_async_tlb; - break; -#endif default: async = NULL; } @@ -819,14 +845,22 @@ void do_page_fault(struct pt_regs *regs, int fault_num, } -#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() +#if CHIP_HAS_TILE_DMA() /* - * Check an async_tlb structure to see if a deferred fault is waiting, - * and if so pass it to the page-fault code. + * This routine effectively re-issues asynchronous page faults + * when we are returning to user space. */ -static void handle_async_page_fault(struct pt_regs *regs, - struct async_tlb *async) +void do_async_page_fault(struct pt_regs *regs) { + struct async_tlb *async = ¤t->thread.dma_async_tlb; + + /* + * Clear thread flag early. If we re-interrupt while processing + * code here, we will reset it and recall this routine before + * returning to user space. + */ + clear_thread_flag(TIF_ASYNC_TLB); + if (async->fault_num) { /* * Clear async->fault_num before calling the page-fault @@ -840,35 +874,15 @@ static void handle_async_page_fault(struct pt_regs *regs, async->address, async->is_write); } } - -/* - * This routine effectively re-issues asynchronous page faults - * when we are returning to user space. - */ -void do_async_page_fault(struct pt_regs *regs) -{ - /* - * Clear thread flag early. If we re-interrupt while processing - * code here, we will reset it and recall this routine before - * returning to user space. - */ - clear_thread_flag(TIF_ASYNC_TLB); - -#if CHIP_HAS_TILE_DMA() - handle_async_page_fault(regs, ¤t->thread.dma_async_tlb); -#endif -#if CHIP_HAS_SN_PROC() - handle_async_page_fault(regs, ¤t->thread.sn_async_tlb); -#endif -} -#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */ +#endif /* CHIP_HAS_TILE_DMA() */ void vmalloc_sync_all(void) { #ifdef __tilegx__ /* Currently all L1 kernel pmd's are static and shared. */ - BUG_ON(pgd_index(VMALLOC_END) != pgd_index(VMALLOC_START)); + BUILD_BUG_ON(pgd_index(VMALLOC_END - PAGE_SIZE) != + pgd_index(VMALLOC_START)); #else /* * Note that races in the updates of insync and start aren't diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c index ef8e5a62b6e..0dc21829477 100644 --- a/arch/tile/mm/highmem.c +++ b/arch/tile/mm/highmem.c @@ -93,7 +93,7 @@ static DEFINE_PER_CPU(struct kmap_amps, amps); * If we examine it earlier we are exposed to a race where it looks * writable earlier, but becomes immutable before we write the PTE. */ -static void kmap_atomic_register(struct page *page, enum km_type type, +static void kmap_atomic_register(struct page *page, int type, unsigned long va, pte_t *ptep, pte_t pteval) { unsigned long flags; @@ -114,7 +114,6 @@ static void kmap_atomic_register(struct page *page, enum km_type type, list_add(&->list, &_list); set_pte(ptep, pteval); - arch_flush_lazy_mmu_mode(); spin_unlock(&_lock); homecache_kpte_unlock(flags); @@ -259,7 +258,6 @@ void __kunmap_atomic(void *kvaddr) BUG_ON(vaddr >= (unsigned long)high_memory); } - arch_flush_lazy_mmu_mode(); pagefault_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index dbcbdf7b8aa..33294fdc402 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -43,12 +43,9 @@ #include "migrate.h" -#if CHIP_HAS_COHERENT_LOCAL_CACHE() - /* * The noallocl2 option suppresses all use of the L2 cache to cache - * locally from a remote home. There's no point in using it if we - * don't have coherent local caching, though. + * locally from a remote home. */ static int __write_once noallocl2; static int __init set_noallocl2(char *str) @@ -58,16 +55,6 @@ static int __init set_noallocl2(char *str) } early_param("noallocl2", set_noallocl2); -#else - -#define noallocl2 0 - -#endif - -/* Provide no-op versions of these routines to keep flush_remote() cleaner. */ -#define mark_caches_evicted_start() 0 -#define mark_caches_evicted_finish(mask, timestamp) do {} while (0) - /* * Update the irq_stat for cpus that we are going to interrupt @@ -107,7 +94,6 @@ static void hv_flush_update(const struct cpumask *cache_cpumask, * there's never any good reason for hv_flush_remote() to fail. * - Accepts a 32-bit PFN rather than a 64-bit PA, which generally * is the type that Linux wants to pass around anyway. - * - Centralizes the mark_caches_evicted() handling. * - Canonicalizes that lengths of zero make cpumasks NULL. * - Handles deferring TLB flushes for dataplane tiles. * - Tracks remote interrupts in the per-cpu irq_cpustat_t. @@ -126,7 +112,6 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, HV_Remote_ASID *asids, int asidcount) { int rc; - int timestamp = 0; /* happy compiler */ struct cpumask cache_cpumask_copy, tlb_cpumask_copy; struct cpumask *cache_cpumask, *tlb_cpumask; HV_PhysAddr cache_pa; @@ -157,15 +142,11 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, hv_flush_update(cache_cpumask, tlb_cpumask, tlb_va, tlb_length, asids, asidcount); cache_pa = (HV_PhysAddr)cache_pfn << PAGE_SHIFT; - if (cache_control & HV_FLUSH_EVICT_L2) - timestamp = mark_caches_evicted_start(); rc = hv_flush_remote(cache_pa, cache_control, cpumask_bits(cache_cpumask), tlb_va, tlb_length, tlb_pgsize, cpumask_bits(tlb_cpumask), asids, asidcount); - if (cache_control & HV_FLUSH_EVICT_L2) - mark_caches_evicted_finish(cache_cpumask, timestamp); if (rc == 0) return; cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy); @@ -180,85 +161,88 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, panic("Unsafe to continue."); } -void flush_remote_page(struct page *page, int order) +static void homecache_finv_page_va(void* va, int home) { - int i, pages = (1 << order); - for (i = 0; i < pages; ++i, ++page) { - void *p = kmap_atomic(page); - int hfh = 0; - int home = page_home(page); -#if CHIP_HAS_CBOX_HOME_MAP() - if (home == PAGE_HOME_HASH) - hfh = 1; - else -#endif - BUG_ON(home < 0 || home >= NR_CPUS); - finv_buffer_remote(p, PAGE_SIZE, hfh); - kunmap_atomic(p); + int cpu = get_cpu(); + if (home == cpu) { + finv_buffer_local(va, PAGE_SIZE); + } else if (home == PAGE_HOME_HASH) { + finv_buffer_remote(va, PAGE_SIZE, 1); + } else { + BUG_ON(home < 0 || home >= NR_CPUS); + finv_buffer_remote(va, PAGE_SIZE, 0); } + put_cpu(); } -void homecache_evict(const struct cpumask *mask) +void homecache_finv_map_page(struct page *page, int home) { - flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); + unsigned long flags; + unsigned long va; + pte_t *ptep; + pte_t pte; + + if (home == PAGE_HOME_UNCACHED) + return; + local_irq_save(flags); +#ifdef CONFIG_HIGHMEM + va = __fix_to_virt(FIX_KMAP_BEGIN + kmap_atomic_idx_push() + + (KM_TYPE_NR * smp_processor_id())); +#else + va = __fix_to_virt(FIX_HOMECACHE_BEGIN + smp_processor_id()); +#endif + ptep = virt_to_kpte(va); + pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL); + __set_pte(ptep, pte_set_home(pte, home)); + homecache_finv_page_va((void *)va, home); + __pte_clear(ptep); + hv_flush_page(va, PAGE_SIZE); +#ifdef CONFIG_HIGHMEM + kmap_atomic_idx_pop(); +#endif + local_irq_restore(flags); } -/* - * Return a mask of the cpus whose caches currently own these pages. - * The return value is whether the pages are all coherently cached - * (i.e. none are immutable, incoherent, or uncached). - */ -static int homecache_mask(struct page *page, int pages, - struct cpumask *home_mask) +static void homecache_finv_page_home(struct page *page, int home) { - int i; - int cached_coherently = 1; - cpumask_clear(home_mask); - for (i = 0; i < pages; ++i) { - int home = page_home(&page[i]); - if (home == PAGE_HOME_IMMUTABLE || - home == PAGE_HOME_INCOHERENT) { - cpumask_copy(home_mask, cpu_possible_mask); - return 0; - } -#if CHIP_HAS_CBOX_HOME_MAP() - if (home == PAGE_HOME_HASH) { - cpumask_or(home_mask, home_mask, &hash_for_home_map); - continue; - } -#endif - if (home == PAGE_HOME_UNCACHED) { - cached_coherently = 0; - continue; - } - BUG_ON(home < 0 || home >= NR_CPUS); - cpumask_set_cpu(home, home_mask); - } - return cached_coherently; + if (!PageHighMem(page) && home == page_home(page)) + homecache_finv_page_va(page_address(page), home); + else + homecache_finv_map_page(page, home); } -/* - * Return the passed length, or zero if it's long enough that we - * believe we should evict the whole L2 cache. - */ -static unsigned long cache_flush_length(unsigned long length) +static inline bool incoherent_home(int home) { - return (length >= CHIP_L2_CACHE_SIZE()) ? HV_FLUSH_EVICT_L2 : length; + return home == PAGE_HOME_IMMUTABLE || home == PAGE_HOME_INCOHERENT; } -/* Flush a page out of whatever cache(s) it is in. */ -void homecache_flush_cache(struct page *page, int order) +static void homecache_finv_page_internal(struct page *page, int force_map) { - int pages = 1 << order; - int length = cache_flush_length(pages * PAGE_SIZE); - unsigned long pfn = page_to_pfn(page); - struct cpumask home_mask; - - homecache_mask(page, pages, &home_mask); - flush_remote(pfn, length, &home_mask, 0, 0, 0, NULL, NULL, 0); - sim_validate_lines_evicted(PFN_PHYS(pfn), pages * PAGE_SIZE); + int home = page_home(page); + if (home == PAGE_HOME_UNCACHED) + return; + if (incoherent_home(home)) { + int cpu; + for_each_cpu(cpu, &cpu_cacheable_map) + homecache_finv_map_page(page, cpu); + } else if (force_map) { + /* Force if, e.g., the normal mapping is migrating. */ + homecache_finv_map_page(page, home); + } else { + homecache_finv_page_home(page, home); + } + sim_validate_lines_evicted(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE); } +void homecache_finv_page(struct page *page) +{ + homecache_finv_page_internal(page, 0); +} + +void homecache_evict(const struct cpumask *mask) +{ + flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); +} /* Report the home corresponding to a given PTE. */ static int pte_to_home(pte_t pte) @@ -272,10 +256,8 @@ static int pte_to_home(pte_t pte) return PAGE_HOME_INCOHERENT; case HV_PTE_MODE_UNCACHED: return PAGE_HOME_UNCACHED; -#if CHIP_HAS_CBOX_HOME_MAP() case HV_PTE_MODE_CACHE_HASH_L3: return PAGE_HOME_HASH; -#endif } panic("Bad PTE %#llx\n", pte.val); } @@ -332,20 +314,16 @@ pte_t pte_set_home(pte_t pte, int home) HV_PTE_MODE_CACHE_NO_L3); } } else -#if CHIP_HAS_CBOX_HOME_MAP() if (hash_default) pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); else -#endif pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); pte = hv_pte_set_nc(pte); break; -#if CHIP_HAS_CBOX_HOME_MAP() case PAGE_HOME_HASH: pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); break; -#endif default: BUG_ON(home < 0 || home >= NR_CPUS || @@ -355,7 +333,6 @@ pte_t pte_set_home(pte_t pte, int home) break; } -#if CHIP_HAS_NC_AND_NOALLOC_BITS() if (noallocl2) pte = hv_pte_set_no_alloc_l2(pte); @@ -364,7 +341,6 @@ pte_t pte_set_home(pte_t pte, int home) hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) { pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); } -#endif /* Checking this case here gives a better panic than from the hv. */ BUG_ON(hv_pte_get_mode(pte) == 0); @@ -380,19 +356,13 @@ EXPORT_SYMBOL(pte_set_home); * so they're not suitable for anything but infrequent use. */ -#if CHIP_HAS_CBOX_HOME_MAP() -static inline int initial_page_home(void) { return PAGE_HOME_HASH; } -#else -static inline int initial_page_home(void) { return 0; } -#endif - int page_home(struct page *page) { if (PageHighMem(page)) { - return initial_page_home(); + return PAGE_HOME_HASH; } else { unsigned long kva = (unsigned long)page_address(page); - return pte_to_home(*virt_to_pte(NULL, kva)); + return pte_to_home(*virt_to_kpte(kva)); } } EXPORT_SYMBOL(page_home); @@ -411,12 +381,13 @@ void homecache_change_page_home(struct page *page, int order, int home) NULL, 0); for (i = 0; i < pages; ++i, kva += PAGE_SIZE) { - pte_t *ptep = virt_to_pte(NULL, kva); + pte_t *ptep = virt_to_kpte(kva); pte_t pteval = *ptep; BUG_ON(!pte_present(pteval) || pte_huge(pteval)); __set_pte(ptep, pte_set_home(pteval, home)); } } +EXPORT_SYMBOL(homecache_change_page_home); struct page *homecache_alloc_pages(gfp_t gfp_mask, unsigned int order, int home) @@ -441,22 +412,25 @@ struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, return page; } -void homecache_free_pages(unsigned long addr, unsigned int order) +void __homecache_free_pages(struct page *page, unsigned int order) { - struct page *page; - - if (addr == 0) - return; - - VM_BUG_ON(!virt_addr_valid((void *)addr)); - page = virt_to_page((void *)addr); if (put_page_testzero(page)) { - homecache_change_page_home(page, order, initial_page_home()); + homecache_change_page_home(page, order, PAGE_HOME_HASH); if (order == 0) { - free_hot_cold_page(page, 0); + free_hot_cold_page(page, false); } else { init_page_count(page); __free_pages(page, order); } } } +EXPORT_SYMBOL(__homecache_free_pages); + +void homecache_free_pages(unsigned long addr, unsigned int order) +{ + if (addr != 0) { + VM_BUG_ON(!virt_addr_valid((void *)addr)); + __homecache_free_pages(virt_to_page((void *)addr), order); + } +} +EXPORT_SYMBOL(homecache_free_pages); diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index 812e2d03797..e514899e110 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -49,38 +49,6 @@ int huge_shift[HUGE_SHIFT_ENTRIES] = { #endif }; -/* - * This routine is a hybrid of pte_alloc_map() and pte_alloc_kernel(). - * It assumes that L2 PTEs are never in HIGHMEM (we don't support that). - * It locks the user pagetable, and bumps up the mm->nr_ptes field, - * but otherwise allocate the page table using the kernel versions. - */ -static pte_t *pte_alloc_hugetlb(struct mm_struct *mm, pmd_t *pmd, - unsigned long address) -{ - pte_t *new; - - if (pmd_none(*pmd)) { - new = pte_alloc_one_kernel(mm, address); - if (!new) - return NULL; - - smp_wmb(); /* See comment in __pte_alloc */ - - spin_lock(&mm->page_table_lock); - if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ - mm->nr_ptes++; - pmd_populate_kernel(mm, pmd, new); - new = NULL; - } else - VM_BUG_ON(pmd_trans_splitting(*pmd)); - spin_unlock(&mm->page_table_lock); - if (new) - pte_free_kernel(mm, new); - } - - return pte_offset_kernel(pmd, address); -} #endif pte_t *huge_pte_alloc(struct mm_struct *mm, @@ -109,7 +77,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, else { if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE]) panic("Unexpected page size %#lx\n", sz); - return pte_alloc_hugetlb(mm, pmd, addr); + return pte_alloc_map(mm, NULL, pmd, addr); } } #else @@ -144,14 +112,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) /* Get the top-level page table entry. */ pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0); - if (!pgd_present(*pgd)) - return NULL; /* We don't have four levels. */ pud = pud_offset(pgd, addr); #ifndef __PAGETABLE_PUD_FOLDED # error support fourth page table level #endif + if (!pud_present(*pud)) + return NULL; /* Check for an L0 huge PTE, if we have three levels. */ #ifndef __PAGETABLE_PMD_FOLDED @@ -231,42 +199,15 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long pgoff, unsigned long flags) { struct hstate *h = hstate_file(file); - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long start_addr; - - if (len > mm->cached_hole_size) { - start_addr = mm->free_area_cache; - } else { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - } - -full_search: - addr = ALIGN(start_addr, huge_page_size(h)); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - if (!vma || addr + len <= vma->vm_start) { - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = ALIGN(vma->vm_end, huge_page_size(h)); - } + struct vm_unmapped_area_info info; + + info.flags = 0; + info.length = len; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + return vm_unmapped_area(&info); } static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, @@ -274,92 +215,30 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long pgoff, unsigned long flags) { struct hstate *h = hstate_file(file); - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *prev_vma; - unsigned long base = mm->mmap_base, addr = addr0; - unsigned long largest_hole = mm->cached_hole_size; - int first_time = 1; - - /* don't allow allocations above current base */ - if (mm->free_area_cache > base) - mm->free_area_cache = base; - - if (len <= largest_hole) { - largest_hole = 0; - mm->free_area_cache = base; - } -try_again: - /* make sure it can fit in the remaining address space */ - if (mm->free_area_cache < len) - goto fail; - - /* either no address requested or can't fit in requested address hole */ - addr = (mm->free_area_cache - len) & huge_page_mask(h); - do { - /* - * Lookup failure means no vma is above this address, - * i.e. return with success: - */ - vma = find_vma_prev(mm, addr, &prev_vma); - if (!vma) { - return addr; - break; - } - - /* - * new region fits between prev_vma->vm_end and - * vma->vm_start, use it: - */ - if (addr + len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) { - /* remember the address as a hint for next time */ - mm->cached_hole_size = largest_hole; - mm->free_area_cache = addr; - return addr; - } else { - /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) { - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } - } - - /* remember the largest hole we saw so far */ - if (addr + largest_hole < vma->vm_start) - largest_hole = vma->vm_start - addr; + struct vm_unmapped_area_info info; + unsigned long addr; - /* try just below the current vma->vm_start */ - addr = (vma->vm_start - len) & huge_page_mask(h); + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = PAGE_SIZE; + info.high_limit = current->mm->mmap_base; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + addr = vm_unmapped_area(&info); - } while (len <= vma->vm_start); - -fail: - /* - * if hint left us with no space for the requested - * mapping then try again: - */ - if (first_time) { - mm->free_area_cache = base; - largest_hole = 0; - first_time = 0; - goto try_again; - } /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ - mm->free_area_cache = TASK_UNMAPPED_BASE; - mm->cached_hole_size = ~0UL; - addr = hugetlb_get_unmapped_area_bottomup(file, addr0, - len, pgoff, flags); - - /* - * Restore the topdown base: - */ - mm->free_area_cache = base; - mm->cached_hole_size = ~0UL; + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } return addr; } diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 630dd2ce2af..bfb3127b4df 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -106,10 +106,8 @@ pte_t *get_prealloc_pte(unsigned long pfn) */ static int initial_heap_home(void) { -#if CHIP_HAS_CBOX_HOME_MAP() if (hash_default) return PAGE_HOME_HASH; -#endif return smp_processor_id(); } @@ -150,7 +148,21 @@ void __init shatter_pmd(pmd_t *pmd) assign_pte(pmd, pte); } -#ifdef CONFIG_HIGHMEM +#ifdef __tilegx__ +static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) +{ + pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va); + if (pud_none(*pud)) + assign_pmd(pud, alloc_pmd()); + return pmd_offset(pud, va); +} +#else +static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) +{ + return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va); +} +#endif + /* * This function initializes a certain range of kernel virtual memory * with new bootmem page tables, everywhere page tables are missing in @@ -163,34 +175,24 @@ void __init shatter_pmd(pmd_t *pmd) * checking the pgd every time. */ static void __init page_table_range_init(unsigned long start, - unsigned long end, pgd_t *pgd_base) + unsigned long end, pgd_t *pgd) { - pgd_t *pgd; - int pgd_idx; unsigned long vaddr; - - vaddr = start; - pgd_idx = pgd_index(vaddr); - pgd = pgd_base + pgd_idx; - - for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { - pmd_t *pmd = pmd_offset(pud_offset(pgd, vaddr), vaddr); + start = round_down(start, PMD_SIZE); + end = round_up(end, PMD_SIZE); + for (vaddr = start; vaddr < end; vaddr += PMD_SIZE) { + pmd_t *pmd = get_pmd(pgd, vaddr); if (pmd_none(*pmd)) assign_pte(pmd, alloc_pte()); - vaddr += PMD_SIZE; } } -#endif /* CONFIG_HIGHMEM */ - -#if CHIP_HAS_CBOX_HOME_MAP() static int __initdata ktext_hash = 1; /* .text pages */ static int __initdata kdata_hash = 1; /* .data and .bss pages */ int __write_once hash_default = 1; /* kernel allocator pages */ EXPORT_SYMBOL(hash_default); int __write_once kstack_hash = 1; /* if no homecaching, use h4h */ -#endif /* CHIP_HAS_CBOX_HOME_MAP */ /* * CPUs to use to for striping the pages of kernel data. If hash-for-home @@ -208,14 +210,12 @@ int __write_once kdata_huge; /* if no homecaching, small pages */ static pgprot_t __init construct_pgprot(pgprot_t prot, int home) { prot = pte_set_home(prot, home); -#if CHIP_HAS_CBOX_HOME_MAP() if (home == PAGE_HOME_IMMUTABLE) { if (ktext_hash) prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3); else prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); } -#endif return prot; } @@ -227,22 +227,17 @@ static pgprot_t __init init_pgprot(ulong address) { int cpu; unsigned long page; - enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET }; + enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET }; -#if CHIP_HAS_CBOX_HOME_MAP() /* For kdata=huge, everything is just hash-for-home. */ if (kdata_huge) return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); -#endif /* We map the aliased pages of permanent text inaccessible. */ if (address < (ulong) _sinittext - CODE_DELTA) return PAGE_NONE; - /* - * We map read-only data non-coherent for performance. We could - * use neighborhood caching on TILE64, but it's not clear it's a win. - */ + /* We map read-only data non-coherent for performance. */ if ((address >= (ulong) __start_rodata && address < (ulong) __end_rodata) || address == (ulong) empty_zero_page) { @@ -250,12 +245,10 @@ static pgprot_t __init init_pgprot(ulong address) } #ifndef __tilegx__ -#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() /* Force the atomic_locks[] array page to be hash-for-home. */ if (address == (ulong) atomic_locks) return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); #endif -#endif /* * Everything else that isn't data or bss is heap, so mark it @@ -273,28 +266,18 @@ static pgprot_t __init init_pgprot(ulong address) if (address >= (ulong) _end || address < (ulong) _einitdata) return construct_pgprot(PAGE_KERNEL, initial_heap_home()); -#if CHIP_HAS_CBOX_HOME_MAP() /* Use hash-for-home if requested for data/bss. */ if (kdata_hash) return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); -#endif - - /* - * Make the w1data homed like heap to start with, to avoid - * making it part of the page-striped data area when we're just - * going to convert it to read-only soon anyway. - */ - if (address >= (ulong)__w1data_begin && address < (ulong)__w1data_end) - return construct_pgprot(PAGE_KERNEL, initial_heap_home()); /* * Otherwise we just hand out consecutive cpus. To avoid * requiring this function to hold state, we just walk forward from - * _sdata by PAGE_SIZE, skipping the readonly and init data, to reach - * the requested address, while walking cpu home around kdata_mask. - * This is typically no more than a dozen or so iterations. + * __end_rodata by PAGE_SIZE, skipping the readonly and init data, to + * reach the requested address, while walking cpu home around + * kdata_mask. This is typically no more than a dozen or so iterations. */ - page = (((ulong)__w1data_end) + PAGE_SIZE - 1) & PAGE_MASK; + page = (((ulong)__end_rodata) + PAGE_SIZE - 1) & PAGE_MASK; BUG_ON(address < page || address >= (ulong)_end); cpu = cpumask_first(&kdata_mask); for (; page < address; page += PAGE_SIZE) { @@ -304,11 +287,9 @@ static pgprot_t __init init_pgprot(ulong address) if (page == (ulong)empty_zero_page) continue; #ifndef __tilegx__ -#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() if (page == (ulong)atomic_locks) continue; #endif -#endif cpu = cpumask_next(cpu, &kdata_mask); if (cpu == NR_CPUS) cpu = cpumask_first(&kdata_mask); @@ -351,7 +332,7 @@ static int __init setup_ktext(char *str) ktext_arg_seen = 1; - /* Default setting on Tile64: use a huge page */ + /* Default setting: use a huge page */ if (strcmp(str, "huge") == 0) pr_info("ktext: using one huge locally cached page\n"); @@ -397,28 +378,11 @@ static inline pgprot_t ktext_set_nocache(pgprot_t prot) { if (!ktext_nocache) prot = hv_pte_set_nc(prot); -#if CHIP_HAS_NC_AND_NOALLOC_BITS() else prot = hv_pte_set_no_alloc_l2(prot); -#endif return prot; } -#ifndef __tilegx__ -static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) -{ - return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va); -} -#else -static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) -{ - pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va); - if (pud_none(*pud)) - assign_pmd(pud, alloc_pmd()); - return pmd_offset(pud, va); -} -#endif - /* Temporary page table we use for staging. */ static pgd_t pgtables[PTRS_PER_PGD] __attribute__((aligned(HV_PAGE_TABLE_ALIGN))); @@ -448,7 +412,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) struct cpumask kstripe_mask; int rc, i; -#if CHIP_HAS_CBOX_HOME_MAP() if (ktext_arg_seen && ktext_hash) { pr_warning("warning: \"ktext\" boot argument ignored" " if \"kcache_hash\" sets up text hash-for-home\n"); @@ -465,7 +428,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) " kcache_hash=all or =allbutstack\n"); kdata_huge = 0; } -#endif /* * Set up a mask for cpus to use for kernel striping. @@ -546,7 +508,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } } - address = MEM_SV_INTRPT; + address = MEM_SV_START; pmd = get_pmd(pgtables, address); pfn = 0; /* code starts at PA 0 */ if (ktext_small) { @@ -570,7 +532,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) prot = ktext_set_nocache(prot); } - BUG_ON(address != (unsigned long)_stext); + BUG_ON(address != (unsigned long)_text); pte = NULL; for (; address < (unsigned long)_einittext; pfn++, address += PAGE_SIZE) { @@ -593,13 +555,11 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) } else { pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); pteval = pte_mkhuge(pteval); -#if CHIP_HAS_CBOX_HOME_MAP() if (ktext_hash) { pteval = hv_pte_set_mode(pteval, HV_PTE_MODE_CACHE_HASH_L3); pteval = ktext_set_nocache(pteval); } else -#endif /* CHIP_HAS_CBOX_HOME_MAP() */ if (cpumask_weight(&ktext_mask) == 1) { pteval = set_remote_cache_cpu(pteval, cpumask_first(&ktext_mask)); @@ -728,7 +688,7 @@ static void __init init_free_pfn_range(unsigned long start, unsigned long end) } init_page_count(page); __free_pages(page, order); - totalram_pages += count; + adjust_managed_page_count(page, count); page += count; pfn += count; @@ -741,16 +701,15 @@ static void __init set_non_bootmem_pages_init(void) for_each_zone(z) { unsigned long start, end; int nid = z->zone_pgdat->node_id; +#ifdef CONFIG_HIGHMEM int idx = zone_idx(z); +#endif start = z->zone_start_pfn; - if (start == 0) - continue; /* bootmem */ end = start + z->spanned_pages; - if (idx == ZONE_NORMAL) { - BUG_ON(start != node_start_pfn[nid]); - start = node_free_pfn[nid]; - } + start = max(start, node_free_pfn[nid]); + start = max(start, max_low_pfn); + #ifdef CONFIG_HIGHMEM if (idx == ZONE_HIGHMEM) totalhigh_pages += z->spanned_pages; @@ -779,9 +738,6 @@ static void __init set_non_bootmem_pages_init(void) */ void __init paging_init(void) { -#ifdef CONFIG_HIGHMEM - unsigned long vaddr, end; -#endif #ifdef __tilegx__ pud_t *pud; #endif @@ -789,14 +745,11 @@ void __init paging_init(void) kernel_physical_mapping_init(pgd_base); + /* Fixed mappings, only the page table structure has to be created. */ + page_table_range_init(fix_to_virt(__end_of_fixed_addresses - 1), + FIXADDR_TOP, pgd_base); + #ifdef CONFIG_HIGHMEM - /* - * Fixed mappings, only the page table structure has to be - * created - mappings will be set by set_fixmap(): - */ - vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; - end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; - page_table_range_init(vaddr, end, pgd_base); permanent_kmaps_init(pgd_base); #endif @@ -833,7 +786,6 @@ static void __init set_max_mapnr_init(void) void __init mem_init(void) { - int codesize, datasize, initsize; int i; #ifndef __tilegx__ void *last; @@ -858,26 +810,14 @@ void __init mem_init(void) set_max_mapnr_init(); /* this will put all bootmem onto the freelists */ - totalram_pages += free_all_bootmem(); + free_all_bootmem(); #ifndef CONFIG_64BIT /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ set_non_bootmem_pages_init(); #endif - codesize = (unsigned long)&_etext - (unsigned long)&_text; - datasize = (unsigned long)&_end - (unsigned long)&_sdata; - initsize = (unsigned long)&_einittext - (unsigned long)&_sinittext; - initsize += (unsigned long)&_einitdata - (unsigned long)&_sinitdata; - - pr_info("Memory: %luk/%luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n", - (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), - codesize >> 10, - datasize >> 10, - initsize >> 10, - (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) - ); + mem_init_print_info(NULL); /* * In debug mode, dump some interesting memory mappings. @@ -888,10 +828,6 @@ void __init mem_init(void) printk(KERN_DEBUG " PKMAP %#lx - %#lx\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1); #endif -#ifdef CONFIG_HUGEVMAP - printk(KERN_DEBUG " HUGEMAP %#lx - %#lx\n", - HUGE_VMAP_BASE, HUGE_VMAP_END - 1); -#endif printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n", _VMALLOC_START, _VMALLOC_END - 1); #ifdef __tilegx__ @@ -947,6 +883,14 @@ int remove_memory(u64 start, u64 size) { return -EINVAL; } + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + /* TODO */ + return -EBUSY; +} +#endif #endif struct kmem_cache *pgd_cache; @@ -958,26 +902,6 @@ void __init pgtable_cache_init(void) panic("pgtable_cache_init(): Cannot create pgd cache"); } -#if !CHIP_HAS_COHERENT_LOCAL_CACHE() -/* - * The __w1data area holds data that is only written during initialization, - * and is read-only and thus freely cacheable thereafter. Fix the page - * table entries that cover that region accordingly. - */ -static void mark_w1data_ro(void) -{ - /* Loop over page table entries */ - unsigned long addr = (unsigned long)__w1data_begin; - BUG_ON((addr & (PAGE_SIZE-1)) != 0); - for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) { - unsigned long pfn = kaddr_to_pfn((void *)addr); - pte_t *ptep = virt_to_pte(NULL, addr); - BUG_ON(pte_huge(*ptep)); /* not relevant for kdata_huge */ - set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO)); - } -} -#endif - #ifdef CONFIG_DEBUG_PAGEALLOC static long __write_once initfree; #else @@ -988,7 +912,7 @@ static long __write_once initfree = 1; static int __init set_initfree(char *str) { long val; - if (strict_strtol(str, 0, &val) == 0) { + if (kstrtol(str, 0, &val) == 0) { initfree = val; pr_info("initfree: %s free init pages\n", initfree ? "will" : "won't"); @@ -1017,7 +941,7 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end) */ int pfn = kaddr_to_pfn((void *)addr); struct page *page = pfn_to_page(pfn); - pte_t *ptep = virt_to_pte(NULL, addr); + pte_t *ptep = virt_to_kpte(addr); if (!initfree) { /* * If debugging page accesses then do not free @@ -1028,31 +952,24 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end) pte_clear(&init_mm, addr, ptep); continue; } - __ClearPageReserved(page); - init_page_count(page); if (pte_huge(*ptep)) BUG_ON(!kdata_huge); else set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL)); memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - free_page(addr); - totalram_pages++; + free_reserved_page(page); } pr_info("Freeing %s: %ldk freed\n", what, (end - begin) >> 10); } void free_initmem(void) { - const unsigned long text_delta = MEM_SV_INTRPT - PAGE_OFFSET; + const unsigned long text_delta = MEM_SV_START - PAGE_OFFSET; /* - * Evict the dirty initdata on the boot cpu, evict the w1data - * wherever it's homed, and evict all the init code everywhere. - * We are guaranteed that no one will touch the init pages any - * more, and although other cpus may be touching the w1data, - * we only actually change the caching on tile64, which won't - * be keeping local copies in the other tiles' caches anyway. + * Evict the cache on all cores to avoid incoherence. + * We are guaranteed that no one will touch the init pages any more. */ homecache_evict(&cpu_cacheable_map); @@ -1063,26 +980,11 @@ void free_initmem(void) /* * Free the pages mapped from 0xc0000000 that correspond to code - * pages from MEM_SV_INTRPT that we won't use again after init. + * pages from MEM_SV_START that we won't use again after init. */ free_init_pages("unused kernel text", (unsigned long)_sinittext - text_delta, (unsigned long)_einittext - text_delta); - -#if !CHIP_HAS_COHERENT_LOCAL_CACHE() - /* - * Upgrade the .w1data section to globally cached. - * We don't do this on tilepro, since the cache architecture - * pretty much makes it irrelevant, and in any case we end - * up having racing issues with other tiles that may touch - * the data after we flush the cache but before we update - * the PTEs and flush the TLBs, causing sharer shootdowns - * later. Even though this is to clean data, it seems like - * an unnecessary complication. - */ - mark_w1data_ro(); -#endif - /* Do a global TLB flush so everyone sees the changes. */ flush_tlb_all(); } diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S index 5305814bf18..772085491bf 100644 --- a/arch/tile/mm/migrate_32.S +++ b/arch/tile/mm/migrate_32.S @@ -136,7 +136,7 @@ STD_ENTRY(flush_and_install_context) move r8, zero /* asids */ move r9, zero /* asidcount */ } - jal hv_flush_remote + jal _hv_flush_remote bnz r0, .Ldone /* Now install the new page table. */ @@ -152,7 +152,7 @@ STD_ENTRY(flush_and_install_context) move r4, r_asid moveli r5, HV_CTX_DIRECTIO | CTX_PAGE_FLAG } - jal hv_install_context + jal _hv_install_context bnz r0, .Ldone /* Finally, flush the TLB. */ diff --git a/arch/tile/mm/migrate_64.S b/arch/tile/mm/migrate_64.S index 1d15b10833d..a49eee38f87 100644 --- a/arch/tile/mm/migrate_64.S +++ b/arch/tile/mm/migrate_64.S @@ -123,7 +123,7 @@ STD_ENTRY(flush_and_install_context) } { move r8, zero /* asidcount */ - jal hv_flush_remote + jal _hv_flush_remote } bnez r0, 1f @@ -136,7 +136,7 @@ STD_ENTRY(flush_and_install_context) move r2, r_asid moveli r3, HV_CTX_DIRECTIO | CTX_PAGE_FLAG } - jal hv_install_context + jal _hv_install_context bnez r0, 1f /* Finally, flush the TLB. */ diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c index f96f4cec602..851a94e6ae5 100644 --- a/arch/tile/mm/mmap.c +++ b/arch/tile/mm/mmap.c @@ -58,18 +58,36 @@ void arch_pick_mmap_layout(struct mm_struct *mm) #else int is_32bit = 0; #endif + unsigned long random_factor = 0UL; + + /* + * 8 bits of randomness in 32bit mmaps, 24 address space bits + * 12 bits of randomness in 64bit mmaps, 28 address space bits + */ + if (current->flags & PF_RANDOMIZE) { + if (is_32bit) + random_factor = get_random_int() % (1<<8); + else + random_factor = get_random_int() % (1<<12); + + random_factor <<= PAGE_SHIFT; + } /* * Use standard layout if the expected stack growth is unlimited * or we are running native 64 bits. */ - if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) { - mm->mmap_base = TASK_UNMAPPED_BASE; + if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; mm->get_unmapped_area = arch_get_unmapped_area; - mm->unmap_area = arch_unmap_area; } else { mm->mmap_base = mmap_base(mm); mm->get_unmapped_area = arch_get_unmapped_area_topdown; - mm->unmap_area = arch_unmap_area_topdown; } } + +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + unsigned long range_end = mm->brk + 0x02000000; + return randomize_range(mm->brk, range_end, 0) ? : mm->brk; +} diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 345edfed9fc..5e86eac4bfa 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -61,7 +61,7 @@ void show_mem(unsigned int filter) global_page_state(NR_PAGETABLE), global_page_state(NR_BOUNCE), global_page_state(NR_FILE_PAGES), - nr_swap_pages); + get_nr_swap_pages()); for_each_zone(zone) { unsigned long flags, order, total = 0, largest_order = -1; @@ -83,55 +83,6 @@ void show_mem(unsigned int filter) } } -/* - * Associate a virtual page frame with a given physical page frame - * and protection flags for that frame. - */ -static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - pgd = swapper_pg_dir + pgd_index(vaddr); - if (pgd_none(*pgd)) { - BUG(); - return; - } - pud = pud_offset(pgd, vaddr); - if (pud_none(*pud)) { - BUG(); - return; - } - pmd = pmd_offset(pud, vaddr); - if (pmd_none(*pmd)) { - BUG(); - return; - } - pte = pte_offset_kernel(pmd, vaddr); - /* <pfn,flags> stored as-is, to permit clearing entries */ - set_pte(pte, pfn_pte(pfn, flags)); - - /* - * It's enough to flush this one mapping. - * This appears conservative since it is only called - * from __set_fixmap. - */ - local_flush_tlb_page(NULL, vaddr, PAGE_SIZE); -} - -void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) -{ - unsigned long address = __fix_to_virt(idx); - - if (idx >= __end_of_fixed_addresses) { - BUG(); - return; - } - set_pte_pfn(address, phys >> PAGE_SHIFT, flags); -} - /** * shatter_huge_page() - ensure a given address is mapped by a small page. * @@ -176,8 +127,7 @@ void shatter_huge_page(unsigned long addr) } /* Shatter the huge page into the preallocated L2 page table. */ - pmd_populate_kernel(&init_mm, pmd, - get_prealloc_pte(pte_pfn(*(pte_t *)pmd))); + pmd_populate_kernel(&init_mm, pmd, get_prealloc_pte(pmd_pfn(*pmd))); #ifdef __PAGETABLE_PMD_FOLDED /* Walk every pgd on the system and update the pmd there. */ @@ -291,6 +241,11 @@ struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address, if (p == NULL) return NULL; + if (!pgtable_page_ctor(p)) { + __free_pages(p, L2_USER_PGTABLE_ORDER); + return NULL; + } + /* * Make every page have a page_count() of one, not just the first. * We don't use __GFP_COMP since it doesn't look like it works @@ -301,7 +256,6 @@ struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address, inc_zone_page_state(p+i, NR_PAGETABLE); } - pgtable_page_ctor(p); return p; } @@ -374,6 +328,17 @@ void ptep_set_wrprotect(struct mm_struct *mm, #endif +/* + * Return a pointer to the PTE that corresponds to the given + * address in the given page table. A NULL page table just uses + * the standard kernel page table; the preferred API in this case + * is virt_to_kpte(). + * + * The returned pointer can point to a huge page in other levels + * of the page table than the bottom, if the huge page is present + * in the page table. For bottom-level PTEs, the returned pointer + * can point to a PTE that is either present or not. + */ pte_t *virt_to_pte(struct mm_struct* mm, unsigned long addr) { pgd_t *pgd; @@ -387,13 +352,23 @@ pte_t *virt_to_pte(struct mm_struct* mm, unsigned long addr) pud = pud_offset(pgd, addr); if (!pud_present(*pud)) return NULL; + if (pud_huge_page(*pud)) + return (pte_t *)pud; pmd = pmd_offset(pud, addr); - if (pmd_huge_page(*pmd)) - return (pte_t *)pmd; if (!pmd_present(*pmd)) return NULL; + if (pmd_huge_page(*pmd)) + return (pte_t *)pmd; return pte_offset_kernel(pmd, addr); } +EXPORT_SYMBOL(virt_to_pte); + +pte_t *virt_to_kpte(unsigned long kaddr) +{ + BUG_ON(kaddr < PAGE_OFFSET); + return virt_to_pte(NULL, kaddr); +} +EXPORT_SYMBOL(virt_to_kpte); pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu) { @@ -568,20 +543,13 @@ void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, addr = area->addr; if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, phys_addr, pgprot)) { - remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr)); + free_vm_area(area); return NULL; } return (__force void __iomem *) (offset + (char *)addr); } EXPORT_SYMBOL(ioremap_prot); -/* Map a PCI MMIO bus address into VA space. */ -void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) -{ - panic("ioremap for PCI MMIO is not supported"); -} -EXPORT_SYMBOL(ioremap); - /* Unmap an MMIO VA mapping. */ void iounmap(volatile void __iomem *addr_in) { @@ -599,12 +567,7 @@ void iounmap(volatile void __iomem *addr_in) in parallel. Reuse of the virtual address is prevented by leaving it in the global lists until we're done with it. cpa takes care of the direct mappings. */ - read_lock(&vmlist_lock); - for (p = vmlist; p; p = p->next) { - if (p->addr == addr) - break; - } - read_unlock(&vmlist_lock); + p = find_vm_area((void *)addr); if (!p) { pr_err("iounmap: bad address %p\n", addr); |
