diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 9 | ||||
-rw-r--r-- | mm/backing-dev.c | 25 | ||||
-rw-r--r-- | mm/bootmem.c | 24 | ||||
-rw-r--r-- | mm/highmem.c | 17 | ||||
-rw-r--r-- | mm/ksm.c | 1 | ||||
-rw-r--r-- | mm/memory-failure.c | 59 | ||||
-rw-r--r-- | mm/memory.c | 14 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 24 | ||||
-rw-r--r-- | mm/mempolicy.c | 13 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 4 | ||||
-rw-r--r-- | mm/nommu.c | 6 | ||||
-rw-r--r-- | mm/page_alloc.c | 7 | ||||
-rw-r--r-- | mm/percpu.c | 139 | ||||
-rw-r--r-- | mm/swapfile.c | 3 | ||||
-rw-r--r-- | mm/vmscan.c | 14 |
16 files changed, 242 insertions, 119 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 57963c6063d..44cf6f0a3a6 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -67,7 +67,7 @@ config DISCONTIGMEM config SPARSEMEM def_bool y - depends on SPARSEMEM_MANUAL + depends on (!SELECT_MEMORY_MODEL && ARCH_SPARSEMEM_ENABLE) || SPARSEMEM_MANUAL config FLATMEM def_bool y @@ -128,11 +128,8 @@ config SPARSEMEM_VMEMMAP config MEMORY_HOTPLUG bool "Allow for memory hot-add" depends on SPARSEMEM || X86_64_ACPI_NUMA - depends on HOTPLUG && !(HIBERNATION && !S390) && ARCH_ENABLE_MEMORY_HOTPLUG - depends on (IA64 || X86 || PPC64 || SUPERH || S390) - -comment "Memory hotplug is currently incompatible with Software Suspend" - depends on SPARSEMEM && HOTPLUG && HIBERNATION && !S390 + depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG + depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390) config MEMORY_HOTPLUG_SPARSE def_bool y diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 5a37e205571..67a33a5a1a9 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -604,15 +604,36 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) /* * Finally, kill the kernel threads. We don't need to be RCU - * safe anymore, since the bdi is gone from visibility. + * safe anymore, since the bdi is gone from visibility. Force + * unfreeze of the thread before calling kthread_stop(), otherwise + * it would never exet if it is currently stuck in the refrigerator. */ - list_for_each_entry(wb, &bdi->wb_list, list) + list_for_each_entry(wb, &bdi->wb_list, list) { + wb->task->flags &= ~PF_FROZEN; kthread_stop(wb->task); + } +} + +/* + * This bdi is going away now, make sure that no super_blocks point to it + */ +static void bdi_prune_sb(struct backing_dev_info *bdi) +{ + struct super_block *sb; + + spin_lock(&sb_lock); + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_bdi == bdi) + sb->s_bdi = NULL; + } + spin_unlock(&sb_lock); } void bdi_unregister(struct backing_dev_info *bdi) { if (bdi->dev) { + bdi_prune_sb(bdi); + if (!bdi_cap_flush_forker(bdi)) bdi_wb_shutdown(bdi); bdi_debug_unregister(bdi); diff --git a/mm/bootmem.c b/mm/bootmem.c index 555d5d2731c..d1dc23cc7f1 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -143,6 +143,30 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); } +/* + * free_bootmem_late - free bootmem pages directly to page allocator + * @addr: starting address of the range + * @size: size of the range in bytes + * + * This is only useful when the bootmem allocator has already been torn + * down, but we are still initializing the system. Pages are given directly + * to the page allocator, no bootmem metadata is updated because it is gone. + */ +void __init free_bootmem_late(unsigned long addr, unsigned long size) +{ + unsigned long cursor, end; + + kmemleak_free_part(__va(addr), size); + + cursor = PFN_UP(addr); + end = PFN_DOWN(addr + size); + + for (; cursor < end; cursor++) { + __free_pages_bootmem(pfn_to_page(cursor), 0); + totalram_pages++; + } +} + static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) { int aligned; diff --git a/mm/highmem.c b/mm/highmem.c index 25878cc49da..9c1e627f282 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -426,16 +426,21 @@ void __init page_address_init(void) void debug_kmap_atomic(enum km_type type) { - static unsigned warn_count = 10; + static int warn_count = 10; - if (unlikely(warn_count == 0)) + if (unlikely(warn_count < 0)) return; if (unlikely(in_interrupt())) { - if (in_irq()) { + if (in_nmi()) { + if (type != KM_NMI && type != KM_NMI_PTE) { + WARN_ON(1); + warn_count--; + } + } else if (in_irq()) { if (type != KM_IRQ0 && type != KM_IRQ1 && type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ && - type != KM_BOUNCE_READ) { + type != KM_BOUNCE_READ && type != KM_IRQ_PTE) { WARN_ON(1); warn_count--; } @@ -452,7 +457,9 @@ void debug_kmap_atomic(enum km_type type) } if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || - type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) { + type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ || + type == KM_IRQ_PTE || type == KM_NMI || + type == KM_NMI_PTE ) { if (!irqs_disabled()) { WARN_ON(1); warn_count--; @@ -1012,6 +1012,7 @@ static struct rmap_item *unstable_tree_search_insert(struct page *page, struct rmap_item *tree_rmap_item; int ret; + cond_resched(); tree_rmap_item = rb_entry(*new, struct rmap_item, node); page2[0] = get_mergeable_page(tree_rmap_item); if (!page2[0]) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 729d4b15b64..dacc6418387 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -35,6 +35,7 @@ #include <linux/mm.h> #include <linux/page-flags.h> #include <linux/sched.h> +#include <linux/ksm.h> #include <linux/rmap.h> #include <linux/pagemap.h> #include <linux/swap.h> @@ -370,9 +371,6 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) int ret = FAILED; struct address_space *mapping; - if (!isolate_lru_page(p)) - page_cache_release(p); - /* * For anonymous pages we're done the only reference left * should be the one m_f() holds. @@ -498,30 +496,18 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) */ static int me_swapcache_dirty(struct page *p, unsigned long pfn) { - int ret = FAILED; - ClearPageDirty(p); /* Trigger EIO in shmem: */ ClearPageUptodate(p); - if (!isolate_lru_page(p)) { - page_cache_release(p); - ret = DELAYED; - } - - return ret; + return DELAYED; } static int me_swapcache_clean(struct page *p, unsigned long pfn) { - int ret = FAILED; - - if (!isolate_lru_page(p)) { - page_cache_release(p); - ret = RECOVERED; - } delete_from_swap_cache(p); - return ret; + + return RECOVERED; } /* @@ -611,8 +597,6 @@ static struct page_state { { 0, 0, "unknown page state", me_unknown }, }; -#undef lru - static void action_result(unsigned long pfn, char *msg, int result) { struct page *page = NULL; @@ -629,13 +613,16 @@ static int page_action(struct page_state *ps, struct page *p, unsigned long pfn, int ref) { int result; + int count; result = ps->action(p, pfn); action_result(pfn, ps->msg, result); - if (page_count(p) != 1 + ref) + + count = page_count(p) - 1 - ref; + if (count != 0) printk(KERN_ERR "MCE %#lx: %s page still referenced by %d users\n", - pfn, ps->msg, page_count(p) - 1); + pfn, ps->msg, count); /* Could do more checks here if page looks ok */ /* @@ -661,12 +648,9 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn, int i; int kill = 1; - if (PageReserved(p) || PageCompound(p) || PageSlab(p)) + if (PageReserved(p) || PageCompound(p) || PageSlab(p) || PageKsm(p)) return; - if (!PageLRU(p)) - lru_add_drain_all(); - /* * This check implies we don't kill processes if their pages * are in the swap cache early. Those are always late kills. @@ -738,6 +722,7 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn, int __memory_failure(unsigned long pfn, int trapno, int ref) { + unsigned long lru_flag; struct page_state *ps; struct page *p; int res; @@ -775,6 +760,24 @@ int __memory_failure(unsigned long pfn, int trapno, int ref) } /* + * We ignore non-LRU pages for good reasons. + * - PG_locked is only well defined for LRU pages and a few others + * - to avoid races with __set_page_locked() + * - to avoid races with __SetPageSlab*() (and more non-atomic ops) + * The check (unnecessarily) ignores LRU pages being isolated and + * walked by the page reclaim code, however that's not a big loss. + */ + if (!PageLRU(p)) + lru_add_drain_all(); + lru_flag = p->flags & lru; + if (isolate_lru_page(p)) { + action_result(pfn, "non LRU", IGNORED); + put_page(p); + return -EBUSY; + } + page_cache_release(p); + + /* * Lock the page and wait for writeback to finish. * It's very difficult to mess with pages currently under IO * and in many cases impossible, so we just avoid it here. @@ -790,7 +793,7 @@ int __memory_failure(unsigned long pfn, int trapno, int ref) /* * Torn down by someone else? */ - if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { + if ((lru_flag & lru) && !PageSwapCache(p) && p->mapping == NULL) { action_result(pfn, "already truncated LRU", IGNORED); res = 0; goto out; @@ -798,7 +801,7 @@ int __memory_failure(unsigned long pfn, int trapno, int ref) res = -EBUSY; for (ps = error_states;; ps++) { - if ((p->flags & ps->mask) == ps->res) { + if (((p->flags | lru_flag)& ps->mask) == ps->res) { res = page_action(ps, p, pfn, ref); break; } diff --git a/mm/memory.c b/mm/memory.c index 7e91b5f9f69..6ab19dd4a19 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -641,6 +641,7 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, unsigned long addr, unsigned long end) { + pte_t *orig_src_pte, *orig_dst_pte; pte_t *src_pte, *dst_pte; spinlock_t *src_ptl, *dst_ptl; int progress = 0; @@ -654,6 +655,8 @@ again: src_pte = pte_offset_map_nested(src_pmd, addr); src_ptl = pte_lockptr(src_mm, src_pmd); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); + orig_src_pte = src_pte; + orig_dst_pte = dst_pte; arch_enter_lazy_mmu_mode(); do { @@ -677,9 +680,9 @@ again: arch_leave_lazy_mmu_mode(); spin_unlock(src_ptl); - pte_unmap_nested(src_pte - 1); + pte_unmap_nested(orig_src_pte); add_mm_rss(dst_mm, rss[0], rss[1]); - pte_unmap_unlock(dst_pte - 1, dst_ptl); + pte_unmap_unlock(orig_dst_pte, dst_ptl); cond_resched(); if (addr != end) goto again; @@ -1820,10 +1823,10 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, token = pmd_pgtable(*pmd); do { - err = fn(pte, token, addr, data); + err = fn(pte++, token, addr, data); if (err) break; - } while (pte++, addr += PAGE_SIZE, addr != end); + } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); @@ -2539,7 +2542,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, } else if (PageHWPoison(page)) { ret = VM_FAULT_HWPOISON; delayacct_clear_flag(DELAYACCT_PF_SWAPIN); - goto out; + goto out_release; } lock_page(page); @@ -2611,6 +2614,7 @@ out_nomap: pte_unmap_unlock(page_table, ptl); out_page: unlock_page(page); +out_release: page_cache_release(page); return ret; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 821dee59637..2047465cd27 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -26,6 +26,7 @@ #include <linux/migrate.h> #include <linux/page-isolation.h> #include <linux/pfn.h> +#include <linux/suspend.h> #include <asm/tlbflush.h> @@ -447,7 +448,8 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) } #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ -static pg_data_t *hotadd_new_pgdat(int nid, u64 start) +/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ +static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) { struct pglist_data *pgdat; unsigned long zones_size[MAX_NR_ZONES] = {0}; @@ -484,14 +486,18 @@ int __ref add_memory(int nid, u64 start, u64 size) struct resource *res; int ret; + lock_system_sleep(); + res = register_memory_resource(start, size); + ret = -EEXIST; if (!res) - return -EEXIST; + goto out; if (!node_online(nid)) { pgdat = hotadd_new_pgdat(nid, start); + ret = -ENOMEM; if (!pgdat) - return -ENOMEM; + goto out; new_pgdat = 1; } @@ -514,7 +520,8 @@ int __ref add_memory(int nid, u64 start, u64 size) BUG_ON(ret); } - return ret; + goto out; + error: /* rollback pgdat allocation and others */ if (new_pgdat) @@ -522,6 +529,8 @@ error: if (res) release_memory_resource(res); +out: + unlock_system_sleep(); return ret; } EXPORT_SYMBOL_GPL(add_memory); @@ -758,6 +767,8 @@ int offline_pages(unsigned long start_pfn, if (!test_pages_in_a_zone(start_pfn, end_pfn)) return -EINVAL; + lock_system_sleep(); + zone = page_zone(pfn_to_page(start_pfn)); node = zone_to_nid(zone); nr_pages = end_pfn - start_pfn; @@ -765,7 +776,7 @@ int offline_pages(unsigned long start_pfn, /* set above range as isolated */ ret = start_isolate_page_range(start_pfn, end_pfn); if (ret) - return ret; + goto out; arg.start_pfn = start_pfn; arg.nr_pages = nr_pages; @@ -843,6 +854,7 @@ repeat: writeback_set_ratelimit(); memory_notify(MEM_OFFLINE, &arg); + unlock_system_sleep(); return 0; failed_removal: @@ -852,6 +864,8 @@ failed_removal: /* pushback to free area */ undo_isolate_page_range(start_pfn, end_pfn); +out: + unlock_system_sleep(); return ret; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 7dd9d9f8069..4545d594424 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1024,7 +1024,7 @@ static long do_mbind(unsigned long start, unsigned long len, err = migrate_prep(); if (err) - return err; + goto mpol_out; } { NODEMASK_SCRATCH(scratch); @@ -1039,10 +1039,9 @@ static long do_mbind(unsigned long start, unsigned long len, err = -ENOMEM; NODEMASK_SCRATCH_FREE(scratch); } - if (err) { - mpol_put(new); - return err; - } + if (err) + goto mpol_out; + vma = check_range(mm, start, end, nmask, flags | MPOL_MF_INVERT, &pagelist); @@ -1058,9 +1057,11 @@ static long do_mbind(unsigned long start, unsigned long len, if (!err && nr_failed && (flags & MPOL_MF_STRICT)) err = -EIO; - } + } else + putback_lru_pages(&pagelist); up_write(&mm->mmap_sem); + mpol_out: mpol_put(new); return err; } diff --git a/mm/migrate.c b/mm/migrate.c index 1a4bf481378..7dbcb22316d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -602,7 +602,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, struct page *newpage = get_new_page(page, private, &result); int rcu_locked = 0; int charge = 0; - struct mem_cgroup *mem; + struct mem_cgroup *mem = NULL; if (!newpage) return -ENOMEM; diff --git a/mm/mmap.c b/mm/mmap.c index 73f5e4b6401..292ddc3cef9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -20,7 +20,6 @@ #include <linux/fs.h> #include <linux/personality.h> #include <linux/security.h> -#include <linux/ima.h> #include <linux/hugetlb.h> #include <linux/profile.h> #include <linux/module.h> @@ -1061,9 +1060,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, error = security_file_mmap(file, reqprot, prot, flags, addr, 0); if (error) return error; - error = ima_file_mmap(file, prot); - if (error) - return error; return mmap_region(file, addr, len, flags, vm_flags, pgoff); } diff --git a/mm/nommu.c b/mm/nommu.c index 5189b5aed8c..9876fa0c3ad 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1362,9 +1362,11 @@ share: error_just_free: up_write(&nommu_region_sem); error: - fput(region->vm_file); + if (region->vm_file) + fput(region->vm_file); kmem_cache_free(vm_region_jar, region); - fput(vma->vm_file); + if (vma->vm_file) + fput(vma->vm_file); if (vma->vm_flags & VM_EXECUTABLE) removed_exe_file_vma(vma->vm_mm); kmem_cache_free(vm_area_cachep, vma); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bf720550b44..2bc2ac63f41 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1769,7 +1769,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask) * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ alloc_flags &= ~ALLOC_CPUSET; - } else if (unlikely(rt_task(p))) + } else if (unlikely(rt_task(p)) && !in_interrupt()) alloc_flags |= ALLOC_HARDER; if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { @@ -1817,9 +1817,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) goto nopage; +restart: wake_all_kswapd(order, zonelist, high_zoneidx); -restart: /* * OK, we're below the kswapd watermark and have kicked background * reclaim. Now things get more complex, so set up alloc_flags according @@ -2183,7 +2183,7 @@ void show_free_areas(void) printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" " active_file:%lu inactive_file:%lu isolated_file:%lu\n" " unevictable:%lu" - " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n" + " dirty:%lu writeback:%lu unstable:%lu\n" " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n" " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n", global_page_state(NR_ACTIVE_ANON), @@ -2196,7 +2196,6 @@ void show_free_areas(void) global_page_state(NR_FILE_DIRTY), global_page_state(NR_WRITEBACK), global_page_state(NR_UNSTABLE_NFS), - nr_blockdev_pages(), global_page_state(NR_FREE_PAGES), global_page_state(NR_SLAB_RECLAIMABLE), global_page_state(NR_SLAB_UNRECLAIMABLE), diff --git a/mm/percpu.c b/mm/percpu.c index 6af78c1ee70..5adfc268b40 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -153,7 +153,10 @@ static int pcpu_reserved_chunk_limit; * * During allocation, pcpu_alloc_mutex is kept locked all the time and * pcpu_lock is grabbed and released as necessary. All actual memory - * allocations are done using GFP_KERNEL with pcpu_lock released. + * allocations are done using GFP_KERNEL with pcpu_lock released. In + * general, percpu memory can't be allocated with irq off but + * irqsave/restore are still used in alloc path so that it can be used + * from early init path - sched_init() specifically. * * Free path accesses and alters only the index data structures, so it * can be safely called from atomic context. When memory needs to be @@ -352,62 +355,86 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) } /** - * pcpu_extend_area_map - extend area map for allocation - * @chunk: target chunk + * pcpu_need_to_extend - determine whether chunk area map needs to be extended + * @chunk: chunk of interest * - * Extend area map of @chunk so that it can accomodate an allocation. - * A single allocation can split an area into three areas, so this - * function makes sure that @chunk->map has at least two extra slots. + * Determine whether area map of @chunk needs to be extended to + * accomodate a new allocation. * * CONTEXT: - * pcpu_alloc_mutex, pcpu_lock. pcpu_lock is released and reacquired - * if area map is extended. + * pcpu_lock. * * RETURNS: - * 0 if noop, 1 if successfully extended, -errno on failure. + * New target map allocation length if extension is necessary, 0 + * otherwise. */ -static int pcpu_extend_area_map(struct pcpu_chunk *chunk) +static int pcpu_need_to_extend(struct pcpu_chunk *chunk) { int new_alloc; - int *new; - size_t size; - /* has enough? */ if (chunk->map_alloc >= chunk->map_used + 2) return 0; - spin_unlock_irq(&pcpu_lock); - new_alloc = PCPU_DFL_MAP_ALLOC; while (new_alloc < chunk->map_used + 2) new_alloc *= 2; - new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); - if (!new) { - spin_lock_irq(&pcpu_lock); + return new_alloc; +} + +/** + * pcpu_extend_area_map - extend area map of a chunk + * @chunk: chunk of interest + * @new_alloc: new target allocation length of the area map + * + * Extend area map of @chunk to have @new_alloc entries. + * + * CONTEXT: + * Does GFP_KERNEL allocation. Grabs and releases pcpu_lock. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) +{ + int *old = NULL, *new = NULL; + size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); + unsigned long flags; + + new = pcpu_mem_alloc(new_size); + if (!new) return -ENOMEM; - } - /* - * Acquire pcpu_lock and switch to new area map. Only free - * could have happened inbetween, so map_used couldn't have - * grown. - */ - spin_lock_irq(&pcpu_lock); - BUG_ON(new_alloc < chunk->map_used + 2); + /* acquire pcpu_lock and switch to new area map */ + spin_lock_irqsave(&pcpu_lock, flags); + + if (new_alloc <= chunk->map_alloc) + goto out_unlock; - size = chunk->map_alloc * sizeof(chunk->map[0]); - memcpy(new, chunk->map, size); + old_size = chunk->map_alloc * sizeof(chunk->map[0]); + memcpy(new, chunk->map, old_size); /* * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is * one of the first chunks and still using static map. */ if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) - pcpu_mem_free(chunk->map, size); + old = chunk->map; chunk->map_alloc = new_alloc; chunk->map = new; + new = NULL; + +out_unlock: + spin_unlock_irqrestore(&pcpu_lock, flags); + + /* + * pcpu_mem_free() might end up calling vfree() which uses + * IRQ-unsafe lock and thus can't be called under pcpu_lock. + */ + pcpu_mem_free(old, old_size); + pcpu_mem_free(new, new_size); + return 0; } @@ -1046,7 +1073,8 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) static int warn_limit = 10; struct pcpu_chunk *chunk; const char *err; - int slot, off; + int slot, off, new_alloc; + unsigned long flags; if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { WARN(true, "illegal size (%zu) or align (%zu) for " @@ -1055,19 +1083,30 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) } mutex_lock(&pcpu_alloc_mutex); - spin_lock_irq(&pcpu_lock); + spin_lock_irqsave(&pcpu_lock, flags); /* serve reserved allocations from the reserved chunk if available */ if (reserved && pcpu_reserved_chunk) { chunk = pcpu_reserved_chunk; - if (size > chunk->contig_hint || - pcpu_extend_area_map(chunk) < 0) { - err = "failed to extend area map of reserved chunk"; + + if (size > chunk->contig_hint) { + err = "alloc from reserved chunk failed"; goto fail_unlock; } + + while ((new_alloc = pcpu_need_to_extend(chunk))) { + spin_unlock_irqrestore(&pcpu_lock, flags); + if (pcpu_extend_area_map(chunk, new_alloc) < 0) { + err = "failed to extend area map of reserved chunk"; + goto fail_unlock_mutex; + } + spin_lock_irqsave(&pcpu_lock, flags); + } + off = pcpu_alloc_area(chunk, size, align); if (off >= 0) goto area_found; + err = "alloc from reserved chunk failed"; goto fail_unlock; } @@ -1079,14 +1118,20 @@ restart: if (size > chunk->contig_hint) continue; - switch (pcpu_extend_area_map(chunk)) { - case 0: - break; - case 1: - goto restart; /* pcpu_lock dropped, restart */ - default: - err = "failed to extend area map"; - goto fail_unlock; + new_alloc = pcpu_need_to_extend(chunk); + if (new_alloc) { + spin_unlock_irqrestore(&pcpu_lock, flags); + if (pcpu_extend_area_map(chunk, + new_alloc) < 0) { + err = "failed to extend area map"; + goto fail_unlock_mutex; + } + spin_lock_irqsave(&pcpu_lock, flags); + /* + * pcpu_lock has been dropped, need to + * restart cpu_slot list walking. + */ + goto restart; } off = pcpu_alloc_area(chunk, size, align); @@ -1096,7 +1141,7 @@ restart: } /* hmmm... no space left, create a new chunk */ - spin_unlock_irq(&pcpu_lock); + spin_unlock_irqrestore(&pcpu_lock, flags); chunk = alloc_pcpu_chunk(); if (!chunk) { @@ -1104,16 +1149,16 @@ restart: goto fail_unlock_mutex; } - spin_lock_irq(&pcpu_lock); + spin_lock_irqsave(&pcpu_lock, flags); pcpu_chunk_relocate(chunk, -1); goto restart; area_found: - spin_unlock_irq(&pcpu_lock); + spin_unlock_irqrestore(&pcpu_lock, flags); /* populate, map and clear the area */ if (pcpu_populate_chunk(chunk, off, size)) { - spin_lock_irq(&pcpu_lock); + spin_lock_irqsave(&pcpu_lock, flags); pcpu_free_area(chunk, off); err = "failed to populate"; goto fail_unlock; @@ -1125,7 +1170,7 @@ area_found: return __addr_to_pcpu_ptr(chunk->base_addr + off); fail_unlock: - spin_unlock_irq(&pcpu_lock); + spin_unlock_irqrestore(&pcpu_lock, flags); fail_unlock_mutex: mutex_unlock(&pcpu_alloc_mutex); if (warn_limit) { diff --git a/mm/swapfile.c b/mm/swapfile.c index a1bc6b9af9a..9c590eef791 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1151,8 +1151,7 @@ static int try_to_unuse(unsigned int type) } else retval = unuse_mm(mm, entry, page); - if (set_start_mm && - swap_count(*swap_map) < swcount) { + if (set_start_mm && *swap_map < swcount) { mmput(new_start_mm); atomic_inc(&mm->mm_users); new_start_mm = mm; diff --git a/mm/vmscan.c b/mm/vmscan.c index 64e43889883..777af57fd8c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -544,6 +544,16 @@ redo: */ lru = LRU_UNEVICTABLE; add_page_to_unevictable_list(page); + /* + * When racing with an mlock clearing (page is + * unlocked), make sure that if the other thread does + * not observe our setting of PG_lru and fails + * isolation, we see PG_mlocked cleared below and move + * the page back to the evictable list. + * + * The other side is TestClearPageMlocked(). + */ + smp_mb(); } /* @@ -1088,7 +1098,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, int lumpy_reclaim = 0; while (unlikely(too_many_isolated(zone, file, sc))) { - congestion_wait(WRITE, HZ/10); + congestion_wait(BLK_RW_ASYNC, HZ/10); /* We are about to die and free our memory. Return now. */ if (fatal_signal_pending(current)) @@ -1356,7 +1366,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, * IO, plus JVM can create lots of anon VM_EXEC pages, * so we ignore them here. */ - if ((vm_flags & VM_EXEC) && !PageAnon(page)) { + if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) { list_add(&page->lru, &l_active); continue; } |