diff options
author | Jiri Kosina <jkosina@suse.cz> | 2012-06-29 14:45:58 +0200 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2012-06-29 14:45:58 +0200 |
commit | 59f91e5dd0504dc0ebfaa0b6f3a55e6931f96266 (patch) | |
tree | b913718405d44a921905ac71044fbde410256865 /mm | |
parent | 57bdfdd80077addf518a9b90c4a66890efc4f70e (diff) | |
parent | 89abfab133ef1f5902abafb744df72793213ac19 (diff) |
Merge branch 'master' into for-next
Conflicts:
include/linux/mmzone.h
Synced with Linus' tree so that trivial patch can be applied
on top of up-to-date code properly.
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 10 | ||||
-rw-r--r-- | mm/Makefile | 9 | ||||
-rw-r--r-- | mm/bootmem.c | 134 | ||||
-rw-r--r-- | mm/compaction.c | 142 | ||||
-rw-r--r-- | mm/filemap.c | 39 | ||||
-rw-r--r-- | mm/huge_memory.c | 21 | ||||
-rw-r--r-- | mm/hugetlb.c | 32 | ||||
-rw-r--r-- | mm/internal.h | 14 | ||||
-rw-r--r-- | mm/madvise.c | 15 | ||||
-rw-r--r-- | mm/memblock.c | 42 | ||||
-rw-r--r-- | mm/memcontrol.c | 127 | ||||
-rw-r--r-- | mm/memory-failure.c | 8 | ||||
-rw-r--r-- | mm/memory.c | 20 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 14 | ||||
-rw-r--r-- | mm/mempolicy.c | 36 | ||||
-rw-r--r-- | mm/mmap.c | 53 | ||||
-rw-r--r-- | mm/nobootmem.c | 112 | ||||
-rw-r--r-- | mm/oom_kill.c | 44 | ||||
-rw-r--r-- | mm/page_alloc.c | 78 | ||||
-rw-r--r-- | mm/readahead.c | 40 | ||||
-rw-r--r-- | mm/rmap.c | 6 | ||||
-rw-r--r-- | mm/shmem.c | 513 | ||||
-rw-r--r-- | mm/sparse.c | 25 | ||||
-rw-r--r-- | mm/swap.c | 51 | ||||
-rw-r--r-- | mm/swapfile.c | 33 | ||||
-rw-r--r-- | mm/thrash.c | 155 | ||||
-rw-r--r-- | mm/truncate.c | 25 | ||||
-rw-r--r-- | mm/vmalloc.c | 7 | ||||
-rw-r--r-- | mm/vmscan.c | 306 | ||||
-rw-r--r-- | mm/vmstat.c | 10 |
30 files changed, 1137 insertions, 984 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 39220026c79..b2176374b98 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -349,6 +349,16 @@ choice benefit. endchoice +config CROSS_MEMORY_ATTACH + bool "Cross Memory Support" + depends on MMU + default y + help + Enabling this option adds the system calls process_vm_readv and + process_vm_writev which allow a process with the correct privileges + to directly read from or write to to another process's address space. + See the man page for more details. + # # UP and nommu archs use km based percpu allocator # diff --git a/mm/Makefile b/mm/Makefile index 8aada89efbb..a156285ce88 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -5,8 +5,11 @@ mmu-y := nommu.o mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ - vmalloc.o pagewalk.o pgtable-generic.o \ - process_vm_access.o + vmalloc.o pagewalk.o pgtable-generic.o + +ifdef CONFIG_CROSS_MEMORY_ATTACH +mmu-$(CONFIG_MMU) += process_vm_access.o +endif obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ maccess.o page_alloc.o page-writeback.o \ @@ -25,7 +28,7 @@ endif obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o obj-$(CONFIG_BOUNCE) += bounce.o -obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o +obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o obj-$(CONFIG_HAS_DMA) += dmapool.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o diff --git a/mm/bootmem.c b/mm/bootmem.c index 0131170c9d5..ec4fcb7a56c 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -77,16 +77,16 @@ unsigned long __init bootmem_bootmap_pages(unsigned long pages) */ static void __init link_bootmem(bootmem_data_t *bdata) { - struct list_head *iter; + bootmem_data_t *ent; - list_for_each(iter, &bdata_list) { - bootmem_data_t *ent; - - ent = list_entry(iter, bootmem_data_t, list); - if (bdata->node_min_pfn < ent->node_min_pfn) - break; + list_for_each_entry(ent, &bdata_list, list) { + if (bdata->node_min_pfn < ent->node_min_pfn) { + list_add_tail(&bdata->list, &ent->list); + return; + } } - list_add_tail(&bdata->list, iter); + + list_add_tail(&bdata->list, &bdata_list); } /* @@ -203,7 +203,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) } else { unsigned long off = 0; - while (vec && off < BITS_PER_LONG) { + vec >>= start & (BITS_PER_LONG - 1); + while (vec) { if (vec & 1) { page = pfn_to_page(start + off); __free_pages_bootmem(page, 0); @@ -467,7 +468,7 @@ static unsigned long __init align_off(struct bootmem_data *bdata, return ALIGN(base + off, align) - base; } -static void * __init alloc_bootmem_core(struct bootmem_data *bdata, +static void * __init alloc_bootmem_bdata(struct bootmem_data *bdata, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { @@ -588,14 +589,14 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, p_bdata = bootmem_arch_preferred_node(bdata, size, align, goal, limit); if (p_bdata) - return alloc_bootmem_core(p_bdata, size, align, + return alloc_bootmem_bdata(p_bdata, size, align, goal, limit); } #endif return NULL; } -static void * __init ___alloc_bootmem_nopanic(unsigned long size, +static void * __init alloc_bootmem_core(unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) @@ -603,7 +604,6 @@ static void * __init ___alloc_bootmem_nopanic(unsigned long size, bootmem_data_t *bdata; void *region; -restart: region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit); if (region) return region; @@ -614,11 +614,25 @@ restart: if (limit && bdata->node_min_pfn >= PFN_DOWN(limit)) break; - region = alloc_bootmem_core(bdata, size, align, goal, limit); + region = alloc_bootmem_bdata(bdata, size, align, goal, limit); if (region) return region; } + return NULL; +} + +static void * __init ___alloc_bootmem_nopanic(unsigned long size, + unsigned long align, + unsigned long goal, + unsigned long limit) +{ + void *ptr; + +restart: + ptr = alloc_bootmem_core(size, align, goal, limit); + if (ptr) + return ptr; if (goal) { goal = 0; goto restart; @@ -684,21 +698,56 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, return ___alloc_bootmem(size, align, goal, limit); } -static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, +static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { void *ptr; - ptr = alloc_arch_preferred_bootmem(bdata, size, align, goal, limit); +again: + ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, + align, goal, limit); if (ptr) return ptr; - ptr = alloc_bootmem_core(bdata, size, align, goal, limit); + ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit); if (ptr) return ptr; - return ___alloc_bootmem(size, align, goal, limit); + ptr = alloc_bootmem_core(size, align, goal, limit); + if (ptr) + return ptr; + + if (goal) { + goal = 0; + goto again; + } + + return NULL; +} + +void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, + unsigned long align, unsigned long goal) +{ + if (WARN_ON_ONCE(slab_is_available())) + return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); + + return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0); +} + +void * __init ___alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, + unsigned long align, unsigned long goal, + unsigned long limit) +{ + void *ptr; + + ptr = ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0); + if (ptr) + return ptr; + + printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size); + panic("Out of memory"); + return NULL; } /** @@ -722,7 +771,7 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, if (WARN_ON_ONCE(slab_is_available())) return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); - return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); + return ___alloc_bootmem_node(pgdat, size, align, goal, 0); } void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, @@ -743,7 +792,7 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, unsigned long new_goal; new_goal = MAX_DMA32_PFN << PAGE_SHIFT; - ptr = alloc_bootmem_core(pgdat->bdata, size, align, + ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, new_goal, 0); if (ptr) return ptr; @@ -754,47 +803,6 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, } -#ifdef CONFIG_SPARSEMEM -/** - * alloc_bootmem_section - allocate boot memory from a specific section - * @size: size of the request in bytes - * @section_nr: sparse map section to allocate from - * - * Return NULL on failure. - */ -void * __init alloc_bootmem_section(unsigned long size, - unsigned long section_nr) -{ - bootmem_data_t *bdata; - unsigned long pfn, goal; - - pfn = section_nr_to_pfn(section_nr); - goal = pfn << PAGE_SHIFT; - bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; - - return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, 0); -} -#endif - -void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, - unsigned long align, unsigned long goal) -{ - void *ptr; - - if (WARN_ON_ONCE(slab_is_available())) - return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); - - ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); - if (ptr) - return ptr; - - ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); - if (ptr) - return ptr; - - return __alloc_bootmem_nopanic(size, align, goal); -} - #ifndef ARCH_LOW_ADDRESS_LIMIT #define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL #endif @@ -839,6 +847,6 @@ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, if (WARN_ON_ONCE(slab_is_available())) return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); - return ___alloc_bootmem_node(pgdat->bdata, size, align, - goal, ARCH_LOW_ADDRESS_LIMIT); + return ___alloc_bootmem_node(pgdat, size, align, + goal, ARCH_LOW_ADDRESS_LIMIT); } diff --git a/mm/compaction.c b/mm/compaction.c index da7d35ea510..840ee288e29 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -235,7 +235,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, */ while (unlikely(too_many_isolated(zone))) { /* async migration should just abort */ - if (!cc->sync) + if (cc->mode != COMPACT_SYNC) return 0; congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -303,7 +303,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, * satisfies the allocation */ pageblock_nr = low_pfn >> pageblock_order; - if (!cc->sync && last_pageblock_nr != pageblock_nr && + if (cc->mode != COMPACT_SYNC && + last_pageblock_nr != pageblock_nr && !migrate_async_suitable(get_pageblock_migratetype(page))) { low_pfn += pageblock_nr_pages; low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; @@ -324,7 +325,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, continue; } - if (!cc->sync) + if (cc->mode != COMPACT_SYNC) mode |= ISOLATE_ASYNC_MIGRATE; /* Try isolate the page */ @@ -357,27 +358,90 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, #endif /* CONFIG_COMPACTION || CONFIG_CMA */ #ifdef CONFIG_COMPACTION +/* + * Returns true if MIGRATE_UNMOVABLE pageblock was successfully + * converted to MIGRATE_MOVABLE type, false otherwise. + */ +static bool rescue_unmovable_pageblock(struct page *page) +{ + unsigned long pfn, start_pfn, end_pfn; + struct page *start_page, *end_page; + + pfn = page_to_pfn(page); + start_pfn = pfn & ~(pageblock_nr_pages - 1); + end_pfn = start_pfn + pageblock_nr_pages; + + start_page = pfn_to_page(start_pfn); + end_page = pfn_to_page(end_pfn); + + /* Do not deal with pageblocks that overlap zones */ + if (page_zone(start_page) != page_zone(end_page)) + return false; + + for (page = start_page, pfn = start_pfn; page < end_page; pfn++, + page++) { + if (!pfn_valid_within(pfn)) + continue; + + if (PageBuddy(page)) { + int order = page_order(page); + + pfn += (1 << order) - 1; + page += (1 << order) - 1; + + continue; + } else if (page_count(page) == 0 || PageLRU(page)) + continue; + + return false; + } + + set_pageblock_migratetype(page, MIGRATE_MOVABLE); + move_freepages_block(page_zone(page), page, MIGRATE_MOVABLE); + return true; +} -/* Returns true if the page is within a block suitable for migration to */ -static bool suitable_migration_target(struct page *page) +enum smt_result { + GOOD_AS_MIGRATION_TARGET, + FAIL_UNMOVABLE_TARGET, + FAIL_BAD_TARGET, +}; + +/* + * Returns GOOD_AS_MIGRATION_TARGET if the page is within a block + * suitable for migration to, FAIL_UNMOVABLE_TARGET if the page + * is within a MIGRATE_UNMOVABLE block, FAIL_BAD_TARGET otherwise. + */ +static enum smt_result suitable_migration_target(struct page *page, + struct compact_control *cc) { int migratetype = get_pageblock_migratetype(page); /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) - return false; + return FAIL_BAD_TARGET; /* If the page is a large free page, then allow migration */ if (PageBuddy(page) && page_order(page) >= pageblock_order) - return true; + return GOOD_AS_MIGRATION_TARGET; /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ - if (migrate_async_suitable(migratetype)) - return true; + if (cc->mode != COMPACT_ASYNC_UNMOVABLE && + migrate_async_suitable(migratetype)) + return GOOD_AS_MIGRATION_TARGET; + + if (cc->mode == COMPACT_ASYNC_MOVABLE && + migratetype == MIGRATE_UNMOVABLE) + return FAIL_UNMOVABLE_TARGET; + + if (cc->mode != COMPACT_ASYNC_MOVABLE && + migratetype == MIGRATE_UNMOVABLE && + rescue_unmovable_pageblock(page)) + return GOOD_AS_MIGRATION_TARGET; /* Otherwise skip the block */ - return false; + return FAIL_BAD_TARGET; } /* @@ -411,6 +475,13 @@ static void isolate_freepages(struct zone *zone, zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; /* + * isolate_freepages() may be called more than once during + * compact_zone_order() run and we want only the most recent + * count. + */ + cc->nr_pageblocks_skipped = 0; + + /* * Isolate free pages until enough are available to migrate the * pages on cc->migratepages. We stop searching if the migrate * and free page scanners meet or enough free pages are isolated. @@ -418,6 +489,7 @@ static void isolate_freepages(struct zone *zone, for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; pfn -= pageblock_nr_pages) { unsigned long isolated; + enum smt_result ret; if (!pfn_valid(pfn)) continue; @@ -434,9 +506,12 @@ static void isolate_freepages(struct zone *zone, continue; /* Check the block is suitable for migration */ - if (!suitable_migration_target(page)) + ret = suitable_migration_target(page, cc); + if (ret != GOOD_AS_MIGRATION_TARGET) { + if (ret == FAIL_UNMOVABLE_TARGET) + cc->nr_pageblocks_skipped++; continue; - + } /* * Found a block suitable for isolating free pages from. Now * we disabled interrupts, double check things are ok and @@ -445,12 +520,14 @@ static void isolate_freepages(struct zone *zone, */ isolated = 0; spin_lock_irqsave(&zone->lock, flags); - if (suitable_migration_target(page)) { + ret = suitable_migration_target(page, cc); + if (ret == GOOD_AS_MIGRATION_TARGET) { end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); isolated = isolate_freepages_block(pfn, end_pfn, freelist, false); nr_freepages += isolated; - } + } else if (ret == FAIL_UNMOVABLE_TARGET) + cc->nr_pageblocks_skipped++; spin_unlock_irqrestore(&zone->lock, flags); /* @@ -682,8 +759,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) nr_migrate = cc->nr_migratepages; err = migrate_pages(&cc->migratepages, compaction_alloc, - (unsigned long)cc, false, - cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC); + (unsigned long)&cc->freepages, false, + (cc->mode == COMPACT_SYNC) ? MIGRATE_SYNC_LIGHT + : MIGRATE_ASYNC); update_nr_listpages(cc); nr_remaining = cc->nr_migratepages; @@ -712,7 +790,8 @@ out: static unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, - bool sync) + enum compact_mode mode, + unsigned long *nr_pageblocks_skipped) { struct compact_control cc = { .nr_freepages = 0, @@ -720,12 +799,17 @@ static unsigned long compact_zone_order(struct zone *zone, .order = order, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, - .sync = sync, + .mode = mode, }; + unsigned long rc; + INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); - return compact_zone(zone, &cc); + rc = compact_zone(zone, &cc); + *nr_pageblocks_skipped = cc.nr_pageblocks_skipped; + + return rc; } int sysctl_extfrag_threshold = 500; @@ -750,6 +834,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, struct zoneref *z; struct zone *zone; int rc = COMPACT_SKIPPED; + unsigned long nr_pageblocks_skipped; + enum compact_mode mode; /* * Check whether it is worth even starting compaction. The order check is @@ -766,12 +852,22 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, nodemask) { int status; - status = compact_zone_order(zone, order, gfp_mask, sync); + mode = sync ? COMPACT_SYNC : COMPACT_ASYNC_MOVABLE; +retry: + status = compact_zone_order(zone, order, gfp_mask, mode, + &nr_pageblocks_skipped); rc = max(status, rc); /* If a normal allocation would succeed, stop compacting */ if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) break; + + if (rc == COMPACT_COMPLETE && mode == COMPACT_ASYNC_MOVABLE) { + if (nr_pageblocks_skipped) { + mode = COMPACT_ASYNC_UNMOVABLE; + goto retry; + } + } } return rc; @@ -805,7 +901,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) if (ok && cc->order > zone->compact_order_failed) zone->compact_order_failed = cc->order + 1; /* Currently async compaction is never deferred. */ - else if (!ok && cc->sync) + else if (!ok && cc->mode == COMPACT_SYNC) defer_compaction(zone, cc->order); } @@ -820,7 +916,7 @@ int compact_pgdat(pg_data_t *pgdat, int order) { struct compact_control cc = { .order = order, - .sync = false, + .mode = COMPACT_ASYNC_MOVABLE, }; return __compact_pgdat(pgdat, &cc); @@ -830,7 +926,7 @@ static int compact_node(int nid) { struct compact_control cc = { .order = -1, - .sync = true, + .mode = COMPACT_SYNC, }; return __compact_pgdat(NODE_DATA(nid), &cc); diff --git a/mm/filemap.c b/mm/filemap.c index 79c4b2b0b14..64b48f934b8 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -29,7 +29,6 @@ #include <linux/pagevec.h> #include <linux/blkdev.h> #include <linux/security.h> -#include <linux/syscalls.h> #include <linux/cpuset.h> #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ #include <linux/memcontrol.h> @@ -1478,44 +1477,6 @@ out: } EXPORT_SYMBOL(generic_file_aio_read); -static ssize_t -do_readahead(struct address_space *mapping, struct file *filp, - pgoff_t index, unsigned long nr) -{ - if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage) - return -EINVAL; - - force_page_cache_readahead(mapping, filp, index, nr); - return 0; -} - -SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count) -{ - ssize_t ret; - struct file *file; - - ret = -EBADF; - file = fget(fd); - if (file) { - if (file->f_mode & FMODE_READ) { - struct address_space *mapping = file->f_mapping; - pgoff_t start = offset >> PAGE_CACHE_SHIFT; - pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT; - unsigned long len = end - start + 1; - ret = do_readahead(mapping, file, start, len); - } - fput(file); - } - return ret; -} -#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -asmlinkage long SyS_readahead(long fd, loff_t offset, long count) -{ - return SYSC_readahead((int) fd, offset, (size_t) count); -} -SYSCALL_ALIAS(sys_readahead, SyS_readahead); -#endif - #ifdef CONFIG_MMU /** * page_cache_read - adds requested page to the page cache if not already there diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f0e5306eeb5..d0def42c121 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -636,16 +636,12 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, unsigned long haddr, pmd_t *pmd, struct page *page) { - int ret = 0; pgtable_t pgtable; VM_BUG_ON(!PageCompound(page)); pgtable = pte_alloc_one(mm, haddr); - if (unlikely(!pgtable)) { - mem_cgroup_uncharge_page(page); - put_page(page); + if (unlikely(!pgtable)) return VM_FAULT_OOM; - } clear_huge_page(page, haddr, HPAGE_PMD_NR); __SetPageUptodate(page); @@ -675,7 +671,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, spin_unlock(&mm->page_table_lock); } - return ret; + return 0; } static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp) @@ -724,8 +720,14 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, put_page(page); goto out; } + if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, + page))) { + mem_cgroup_uncharge_page(page); + put_page(page); + goto out; + } - return __do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page); + return 0; } out: /* @@ -950,6 +952,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, count_vm_event(THP_FAULT_FALLBACK); ret = do_huge_pmd_wp_page_fallback(mm, vma, address, pmd, orig_pmd, page, haddr); + if (ret & VM_FAULT_OOM) + split_huge_page(page); put_page(page); goto out; } @@ -957,6 +961,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { put_page(new_page); + split_huge_page(page); put_page(page); ret |= VM_FAULT_OOM; goto out; @@ -968,8 +973,10 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, spin_lock(&mm->page_table_lock); put_page(page); if (unlikely(!pmd_same(*pmd, orig_pmd))) { + spin_unlock(&mm->page_table_lock); mem_cgroup_uncharge_page(new_page); put_page(new_page); + goto out; } else { pmd_t entry; VM_BUG_ON(!PageHead(page)); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4e28416c47f..285a81e87ec 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -273,8 +273,8 @@ static long region_count(struct list_head *head, long f, long t) /* Locate each segment we overlap with, and count that overlap. */ list_for_each_entry(rg, head, link) { - int seg_from; - int seg_to; + long seg_from; + long seg_to; if (rg->to <= f) continue; @@ -2157,6 +2157,15 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma) kref_get(&reservations->refs); } +static void resv_map_put(struct vm_area_struct *vma) +{ + struct resv_map *reservations = vma_resv_map(vma); + + if (!reservations) + return; + kref_put(&reservations->refs, resv_map_release); +} + static void hugetlb_vm_op_close(struct vm_area_struct *vma) { struct hstate *h = hstate_vma(vma); @@ -2173,7 +2182,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) reserve = (end - start) - region_count(&reservations->regions, start, end); - kref_put(&reservations->refs, resv_map_release); + resv_map_put(vma); if (reserve) { hugetlb_acct_memory(h, -reserve); @@ -2991,12 +3000,16 @@ int hugetlb_reserve_pages(struct inode *inode, set_vma_resv_flags(vma, HPAGE_RESV_OWNER); } - if (chg < 0) - return chg; + if (chg < 0) { + ret = chg; + goto out_err; + } /* There must be enough pages in the subpool for the mapping */ - if (hugepage_subpool_get_pages(spool, chg)) - return -ENOSPC; + if (hugepage_subpool_get_pages(spool, chg)) { + ret = -ENOSPC; + goto out_err; + } /* * Check enough hugepages are available for the reservation. @@ -3005,7 +3018,7 @@ int hugetlb_reserve_pages(struct inode *inode, ret = hugetlb_acct_memory(h, chg); if (ret < 0) { hugepage_subpool_put_pages(spool, chg); - return ret; + goto out_err; } /* @@ -3022,6 +3035,9 @@ int hugetlb_reserve_pages(struct inode *inode, if (!vma || vma->vm_flags & VM_MAYSHARE) region_add(&inode->i_mapping->private_list, from, to); return 0; +out_err: + resv_map_put(vma); + return ret; } void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) diff --git a/mm/internal.h b/mm/internal.h index aee4761cf9a..4194ab9dc19 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -94,6 +94,9 @@ extern void putback_lru_page(struct page *page); /* * in mm/page_alloc.c */ +extern void set_pageblock_migratetype(struct page *page, int migratetype); +extern int move_freepages_block(struct zone *zone, struct page *page, + int migratetype); extern void __free_pages_bootmem(struct page *page, unsigned int order); extern void prep_compound_page(struct page *page, unsigned long order); #ifdef CONFIG_MEMORY_FAILURE @@ -101,6 +104,7 @@ extern bool is_free_buddy_page(struct page *page); #endif #if defined CONFIG_COMPACTION || defined CONFIG_CMA +#include <linux/compaction.h> /* * in mm/compaction.c @@ -119,11 +123,14 @@ struct compact_control { unsigned long nr_migratepages; /* Number of pages to migrate */ unsigned long free_pfn; /* isolate_freepages search base */ unsigned long migrate_pfn; /* isolate_migratepages search base */ - bool sync; /* Synchronous migration */ + enum compact_mode mode; /* Compaction mode */ int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; + + /* Number of UNMOVABLE destination pageblocks skipped during scan */ + unsigned long nr_pageblocks_skipped; }; unsigned long @@ -164,7 +171,8 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma) * to determine if it's being mapped into a LOCKED vma. * If so, mark page as mlocked. */ -static inline int is_mlocked_vma(struct vm_area_struct *vma, struct page *page) +static inline int mlocked_vma_newpage(struct vm_area_struct *vma, + struct page *page) { VM_BUG_ON(PageLRU(page)); @@ -222,7 +230,7 @@ extern unsigned long vma_address(struct page *page, struct vm_area_struct *vma); #endif #else /* !CONFIG_MMU */ -static inline int is_mlocked_vma(struct vm_area_struct *v, struct page *p) +static inline int mlocked_vma_newpage(struct vm_area_struct *v, struct page *p) { return 0; } diff --git a/mm/madvise.c b/mm/madvise.c index 1ccbba5b667..deff1b64a08 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -11,8 +11,10 @@ #include <linux/mempolicy.h> #include <linux/page-isolation.h> #include <linux/hugetlb.h> +#include <linux/falloc.h> #include <linux/sched.h> #include <linux/ksm.h> +#include <linux/fs.h> /* * Any behaviour which results in changes to the vma->vm_flags needs to @@ -200,8 +202,7 @@ static long madvise_remove(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end) { - struct address_space *mapping; - loff_t offset, endoff; + loff_t offset; int error; *prev = NULL; /* tell sys_madvise we drop mmap_sem */ @@ -217,16 +218,14 @@ static long madvise_remove(struct vm_area_struct *vma, if ((vma->vm_flags & (VM_SHARED|VM_WRITE)) != (VM_SHARED|VM_WRITE)) return -EACCES; - mapping = vma->vm_file->f_mapping; - offset = (loff_t)(start - vma->vm_start) + ((loff_t)vma->vm_pgo |