aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig17
-rw-r--r--mm/Makefile1
-rw-r--r--mm/compaction.c142
-rw-r--r--mm/frontswap.c314
-rw-r--r--mm/internal.h9
-rw-r--r--mm/memblock.c68
-rw-r--r--mm/memcontrol.c6
-rw-r--r--mm/memory.c12
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/migrate.c5
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/oom_kill.c21
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/page_cgroup.c4
-rw-r--r--mm/page_io.c12
-rw-r--r--mm/pagewalk.c1
-rw-r--r--mm/percpu-vm.c1
-rw-r--r--mm/shmem.c57
-rw-r--r--mm/swapfile.c66
19 files changed, 543 insertions, 205 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index b2176374b98..82fed4eb2b6 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -389,3 +389,20 @@ config CLEANCACHE
in a negligible performance hit.
If unsure, say Y to enable cleancache
+
+config FRONTSWAP
+ bool "Enable frontswap to cache swap pages if tmem is present"
+ depends on SWAP
+ default n
+ help
+ Frontswap is so named because it can be thought of as the opposite
+ of a "backing" store for a swap device. The data is stored into
+ "transcendent memory", memory that is not directly accessible or
+ addressable by the kernel and is of unknown and possibly
+ time-varying size. When space in transcendent memory is available,
+ a significant swap I/O reduction may be achieved. When none is
+ available, all frontswap calls are reduced to a single pointer-
+ compare-against-NULL resulting in a negligible performance hit
+ and swap data is stored as normal on the matching swap device.
+
+ If unsure, say Y to enable frontswap.
diff --git a/mm/Makefile b/mm/Makefile
index a156285ce88..2e2fbbefb99 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
obj-$(CONFIG_BOUNCE) += bounce.o
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
+obj-$(CONFIG_FRONTSWAP) += frontswap.o
obj-$(CONFIG_HAS_DMA) += dmapool.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
obj-$(CONFIG_NUMA) += mempolicy.o
diff --git a/mm/compaction.c b/mm/compaction.c
index 4ac338af512..7ea259d82a9 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -236,7 +236,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
*/
while (unlikely(too_many_isolated(zone))) {
/* async migration should just abort */
- if (cc->mode != COMPACT_SYNC)
+ if (!cc->sync)
return 0;
congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -304,8 +304,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
* satisfies the allocation
*/
pageblock_nr = low_pfn >> pageblock_order;
- if (cc->mode != COMPACT_SYNC &&
- last_pageblock_nr != pageblock_nr &&
+ if (!cc->sync && last_pageblock_nr != pageblock_nr &&
!migrate_async_suitable(get_pageblock_migratetype(page))) {
low_pfn += pageblock_nr_pages;
low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
@@ -326,7 +325,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
continue;
}
- if (cc->mode != COMPACT_SYNC)
+ if (!cc->sync)
mode |= ISOLATE_ASYNC_MIGRATE;
lruvec = mem_cgroup_page_lruvec(page, zone);
@@ -361,90 +360,27 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
#endif /* CONFIG_COMPACTION || CONFIG_CMA */
#ifdef CONFIG_COMPACTION
-/*
- * Returns true if MIGRATE_UNMOVABLE pageblock was successfully
- * converted to MIGRATE_MOVABLE type, false otherwise.
- */
-static bool rescue_unmovable_pageblock(struct page *page)
-{
- unsigned long pfn, start_pfn, end_pfn;
- struct page *start_page, *end_page;
-
- pfn = page_to_pfn(page);
- start_pfn = pfn & ~(pageblock_nr_pages - 1);
- end_pfn = start_pfn + pageblock_nr_pages;
-
- start_page = pfn_to_page(start_pfn);
- end_page = pfn_to_page(end_pfn);
-
- /* Do not deal with pageblocks that overlap zones */
- if (page_zone(start_page) != page_zone(end_page))
- return false;
-
- for (page = start_page, pfn = start_pfn; page < end_page; pfn++,
- page++) {
- if (!pfn_valid_within(pfn))
- continue;
-
- if (PageBuddy(page)) {
- int order = page_order(page);
-
- pfn += (1 << order) - 1;
- page += (1 << order) - 1;
-
- continue;
- } else if (page_count(page) == 0 || PageLRU(page))
- continue;
-
- return false;
- }
-
- set_pageblock_migratetype(page, MIGRATE_MOVABLE);
- move_freepages_block(page_zone(page), page, MIGRATE_MOVABLE);
- return true;
-}
-enum smt_result {
- GOOD_AS_MIGRATION_TARGET,
- FAIL_UNMOVABLE_TARGET,
- FAIL_BAD_TARGET,
-};
-
-/*
- * Returns GOOD_AS_MIGRATION_TARGET if the page is within a block
- * suitable for migration to, FAIL_UNMOVABLE_TARGET if the page
- * is within a MIGRATE_UNMOVABLE block, FAIL_BAD_TARGET otherwise.
- */
-static enum smt_result suitable_migration_target(struct page *page,
- struct compact_control *cc)
+/* Returns true if the page is within a block suitable for migration to */
+static bool suitable_migration_target(struct page *page)
{
int migratetype = get_pageblock_migratetype(page);
/* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
- return FAIL_BAD_TARGET;
+ return false;
/* If the page is a large free page, then allow migration */
if (PageBuddy(page) && page_order(page) >= pageblock_order)
- return GOOD_AS_MIGRATION_TARGET;
+ return true;
/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
- if (cc->mode != COMPACT_ASYNC_UNMOVABLE &&
- migrate_async_suitable(migratetype))
- return GOOD_AS_MIGRATION_TARGET;
-
- if (cc->mode == COMPACT_ASYNC_MOVABLE &&
- migratetype == MIGRATE_UNMOVABLE)
- return FAIL_UNMOVABLE_TARGET;
-
- if (cc->mode != COMPACT_ASYNC_MOVABLE &&
- migratetype == MIGRATE_UNMOVABLE &&
- rescue_unmovable_pageblock(page))
- return GOOD_AS_MIGRATION_TARGET;
+ if (migrate_async_suitable(migratetype))
+ return true;
/* Otherwise skip the block */
- return FAIL_BAD_TARGET;
+ return false;
}
/*
@@ -478,13 +414,6 @@ static void isolate_freepages(struct zone *zone,
zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
/*
- * isolate_freepages() may be called more than once during
- * compact_zone_order() run and we want only the most recent
- * count.
- */
- cc->nr_pageblocks_skipped = 0;
-
- /*
* Isolate free pages until enough are available to migrate the
* pages on cc->migratepages. We stop searching if the migrate
* and free page scanners meet or enough free pages are isolated.
@@ -492,7 +421,6 @@ static void isolate_freepages(struct zone *zone,
for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
pfn -= pageblock_nr_pages) {
unsigned long isolated;
- enum smt_result ret;
if (!pfn_valid(pfn))
continue;
@@ -509,12 +437,9 @@ static void isolate_freepages(struct zone *zone,
continue;
/* Check the block is suitable for migration */
- ret = suitable_migration_target(page, cc);
- if (ret != GOOD_AS_MIGRATION_TARGET) {
- if (ret == FAIL_UNMOVABLE_TARGET)
- cc->nr_pageblocks_skipped++;
+ if (!suitable_migration_target(page))
continue;
- }
+
/*
* Found a block suitable for isolating free pages from. Now
* we disabled interrupts, double check things are ok and
@@ -523,14 +448,12 @@ static void isolate_freepages(struct zone *zone,
*/
isolated = 0;
spin_lock_irqsave(&zone->lock, flags);
- ret = suitable_migration_target(page, cc);
- if (ret == GOOD_AS_MIGRATION_TARGET) {
+ if (suitable_migration_target(page)) {
end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
isolated = isolate_freepages_block(pfn, end_pfn,
freelist, false);
nr_freepages += isolated;
- } else if (ret == FAIL_UNMOVABLE_TARGET)
- cc->nr_pageblocks_skipped++;
+ }
spin_unlock_irqrestore(&zone->lock, flags);
/*
@@ -762,9 +685,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
nr_migrate = cc->nr_migratepages;
err = migrate_pages(&cc->migratepages, compaction_alloc,
- (unsigned long)&cc->freepages, false,
- (cc->mode == COMPACT_SYNC) ? MIGRATE_SYNC_LIGHT
- : MIGRATE_ASYNC);
+ (unsigned long)cc, false,
+ cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
update_nr_listpages(cc);
nr_remaining = cc->nr_migratepages;
@@ -793,8 +715,7 @@ out:
static unsigned long compact_zone_order(struct zone *zone,
int order, gfp_t gfp_mask,
- enum compact_mode mode,
- unsigned long *nr_pageblocks_skipped)
+ bool sync)
{
struct compact_control cc = {
.nr_freepages = 0,
@@ -802,17 +723,12 @@ static unsigned long compact_zone_order(struct zone *zone,
.order = order,
.migratetype = allocflags_to_migratetype(gfp_mask),
.zone = zone,
- .mode = mode,
+ .sync = sync,
};
- unsigned long rc;
-
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
- rc = compact_zone(zone, &cc);
- *nr_pageblocks_skipped = cc.nr_pageblocks_skipped;
-
- return rc;
+ return compact_zone(zone, &cc);
}
int sysctl_extfrag_threshold = 500;
@@ -837,8 +753,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
struct zoneref *z;
struct zone *zone;
int rc = COMPACT_SKIPPED;
- unsigned long nr_pageblocks_skipped;
- enum compact_mode mode;
/*
* Check whether it is worth even starting compaction. The order check is
@@ -855,22 +769,12 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
nodemask) {
int status;
- mode = sync ? COMPACT_SYNC : COMPACT_ASYNC_MOVABLE;
-retry:
- status = compact_zone_order(zone, order, gfp_mask, mode,
- &nr_pageblocks_skipped);
+ status = compact_zone_order(zone, order, gfp_mask, sync);
rc = max(status, rc);
/* If a normal allocation would succeed, stop compacting */
if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
break;
-
- if (rc == COMPACT_COMPLETE && mode == COMPACT_ASYNC_MOVABLE) {
- if (nr_pageblocks_skipped) {
- mode = COMPACT_ASYNC_UNMOVABLE;
- goto retry;
- }
- }
}
return rc;
@@ -904,7 +808,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
if (ok && cc->order > zone->compact_order_failed)
zone->compact_order_failed = cc->order + 1;
/* Currently async compaction is never deferred. */
- else if (!ok && cc->mode == COMPACT_SYNC)
+ else if (!ok && cc->sync)
defer_compaction(zone, cc->order);
}
@@ -919,7 +823,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
{
struct compact_control cc = {
.order = order,
- .mode = COMPACT_ASYNC_MOVABLE,
+ .sync = false,
};
return __compact_pgdat(pgdat, &cc);
@@ -929,7 +833,7 @@ static int compact_node(int nid)
{
struct compact_control cc = {
.order = -1,
- .mode = COMPACT_SYNC,
+ .sync = true,
};
return __compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/frontswap.c b/mm/frontswap.c
new file mode 100644
index 00000000000..e25025574a0
--- /dev/null
+++ b/mm/frontswap.c
@@ -0,0 +1,314 @@
+/*
+ * Frontswap frontend
+ *
+ * This code provides the generic "frontend" layer to call a matching
+ * "backend" driver implementation of frontswap. See
+ * Documentation/vm/frontswap.txt for more information.
+ *
+ * Copyright (C) 2009-2012 Oracle Corp. All rights reserved.
+ * Author: Dan Magenheimer
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/proc_fs.h>
+#include <linux/security.h>
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/frontswap.h>
+#include <linux/swapfile.h>
+
+/*
+ * frontswap_ops is set by frontswap_register_ops to contain the pointers
+ * to the frontswap "backend" implementation functions.
+ */
+static struct frontswap_ops frontswap_ops __read_mostly;
+
+/*
+ * This global enablement flag reduces overhead on systems where frontswap_ops
+ * has not been registered, so is preferred to the slower alternative: a
+ * function call that checks a non-global.
+ */
+bool frontswap_enabled __read_mostly;
+EXPORT_SYMBOL(frontswap_enabled);
+
+/*
+ * If enabled, frontswap_store will return failure even on success. As
+ * a result, the swap subsystem will always write the page to swap, in
+ * effect converting frontswap into a writethrough cache. In this mode,
+ * there is no direct reduction in swap writes, but a frontswap backend
+ * can unilaterally "reclaim" any pages in use with no data loss, thus
+ * providing increases control over maximum memory usage due to frontswap.
+ */
+static bool frontswap_writethrough_enabled __read_mostly;
+
+#ifdef CONFIG_DEBUG_FS
+/*
+ * Counters available via /sys/kernel/debug/frontswap (if debugfs is
+ * properly configured). These are for information only so are not protected
+ * against increment races.
+ */
+static u64 frontswap_loads;
+static u64 frontswap_succ_stores;
+static u64 frontswap_failed_stores;
+static u64 frontswap_invalidates;
+
+static inline void inc_frontswap_loads(void) {
+ frontswap_loads++;
+}
+static inline void inc_frontswap_succ_stores(void) {
+ frontswap_succ_stores++;
+}
+static inline void inc_frontswap_failed_stores(void) {
+ frontswap_failed_stores++;
+}
+static inline void inc_frontswap_invalidates(void) {
+ frontswap_invalidates++;
+}
+#else
+static inline void inc_frontswap_loads(void) { }
+static inline void inc_frontswap_succ_stores(void) { }
+static inline void inc_frontswap_failed_stores(void) { }
+static inline void inc_frontswap_invalidates(void) { }
+#endif
+/*
+ * Register operations for frontswap, returning previous thus allowing
+ * detection of multiple backends and possible nesting.
+ */
+struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops)
+{
+ struct frontswap_ops old = frontswap_ops;
+
+ frontswap_ops = *ops;
+ frontswap_enabled = true;
+ return old;
+}
+EXPORT_SYMBOL(frontswap_register_ops);
+
+/*
+ * Enable/disable frontswap writethrough (see above).
+ */
+void frontswap_writethrough(bool enable)
+{
+ frontswap_writethrough_enabled = enable;
+}
+EXPORT_SYMBOL(frontswap_writethrough);
+
+/*
+ * Called when a swap device is swapon'd.
+ */
+void __frontswap_init(unsigned type)
+{
+ struct swap_info_struct *sis = swap_info[type];
+
+ BUG_ON(sis == NULL);
+ if (sis->frontswap_map == NULL)
+ return;
+ if (frontswap_enabled)
+ (*frontswap_ops.init)(type);
+}
+EXPORT_SYMBOL(__frontswap_init);
+
+/*
+ * "Store" data from a page to frontswap and associate it with the page's
+ * swaptype and offset. Page must be locked and in the swap cache.
+ * If frontswap already contains a page with matching swaptype and
+ * offset, the frontswap implmentation may either overwrite the data and
+ * return success or invalidate the page from frontswap and return failure.
+ */
+int __frontswap_store(struct page *page)
+{
+ int ret = -1, dup = 0;
+ swp_entry_t entry = { .val = page_private(page), };
+ int type = swp_type(entry);
+ struct swap_info_struct *sis = swap_info[type];
+ pgoff_t offset = swp_offset(entry);
+
+ BUG_ON(!PageLocked(page));
+ BUG_ON(sis == NULL);
+ if (frontswap_test(sis, offset))
+ dup = 1;
+ ret = (*frontswap_ops.store)(type, offset, page);
+ if (ret == 0) {
+ frontswap_set(sis, offset);
+ inc_frontswap_succ_stores();
+ if (!dup)
+ atomic_inc(&sis->frontswap_pages);
+ } else if (dup) {
+ /*
+ failed dup always results in automatic invalidate of
+ the (older) page from frontswap
+ */
+ frontswap_clear(sis, offset);
+ atomic_dec(&sis->frontswap_pages);
+ inc_frontswap_failed_stores();
+ } else
+ inc_frontswap_failed_stores();
+ if (frontswap_writethrough_enabled)
+ /* report failure so swap also writes to swap device */
+ ret = -1;
+ return ret;
+}
+EXPORT_SYMBOL(__frontswap_store);
+
+/*
+ * "Get" data from frontswap associated with swaptype and offset that were
+ * specified when the data was put to frontswap and use it to fill the
+ * specified page with data. Page must be locked and in the swap cache.
+ */
+int __frontswap_load(struct page *page)
+{
+ int ret = -1;
+ swp_entry_t entry = { .val = page_private(page), };
+ int type = swp_type(entry);
+ struct swap_info_struct *sis = swap_info[type];
+ pgoff_t offset = swp_offset(entry);
+
+ BUG_ON(!PageLocked(page));
+ BUG_ON(sis == NULL);
+ if (frontswap_test(sis, offset))
+ ret = (*frontswap_ops.load)(type, offset, page);
+ if (ret == 0)
+ inc_frontswap_loads();
+ return ret;
+}
+EXPORT_SYMBOL(__frontswap_load);
+
+/*
+ * Invalidate any data from frontswap associated with the specified swaptype
+ * and offset so that a subsequent "get" will fail.
+ */
+void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
+{
+ struct swap_info_struct *sis = swap_info[type];
+
+ BUG_ON(sis == NULL);
+ if (frontswap_test(sis, offset)) {
+ (*frontswap_ops.invalidate_page)(type, offset);
+ atomic_dec(&sis->frontswap_pages);
+ frontswap_clear(sis, offset);
+ inc_frontswap_invalidates();
+ }
+}
+EXPORT_SYMBOL(__frontswap_invalidate_page);
+
+/*
+ * Invalidate all data from frontswap associated with all offsets for the
+ * specified swaptype.
+ */
+void __frontswap_invalidate_area(unsigned type)
+{
+ struct swap_info_struct *sis = swap_info[type];
+
+ BUG_ON(sis == NULL);
+ if (sis->frontswap_map == NULL)
+ return;
+ (*frontswap_ops.invalidate_area)(type);
+ atomic_set(&sis->frontswap_pages, 0);
+ memset(sis->frontswap_map, 0, sis->max / sizeof(long));
+}
+EXPORT_SYMBOL(__frontswap_invalidate_area);
+
+/*
+ * Frontswap, like a true swap device, may unnecessarily retain pages
+ * under certain circumstances; "shrink" frontswap is essentially a
+ * "partial swapoff" and works by calling try_to_unuse to attempt to
+ * unuse enough frontswap pages to attempt to -- subject to memory
+ * constraints -- reduce the number of pages in frontswap to the
+ * number given in the parameter target_pages.
+ */
+void frontswap_shrink(unsigned long target_pages)
+{
+ struct swap_info_struct *si = NULL;
+ int si_frontswap_pages;
+ unsigned long total_pages = 0, total_pages_to_unuse;
+ unsigned long pages = 0, pages_to_unuse = 0;
+ int type;
+ bool locked = false;
+
+ /*
+ * we don't want to hold swap_lock while doing a very
+ * lengthy try_to_unuse, but swap_list may change
+ * so restart scan from swap_list.head each time
+ */
+ spin_lock(&swap_lock);
+ locked = true;
+ total_pages = 0;
+ for (type = swap_list.head; type >= 0; type = si->next) {
+ si = swap_info[type];
+ total_pages += atomic_read(&si->frontswap_pages);
+ }
+ if (total_pages <= target_pages)
+ goto out;
+ total_pages_to_unuse = total_pages - target_pages;
+ for (type = swap_list.head; type >= 0; type = si->next) {
+ si = swap_info[type];
+ si_frontswap_pages = atomic_read(&si->frontswap_pages);
+ if (total_pages_to_unuse < si_frontswap_pages)
+ pages = pages_to_unuse = total_pages_to_unuse;
+ else {
+ pages = si_frontswap_pages;
+ pages_to_unuse = 0; /* unuse all */
+ }
+ /* ensure there is enough RAM to fetch pages from frontswap */
+ if (security_vm_enough_memory_mm(current->mm, pages))
+ continue;
+ vm_unacct_memory(pages);
+ break;
+ }
+ if (type < 0)
+ goto out;
+ locked = false;
+ spin_unlock(&swap_lock);
+ try_to_unuse(type, true, pages_to_unuse);
+out:
+ if (locked)
+ spin_unlock(&swap_lock);
+ return;
+}
+EXPORT_SYMBOL(frontswap_shrink);
+
+/*
+ * Count and return the number of frontswap pages across all
+ * swap devices. This is exported so that backend drivers can
+ * determine current usage without reading debugfs.
+ */
+unsigned long frontswap_curr_pages(void)
+{
+ int type;
+ unsigned long totalpages = 0;
+ struct swap_info_struct *si = NULL;
+
+ spin_lock(&swap_lock);
+ for (type = swap_list.head; type >= 0; type = si->next) {
+ si = swap_info[type];
+ totalpages += atomic_read(&si->frontswap_pages);
+ }
+ spin_unlock(&swap_lock);
+ return totalpages;
+}
+EXPORT_SYMBOL(frontswap_curr_pages);
+
+static int __init init_frontswap(void)
+{
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *root = debugfs_create_dir("frontswap", NULL);
+ if (root == NULL)
+ return -ENXIO;
+ debugfs_create_u64("loads", S_IRUGO, root, &frontswap_loads);
+ debugfs_create_u64("succ_stores", S_IRUGO, root, &frontswap_succ_stores);
+ debugfs_create_u64("failed_stores", S_IRUGO, root,
+ &frontswap_failed_stores);
+ debugfs_create_u64("invalidates", S_IRUGO,
+ root, &frontswap_invalidates);
+#endif
+ return 0;
+}
+
+module_init(init_frontswap);
diff --git a/mm/internal.h b/mm/internal.h
index 5cbb7819004..2ba87fbfb75 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -94,9 +94,6 @@ extern void putback_lru_page(struct page *page);
/*
* in mm/page_alloc.c
*/
-extern void set_pageblock_migratetype(struct page *page, int migratetype);
-extern int move_freepages_block(struct zone *zone, struct page *page,
- int migratetype);
extern void __free_pages_bootmem(struct page *page, unsigned int order);
extern void prep_compound_page(struct page *page, unsigned long order);
#ifdef CONFIG_MEMORY_FAILURE
@@ -104,7 +101,6 @@ extern bool is_free_buddy_page(struct page *page);
#endif
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
-#include <linux/compaction.h>
/*
* in mm/compaction.c
@@ -123,14 +119,11 @@ struct compact_control {
unsigned long nr_migratepages; /* Number of pages to migrate */
unsigned long free_pfn; /* isolate_freepages search base */
unsigned long migrate_pfn; /* isolate_migratepages search base */
- enum compact_mode mode; /* Compaction mode */
+ bool sync; /* Synchronous migration */
int order; /* order a direct compactor needs */
int migratetype; /* MOVABLE, RECLAIMABLE etc */
struct zone *zone;
-
- /* Number of UNMOVABLE destination pageblocks skipped during scan */
- unsigned long nr_pageblocks_skipped;
};
unsigned long
diff --git a/mm/memblock.c b/mm/memblock.c
index 952123eba43..d4382095f8b 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -184,7 +184,24 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
}
}
-static int __init_memblock memblock_double_array(struct memblock_type *type)
+/**
+ * memblock_double_array - double the size of the memblock regions array
+ * @type: memblock type of the regions array being doubled
+ * @new_area_start: starting address of memory range to avoid overlap with
+ * @new_area_size: size of memory range to avoid overlap with
+ *
+ * Double the size of the @type regions array. If memblock is being used to
+ * allocate memory for a new reserved regions array and there is a previously
+ * allocated memory range [@new_area_start,@new_area_start+@new_area_size]
+ * waiting to be reserved, ensure the memory used by the new array does
+ * not overlap.
+ *
+ * RETURNS:
+ * 0 on success, -1 on failure.
+ */
+static int __init_memblock memblock_double_array(struct memblock_type *type,
+ phys_addr_t new_area_start,
+ phys_addr_t new_area_size)
{
struct memblock_region *new_array, *old_array;
phys_addr_t old_size, new_size, addr;
@@ -222,7 +239,18 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
new_array = kmalloc(new_size, GFP_KERNEL);
addr = new_array ? __pa(new_array) : 0;
} else {
- addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t));
+ /* only exclude range when trying to double reserved.regions */
+ if (type != &memblock.reserved)
+ new_area_start = new_area_size = 0;
+
+ addr = memblock_find_in_range(new_area_start + new_area_size,
+ memblock.current_limit,
+ new_size, sizeof(phys_addr_t));
+ if (!addr && new_area_size)
+ addr = memblock_find_in_range(0,
+ min(new_area_start, memblock.current_limit),
+ new_size, sizeof(phys_addr_t));
+
new_array = addr ? __va(addr) : 0;
}
if (!addr) {
@@ -399,7 +427,7 @@ repeat:
*/
if (!insert) {
while (type->cnt + nr_new > type->max)
- if (memblock_double_array(type) < 0)
+ if (memblock_double_array(type, obase, size) < 0)
return -ENOMEM;
insert = true;
goto repeat;
@@ -450,7 +478,7 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
/* we'll create at most two more regions */
while (type->cnt + 2 > type->max)
- if (memblock_double_array(type) < 0)
+ if (memblock_double_array(type, base, size) < 0)
return -ENOMEM;
for (i = 0; i < type->cnt; i++) {
@@ -540,9 +568,9 @@ int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
* __next_free_mem_range - next function for for_each_free_mem_range()
* @idx: pointer to u64 loop variable
* @nid: nid: node selector, %MAX_NUMNODES for all nodes
- * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
- * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
- * @p_nid: ptr to int for nid of the range, can be %NULL
+ * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ * @out_nid: ptr to int for nid of the range, can be %NULL
*
* Find the first free area from *@idx which matches @nid, fill the out
* parameters, and update *@idx for the next iteration. The lower 32bit of
@@ -616,9 +644,9 @@ void __init_memblock __next_free_mem_range(u64 *idx, int nid,
* __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse()
* @idx: pointer to u64 loop variable
* @nid: nid: node selector, %MAX_NUMNODES for all nodes
- * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
- * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
- * @p_nid: ptr to int for nid of the range, can be %NULL
+ * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ * @out_nid: ptr to int for nid of the range, can be %NULL
*
* Reverse of __next_free_mem_range().
*/
@@ -867,6 +895,16 @@ int __init_memblock memblock_is_memory(phys_addr_t addr)
return memblock_search(&memblock.memory, addr) != -1;
}
+/**
+ * memblock_is_region_memory - check if a region is a subset of memory
+ * @base: base of region to check
+ * @size: size of region to check
+ *
+ * Check if the region [@base, @base+@size) is a subset of a memory block.
+ *
+ * RETURNS:
+ * 0 if false, non-zero if true
+ */
int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
{
int idx = memblock_search(&memblock.memory, base);
@@ -879,6 +917,16 @@ int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size
memblock.memory.regions[idx].size) >= end;
}
+/**
+ * memblock_is_region_reserved - check if a region intersects reserved memory
+ * @base: base of region to check
+ * @size: size of region to check
+ *
+ * Check if the region [@base, @base+@size) intersects a reserved memory block.
+ *
+ * RETURNS:
+ * 0 if false, non-zero if true
+ */
int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
{
memblock_cap_size(base, &size);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ac35bccadb7..f72b5e52451 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1148,7 +1148,7 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
{
if (root_memcg == memcg)
return true;
- if (!root_memcg->use_hierarchy)
+ if (!root_memcg->use_hierarchy || !memcg)
return false;
return css_is_ancestor(&memcg->css, &root_memcg->css);
}
@@ -1234,7 +1234,7 @@ int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
/**
* mem_cgroup_margin - calculate chargeable space of a memory cgroup
- * @mem: the memory cgroup
+ * @memcg: the memory cgroup
*
* Returns the maximum amount of memory @mem can be charged with, in
* pages.
@@ -1508,7 +1508,7 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
/**
* test_mem_cgroup_node_reclaimable
- * @mem: the target memcg
+ * @memcg: the target memcg
* @nid: the node ID to be checked.
* @noswap : specify true here if the user wants flle only information.
*
diff --git a/mm/memory.c b/mm/memory.c
index 1b7dc662bf9..2466d125023 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1225,7 +1225,15 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
next = pmd_addr_end(addr, end);
if (pmd_trans_huge(*pmd)) {
if (next - addr != HPAGE_PMD_SIZE) {
- VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
+#ifdef CONFIG_DEBUG_VM
+ if (!rwsem_is_locked(&tlb->mm->mmap_sem)) {
+ pr_err("%s: mmap_sem is unlocked! addr=0x%lx end=0x%lx vma->vm_start=0x%lx vma->vm_end=0x%lx\n",
+ __func__, addr, end,
+ vma->vm_start,
+ vma->vm_end);
+ BUG();
+ }
+#endif
split_huge_page_pmd(vma->vm_mm, pmd);
} else if (zap_huge_pmd(tlb, vma, pmd, addr))
goto next;
@@ -1366,7 +1374,7 @@ void unmap_vmas(struct mmu_gather *tlb,
/**
* zap_page_range - remove user pages in a given range
* @vma: vm_area_struct holding the applicable pages
- * @address: starting address of pages to zap
+ * @start: starting address of pages to zap
* @size: number of bytes to zap
* @details: details of nonlinear truncation or shared cache invalidation
*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f15c1b24ca1..1d771e4200d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1177,7 +1177,7 @@ static long do_mbind(unsigned long start, unsigned long len,
if (!list_empty(&pagelist)) {
nr_failed = migrate_pages(&pagelist, new_vma_page,
(unsigned long)vma,
- false, true);
+ false, MIGRATE_SYNC);
if (nr_failed)
putback_lru_pages(&pagelist);
}
diff --git a/mm/migrate.c b/mm/migrate.c
index ab81d482ae6..be26d5cbe56 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -436,7 +436,10 @@ void migrate_page_copy(struct page *newpage, struct page *page)
* is actually a signal that all of the page has become dirty.
* Whereas only part of our page may be dirty.
*/
- __set_page_dirty_nobuffers(newpage);
+ if (PageSwapBacked(page))
+ SetPageDirty(newpage);
+ else
+ __set_page_dirty_nobuffers(newpage);
}
mlock_migrate_page(newpage, page);
diff --git a/mm/nommu.c b/mm/nommu.c
index c4acfbc0997..d4b0c10872d 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1486,7 +1486,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
- ret = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+ retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff);
if (file)
fput(file);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ed0e1967736..ac300c99baf 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -183,7 +183,8 @@ static bool oom_unkillable_task(struct task_struct *p,
unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
const nodemask_t *nodemask, unsigned long totalpages)
{
- unsigned long points;
+ long points;
+ long adj;
if (oom_unkillable_task(p, memcg, nodemask))
return 0;
@@ -192,7 +193,8 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
if (!p)
return 0;
- if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
+ adj = p->signal->oom_score_adj;
+ if (adj == OOM_SCORE_ADJ_MIN) {
task_unlock(p);
return 0;
}