aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig27
-rw-r--r--mm/backing-dev.c11
-rw-r--r--mm/bootmem.c32
-rw-r--r--mm/bounce.c48
-rw-r--r--mm/cleancache.c2
-rw-r--r--mm/compaction.c133
-rw-r--r--mm/fadvise.c20
-rw-r--r--mm/filemap.c7
-rw-r--r--mm/fremap.c51
-rw-r--r--mm/huge_memory.c113
-rw-r--r--mm/hugetlb.c39
-rw-r--r--mm/internal.h7
-rw-r--r--mm/kmemleak.c14
-rw-r--r--mm/ksm.c670
-rw-r--r--mm/madvise.c105
-rw-r--r--mm/memblock.c70
-rw-r--r--mm/memcontrol.c477
-rw-r--r--mm/memory-failure.c202
-rw-r--r--mm/memory.c127
-rw-r--r--mm/memory_hotplug.c553
-rw-r--r--mm/mempolicy.c59
-rw-r--r--mm/migrate.c154
-rw-r--r--mm/mincore.c5
-rw-r--r--mm/mlock.c137
-rw-r--r--mm/mm_init.c31
-rw-r--r--mm/mmap.c123
-rw-r--r--mm/mmu_notifier.c102
-rw-r--r--mm/mmzone.c20
-rw-r--r--mm/mremap.c28
-rw-r--r--mm/nobootmem.c23
-rw-r--r--mm/nommu.c41
-rw-r--r--mm/oom_kill.c6
-rw-r--r--mm/page-writeback.c28
-rw-r--r--mm/page_alloc.c498
-rw-r--r--mm/rmap.c30
-rw-r--r--mm/shmem.c102
-rw-r--r--mm/slab.c2
-rw-r--r--mm/slob.c2
-rw-r--r--mm/slub.c4
-rw-r--r--mm/sparse.c12
-rw-r--r--mm/swap.c9
-rw-r--r--mm/swap_state.c58
-rw-r--r--mm/swapfile.c176
-rw-r--r--mm/util.c26
-rw-r--r--mm/vmalloc.c33
-rw-r--r--mm/vmscan.c397
-rw-r--r--mm/vmstat.c7
47 files changed, 3230 insertions, 1591 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 278e3ab1f16..ae55c1e04d1 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1,6 +1,6 @@
config SELECT_MEMORY_MODEL
def_bool y
- depends on EXPERIMENTAL || ARCH_SELECT_MEMORY_MODEL
+ depends on ARCH_SELECT_MEMORY_MODEL
choice
prompt "Memory model"
@@ -162,10 +162,16 @@ config MOVABLE_NODE
Say Y here if you want to hotplug a whole node.
Say N here if you want kernel to use memory on all nodes evenly.
+#
+# Only be set on architectures that have completely implemented memory hotplug
+# feature. If you are not sure, don't touch it.
+#
+config HAVE_BOOTMEM_INFO_NODE
+ def_bool n
+
# eventually, we can have this option just 'select SPARSEMEM'
config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
- select MEMORY_ISOLATION
depends on SPARSEMEM || X86_64_ACPI_NUMA
depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG
depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390)
@@ -176,6 +182,8 @@ config MEMORY_HOTPLUG_SPARSE
config MEMORY_HOTREMOVE
bool "Allow for memory hot remove"
+ select MEMORY_ISOLATION
+ select HAVE_BOOTMEM_INFO_NODE if X86_64
depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
depends on MIGRATION
@@ -258,6 +266,19 @@ config BOUNCE
def_bool y
depends on BLOCK && MMU && (ZONE_DMA || HIGHMEM)
+# On the 'tile' arch, USB OHCI needs the bounce pool since tilegx will often
+# have more than 4GB of memory, but we don't currently use the IOTLB to present
+# a 32-bit address to OHCI. So we need to use a bounce pool instead.
+#
+# We also use the bounce pool to provide stable page writes for jbd. jbd
+# initiates buffer writeback without locking the page or setting PG_writeback,
+# and fixing that behavior (a second time; jbd2 doesn't have this problem) is
+# a major rework effort. Instead, use the bounce buffer to snapshot pages
+# (until jbd goes away). The only jbd user is ext3.
+config NEED_BOUNCE_POOL
+ bool
+ default y if (TILE && USB_OHCI_HCD) || (BLK_DEV_INTEGRITY && JBD)
+
config NR_QUICK
int
depends on QUICKLIST
@@ -266,7 +287,7 @@ config NR_QUICK
config VIRT_TO_BUS
def_bool y
- depends on !ARCH_NO_VIRT_TO_BUS
+ depends on HAVE_VIRT_TO_BUS
config MMU_NOTIFIER
bool
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d3ca2b3ee17..41733c5dc82 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -221,12 +221,23 @@ static ssize_t max_ratio_store(struct device *dev,
}
BDI_SHOW(max_ratio, bdi->max_ratio)
+static ssize_t stable_pages_required_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct backing_dev_info *bdi = dev_get_drvdata(dev);
+
+ return snprintf(page, PAGE_SIZE-1, "%d\n",
+ bdi_cap_stable_pages_required(bdi) ? 1 : 0);
+}
+
#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
static struct device_attribute bdi_dev_attrs[] = {
__ATTR_RW(read_ahead_kb),
__ATTR_RW(min_ratio),
__ATTR_RW(max_ratio),
+ __ATTR_RO(stable_pages_required),
__ATTR_NULL,
};
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 1324cd74fae..2b0bcb019ec 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -185,10 +185,23 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
while (start < end) {
unsigned long *map, idx, vec;
+ unsigned shift;
map = bdata->node_bootmem_map;
idx = start - bdata->node_min_pfn;
+ shift = idx & (BITS_PER_LONG - 1);
+ /*
+ * vec holds at most BITS_PER_LONG map bits,
+ * bit 0 corresponds to start.
+ */
vec = ~map[idx / BITS_PER_LONG];
+
+ if (shift) {
+ vec >>= shift;
+ if (end - start >= BITS_PER_LONG)
+ vec |= ~map[idx / BITS_PER_LONG + 1] <<
+ (BITS_PER_LONG - shift);
+ }
/*
* If we have a properly aligned and fully unreserved
* BITS_PER_LONG block of pages in front of us, free
@@ -201,19 +214,18 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
count += BITS_PER_LONG;
start += BITS_PER_LONG;
} else {
- unsigned long off = 0;
+ unsigned long cur = start;
- vec >>= start & (BITS_PER_LONG - 1);
- while (vec) {
+ start = ALIGN(start + 1, BITS_PER_LONG);
+ while (vec && cur != start) {
if (vec & 1) {
- page = pfn_to_page(start + off);
+ page = pfn_to_page(cur);
__free_pages_bootmem(page, 0);
count++;
}
vec >>= 1;
- off++;
+ ++cur;
}
- start = ALIGN(start + 1, BITS_PER_LONG);
}
}
@@ -821,6 +833,14 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
}
+void * __init __alloc_bootmem_low_nopanic(unsigned long size,
+ unsigned long align,
+ unsigned long goal)
+{
+ return ___alloc_bootmem_nopanic(size, align, goal,
+ ARCH_LOW_ADDRESS_LIMIT);
+}
+
/**
* __alloc_bootmem_low_node - allocate low boot memory from a specific node
* @pgdat: node to allocate from
diff --git a/mm/bounce.c b/mm/bounce.c
index 04208677556..5f890176860 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -178,8 +178,45 @@ static void bounce_end_io_read_isa(struct bio *bio, int err)
__bounce_end_io_read(bio, isa_page_pool, err);
}
+#ifdef CONFIG_NEED_BOUNCE_POOL
+static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio)
+{
+ struct page *page;
+ struct backing_dev_info *bdi;
+ struct address_space *mapping;
+ struct bio_vec *from;
+ int i;
+
+ if (bio_data_dir(bio) != WRITE)
+ return 0;
+
+ if (!bdi_cap_stable_pages_required(&q->backing_dev_info))
+ return 0;
+
+ /*
+ * Based on the first page that has a valid mapping, decide whether or
+ * not we have to employ bounce buffering to guarantee stable pages.
+ */
+ bio_for_each_segment(from, bio, i) {
+ page = from->bv_page;
+ mapping = page_mapping(page);
+ if (!mapping)
+ continue;
+ bdi = mapping->backing_dev_info;
+ return mapping->host->i_sb->s_flags & MS_SNAP_STABLE;
+ }
+
+ return 0;
+}
+#else
+static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio)
+{
+ return 0;
+}
+#endif /* CONFIG_NEED_BOUNCE_POOL */
+
static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
- mempool_t *pool)
+ mempool_t *pool, int force)
{
struct page *page;
struct bio *bio = NULL;
@@ -192,7 +229,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
/*
* is destination page below bounce pfn?
*/
- if (page_to_pfn(page) <= queue_bounce_pfn(q))
+ if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
continue;
/*
@@ -270,6 +307,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
{
+ int must_bounce;
mempool_t *pool;
/*
@@ -278,13 +316,15 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
if (!bio_has_data(*bio_orig))
return;
+ must_bounce = must_snapshot_stable_pages(q, *bio_orig);
+
/*
* for non-isa bounce case, just check if the bounce pfn is equal
* to or bigger than the highest pfn in the system -- in that case,
* don't waste time iterating over bio segments
*/
if (!(q->bounce_gfp & GFP_DMA)) {
- if (queue_bounce_pfn(q) >= blk_max_pfn)
+ if (queue_bounce_pfn(q) >= blk_max_pfn && !must_bounce)
return;
pool = page_pool;
} else {
@@ -295,7 +335,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
/*
* slow path
*/
- __blk_queue_bounce(q, bio_orig, pool);
+ __blk_queue_bounce(q, bio_orig, pool, must_bounce);
}
EXPORT_SYMBOL(blk_queue_bounce);
diff --git a/mm/cleancache.c b/mm/cleancache.c
index 32e6f4136fa..d76ba74be2d 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c
@@ -89,7 +89,7 @@ static int cleancache_get_key(struct inode *inode,
fhfn = sb->s_export_op->encode_fh;
if (fhfn) {
len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL);
- if (len <= 0 || len == 255)
+ if (len <= FILEID_ROOT || len == FILEID_INVALID)
return -1;
if (maxlen > CLEANCACHE_KEY_MAX)
return -1;
diff --git a/mm/compaction.c b/mm/compaction.c
index 6b807e46649..05ccb4cc0bd 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -15,6 +15,7 @@
#include <linux/sysctl.h>
#include <linux/sysfs.h>
#include <linux/balloon_compaction.h>
+#include <linux/page-isolation.h>
#include "internal.h"
#ifdef CONFIG_COMPACTION
@@ -85,7 +86,7 @@ static inline bool isolation_suitable(struct compact_control *cc,
static void __reset_isolation_suitable(struct zone *zone)
{
unsigned long start_pfn = zone->zone_start_pfn;
- unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ unsigned long end_pfn = zone_end_pfn(zone);
unsigned long pfn;
zone->compact_cached_migrate_pfn = start_pfn;
@@ -215,7 +216,10 @@ static bool suitable_migration_target(struct page *page)
int migratetype = get_pageblock_migratetype(page);
/* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
- if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
+ if (migratetype == MIGRATE_RESERVE)
+ return false;
+
+ if (is_migrate_isolate(migratetype))
return false;
/* If the page is a large free page, then allow migration */
@@ -611,8 +615,7 @@ check_compact_cluster:
continue;
next_pageblock:
- low_pfn += pageblock_nr_pages;
- low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
+ low_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages) - 1;
last_pageblock_nr = pageblock_nr;
}
@@ -644,7 +647,7 @@ static void isolate_freepages(struct zone *zone,
struct compact_control *cc)
{
struct page *page;
- unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn;
+ unsigned long high_pfn, low_pfn, pfn, z_end_pfn, end_pfn;
int nr_freepages = cc->nr_freepages;
struct list_head *freelist = &cc->freepages;
@@ -663,7 +666,7 @@ static void isolate_freepages(struct zone *zone,
*/
high_pfn = min(low_pfn, pfn);
- zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ z_end_pfn = zone_end_pfn(zone);
/*
* Isolate free pages until enough are available to migrate the
@@ -706,7 +709,7 @@ static void isolate_freepages(struct zone *zone,
* only scans within a pageblock
*/
end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
- end_pfn = min(end_pfn, zone_end_pfn);
+ end_pfn = min(end_pfn, z_end_pfn);
isolated = isolate_freepages_block(cc, pfn, end_pfn,
freelist, false);
nr_freepages += isolated;
@@ -795,7 +798,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn);
/* Only scan within a pageblock boundary */
- end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages);
+ end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages);
/* Do not cross the free scanner or scan within a memory hole */
if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) {
@@ -816,6 +819,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
static int compact_finished(struct zone *zone,
struct compact_control *cc)
{
+ unsigned int order;
unsigned long watermark;
if (fatal_signal_pending(current))
@@ -850,22 +854,16 @@ static int compact_finished(struct zone *zone,
return COMPACT_CONTINUE;
/* Direct compactor: Is a suitable page free? */
- if (cc->page) {
- /* Was a suitable page captured? */
- if (*cc->page)
+ for (order = cc->order; order < MAX_ORDER; order++) {
+ struct free_area *area = &zone->free_area[order];
+
+ /* Job done if page is free of the right migratetype */
+ if (!list_empty(&area->free_list[cc->migratetype]))
+ return COMPACT_PARTIAL;
+
+ /* Job done if allocation would set block type */
+ if (cc->order >= pageblock_order && area->nr_free)
return COMPACT_PARTIAL;
- } else {
- unsigned int order;
- for (order = cc->order; order < MAX_ORDER; order++) {
- struct free_area *area = &zone->free_area[cc->order];
- /* Job done if page is free of the right migratetype */
- if (!list_empty(&area->free_list[cc->migratetype]))
- return COMPACT_PARTIAL;
-
- /* Job done if allocation would set block type */
- if (cc->order >= pageblock_order && area->nr_free)
- return COMPACT_PARTIAL;
- }
}
return COMPACT_CONTINUE;
@@ -921,65 +919,11 @@ unsigned long compaction_suitable(struct zone *zone, int order)
return COMPACT_CONTINUE;
}
-static void compact_capture_page(struct compact_control *cc)
-{
- unsigned long flags;
- int mtype, mtype_low, mtype_high;
-
- if (!cc->page || *cc->page)
- return;
-
- /*
- * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
- * regardless of the migratetype of the freelist is is captured from.
- * This is fine because the order for a high-order MIGRATE_MOVABLE
- * allocation is typically at least a pageblock size and overall
- * fragmentation is not impaired. Other allocation types must
- * capture pages from their own migratelist because otherwise they
- * could pollute other pageblocks like MIGRATE_MOVABLE with
- * difficult to move pages and making fragmentation worse overall.
- */
- if (cc->migratetype == MIGRATE_MOVABLE) {
- mtype_low = 0;
- mtype_high = MIGRATE_PCPTYPES;
- } else {
- mtype_low = cc->migratetype;
- mtype_high = cc->migratetype + 1;
- }
-
- /* Speculatively examine the free lists without zone lock */
- for (mtype = mtype_low; mtype < mtype_high; mtype++) {
- int order;
- for (order = cc->order; order < MAX_ORDER; order++) {
- struct page *page;
- struct free_area *area;
- area = &(cc->zone->free_area[order]);
- if (list_empty(&area->free_list[mtype]))
- continue;
-
- /* Take the lock and attempt capture of the page */
- if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
- return;
- if (!list_empty(&area->free_list[mtype])) {
- page = list_entry(area->free_list[mtype].next,
- struct page, lru);
- if (capture_free_page(page, cc->order, mtype)) {
- spin_unlock_irqrestore(&cc->zone->lock,
- flags);
- *cc->page = page;
- return;
- }
- }
- spin_unlock_irqrestore(&cc->zone->lock, flags);
- }
- }
-}
-
static int compact_zone(struct zone *zone, struct compact_control *cc)
{
int ret;
unsigned long start_pfn = zone->zone_start_pfn;
- unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages;
+ unsigned long end_pfn = zone_end_pfn(zone);
ret = compaction_suitable(zone, cc->order);
switch (ret) {
@@ -1036,7 +980,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
nr_migrate = cc->nr_migratepages;
err = migrate_pages(&cc->migratepages, compaction_alloc,
- (unsigned long)cc, false,
+ (unsigned long)cc,
cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC,
MR_COMPACTION);
update_nr_listpages(cc);
@@ -1054,9 +998,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
goto out;
}
}
-
- /* Capture a page now if it is a suitable size */
- compact_capture_page(cc);
}
out:
@@ -1069,8 +1010,7 @@ out:
static unsigned long compact_zone_order(struct zone *zone,
int order, gfp_t gfp_mask,
- bool sync, bool *contended,
- struct page **page)
+ bool sync, bool *contended)
{
unsigned long ret;
struct compact_control cc = {
@@ -1080,7 +1020,6 @@ static unsigned long compact_zone_order(struct zone *zone,
.migratetype = allocflags_to_migratetype(gfp_mask),
.zone = zone,
.sync = sync,
- .page = page,
};
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
@@ -1110,7 +1049,7 @@ int sysctl_extfrag_threshold = 500;
*/
unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *nodemask,
- bool sync, bool *contended, struct page **page)
+ bool sync, bool *contended)
{
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
int may_enter_fs = gfp_mask & __GFP_FS;
@@ -1136,7 +1075,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
int status;
status = compact_zone_order(zone, order, gfp_mask, sync,
- contended, page);
+ contended);
rc = max(status, rc);
/* If a normal allocation would succeed, stop compacting */
@@ -1150,7 +1089,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
/* Compact all zones within a node */
-static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
+static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
{
int zoneid;
struct zone *zone;
@@ -1183,34 +1122,30 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
VM_BUG_ON(!list_empty(&cc->freepages));
VM_BUG_ON(!list_empty(&cc->migratepages));
}
-
- return 0;
}
-int compact_pgdat(pg_data_t *pgdat, int order)
+void compact_pgdat(pg_data_t *pgdat, int order)
{
struct compact_control cc = {
.order = order,
.sync = false,
- .page = NULL,
};
- return __compact_pgdat(pgdat, &cc);
+ __compact_pgdat(pgdat, &cc);
}
-static int compact_node(int nid)
+static void compact_node(int nid)
{
struct compact_control cc = {
.order = -1,
.sync = true,
- .page = NULL,
};
- return __compact_pgdat(NODE_DATA(nid), &cc);
+ __compact_pgdat(NODE_DATA(nid), &cc);
}
/* Compact all nodes in the system */
-static int compact_nodes(void)
+static void compact_nodes(void)
{
int nid;
@@ -1219,8 +1154,6 @@ static int compact_nodes(void)
for_each_online_node(nid)
compact_node(nid);
-
- return COMPACT_COMPLETE;
}
/* The written value is actually unused, all memory is compacted */
@@ -1231,7 +1164,7 @@ int sysctl_compaction_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
if (write)
- return compact_nodes();
+ compact_nodes();
return 0;
}
diff --git a/mm/fadvise.c b/mm/fadvise.c
index a47f0f50c89..7e092689a12 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -17,6 +17,7 @@
#include <linux/fadvise.h>
#include <linux/writeback.h>
#include <linux/syscalls.h>
+#include <linux/swap.h>
#include <asm/unistd.h>
@@ -38,7 +39,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
if (!f.file)
return -EBADF;
- if (S_ISFIFO(f.file->f_path.dentry->d_inode->i_mode)) {
+ if (S_ISFIFO(file_inode(f.file)->i_mode)) {
ret = -ESPIPE;
goto out;
}
@@ -120,9 +121,22 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT;
end_index = (endbyte >> PAGE_CACHE_SHIFT);
- if (end_index >= start_index)
- invalidate_mapping_pages(mapping, start_index,
+ if (end_index >= start_index) {
+ unsigned long count = invalidate_mapping_pages(mapping,
+ start_index, end_index);
+
+ /*
+ * If fewer pages were invalidated than expected then
+ * it is possible that some of the pages were on
+ * a per-cpu pagevec for a remote CPU. Drain all
+ * pagevecs and try again.
+ */
+ if (count < (end_index - start_index + 1)) {
+ lru_add_drain_all();
+ invalidate_mapping_pages(mapping, start_index,
end_index);
+ }
+ }
break;
default:
ret = -EINVAL;
diff --git a/mm/filemap.c b/mm/filemap.c
index 83efee76a5c..e1979fdca80 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1711,7 +1711,7 @@ EXPORT_SYMBOL(filemap_fault);
int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(vma->vm_file);
int ret = VM_FAULT_LOCKED;
sb_start_pagefault(inode->i_sb);
@@ -1728,6 +1728,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
* see the dirty page and writeprotect it again.
*/
set_page_dirty(page);
+ wait_for_stable_page(page);
out:
sb_end_pagefault(inode->i_sb);
return ret;
@@ -2056,7 +2057,7 @@ EXPORT_SYMBOL(iov_iter_fault_in_readable);
/*
* Return the count of just the current iov_iter segment.
*/
-size_t iov_iter_single_seg_count(struct iov_iter *i)
+size_t iov_iter_single_seg_count(const struct iov_iter *i)
{
const struct iovec *iov = i->iov;
if (i->nr_segs == 1)
@@ -2274,7 +2275,7 @@ repeat:
return NULL;
}
found:
- wait_on_page_writeback(page);
+ wait_for_stable_page(page);
return page;
}
EXPORT_SYMBOL(grab_cache_page_write_begin);
diff --git a/mm/fremap.c b/mm/fremap.c
index a0aaf0e5680..0cd4c11488e 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -129,6 +129,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
struct vm_area_struct *vma;
int err = -EINVAL;
int has_write_lock = 0;
+ vm_flags_t vm_flags;
if (prot)
return err;
@@ -160,15 +161,11 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
/*
* Make sure the vma is shared, that it supports prefaulting,
* and that the remapped range is valid and fully within
- * the single existing vma. vm_private_data is used as a
- * swapout cursor in a VM_NONLINEAR vma.
+ * the single existing vma.
*/
if (!vma || !(vma->vm_flags & VM_SHARED))
goto out;
- if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR))
- goto out;
-
if (!vma->vm_ops || !vma->vm_ops->remap_pages)
goto out;
@@ -177,6 +174,13 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
/* Must set VM_NONLINEAR before any pages are populated. */
if (!(vma->vm_flags & VM_NONLINEAR)) {
+ /*
+ * vm_private_data is used as a swapout cursor
+ * in a VM_NONLINEAR vma.
+ */
+ if (vma->vm_private_data)
+ goto out;
+
/* Don't need a nonlinear mapping, exit success */
if (pgoff == linear_page_index(vma, start)) {
err = 0;
@@ -184,6 +188,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
}
if (!has_write_lock) {
+get_write_lock:
up_read(&mm->mmap_sem);
down_writ