diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 27 | ||||
-rw-r--r-- | mm/backing-dev.c | 11 | ||||
-rw-r--r-- | mm/bootmem.c | 32 | ||||
-rw-r--r-- | mm/bounce.c | 48 | ||||
-rw-r--r-- | mm/cleancache.c | 2 | ||||
-rw-r--r-- | mm/compaction.c | 133 | ||||
-rw-r--r-- | mm/fadvise.c | 20 | ||||
-rw-r--r-- | mm/filemap.c | 7 | ||||
-rw-r--r-- | mm/fremap.c | 51 | ||||
-rw-r--r-- | mm/huge_memory.c | 113 | ||||
-rw-r--r-- | mm/hugetlb.c | 39 | ||||
-rw-r--r-- | mm/internal.h | 7 | ||||
-rw-r--r-- | mm/kmemleak.c | 14 | ||||
-rw-r--r-- | mm/ksm.c | 670 | ||||
-rw-r--r-- | mm/madvise.c | 105 | ||||
-rw-r--r-- | mm/memblock.c | 70 | ||||
-rw-r--r-- | mm/memcontrol.c | 477 | ||||
-rw-r--r-- | mm/memory-failure.c | 202 | ||||
-rw-r--r-- | mm/memory.c | 127 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 553 | ||||
-rw-r--r-- | mm/mempolicy.c | 59 | ||||
-rw-r--r-- | mm/migrate.c | 154 | ||||
-rw-r--r-- | mm/mincore.c | 5 | ||||
-rw-r--r-- | mm/mlock.c | 137 | ||||
-rw-r--r-- | mm/mm_init.c | 31 | ||||
-rw-r--r-- | mm/mmap.c | 123 | ||||
-rw-r--r-- | mm/mmu_notifier.c | 102 | ||||
-rw-r--r-- | mm/mmzone.c | 20 | ||||
-rw-r--r-- | mm/mremap.c | 28 | ||||
-rw-r--r-- | mm/nobootmem.c | 23 | ||||
-rw-r--r-- | mm/nommu.c | 41 | ||||
-rw-r--r-- | mm/oom_kill.c | 6 | ||||
-rw-r--r-- | mm/page-writeback.c | 28 | ||||
-rw-r--r-- | mm/page_alloc.c | 498 | ||||
-rw-r--r-- | mm/rmap.c | 30 | ||||
-rw-r--r-- | mm/shmem.c | 102 | ||||
-rw-r--r-- | mm/slab.c | 2 | ||||
-rw-r--r-- | mm/slob.c | 2 | ||||
-rw-r--r-- | mm/slub.c | 4 | ||||
-rw-r--r-- | mm/sparse.c | 12 | ||||
-rw-r--r-- | mm/swap.c | 9 | ||||
-rw-r--r-- | mm/swap_state.c | 58 | ||||
-rw-r--r-- | mm/swapfile.c | 176 | ||||
-rw-r--r-- | mm/util.c | 26 | ||||
-rw-r--r-- | mm/vmalloc.c | 33 | ||||
-rw-r--r-- | mm/vmscan.c | 397 | ||||
-rw-r--r-- | mm/vmstat.c | 7 |
47 files changed, 3230 insertions, 1591 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 278e3ab1f16..ae55c1e04d1 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1,6 +1,6 @@ config SELECT_MEMORY_MODEL def_bool y - depends on EXPERIMENTAL || ARCH_SELECT_MEMORY_MODEL + depends on ARCH_SELECT_MEMORY_MODEL choice prompt "Memory model" @@ -162,10 +162,16 @@ config MOVABLE_NODE Say Y here if you want to hotplug a whole node. Say N here if you want kernel to use memory on all nodes evenly. +# +# Only be set on architectures that have completely implemented memory hotplug +# feature. If you are not sure, don't touch it. +# +config HAVE_BOOTMEM_INFO_NODE + def_bool n + # eventually, we can have this option just 'select SPARSEMEM' config MEMORY_HOTPLUG bool "Allow for memory hot-add" - select MEMORY_ISOLATION depends on SPARSEMEM || X86_64_ACPI_NUMA depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390) @@ -176,6 +182,8 @@ config MEMORY_HOTPLUG_SPARSE config MEMORY_HOTREMOVE bool "Allow for memory hot remove" + select MEMORY_ISOLATION + select HAVE_BOOTMEM_INFO_NODE if X86_64 depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE depends on MIGRATION @@ -258,6 +266,19 @@ config BOUNCE def_bool y depends on BLOCK && MMU && (ZONE_DMA || HIGHMEM) +# On the 'tile' arch, USB OHCI needs the bounce pool since tilegx will often +# have more than 4GB of memory, but we don't currently use the IOTLB to present +# a 32-bit address to OHCI. So we need to use a bounce pool instead. +# +# We also use the bounce pool to provide stable page writes for jbd. jbd +# initiates buffer writeback without locking the page or setting PG_writeback, +# and fixing that behavior (a second time; jbd2 doesn't have this problem) is +# a major rework effort. Instead, use the bounce buffer to snapshot pages +# (until jbd goes away). The only jbd user is ext3. +config NEED_BOUNCE_POOL + bool + default y if (TILE && USB_OHCI_HCD) || (BLK_DEV_INTEGRITY && JBD) + config NR_QUICK int depends on QUICKLIST @@ -266,7 +287,7 @@ config NR_QUICK config VIRT_TO_BUS def_bool y - depends on !ARCH_NO_VIRT_TO_BUS + depends on HAVE_VIRT_TO_BUS config MMU_NOTIFIER bool diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d3ca2b3ee17..41733c5dc82 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -221,12 +221,23 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio) +static ssize_t stable_pages_required_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return snprintf(page, PAGE_SIZE-1, "%d\n", + bdi_cap_stable_pages_required(bdi) ? 1 : 0); +} + #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) static struct device_attribute bdi_dev_attrs[] = { __ATTR_RW(read_ahead_kb), __ATTR_RW(min_ratio), __ATTR_RW(max_ratio), + __ATTR_RO(stable_pages_required), __ATTR_NULL, }; diff --git a/mm/bootmem.c b/mm/bootmem.c index 1324cd74fae..2b0bcb019ec 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -185,10 +185,23 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) while (start < end) { unsigned long *map, idx, vec; + unsigned shift; map = bdata->node_bootmem_map; idx = start - bdata->node_min_pfn; + shift = idx & (BITS_PER_LONG - 1); + /* + * vec holds at most BITS_PER_LONG map bits, + * bit 0 corresponds to start. + */ vec = ~map[idx / BITS_PER_LONG]; + + if (shift) { + vec >>= shift; + if (end - start >= BITS_PER_LONG) + vec |= ~map[idx / BITS_PER_LONG + 1] << + (BITS_PER_LONG - shift); + } /* * If we have a properly aligned and fully unreserved * BITS_PER_LONG block of pages in front of us, free @@ -201,19 +214,18 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) count += BITS_PER_LONG; start += BITS_PER_LONG; } else { - unsigned long off = 0; + unsigned long cur = start; - vec >>= start & (BITS_PER_LONG - 1); - while (vec) { + start = ALIGN(start + 1, BITS_PER_LONG); + while (vec && cur != start) { if (vec & 1) { - page = pfn_to_page(start + off); + page = pfn_to_page(cur); __free_pages_bootmem(page, 0); count++; } vec >>= 1; - off++; + ++cur; } - start = ALIGN(start + 1, BITS_PER_LONG); } } @@ -821,6 +833,14 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT); } +void * __init __alloc_bootmem_low_nopanic(unsigned long size, + unsigned long align, + unsigned long goal) +{ + return ___alloc_bootmem_nopanic(size, align, goal, + ARCH_LOW_ADDRESS_LIMIT); +} + /** * __alloc_bootmem_low_node - allocate low boot memory from a specific node * @pgdat: node to allocate from diff --git a/mm/bounce.c b/mm/bounce.c index 04208677556..5f890176860 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -178,8 +178,45 @@ static void bounce_end_io_read_isa(struct bio *bio, int err) __bounce_end_io_read(bio, isa_page_pool, err); } +#ifdef CONFIG_NEED_BOUNCE_POOL +static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) +{ + struct page *page; + struct backing_dev_info *bdi; + struct address_space *mapping; + struct bio_vec *from; + int i; + + if (bio_data_dir(bio) != WRITE) + return 0; + + if (!bdi_cap_stable_pages_required(&q->backing_dev_info)) + return 0; + + /* + * Based on the first page that has a valid mapping, decide whether or + * not we have to employ bounce buffering to guarantee stable pages. + */ + bio_for_each_segment(from, bio, i) { + page = from->bv_page; + mapping = page_mapping(page); + if (!mapping) + continue; + bdi = mapping->backing_dev_info; + return mapping->host->i_sb->s_flags & MS_SNAP_STABLE; + } + + return 0; +} +#else +static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) +{ + return 0; +} +#endif /* CONFIG_NEED_BOUNCE_POOL */ + static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, - mempool_t *pool) + mempool_t *pool, int force) { struct page *page; struct bio *bio = NULL; @@ -192,7 +229,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, /* * is destination page below bounce pfn? */ - if (page_to_pfn(page) <= queue_bounce_pfn(q)) + if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) continue; /* @@ -270,6 +307,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) { + int must_bounce; mempool_t *pool; /* @@ -278,13 +316,15 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) if (!bio_has_data(*bio_orig)) return; + must_bounce = must_snapshot_stable_pages(q, *bio_orig); + /* * for non-isa bounce case, just check if the bounce pfn is equal * to or bigger than the highest pfn in the system -- in that case, * don't waste time iterating over bio segments */ if (!(q->bounce_gfp & GFP_DMA)) { - if (queue_bounce_pfn(q) >= blk_max_pfn) + if (queue_bounce_pfn(q) >= blk_max_pfn && !must_bounce) return; pool = page_pool; } else { @@ -295,7 +335,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) /* * slow path */ - __blk_queue_bounce(q, bio_orig, pool); + __blk_queue_bounce(q, bio_orig, pool, must_bounce); } EXPORT_SYMBOL(blk_queue_bounce); diff --git a/mm/cleancache.c b/mm/cleancache.c index 32e6f4136fa..d76ba74be2d 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -89,7 +89,7 @@ static int cleancache_get_key(struct inode *inode, fhfn = sb->s_export_op->encode_fh; if (fhfn) { len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL); - if (len <= 0 || len == 255) + if (len <= FILEID_ROOT || len == FILEID_INVALID) return -1; if (maxlen > CLEANCACHE_KEY_MAX) return -1; diff --git a/mm/compaction.c b/mm/compaction.c index 6b807e46649..05ccb4cc0bd 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -15,6 +15,7 @@ #include <linux/sysctl.h> #include <linux/sysfs.h> #include <linux/balloon_compaction.h> +#include <linux/page-isolation.h> #include "internal.h" #ifdef CONFIG_COMPACTION @@ -85,7 +86,7 @@ static inline bool isolation_suitable(struct compact_control *cc, static void __reset_isolation_suitable(struct zone *zone) { unsigned long start_pfn = zone->zone_start_pfn; - unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages; + unsigned long end_pfn = zone_end_pfn(zone); unsigned long pfn; zone->compact_cached_migrate_pfn = start_pfn; @@ -215,7 +216,10 @@ static bool suitable_migration_target(struct page *page) int migratetype = get_pageblock_migratetype(page); /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ - if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) + if (migratetype == MIGRATE_RESERVE) + return false; + + if (is_migrate_isolate(migratetype)) return false; /* If the page is a large free page, then allow migration */ @@ -611,8 +615,7 @@ check_compact_cluster: continue; next_pageblock: - low_pfn += pageblock_nr_pages; - low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; + low_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages) - 1; last_pageblock_nr = pageblock_nr; } @@ -644,7 +647,7 @@ static void isolate_freepages(struct zone *zone, struct compact_control *cc) { struct page *page; - unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn; + unsigned long high_pfn, low_pfn, pfn, z_end_pfn, end_pfn; int nr_freepages = cc->nr_freepages; struct list_head *freelist = &cc->freepages; @@ -663,7 +666,7 @@ static void isolate_freepages(struct zone *zone, */ high_pfn = min(low_pfn, pfn); - zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; + z_end_pfn = zone_end_pfn(zone); /* * Isolate free pages until enough are available to migrate the @@ -706,7 +709,7 @@ static void isolate_freepages(struct zone *zone, * only scans within a pageblock */ end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); - end_pfn = min(end_pfn, zone_end_pfn); + end_pfn = min(end_pfn, z_end_pfn); isolated = isolate_freepages_block(cc, pfn, end_pfn, freelist, false); nr_freepages += isolated; @@ -795,7 +798,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); /* Only scan within a pageblock boundary */ - end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); + end_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages); /* Do not cross the free scanner or scan within a memory hole */ if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { @@ -816,6 +819,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, static int compact_finished(struct zone *zone, struct compact_control *cc) { + unsigned int order; unsigned long watermark; if (fatal_signal_pending(current)) @@ -850,22 +854,16 @@ static int compact_finished(struct zone *zone, return COMPACT_CONTINUE; /* Direct compactor: Is a suitable page free? */ - if (cc->page) { - /* Was a suitable page captured? */ - if (*cc->page) + for (order = cc->order; order < MAX_ORDER; order++) { + struct free_area *area = &zone->free_area[order]; + + /* Job done if page is free of the right migratetype */ + if (!list_empty(&area->free_list[cc->migratetype])) + return COMPACT_PARTIAL; + + /* Job done if allocation would set block type */ + if (cc->order >= pageblock_order && area->nr_free) return COMPACT_PARTIAL; - } else { - unsigned int order; - for (order = cc->order; order < MAX_ORDER; order++) { - struct free_area *area = &zone->free_area[cc->order]; - /* Job done if page is free of the right migratetype */ - if (!list_empty(&area->free_list[cc->migratetype])) - return COMPACT_PARTIAL; - - /* Job done if allocation would set block type */ - if (cc->order >= pageblock_order && area->nr_free) - return COMPACT_PARTIAL; - } } return COMPACT_CONTINUE; @@ -921,65 +919,11 @@ unsigned long compaction_suitable(struct zone *zone, int order) return COMPACT_CONTINUE; } -static void compact_capture_page(struct compact_control *cc) -{ - unsigned long flags; - int mtype, mtype_low, mtype_high; - - if (!cc->page || *cc->page) - return; - - /* - * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP - * regardless of the migratetype of the freelist is is captured from. - * This is fine because the order for a high-order MIGRATE_MOVABLE - * allocation is typically at least a pageblock size and overall - * fragmentation is not impaired. Other allocation types must - * capture pages from their own migratelist because otherwise they - * could pollute other pageblocks like MIGRATE_MOVABLE with - * difficult to move pages and making fragmentation worse overall. - */ - if (cc->migratetype == MIGRATE_MOVABLE) { - mtype_low = 0; - mtype_high = MIGRATE_PCPTYPES; - } else { - mtype_low = cc->migratetype; - mtype_high = cc->migratetype + 1; - } - - /* Speculatively examine the free lists without zone lock */ - for (mtype = mtype_low; mtype < mtype_high; mtype++) { - int order; - for (order = cc->order; order < MAX_ORDER; order++) { - struct page *page; - struct free_area *area; - area = &(cc->zone->free_area[order]); - if (list_empty(&area->free_list[mtype])) - continue; - - /* Take the lock and attempt capture of the page */ - if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc)) - return; - if (!list_empty(&area->free_list[mtype])) { - page = list_entry(area->free_list[mtype].next, - struct page, lru); - if (capture_free_page(page, cc->order, mtype)) { - spin_unlock_irqrestore(&cc->zone->lock, - flags); - *cc->page = page; - return; - } - } - spin_unlock_irqrestore(&cc->zone->lock, flags); - } - } -} - static int compact_zone(struct zone *zone, struct compact_control *cc) { int ret; unsigned long start_pfn = zone->zone_start_pfn; - unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages; + unsigned long end_pfn = zone_end_pfn(zone); ret = compaction_suitable(zone, cc->order); switch (ret) { @@ -1036,7 +980,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) nr_migrate = cc->nr_migratepages; err = migrate_pages(&cc->migratepages, compaction_alloc, - (unsigned long)cc, false, + (unsigned long)cc, cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC, MR_COMPACTION); update_nr_listpages(cc); @@ -1054,9 +998,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) goto out; } } - - /* Capture a page now if it is a suitable size */ - compact_capture_page(cc); } out: @@ -1069,8 +1010,7 @@ out: static unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, - bool sync, bool *contended, - struct page **page) + bool sync, bool *contended) { unsigned long ret; struct compact_control cc = { @@ -1080,7 +1020,6 @@ static unsigned long compact_zone_order(struct zone *zone, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, .sync = sync, - .page = page, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -1110,7 +1049,7 @@ int sysctl_extfrag_threshold = 500; */ unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync, bool *contended, struct page **page) + bool sync, bool *contended) { enum zone_type high_zoneidx = gfp_zone(gfp_mask); int may_enter_fs = gfp_mask & __GFP_FS; @@ -1136,7 +1075,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, int status; status = compact_zone_order(zone, order, gfp_mask, sync, - contended, page); + contended); rc = max(status, rc); /* If a normal allocation would succeed, stop compacting */ @@ -1150,7 +1089,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, /* Compact all zones within a node */ -static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) +static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) { int zoneid; struct zone *zone; @@ -1183,34 +1122,30 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) VM_BUG_ON(!list_empty(&cc->freepages)); VM_BUG_ON(!list_empty(&cc->migratepages)); } - - return 0; } -int compact_pgdat(pg_data_t *pgdat, int order) +void compact_pgdat(pg_data_t *pgdat, int order) { struct compact_control cc = { .order = order, .sync = false, - .page = NULL, }; - return __compact_pgdat(pgdat, &cc); + __compact_pgdat(pgdat, &cc); } -static int compact_node(int nid) +static void compact_node(int nid) { struct compact_control cc = { .order = -1, .sync = true, - .page = NULL, }; - return __compact_pgdat(NODE_DATA(nid), &cc); + __compact_pgdat(NODE_DATA(nid), &cc); } /* Compact all nodes in the system */ -static int compact_nodes(void) +static void compact_nodes(void) { int nid; @@ -1219,8 +1154,6 @@ static int compact_nodes(void) for_each_online_node(nid) compact_node(nid); - - return COMPACT_COMPLETE; } /* The written value is actually unused, all memory is compacted */ @@ -1231,7 +1164,7 @@ int sysctl_compaction_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { if (write) - return compact_nodes(); + compact_nodes(); return 0; } diff --git a/mm/fadvise.c b/mm/fadvise.c index a47f0f50c89..7e092689a12 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -17,6 +17,7 @@ #include <linux/fadvise.h> #include <linux/writeback.h> #include <linux/syscalls.h> +#include <linux/swap.h> #include <asm/unistd.h> @@ -38,7 +39,7 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) if (!f.file) return -EBADF; - if (S_ISFIFO(f.file->f_path.dentry->d_inode->i_mode)) { + if (S_ISFIFO(file_inode(f.file)->i_mode)) { ret = -ESPIPE; goto out; } @@ -120,9 +121,22 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT; end_index = (endbyte >> PAGE_CACHE_SHIFT); - if (end_index >= start_index) - invalidate_mapping_pages(mapping, start_index, + if (end_index >= start_index) { + unsigned long count = invalidate_mapping_pages(mapping, + start_index, end_index); + + /* + * If fewer pages were invalidated than expected then + * it is possible that some of the pages were on + * a per-cpu pagevec for a remote CPU. Drain all + * pagevecs and try again. + */ + if (count < (end_index - start_index + 1)) { + lru_add_drain_all(); + invalidate_mapping_pages(mapping, start_index, end_index); + } + } break; default: ret = -EINVAL; diff --git a/mm/filemap.c b/mm/filemap.c index 83efee76a5c..e1979fdca80 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1711,7 +1711,7 @@ EXPORT_SYMBOL(filemap_fault); int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); int ret = VM_FAULT_LOCKED; sb_start_pagefault(inode->i_sb); @@ -1728,6 +1728,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) * see the dirty page and writeprotect it again. */ set_page_dirty(page); + wait_for_stable_page(page); out: sb_end_pagefault(inode->i_sb); return ret; @@ -2056,7 +2057,7 @@ EXPORT_SYMBOL(iov_iter_fault_in_readable); /* * Return the count of just the current iov_iter segment. */ -size_t iov_iter_single_seg_count(struct iov_iter *i) +size_t iov_iter_single_seg_count(const struct iov_iter *i) { const struct iovec *iov = i->iov; if (i->nr_segs == 1) @@ -2274,7 +2275,7 @@ repeat: return NULL; } found: - wait_on_page_writeback(page); + wait_for_stable_page(page); return page; } EXPORT_SYMBOL(grab_cache_page_write_begin); diff --git a/mm/fremap.c b/mm/fremap.c index a0aaf0e5680..0cd4c11488e 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -129,6 +129,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, struct vm_area_struct *vma; int err = -EINVAL; int has_write_lock = 0; + vm_flags_t vm_flags; if (prot) return err; @@ -160,15 +161,11 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, /* * Make sure the vma is shared, that it supports prefaulting, * and that the remapped range is valid and fully within - * the single existing vma. vm_private_data is used as a - * swapout cursor in a VM_NONLINEAR vma. + * the single existing vma. */ if (!vma || !(vma->vm_flags & VM_SHARED)) goto out; - if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) - goto out; - if (!vma->vm_ops || !vma->vm_ops->remap_pages) goto out; @@ -177,6 +174,13 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, /* Must set VM_NONLINEAR before any pages are populated. */ if (!(vma->vm_flags & VM_NONLINEAR)) { + /* + * vm_private_data is used as a swapout cursor + * in a VM_NONLINEAR vma. + */ + if (vma->vm_private_data) + goto out; + /* Don't need a nonlinear mapping, exit success */ if (pgoff == linear_page_index(vma, start)) { err = 0; @@ -184,6 +188,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, } if (!has_write_lock) { +get_write_lock: up_read(&mm->mmap_sem); down_writ |