aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/Makefile1
-rw-r--r--mm/allocpercpu.c20
-rw-r--r--mm/bootmem.c37
-rw-r--r--mm/bounce.c2
-rw-r--r--mm/filemap.c265
-rw-r--r--mm/filemap_xip.c70
-rw-r--r--mm/fremap.c3
-rw-r--r--mm/highmem.c5
-rw-r--r--mm/hugetlb.c97
-rw-r--r--mm/madvise.c4
-rw-r--r--mm/memcontrol.c23
-rw-r--r--mm/memory.c79
-rw-r--r--mm/mempolicy.c1
-rw-r--r--mm/migrate.c33
-rw-r--r--mm/mlock.c2
-rw-r--r--mm/mm_init.c10
-rw-r--r--mm/mmap.c172
-rw-r--r--mm/mmu_notifier.c277
-rw-r--r--mm/mmzone.c2
-rw-r--r--mm/mprotect.c3
-rw-r--r--mm/mremap.c6
-rw-r--r--mm/nommu.c25
-rw-r--r--mm/oom_kill.c6
-rw-r--r--mm/page-writeback.c12
-rw-r--r--mm/page_alloc.c45
-rw-r--r--mm/page_isolation.c13
-rw-r--r--mm/quicklist.c9
-rw-r--r--mm/readahead.c6
-rw-r--r--mm/rmap.c55
-rw-r--r--mm/shmem.c15
-rw-r--r--mm/shmem_acl.c2
-rw-r--r--mm/slab.c12
-rw-r--r--mm/slob.c16
-rw-r--r--mm/slub.c45
-rw-r--r--mm/sparse.c3
-rw-r--r--mm/swap.c9
-rw-r--r--mm/swap_state.c40
-rw-r--r--mm/swapfile.c16
-rw-r--r--mm/tiny-shmem.c26
-rw-r--r--mm/truncate.c16
-rw-r--r--mm/util.c70
-rw-r--r--mm/vmalloc.c13
-rw-r--r--mm/vmscan.c88
-rw-r--r--mm/vmstat.c19
45 files changed, 1304 insertions, 372 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index aa799007a11..0bd9c2dbb2a 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -205,3 +205,6 @@ config NR_QUICK
config VIRT_TO_BUS
def_bool y
depends on !ARCH_NO_VIRT_TO_BUS
+
+config MMU_NOTIFIER
+ bool
diff --git a/mm/Makefile b/mm/Makefile
index 06ca2381fef..da4ccf015ae 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SHMEM) += shmem.o
obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
obj-$(CONFIG_SLOB) += slob.o
+obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_SLUB) += slub.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index 843364594e2..4297bc41bfd 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -18,27 +18,28 @@
* Depopulating per-cpu data for a cpu going offline would be a typical
* use case. You need to register a cpu hotplug handler for that purpose.
*/
-void percpu_depopulate(void *__pdata, int cpu)
+static void percpu_depopulate(void *__pdata, int cpu)
{
struct percpu_data *pdata = __percpu_disguise(__pdata);
kfree(pdata->ptrs[cpu]);
pdata->ptrs[cpu] = NULL;
}
-EXPORT_SYMBOL_GPL(percpu_depopulate);
/**
* percpu_depopulate_mask - depopulate per-cpu data for some cpu's
* @__pdata: per-cpu data to depopulate
* @mask: depopulate per-cpu data for cpu's selected through mask bits
*/
-void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+static void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
{
int cpu;
for_each_cpu_mask_nr(cpu, *mask)
percpu_depopulate(__pdata, cpu);
}
-EXPORT_SYMBOL_GPL(__percpu_depopulate_mask);
+
+#define percpu_depopulate_mask(__pdata, mask) \
+ __percpu_depopulate_mask((__pdata), &(mask))
/**
* percpu_populate - populate per-cpu data for given cpu
@@ -51,7 +52,7 @@ EXPORT_SYMBOL_GPL(__percpu_depopulate_mask);
* use case. You need to register a cpu hotplug handler for that purpose.
* Per-cpu object is populated with zeroed buffer.
*/
-void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
+static void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
{
struct percpu_data *pdata = __percpu_disguise(__pdata);
int node = cpu_to_node(cpu);
@@ -68,7 +69,6 @@ void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
pdata->ptrs[cpu] = kzalloc(size, gfp);
return pdata->ptrs[cpu];
}
-EXPORT_SYMBOL_GPL(percpu_populate);
/**
* percpu_populate_mask - populate per-cpu data for more cpu's
@@ -79,8 +79,8 @@ EXPORT_SYMBOL_GPL(percpu_populate);
*
* Per-cpu objects are populated with zeroed buffers.
*/
-int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
- cpumask_t *mask)
+static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+ cpumask_t *mask)
{
cpumask_t populated;
int cpu;
@@ -94,7 +94,9 @@ int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
cpu_set(cpu, populated);
return 0;
}
-EXPORT_SYMBOL_GPL(__percpu_populate_mask);
+
+#define percpu_populate_mask(__pdata, size, gfp, mask) \
+ __percpu_populate_mask((__pdata), (size), (gfp), &(mask))
/**
* percpu_alloc_mask - initial setup of per-cpu data
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 4af15d0340a..ad8eec6e44a 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -405,6 +405,29 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
}
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
+static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx,
+ unsigned long step)
+{
+ unsigned long base = bdata->node_min_pfn;
+
+ /*
+ * Align the index with respect to the node start so that the
+ * combination of both satisfies the requested alignment.
+ */
+
+ return ALIGN(base + idx, step) - base;
+}
+
+static unsigned long align_off(struct bootmem_data *bdata, unsigned long off,
+ unsigned long align)
+{
+ unsigned long base = PFN_PHYS(bdata->node_min_pfn);
+
+ /* Same as align_idx for byte offsets */
+
+ return ALIGN(base + off, align) - base;
+}
+
static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
unsigned long size, unsigned long align,
unsigned long goal, unsigned long limit)
@@ -441,7 +464,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
else
start = ALIGN(min, step);
- sidx = start - bdata->node_min_pfn;;
+ sidx = start - bdata->node_min_pfn;
midx = max - bdata->node_min_pfn;
if (bdata->hint_idx > sidx) {
@@ -450,7 +473,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
* catch the fallback below.
*/
fallback = sidx + 1;
- sidx = ALIGN(bdata->hint_idx, step);
+ sidx = align_idx(bdata, bdata->hint_idx, step);
}
while (1) {
@@ -459,7 +482,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
unsigned long eidx, i, start_off, end_off;
find_block:
sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
- sidx = ALIGN(sidx, step);
+ sidx = align_idx(bdata, sidx, step);
eidx = sidx + PFN_UP(size);
if (sidx >= midx || eidx > midx)
@@ -467,15 +490,15 @@ find_block:
for (i = sidx; i < eidx; i++)
if (test_bit(i, bdata->node_bootmem_map)) {
- sidx = ALIGN(i, step);
+ sidx = align_idx(bdata, i, step);
if (sidx == i)
sidx += step;
goto find_block;
}
- if (bdata->last_end_off &&
+ if (bdata->last_end_off & (PAGE_SIZE - 1) &&
PFN_DOWN(bdata->last_end_off) + 1 == sidx)
- start_off = ALIGN(bdata->last_end_off, align);
+ start_off = align_off(bdata, bdata->last_end_off, align);
else
start_off = PFN_PHYS(sidx);
@@ -499,7 +522,7 @@ find_block:
}
if (fallback) {
- sidx = ALIGN(fallback - 1, step);
+ sidx = align_idx(bdata, fallback - 1, step);
fallback = 0;
goto find_block;
}
diff --git a/mm/bounce.c b/mm/bounce.c
index b6d2d0f1019..06722c40305 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -267,7 +267,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
/*
* Data-less bio, nothing to bounce
*/
- if (bio_empty_barrier(*bio_orig))
+ if (!bio_has_data(*bio_orig))
return;
/*
diff --git a/mm/filemap.c b/mm/filemap.c
index 2d3ec1ffc66..876bc595d0f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -109,7 +109,7 @@
/*
* Remove a page from the page cache and free it. Caller has to make
* sure the page is locked and that nobody else uses it - or that usage
- * is safe. The caller must hold a write_lock on the mapping's tree_lock.
+ * is safe. The caller must hold the mapping's tree_lock.
*/
void __remove_from_page_cache(struct page *page)
{
@@ -141,9 +141,9 @@ void remove_from_page_cache(struct page *page)
BUG_ON(!PageLocked(page));
- write_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
__remove_from_page_cache(page);
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
}
static int sync_page(void *word)
@@ -442,48 +442,52 @@ int filemap_write_and_wait_range(struct address_space *mapping,
}
/**
- * add_to_page_cache - add newly allocated pagecache pages
+ * add_to_page_cache_locked - add a locked page to the pagecache
* @page: page to add
* @mapping: the page's address_space
* @offset: page index
* @gfp_mask: page allocation mode
*
- * This function is used to add newly allocated pagecache pages;
- * the page is new, so we can just run SetPageLocked() against it.
- * The other page state flags were set by rmqueue().
- *
+ * This function is used to add a page to the pagecache. It must be locked.
* This function does not add the page to the LRU. The caller must do that.
*/
-int add_to_page_cache(struct page *page, struct address_space *mapping,
+int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
pgoff_t offset, gfp_t gfp_mask)
{
- int error = mem_cgroup_cache_charge(page, current->mm,
+ int error;
+
+ VM_BUG_ON(!PageLocked(page));
+
+ error = mem_cgroup_cache_charge(page, current->mm,
gfp_mask & ~__GFP_HIGHMEM);
if (error)
goto out;
error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
if (error == 0) {
- write_lock_irq(&mapping->tree_lock);
+ page_cache_get(page);
+ page->mapping = mapping;
+ page->index = offset;
+
+ spin_lock_irq(&mapping->tree_lock);
error = radix_tree_insert(&mapping->page_tree, offset, page);
- if (!error) {
- page_cache_get(page);
- SetPageLocked(page);
- page->mapping = mapping;
- page->index = offset;
+ if (likely(!error)) {
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
- } else
+ } else {
+ page->mapping = NULL;
mem_cgroup_uncharge_cache_page(page);
+ page_cache_release(page);
+ }
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
radix_tree_preload_end();
} else
mem_cgroup_uncharge_cache_page(page);
out:
return error;
}
-EXPORT_SYMBOL(add_to_page_cache);
+EXPORT_SYMBOL(add_to_page_cache_locked);
int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
pgoff_t offset, gfp_t gfp_mask)
@@ -554,14 +558,14 @@ EXPORT_SYMBOL(wait_on_page_bit);
* But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
*
* The first mb is necessary to safely close the critical section opened by the
- * TestSetPageLocked(), the second mb is necessary to enforce ordering between
- * the clear_bit and the read of the waitqueue (to avoid SMP races with a
- * parallel wait_on_page_locked()).
+ * test_and_set_bit() to lock the page; the second mb is necessary to enforce
+ * ordering between the clear_bit and the read of the waitqueue (to avoid SMP
+ * races with a parallel wait_on_page_locked()).
*/
void unlock_page(struct page *page)
{
smp_mb__before_clear_bit();
- if (!TestClearPageLocked(page))
+ if (!test_and_clear_bit(PG_locked, &page->flags))
BUG();
smp_mb__after_clear_bit();
wake_up_page(page, PG_locked);
@@ -633,15 +637,35 @@ void __lock_page_nosync(struct page *page)
* Is there a pagecache struct page at the given (mapping, offset) tuple?
* If yes, increment its refcount and return it; if no, return NULL.
*/
-struct page * find_get_page(struct address_space *mapping, pgoff_t offset)
+struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
{
+ void **pagep;
struct page *page;
- read_lock_irq(&mapping->tree_lock);
- page = radix_tree_lookup(&mapping->page_tree, offset);
- if (page)
- page_cache_get(page);
- read_unlock_irq(&mapping->tree_lock);
+ rcu_read_lock();
+repeat:
+ page = NULL;
+ pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
+ if (pagep) {
+ page = radix_tree_deref_slot(pagep);
+ if (unlikely(!page || page == RADIX_TREE_RETRY))
+ goto repeat;
+
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /*
+ * Has the page moved?
+ * This is part of the lockless pagecache protocol. See
+ * include/linux/pagemap.h for details.
+ */
+ if (unlikely(page != *pagep)) {
+ page_cache_release(page);
+ goto repeat;
+ }
+ }
+ rcu_read_unlock();
+
return page;
}
EXPORT_SYMBOL(find_get_page);
@@ -656,32 +680,22 @@ EXPORT_SYMBOL(find_get_page);
*
* Returns zero if the page was not present. find_lock_page() may sleep.
*/
-struct page *find_lock_page(struct address_space *mapping,
- pgoff_t offset)
+struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
{
struct page *page;
repeat:
- read_lock_irq(&mapping->tree_lock);
- page = radix_tree_lookup(&mapping->page_tree, offset);
+ page = find_get_page(mapping, offset);
if (page) {
- page_cache_get(page);
- if (TestSetPageLocked(page)) {
- read_unlock_irq(&mapping->tree_lock);
- __lock_page(page);
-
- /* Has the page been truncated while we slept? */
- if (unlikely(page->mapping != mapping)) {
- unlock_page(page);
- page_cache_release(page);
- goto repeat;
- }
- VM_BUG_ON(page->index != offset);
- goto out;
+ lock_page(page);
+ /* Has the page been truncated? */
+ if (unlikely(page->mapping != mapping)) {
+ unlock_page(page);
+ page_cache_release(page);
+ goto repeat;
}
+ VM_BUG_ON(page->index != offset);
}
- read_unlock_irq(&mapping->tree_lock);
-out:
return page;
}
EXPORT_SYMBOL(find_lock_page);
@@ -747,13 +761,39 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
{
unsigned int i;
unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
+ (void ***)pages, start, nr_pages);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+ /*
+ * this can only trigger if nr_found == 1, making livelock
+ * a non issue.
+ */
+ if (unlikely(page == RADIX_TREE_RETRY))
+ goto restart;
+
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ page_cache_release(page);
+ goto repeat;
+ }
- read_lock_irq(&mapping->tree_lock);
- ret = radix_tree_gang_lookup(&mapping->page_tree,
- (void **)pages, start, nr_pages);
- for (i = 0; i < ret; i++)
- page_cache_get(pages[i]);
- read_unlock_irq(&mapping->tree_lock);
+ pages[ret] = page;
+ ret++;
+ }
+ rcu_read_unlock();
return ret;
}
@@ -774,19 +814,44 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
{
unsigned int i;
unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
+ (void ***)pages, index, nr_pages);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+ /*
+ * this can only trigger if nr_found == 1, making livelock
+ * a non issue.
+ */
+ if (unlikely(page == RADIX_TREE_RETRY))
+ goto restart;
- read_lock_irq(&mapping->tree_lock);
- ret = radix_tree_gang_lookup(&mapping->page_tree,
- (void **)pages, index, nr_pages);
- for (i = 0; i < ret; i++) {
- if (pages[i]->mapping == NULL || pages[i]->index != index)
+ if (page->mapping == NULL || page->index != index)
break;
- page_cache_get(pages[i]);
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ page_cache_release(page);
+ goto repeat;
+ }
+
+ pages[ret] = page;
+ ret++;
index++;
}
- read_unlock_irq(&mapping->tree_lock);
- return i;
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(find_get_pages_contig);
@@ -806,15 +871,43 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
{
unsigned int i;
unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree,
+ (void ***)pages, *index, nr_pages, tag);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+ /*
+ * this can only trigger if nr_found == 1, making livelock
+ * a non issue.
+ */
+ if (unlikely(page == RADIX_TREE_RETRY))
+ goto restart;
+
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ page_cache_release(page);
+ goto repeat;
+ }
+
+ pages[ret] = page;
+ ret++;
+ }
+ rcu_read_unlock();
- read_lock_irq(&mapping->tree_lock);
- ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
- (void **)pages, *index, nr_pages, tag);
- for (i = 0; i < ret; i++)
- page_cache_get(pages[i]);
if (ret)
*index = pages[ret - 1]->index + 1;
- read_unlock_irq(&mapping->tree_lock);
+
return ret;
}
EXPORT_SYMBOL(find_get_pages_tag);
@@ -838,7 +931,7 @@ grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
struct page *page = find_get_page(mapping, index);
if (page) {
- if (!TestSetPageLocked(page))
+ if (trylock_page(page))
return page;
page_cache_release(page);
return NULL;
@@ -930,8 +1023,17 @@ find_page:
ra, filp, page,
index, last_index - index);
}
- if (!PageUptodate(page))
- goto page_not_up_to_date;
+ if (!PageUptodate(page)) {
+ if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
+ !mapping->a_ops->is_partially_uptodate)
+ goto page_not_up_to_date;
+ if (!trylock_page(page))
+ goto page_not_up_to_date;
+ if (!mapping->a_ops->is_partially_uptodate(page,
+ desc, offset))
+ goto page_not_up_to_date_locked;
+ unlock_page(page);
+ }
page_ok:
/*
* i_size must be checked after we know the page is Uptodate.
@@ -1001,6 +1103,7 @@ page_not_up_to_date:
if (lock_page_killable(page))
goto readpage_eio;
+page_not_up_to_date_locked:
/* Did it get truncated before we got the lock? */
if (!page->mapping) {
unlock_page(page);
@@ -1665,8 +1768,9 @@ static int __remove_suid(struct dentry *dentry, int kill)
return notify_change(dentry, &newattrs);
}
-int remove_suid(struct dentry *dentry)
+int file_remove_suid(struct file *file)
{
+ struct dentry *dentry = file->f_path.dentry;
int killsuid = should_remove_suid(dentry);
int killpriv = security_inode_need_killpriv(dentry);
int error = 0;
@@ -1680,7 +1784,7 @@ int remove_suid(struct dentry *dentry)
return error;
}
-EXPORT_SYMBOL(remove_suid);
+EXPORT_SYMBOL(file_remove_suid);
static size_t __iovec_copy_from_user_inatomic(char *vaddr,
const struct iovec *iov, size_t base, size_t bytes)
@@ -1775,7 +1879,7 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes)
* The !iov->iov_len check ensures we skip over unlikely
* zero-length segments (without overruning the iovec).
*/
- while (bytes || unlikely(!iov->iov_len && i->count)) {
+ while (bytes || unlikely(i->count && !iov->iov_len)) {
int copy;
copy = min(bytes, iov->iov_len - base);
@@ -2025,13 +2129,20 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
* After a write we want buffered reads to be sure to go to disk to get
* the new data. We invalidate clean cached page from the region we're
* about to write. We do this *before* the write so that we can return
- * -EIO without clobbering -EIOCBQUEUED from ->direct_IO().
+ * without clobbering -EIOCBQUEUED from ->direct_IO().
*/
if (mapping->nrpages) {
written = invalidate_inode_pages2_range(mapping,
pos >> PAGE_CACHE_SHIFT, end);
- if (written)
+ /*
+ * If a page can not be invalidated, return 0 to fall back
+ * to buffered write.
+ */
+ if (written) {
+ if (written == -EBUSY)
+ return 0;
goto out;
+ }
}
written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs);
@@ -2436,7 +2547,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
if (count == 0)
goto out;
- err = remove_suid(file->f_path.dentry);
+ err = file_remove_suid(file);
if (err)
goto out;
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 3e744abcce9..b5167dfb2f2 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -13,7 +13,10 @@
#include <linux/module.h>
#include <linux/uio.h>
#include <linux/rmap.h>
+#include <linux/mmu_notifier.h>
#include <linux/sched.h>
+#include <linux/seqlock.h>
+#include <linux/mutex.h>
#include <asm/tlbflush.h>
#include <asm/io.h>
@@ -21,22 +24,18 @@
* We do use our own empty page to avoid interference with other users
* of ZERO_PAGE(), such as /dev/zero
*/
+static DEFINE_MUTEX(xip_sparse_mutex);
+static seqcount_t xip_sparse_seq = SEQCNT_ZERO;
static struct page *__xip_sparse_page;
+/* called under xip_sparse_mutex */
static struct page *xip_sparse_page(void)
{
if (!__xip_sparse_page) {
struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
- if (page) {
- static DEFINE_SPINLOCK(xip_alloc_lock);
- spin_lock(&xip_alloc_lock);
- if (!__xip_sparse_page)
- __xip_sparse_page = page;
- else
- __free_page(page);
- spin_unlock(&xip_alloc_lock);
- }
+ if (page)
+ __xip_sparse_page = page;
}
return __xip_sparse_page;
}
@@ -173,22 +172,27 @@ __xip_unmap (struct address_space * mapping,
pte_t pteval;
spinlock_t *ptl;
struct page *page;
+ unsigned count;
+ int locked = 0;
+
+ count = read_seqcount_begin(&xip_sparse_seq);
page = __xip_sparse_page;
if (!page)
return;
+retry:
spin_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
mm = vma->vm_mm;
address = vma->vm_start +
((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
- pte = page_check_address(page, mm, address, &ptl);
+ pte = page_check_address(page, mm, address, &ptl, 1);
if (pte) {
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
- pteval = ptep_clear_flush(vma, address, pte);
+ pteval = ptep_clear_flush_notify(vma, address, pte);
page_remove_rmap(page, vma);
dec_mm_counter(mm, file_rss);
BUG_ON(pte_dirty(pteval));
@@ -197,6 +201,14 @@ __xip_unmap (struct address_space * mapping,
}
}
spin_unlock(&mapping->i_mmap_lock);
+
+ if (locked) {
+ mutex_unlock(&xip_sparse_mutex);
+ } else if (read_seqcount_retry(&xip_sparse_seq, count)) {
+ mutex_lock(&xip_sparse_mutex);
+ locked = 1;
+ goto retry;
+ }
}
/*
@@ -217,7 +229,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
int error;
/* XXX: are VM_FAULT_ codes OK? */
-
+again:
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (vmf->pgoff >= size)
return VM_FAULT_SIGBUS;
@@ -236,8 +248,10 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
int err;
/* maybe shared writable, allocate new block */
+ mutex_lock(&xip_sparse_mutex);
error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1,
&xip_mem, &xip_pfn);
+ mutex_unlock(&xip_sparse_mutex);
if (error)
return VM_FAULT_SIGBUS;