aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorJens Axboe <jaxboe@fusionio.com>2011-03-10 08:58:35 +0100
committerJens Axboe <jaxboe@fusionio.com>2011-03-10 08:58:35 +0100
commit4c63f5646e405b5010cc9499419060bf2e838f5b (patch)
treedf91ba315032c8ec4aafeb3ab96fdfa7c6c656e1 /mm
parentcafb0bfca1a73efd6d8a4a6a6a716e6134b96c24 (diff)
parent69d60eb96ae8a73cf9b79cf28051caf973006011 (diff)
Merge branch 'for-2.6.39/stack-plug' into for-2.6.39/core
Conflicts: block/blk-core.c block/blk-flush.c drivers/md/raid1.c drivers/md/raid10.c drivers/md/raid5.c fs/nilfs2/btnode.c fs/nilfs2/mdt.c Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c6
-rw-r--r--mm/filemap.c74
-rw-r--r--mm/memory-failure.c8
-rw-r--r--mm/nommu.c4
-rw-r--r--mm/page-writeback.c2
-rw-r--r--mm/page_io.c2
-rw-r--r--mm/readahead.c18
-rw-r--r--mm/shmem.c1
-rw-r--r--mm/swap_state.c5
-rw-r--r--mm/swapfile.c37
-rw-r--r--mm/vmscan.c2
11 files changed, 27 insertions, 132 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 027100d3022..c91e139a652 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -14,17 +14,11 @@
static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
-void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-}
-EXPORT_SYMBOL(default_unplug_io_fn);
-
struct backing_dev_info default_backing_dev_info = {
.name = "default",
.ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
.state = 0,
.capabilities = BDI_CAP_MAP_COPY,
- .unplug_io_fn = default_unplug_io_fn,
};
EXPORT_SYMBOL_GPL(default_backing_dev_info);
diff --git a/mm/filemap.c b/mm/filemap.c
index 83a45d35468..f9a29c87a2c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -155,45 +155,15 @@ void remove_from_page_cache(struct page *page)
}
EXPORT_SYMBOL(remove_from_page_cache);
-static int sync_page(void *word)
+static int sleep_on_page(void *word)
{
- struct address_space *mapping;
- struct page *page;
-
- page = container_of((unsigned long *)word, struct page, flags);
-
- /*
- * page_mapping() is being called without PG_locked held.
- * Some knowledge of the state and use of the page is used to
- * reduce the requirements down to a memory barrier.
- * The danger here is of a stale page_mapping() return value
- * indicating a struct address_space different from the one it's
- * associated with when it is associated with one.
- * After smp_mb(), it's either the correct page_mapping() for
- * the page, or an old page_mapping() and the page's own
- * page_mapping() has gone NULL.
- * The ->sync_page() address_space operation must tolerate
- * page_mapping() going NULL. By an amazing coincidence,
- * this comes about because none of the users of the page
- * in the ->sync_page() methods make essential use of the
- * page_mapping(), merely passing the page down to the backing
- * device's unplug functions when it's non-NULL, which in turn
- * ignore it for all cases but swap, where only page_private(page) is
- * of interest. When page_mapping() does go NULL, the entire
- * call stack gracefully ignores the page and returns.
- * -- wli
- */
- smp_mb();
- mapping = page_mapping(page);
- if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
- mapping->a_ops->sync_page(page);
io_schedule();
return 0;
}
-static int sync_page_killable(void *word)
+static int sleep_on_page_killable(void *word)
{
- sync_page(word);
+ sleep_on_page(word);
return fatal_signal_pending(current) ? -EINTR : 0;
}
@@ -479,12 +449,6 @@ struct page *__page_cache_alloc(gfp_t gfp)
EXPORT_SYMBOL(__page_cache_alloc);
#endif
-static int __sleep_on_page_lock(void *word)
-{
- io_schedule();
- return 0;
-}
-
/*
* In order to wait for pages to become available there must be
* waitqueues associated with pages. By using a hash table of
@@ -512,7 +476,7 @@ void wait_on_page_bit(struct page *page, int bit_nr)
DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
if (test_bit(bit_nr, &page->flags))
- __wait_on_bit(page_waitqueue(page), &wait, sync_page,
+ __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page,
TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(wait_on_page_bit);
@@ -576,17 +540,12 @@ EXPORT_SYMBOL(end_page_writeback);
/**
* __lock_page - get a lock on the page, assuming we need to sleep to get it
* @page: the page to lock
- *
- * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some
- * random driver's requestfn sets TASK_RUNNING, we could busywait. However
- * chances are that on the second loop, the block layer's plug list is empty,
- * so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
*/
void __lock_page(struct page *page)
{
DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
- __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
+ __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(__lock_page);
@@ -596,24 +555,10 @@ int __lock_page_killable(struct page *page)
DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
return __wait_on_bit_lock(page_waitqueue(page), &wait,
- sync_page_killable, TASK_KILLABLE);
+ sleep_on_page_killable, TASK_KILLABLE);
}
EXPORT_SYMBOL_GPL(__lock_page_killable);
-/**
- * __lock_page_nosync - get a lock on the page, without calling sync_page()
- * @page: the page to lock
- *
- * Variant of lock_page that does not require the caller to hold a reference
- * on the page's mapping.
- */
-void __lock_page_nosync(struct page *page)
-{
- DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
- __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
- TASK_UNINTERRUPTIBLE);
-}
-
int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags)
{
@@ -1298,12 +1243,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long seg = 0;
size_t count;
loff_t *ppos = &iocb->ki_pos;
+ struct blk_plug plug;
count = 0;
retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
if (retval)
return retval;
+ blk_start_plug(&plug);
+
/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
if (filp->f_flags & O_DIRECT) {
loff_t size;
@@ -1376,6 +1324,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
break;
}
out:
+ blk_finish_plug(&plug);
return retval;
}
EXPORT_SYMBOL(generic_file_aio_read);
@@ -2487,11 +2436,13 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ struct blk_plug plug;
ssize_t ret;
BUG_ON(iocb->ki_pos != pos);
mutex_lock(&inode->i_mutex);
+ blk_start_plug(&plug);
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
@@ -2502,6 +2453,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0 && ret > 0)
ret = err;
}
+ blk_finish_plug(&plug);
return ret;
}
EXPORT_SYMBOL(generic_file_aio_write);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 0207c2f6f8b..bfba796d374 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -945,7 +945,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
collect_procs(ppage, &tokill);
if (hpage != ppage)
- lock_page_nosync(ppage);
+ lock_page(ppage);
ret = try_to_unmap(ppage, ttu);
if (ret != SWAP_SUCCESS)
@@ -1038,7 +1038,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
* Check "just unpoisoned", "filter hit", and
* "race with other subpage."
*/
- lock_page_nosync(hpage);
+ lock_page(hpage);
if (!PageHWPoison(hpage)
|| (hwpoison_filter(p) && TestClearPageHWPoison(p))
|| (p != hpage && TestSetPageHWPoison(hpage))) {
@@ -1088,7 +1088,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
* It's very difficult to mess with pages currently under IO
* and in many cases impossible, so we just avoid it here.
*/
- lock_page_nosync(hpage);
+ lock_page(hpage);
/*
* unpoison always clear PG_hwpoison inside page lock
@@ -1231,7 +1231,7 @@ int unpoison_memory(unsigned long pfn)
return 0;
}
- lock_page_nosync(page);
+ lock_page(page);
/*
* This test is racy because PG_hwpoison is set outside of page lock.
* That's acceptable because that won't trigger kernel panic. Instead,
diff --git a/mm/nommu.c b/mm/nommu.c
index f59e1424d3d..fb6cbd6abe1 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1842,10 +1842,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
}
EXPORT_SYMBOL(remap_vmalloc_range);
-void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-}
-
unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2cb01f6ec5d..cc0ede169e4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1239,7 +1239,7 @@ int set_page_dirty_lock(struct page *page)
{
int ret;
- lock_page_nosync(page);
+ lock_page(page);
ret = set_page_dirty(page);
unlock_page(page);
return ret;
diff --git a/mm/page_io.c b/mm/page_io.c
index 2dee975bf46..dc76b4d0611 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -106,7 +106,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
goto out;
}
if (wbc->sync_mode == WB_SYNC_ALL)
- rw |= REQ_SYNC | REQ_UNPLUG;
+ rw |= REQ_SYNC;
count_vm_event(PSWPOUT);
set_page_writeback(page);
unlock_page(page);
diff --git a/mm/readahead.c b/mm/readahead.c
index 77506a291a2..2c0cc489e28 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -109,9 +109,12 @@ EXPORT_SYMBOL(read_cache_pages);
static int read_pages(struct address_space *mapping, struct file *filp,
struct list_head *pages, unsigned nr_pages)
{
+ struct blk_plug plug;
unsigned page_idx;
int ret;
+ blk_start_plug(&plug);
+
if (mapping->a_ops->readpages) {
ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
/* Clean up the remaining pages */
@@ -129,7 +132,10 @@ static int read_pages(struct address_space *mapping, struct file *filp,
page_cache_release(page);
}
ret = 0;
+
out:
+ blk_finish_plug(&plug);
+
return ret;
}
@@ -554,17 +560,5 @@ page_cache_async_readahead(struct address_space *mapping,
/* do read-ahead */
ondemand_readahead(mapping, ra, filp, true, offset, req_size);
-
-#ifdef CONFIG_BLOCK
- /*
- * Normally the current page is !uptodate and lock_page() will be
- * immediately called to implicitly unplug the device. However this
- * is not always true for RAID conifgurations, where data arrives
- * not strictly in their submission order. In this case we need to
- * explicitly kick off the IO.
- */
- if (PageUptodate(page))
- blk_run_backing_dev(mapping->backing_dev_info, NULL);
-#endif
}
EXPORT_SYMBOL_GPL(page_cache_async_readahead);
diff --git a/mm/shmem.c b/mm/shmem.c
index 5ee67c99060..24d23f5bedf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -224,7 +224,6 @@ static const struct vm_operations_struct shmem_vm_ops;
static struct backing_dev_info shmem_backing_dev_info __read_mostly = {
.ra_pages = 0, /* No readahead */
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
- .unplug_io_fn = default_unplug_io_fn,
};
static LIST_HEAD(shmem_swaplist);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 5c8cfabbc9b..46680461785 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -24,12 +24,10 @@
/*
* swapper_space is a fiction, retained to simplify the path through
- * vmscan's shrink_page_list, to make sync_page look nicer, and to allow
- * future use of radix_tree tags in the swap cache.
+ * vmscan's shrink_page_list.
*/
static const struct address_space_operations swap_aops = {
.writepage = swap_writepage,
- .sync_page = block_sync_page,
.set_page_dirty = __set_page_dirty_nobuffers,
.migratepage = migrate_page,
};
@@ -37,7 +35,6 @@ static const struct address_space_operations swap_aops = {
static struct backing_dev_info swap_backing_dev_info = {
.name = "swap",
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
- .unplug_io_fn = swap_unplug_io_fn,
};
struct address_space swapper_space = {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0341c5700e3..64d627ab624 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -95,39 +95,6 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
}
/*
- * We need this because the bdev->unplug_fn can sleep and we cannot
- * hold swap_lock while calling the unplug_fn. And swap_lock
- * cannot be turned into a mutex.
- */
-static DECLARE_RWSEM(swap_unplug_sem);
-
-void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
-{
- swp_entry_t entry;
-
- down_read(&swap_unplug_sem);
- entry.val = page_private(page);
- if (PageSwapCache(page)) {
- struct block_device *bdev = swap_info[swp_type(entry)]->bdev;
- struct backing_dev_info *bdi;
-
- /*
- * If the page is removed from swapcache from under us (with a
- * racy try_to_unuse/swapoff) we need an additional reference
- * count to avoid reading garbage from page_private(page) above.
- * If the WARN_ON triggers during a swapoff it maybe the race
- * condition and it's harmless. However if it triggers without
- * swapoff it signals a problem.
- */
- WARN_ON(page_count(page) <= 1);
-
- bdi = bdev->bd_inode->i_mapping->backing_dev_info;
- blk_run_backing_dev(bdi, page);
- }
- up_read(&swap_unplug_sem);
-}
-
-/*
* swapon tell device that all the old swap contents can be discarded,
* to allow the swap device to optimize its wear-levelling.
*/
@@ -1643,10 +1610,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
goto out_dput;
}
- /* wait for any unplug function to finish */
- down_write(&swap_unplug_sem);
- up_write(&swap_unplug_sem);
-
destroy_swap_extents(p);
if (p->flags & SWP_CONTINUED)
free_swap_count_continuations(p);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6771ea70bfe..951cac21c2e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -358,7 +358,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
static void handle_write_error(struct address_space *mapping,
struct page *page, int error)
{
- lock_page_nosync(page);
+ lock_page(page);
if (page_mapping(page) == mapping)
mapping_set_error(mapping, error);
unlock_page(page);