diff options
author | Jiri Kosina <jkosina@suse.cz> | 2011-04-26 10:22:15 +0200 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2011-04-26 10:22:59 +0200 |
commit | 07f9479a40cc778bc1462ada11f95b01360ae4ff (patch) | |
tree | 0676cf38df3844004bb3ebfd99dfa67a4a8998f5 /mm | |
parent | 9d5e6bdb3013acfb311ab407eeca0b6a6a3dedbf (diff) | |
parent | cd2e49e90f1cae7726c9a2c54488d881d7f1cd1c (diff) |
Merge branch 'master' into for-next
Fast-forwarded to current state of Linus' tree as there are patches to be
applied for files that didn't exist on the old branch.
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig.debug | 25 | ||||
-rw-r--r-- | mm/backing-dev.c | 18 | ||||
-rw-r--r-- | mm/bootmem.c | 8 | ||||
-rw-r--r-- | mm/compaction.c | 65 | ||||
-rw-r--r-- | mm/filemap.c | 211 | ||||
-rw-r--r-- | mm/huge_memory.c | 69 | ||||
-rw-r--r-- | mm/hugetlb.c | 16 | ||||
-rw-r--r-- | mm/hwpoison-inject.c | 2 | ||||
-rw-r--r-- | mm/internal.h | 2 | ||||
-rw-r--r-- | mm/kmemleak.c | 6 | ||||
-rw-r--r-- | mm/ksm.c | 25 | ||||
-rw-r--r-- | mm/memblock.c | 241 | ||||
-rw-r--r-- | mm/memcontrol.c | 669 | ||||
-rw-r--r-- | mm/memory-failure.c | 16 | ||||
-rw-r--r-- | mm/memory.c | 106 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 4 | ||||
-rw-r--r-- | mm/mempolicy.c | 3 | ||||
-rw-r--r-- | mm/migrate.c | 58 | ||||
-rw-r--r-- | mm/mlock.c | 17 | ||||
-rw-r--r-- | mm/mmap.c | 15 | ||||
-rw-r--r-- | mm/mremap.c | 11 | ||||
-rw-r--r-- | mm/nobootmem.c | 10 | ||||
-rw-r--r-- | mm/nommu.c | 58 | ||||
-rw-r--r-- | mm/oom_kill.c | 89 | ||||
-rw-r--r-- | mm/page-writeback.c | 25 | ||||
-rw-r--r-- | mm/page_alloc.c | 95 | ||||
-rw-r--r-- | mm/page_cgroup.c | 140 | ||||
-rw-r--r-- | mm/page_io.c | 2 | ||||
-rw-r--r-- | mm/pagewalk.c | 24 | ||||
-rw-r--r-- | mm/percpu.c | 13 | ||||
-rw-r--r-- | mm/readahead.c | 18 | ||||
-rw-r--r-- | mm/rmap.c | 85 | ||||
-rw-r--r-- | mm/shmem.c | 11 | ||||
-rw-r--r-- | mm/slab.c | 61 | ||||
-rw-r--r-- | mm/slob.c | 6 | ||||
-rw-r--r-- | mm/slub.c | 376 | ||||
-rw-r--r-- | mm/sparse.c | 2 | ||||
-rw-r--r-- | mm/swap.c | 189 | ||||
-rw-r--r-- | mm/swap_state.c | 5 | ||||
-rw-r--r-- | mm/swapfile.c | 411 | ||||
-rw-r--r-- | mm/truncate.c | 22 | ||||
-rw-r--r-- | mm/util.c | 2 | ||||
-rw-r--r-- | mm/vmalloc.c | 158 | ||||
-rw-r--r-- | mm/vmscan.c | 66 | ||||
-rw-r--r-- | mm/vmstat.c | 27 |
45 files changed, 2123 insertions, 1359 deletions
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index af7cfb43d2f..8b1a477162d 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -1,27 +1,24 @@ config DEBUG_PAGEALLOC bool "Debug page memory allocations" - depends on DEBUG_KERNEL && ARCH_SUPPORTS_DEBUG_PAGEALLOC - depends on !HIBERNATION || !PPC && !SPARC + depends on DEBUG_KERNEL + depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC depends on !KMEMCHECK + select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC ---help--- Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types of memory corruption. + For architectures which don't enable ARCH_SUPPORTS_DEBUG_PAGEALLOC, + fill the pages with poison patterns after free_pages() and verify + the patterns before alloc_pages(). Additionally, + this option cannot be enabled in combination with hibernation as + that would result in incorrect warnings of memory corruption after + a resume because free pages are not saved to the suspend image. + config WANT_PAGE_DEBUG_FLAGS bool config PAGE_POISONING - bool "Debug page memory allocations" - depends on DEBUG_KERNEL && !ARCH_SUPPORTS_DEBUG_PAGEALLOC - depends on !HIBERNATION - select DEBUG_PAGEALLOC + bool select WANT_PAGE_DEBUG_FLAGS - ---help--- - Fill the pages with poison patterns after free_pages() and verify - the patterns before alloc_pages(). This results in a large slowdown, - but helps to find certain types of memory corruption. - - This option cannot be enabled in combination with hibernation as - that would result in incorrect warnings of memory corruption after - a resume because free pages are not saved to the suspend image. diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 027100d3022..befc87531e4 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -14,17 +14,11 @@ static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); -void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) -{ -} -EXPORT_SYMBOL(default_unplug_io_fn); - struct backing_dev_info default_backing_dev_info = { .name = "default", .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, .state = 0, .capabilities = BDI_CAP_MAP_COPY, - .unplug_io_fn = default_unplug_io_fn, }; EXPORT_SYMBOL_GPL(default_backing_dev_info); @@ -73,14 +67,14 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) struct inode *inode; nr_wb = nr_dirty = nr_io = nr_more_io = 0; - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); list_for_each_entry(inode, &wb->b_dirty, i_wb_list) nr_dirty++; list_for_each_entry(inode, &wb->b_io, i_wb_list) nr_io++; list_for_each_entry(inode, &wb->b_more_io, i_wb_list) nr_more_io++; - spin_unlock(&inode_lock); + spin_unlock(&inode_wb_list_lock); global_dirty_limits(&background_thresh, &dirty_thresh); bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); @@ -604,7 +598,7 @@ static void bdi_prune_sb(struct backing_dev_info *bdi) spin_lock(&sb_lock); list_for_each_entry(sb, &super_blocks, s_list) { if (sb->s_bdi == bdi) - sb->s_bdi = NULL; + sb->s_bdi = &default_backing_dev_info; } spin_unlock(&sb_lock); } @@ -682,11 +676,11 @@ void bdi_destroy(struct backing_dev_info *bdi) if (bdi_has_dirty_io(bdi)) { struct bdi_writeback *dst = &default_backing_dev_info.wb; - spin_lock(&inode_lock); + spin_lock(&inode_wb_list_lock); list_splice(&bdi->wb.b_dirty, &dst->b_dirty); list_splice(&bdi->wb.b_io, &dst->b_io); list_splice(&bdi->wb.b_more_io, &dst->b_more_io); - spin_unlock(&inode_lock); + spin_unlock(&inode_wb_list_lock); } bdi_unregister(bdi); @@ -793,7 +787,7 @@ EXPORT_SYMBOL(congestion_wait); * jiffies for either a BDI to exit congestion of the given @sync queue * or a write to complete. * - * In the absense of zone congestion, cond_resched() is called to yield + * In the absence of zone congestion, cond_resched() is called to yield * the processor if necessary but otherwise does not sleep. * * The return value is 0 if the sleep is for the full timeout. Otherwise, diff --git a/mm/bootmem.c b/mm/bootmem.c index 07aeb89e396..01d5a4b3dd0 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -34,14 +34,6 @@ unsigned long max_low_pfn; unsigned long min_low_pfn; unsigned long max_pfn; -#ifdef CONFIG_CRASH_DUMP -/* - * If we have booted due to a crash, max_pfn will be a very low value. We need - * to know the amount of memory that the previous kernel used. - */ -unsigned long saved_max_pfn; -#endif - bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); diff --git a/mm/compaction.c b/mm/compaction.c index 8be430b812d..021a2960ef9 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -42,8 +42,6 @@ struct compact_control { unsigned int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; - - int compact_mode; }; static unsigned long release_freepages(struct list_head *freelist) @@ -155,7 +153,6 @@ static void isolate_freepages(struct zone *zone, * pages on cc->migratepages. We stop searching if the migrate * and free page scanners meet or enough free pages are isolated. */ - spin_lock_irqsave(&zone->lock, flags); for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; pfn -= pageblock_nr_pages) { unsigned long isolated; @@ -178,9 +175,19 @@ static void isolate_freepages(struct zone *zone, if (!suitable_migration_target(page)) continue; - /* Found a block suitable for isolating free pages from */ - isolated = isolate_freepages_block(zone, pfn, freelist); - nr_freepages += isolated; + /* + * Found a block suitable for isolating free pages from. Now + * we disabled interrupts, double check things are ok and + * isolate the pages. This is to minimise the time IRQs + * are disabled + */ + isolated = 0; + spin_lock_irqsave(&zone->lock, flags); + if (suitable_migration_target(page)) { + isolated = isolate_freepages_block(zone, pfn, freelist); + nr_freepages += isolated; + } + spin_unlock_irqrestore(&zone->lock, flags); /* * Record the highest PFN we isolated pages from. When next @@ -190,7 +197,6 @@ static void isolate_freepages(struct zone *zone, if (isolated) high_pfn = max(high_pfn, pfn); } - spin_unlock_irqrestore(&zone->lock, flags); /* split_free_page does not map the pages */ list_for_each_entry(page, freelist, lru) { @@ -271,9 +277,27 @@ static unsigned long isolate_migratepages(struct zone *zone, } /* Time to isolate some pages for migration */ + cond_resched(); spin_lock_irq(&zone->lru_lock); for (; low_pfn < end_pfn; low_pfn++) { struct page *page; + bool locked = true; + + /* give a chance to irqs before checking need_resched() */ + if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) { + spin_unlock_irq(&zone->lru_lock); + locked = false; + } + if (need_resched() || spin_is_contended(&zone->lru_lock)) { + if (locked) + spin_unlock_irq(&zone->lru_lock); + cond_resched(); + spin_lock_irq(&zone->lru_lock); + if (fatal_signal_pending(current)) + break; + } else if (!locked) + spin_lock_irq(&zone->lru_lock); + if (!pfn_valid_within(low_pfn)) continue; nr_scanned++; @@ -397,10 +421,7 @@ static int compact_finished(struct zone *zone, return COMPACT_COMPLETE; /* Compaction run is not finished if the watermark is not met */ - if (cc->compact_mode != COMPACT_MODE_KSWAPD) - watermark = low_wmark_pages(zone); - else - watermark = high_wmark_pages(zone); + watermark = low_wmark_pages(zone); watermark += (1 << cc->order); if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0)) @@ -413,15 +434,6 @@ static int compact_finished(struct zone *zone, if (cc->order == -1) return COMPACT_CONTINUE; - /* - * Generating only one page of the right order is not enough - * for kswapd, we must continue until we're above the high - * watermark as a pool for high order GFP_ATOMIC allocations - * too. - */ - if (cc->compact_mode == COMPACT_MODE_KSWAPD) - return COMPACT_CONTINUE; - /* Direct compactor: Is a suitable page free? */ for (order = cc->order; order < MAX_ORDER; order++) { /* Job done if page is free of the right migratetype */ @@ -508,12 +520,13 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) { unsigned long nr_migrate, nr_remaining; + int err; if (!isolate_migratepages(zone, cc)) continue; nr_migrate = cc->nr_migratepages; - migrate_pages(&cc->migratepages, compaction_alloc, + err = migrate_pages(&cc->migratepages, compaction_alloc, (unsigned long)cc, false, cc->sync); update_nr_listpages(cc); @@ -527,7 +540,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) nr_remaining); /* Release LRU pages not migrated */ - if (!list_empty(&cc->migratepages)) { + if (err) { putback_lru_pages(&cc->migratepages); cc->nr_migratepages = 0; } @@ -543,8 +556,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, - bool sync, - int compact_mode) + bool sync) { struct compact_control cc = { .nr_freepages = 0, @@ -553,7 +565,6 @@ unsigned long compact_zone_order(struct zone *zone, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, .sync = sync, - .compact_mode = compact_mode, }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -599,8 +610,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, nodemask) { int status; - status = compact_zone_order(zone, order, gfp_mask, sync, - COMPACT_MODE_DIRECT_RECLAIM); + status = compact_zone_order(zone, order, gfp_mask, sync); rc = max(status, rc); /* If a normal allocation would succeed, stop compacting */ @@ -631,7 +641,6 @@ static int compact_node(int nid) .nr_freepages = 0, .nr_migratepages = 0, .order = -1, - .compact_mode = COMPACT_MODE_DIRECT_RECLAIM, }; zone = &pgdat->node_zones[zoneid]; diff --git a/mm/filemap.c b/mm/filemap.c index 83a45d35468..c641edf553a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -80,8 +80,8 @@ * ->i_mutex * ->i_alloc_sem (various) * - * ->inode_lock - * ->sb_lock (fs/fs-writeback.c) + * inode_wb_list_lock + * sb_lock (fs/fs-writeback.c) * ->mapping->tree_lock (__sync_single_inode) * * ->i_mmap_lock @@ -98,8 +98,10 @@ * ->zone.lru_lock (check_pte_range->isolate_lru_page) * ->private_lock (page_remove_rmap->set_page_dirty) * ->tree_lock (page_remove_rmap->set_page_dirty) - * ->inode_lock (page_remove_rmap->set_page_dirty) - * ->inode_lock (zap_pte_range->set_page_dirty) + * inode_wb_list_lock (page_remove_rmap->set_page_dirty) + * ->inode->i_lock (page_remove_rmap->set_page_dirty) + * inode_wb_list_lock (zap_pte_range->set_page_dirty) + * ->inode->i_lock (zap_pte_range->set_page_dirty) * ->private_lock (zap_pte_range->__set_page_dirty_buffers) * * (code doesn't rely on that order, so you could switch it around) @@ -108,11 +110,11 @@ */ /* - * Remove a page from the page cache and free it. Caller has to make + * Delete a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage * is safe. The caller must hold the mapping's tree_lock. */ -void __remove_from_page_cache(struct page *page) +void __delete_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; @@ -137,7 +139,15 @@ void __remove_from_page_cache(struct page *page) } } -void remove_from_page_cache(struct page *page) +/** + * delete_from_page_cache - delete page from page cache + * @page: the page which the kernel is trying to remove from page cache + * + * This must be called only on pages that have been verified to be in the page + * cache and locked. It will never put the page into the free list, the caller + * has a reference on the page. + */ +void delete_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; void (*freepage)(struct page *); @@ -146,54 +156,25 @@ void remove_from_page_cache(struct page *page) freepage = mapping->a_ops->freepage; spin_lock_irq(&mapping->tree_lock); - __remove_from_page_cache(page); + __delete_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); if (freepage) freepage(page); + page_cache_release(page); } -EXPORT_SYMBOL(remove_from_page_cache); +EXPORT_SYMBOL(delete_from_page_cache); -static int sync_page(void *word) +static int sleep_on_page(void *word) { - struct address_space *mapping; - struct page *page; - - page = container_of((unsigned long *)word, struct page, flags); - - /* - * page_mapping() is being called without PG_locked held. - * Some knowledge of the state and use of the page is used to - * reduce the requirements down to a memory barrier. - * The danger here is of a stale page_mapping() return value - * indicating a struct address_space different from the one it's - * associated with when it is associated with one. - * After smp_mb(), it's either the correct page_mapping() for - * the page, or an old page_mapping() and the page's own - * page_mapping() has gone NULL. - * The ->sync_page() address_space operation must tolerate - * page_mapping() going NULL. By an amazing coincidence, - * this comes about because none of the users of the page - * in the ->sync_page() methods make essential use of the - * page_mapping(), merely passing the page down to the backing - * device's unplug functions when it's non-NULL, which in turn - * ignore it for all cases but swap, where only page_private(page) is - * of interest. When page_mapping() does go NULL, the entire - * call stack gracefully ignores the page and returns. - * -- wli - */ - smp_mb(); - mapping = page_mapping(page); - if (mapping && mapping->a_ops && mapping->a_ops->sync_page) - mapping->a_ops->sync_page(page); io_schedule(); return 0; } -static int sync_page_killable(void *word) +static int sleep_on_page_killable(void *word) { - sync_page(word); + sleep_on_page(word); return fatal_signal_pending(current) ? -EINTR : 0; } @@ -387,6 +368,76 @@ int filemap_write_and_wait_range(struct address_space *mapping, EXPORT_SYMBOL(filemap_write_and_wait_range); /** + * replace_page_cache_page - replace a pagecache page with a new one + * @old: page to be replaced + * @new: page to replace with + * @gfp_mask: allocation mode + * + * This function replaces a page in the pagecache with a new one. On + * success it acquires the pagecache reference for the new page and + * drops it for the old page. Both the old and new pages must be + * locked. This function does not add the new page to the LRU, the + * caller must do that. + * + * The remove + add is atomic. The only way this function can fail is + * memory allocation failure. + */ +int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) +{ + int error; + struct mem_cgroup *memcg = NULL; + + VM_BUG_ON(!PageLocked(old)); + VM_BUG_ON(!PageLocked(new)); + VM_BUG_ON(new->mapping); + + /* + * This is not page migration, but prepare_migration and + * end_migration does enough work for charge replacement. + * + * In the longer term we probably want a specialized function + * for moving the charge from old to new in a more efficient + * manner. + */ + error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask); + if (error) + return error; + + error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); + if (!error) { + struct address_space *mapping = old->mapping; + void (*freepage)(struct page *); + + pgoff_t offset = old->index; + freepage = mapping->a_ops->freepage; + + page_cache_get(new); + new->mapping = mapping; + new->index = offset; + + spin_lock_irq(&mapping->tree_lock); + __delete_from_page_cache(old); + error = radix_tree_insert(&mapping->page_tree, offset, new); + BUG_ON(error); + mapping->nrpages++; + __inc_zone_page_state(new, NR_FILE_PAGES); + if (PageSwapBacked(new)) + __inc_zone_page_state(new, NR_SHMEM); + spin_unlock_irq(&mapping->tree_lock); + radix_tree_preload_end(); + if (freepage) + freepage(old); + page_cache_release(old); + mem_cgroup_end_migration(memcg, old, new, true); + } else { + mem_cgroup_end_migration(memcg, old, new, false); + } + + return error; +} +EXPORT_SYMBOL_GPL(replace_page_cache_page); + +/** * add_to_page_cache_locked - add a locked page to the pagecache * @page: page to add * @mapping: the page's address_space @@ -479,12 +530,6 @@ struct page *__page_cache_alloc(gfp_t gfp) EXPORT_SYMBOL(__page_cache_alloc); #endif -static int __sleep_on_page_lock(void *word) -{ - io_schedule(); - return 0; -} - /* * In order to wait for pages to become available there must be * waitqueues associated with pages. By using a hash table of @@ -512,7 +557,7 @@ void wait_on_page_bit(struct page *page, int bit_nr) DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); if (test_bit(bit_nr, &page->flags)) - __wait_on_bit(page_waitqueue(page), &wait, sync_page, + __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(wait_on_page_bit); @@ -576,17 +621,12 @@ EXPORT_SYMBOL(end_page_writeback); /** * __lock_page - get a lock on the page, assuming we need to sleep to get it * @page: the page to lock - * - * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary. If some - * random driver's requestfn sets TASK_RUNNING, we could busywait. However - * chances are that on the second loop, the block layer's plug list is empty, - * so sync_page() will then return in state TASK_UNINTERRUPTIBLE. */ void __lock_page(struct page *page) { DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); - __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page, + __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(__lock_page); @@ -596,24 +636,10 @@ int __lock_page_killable(struct page *page) DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); return __wait_on_bit_lock(page_waitqueue(page), &wait, - sync_page_killable, TASK_KILLABLE); + sleep_on_page_killable, TASK_KILLABLE); } EXPORT_SYMBOL_GPL(__lock_page_killable); -/** - * __lock_page_nosync - get a lock on the page, without calling sync_page() - * @page: the page to lock - * - * Variant of lock_page that does not require the caller to hold a reference - * on the page's mapping. - */ -void __lock_page_nosync(struct page *page) -{ - DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); - __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock, - TASK_UNINTERRUPTIBLE); -} - int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) { @@ -621,8 +647,10 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, __lock_page(page); return 1; } else { - up_read(&mm->mmap_sem); - wait_on_page_locked(page); + if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) { + up_read(&mm->mmap_sem); + wait_on_page_locked(page); + } return 0; } } @@ -782,9 +810,13 @@ repeat: page = radix_tree_deref_slot((void **)pages[i]); if (unlikely(!page)) continue; + + /* + * This can only trigger when the entry at index 0 moves out + * of or back to the root: none yet gotten, safe to restart. + */ if (radix_tree_deref_retry(page)) { - if (ret) - start = pages[ret-1]->index; + WARN_ON(start | i); goto restart; } @@ -800,6 +832,13 @@ repeat: pages[ret] = page; ret++; } + + /* + * If all entries were removed before we could secure them, + * try again, because callers stop trying once 0 is returned. + */ + if (unlikely(!ret && nr_found)) + goto restart; rcu_read_unlock(); return ret; } @@ -834,6 +873,11 @@ repeat: page = radix_tree_deref_slot((void **)pages[i]); if (unlikely(!page)) continue; + + /* + * This can only trigger when the entry at index 0 moves out + * of or back to the root: none yet gotten, safe to restart. + */ if (radix_tree_deref_retry(page)) goto restart; @@ -894,6 +938,11 @@ repeat: page = radix_tree_deref_slot((void **)pages[i]); if (unlikely(!page)) continue; + + /* + * This can only trigger when the entry at index 0 moves out + * of or back to the root: none yet gotten, safe to restart. + */ if (radix_tree_deref_retry(page)) goto restart; @@ -909,6 +958,13 @@ repeat: pages[ret] = page; ret++; } + + /* + * If all entries were removed before we could secure them, + * try again, because callers stop trying once 0 is returned. + */ + if (unlikely(!ret && nr_found)) + goto restart; rcu_read_unlock(); if (ret) @@ -1298,12 +1354,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long seg = 0; size_t count; loff_t *ppos = &iocb->ki_pos; + struct blk_plug plug; count = 0; retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); if (retval) return retval; + blk_start_plug(&plug); + /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (filp->f_flags & O_DIRECT) { loff_t size; @@ -1376,6 +1435,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, break; } out: + blk_finish_plug(&plug); return retval; } EXPORT_SYMBOL(generic_file_aio_read); @@ -2487,11 +2547,13 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + struct blk_plug plug; ssize_t ret; BUG_ON(iocb->ki_pos != pos); mutex_lock(&inode->i_mutex); + blk_start_plug(&plug); ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); @@ -2502,6 +2564,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0 && ret > 0) ret = err; } + blk_finish_plug(&plug); return ret; } EXPORT_SYMBOL(generic_file_aio_write); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 113e35c4750..470dcda10ad 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -244,24 +244,28 @@ static ssize_t single_flag_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf, enum transparent_hugepage_flag flag) { - if (test_bit(flag, &transparent_hugepage_flags)) - return sprintf(buf, "[yes] no\n"); - else - return sprintf(buf, "yes [no]\n"); + return sprintf(buf, "%d\n", + !!test_bit(flag, &transparent_hugepage_flags)); } + static ssize_t single_flag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count, enum transparent_hugepage_flag flag) { - if (!memcmp("yes", buf, - min(sizeof("yes")-1, count))) { + unsigned long value; + int ret; + + ret = kstrtoul(buf, 10, &value); + if (ret < 0) + return ret; + if (value > 1) + return -EINVAL; + + if (value) set_bit(flag, &transparent_hugepage_flags); - } else if (!memcmp("no", buf, - min(sizeof("no")-1, count))) { + else clear_bit(flag, &transparent_hugepage_flags); - } else - return -EINVAL; return count; } @@ -643,23 +647,24 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, |