diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-07 16:38:06 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-07 16:38:06 -0700 |
commit | 26c12d93348f0bda0756aff83f4867d9ae58a5a6 (patch) | |
tree | 65221f6837c66a9260c5c973e5fb908b10e0d504 /mm | |
parent | dc5ed40686a4da95881c35d913b60f867755cbe2 (diff) | |
parent | fdc5813fbbd484a54c88477f91a78934cda8bb32 (diff) |
Merge branch 'akpm' (incoming from Andrew)
Merge second patch-bomb from Andrew Morton:
- the rest of MM
- zram updates
- zswap updates
- exit
- procfs
- exec
- wait
- crash dump
- lib/idr
- rapidio
- adfs, affs, bfs, ufs
- cris
- Kconfig things
- initramfs
- small amount of IPC material
- percpu enhancements
- early ioremap support
- various other misc things
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (156 commits)
MAINTAINERS: update Intel C600 SAS driver maintainers
fs/ufs: remove unused ufs_super_block_third pointer
fs/ufs: remove unused ufs_super_block_second pointer
fs/ufs: remove unused ufs_super_block_first pointer
fs/ufs/super.c: add __init to init_inodecache()
doc/kernel-parameters.txt: add early_ioremap_debug
arm64: add early_ioremap support
arm64: initialize pgprot info earlier in boot
x86: use generic early_ioremap
mm: create generic early_ioremap() support
x86/mm: sparse warning fix for early_memremap
lglock: map to spinlock when !CONFIG_SMP
percpu: add preemption checks to __this_cpu ops
vmstat: use raw_cpu_ops to avoid false positives on preemption checks
slub: use raw_cpu_inc for incrementing statistics
net: replace __this_cpu_inc in route.c with raw_cpu_inc
modules: use raw_cpu_write for initialization of per cpu refcount.
mm: use raw_cpu ops for determining current NUMA node
percpu: add raw_cpu_ops
slub: fix leak of 'name' in sysfs_slab_add
...
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 4 | ||||
-rw-r--r-- | mm/Makefile | 3 | ||||
-rw-r--r-- | mm/compaction.c | 84 | ||||
-rw-r--r-- | mm/early_ioremap.c | 245 | ||||
-rw-r--r-- | mm/filemap.c | 86 | ||||
-rw-r--r-- | mm/huge_memory.c | 21 | ||||
-rw-r--r-- | mm/hugetlb.c | 14 | ||||
-rw-r--r-- | mm/internal.h | 16 | ||||
-rw-r--r-- | mm/memblock.c | 28 | ||||
-rw-r--r-- | mm/memcontrol.c | 453 | ||||
-rw-r--r-- | mm/memory.c | 147 | ||||
-rw-r--r-- | mm/mempolicy.c | 46 | ||||
-rw-r--r-- | mm/mempool.c | 4 | ||||
-rw-r--r-- | mm/mlock.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 55 | ||||
-rw-r--r-- | mm/mprotect.c | 56 | ||||
-rw-r--r-- | mm/nommu.c | 49 | ||||
-rw-r--r-- | mm/page-writeback.c | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 118 | ||||
-rw-r--r-- | mm/readahead.c | 21 | ||||
-rw-r--r-- | mm/rmap.c | 14 | ||||
-rw-r--r-- | mm/shmem.c | 7 | ||||
-rw-r--r-- | mm/slab.c | 8 | ||||
-rw-r--r-- | mm/slab.h | 21 | ||||
-rw-r--r-- | mm/slab_common.c | 250 | ||||
-rw-r--r-- | mm/slub.c | 87 | ||||
-rw-r--r-- | mm/sparse.c | 4 | ||||
-rw-r--r-- | mm/util.c | 5 | ||||
-rw-r--r-- | mm/vmacache.c | 112 | ||||
-rw-r--r-- | mm/vmalloc.c | 10 | ||||
-rw-r--r-- | mm/vmscan.c | 12 | ||||
-rw-r--r-- | mm/zswap.c | 78 |
32 files changed, 1342 insertions, 722 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 2888024e0b0..ebe5880c29d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -216,6 +216,7 @@ config PAGEFLAGS_EXTENDED # config SPLIT_PTLOCK_CPUS int + default "999999" if !MMU default "999999" if ARM && !CPU_CACHE_VIPT default "999999" if PARISC && !PA20 default "4" @@ -577,3 +578,6 @@ config PGTABLE_MAPPING You can check speed with zsmalloc benchmark: https://github.com/spartacus06/zsmapbench + +config GENERIC_EARLY_IOREMAP + bool diff --git a/mm/Makefile b/mm/Makefile index cdd741519ee..9e5aaf92197 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -16,7 +16,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ - compaction.o balloon_compaction.o \ + compaction.o balloon_compaction.o vmacache.o \ interval_tree.o list_lru.o workingset.o $(mmu-y) obj-y += init-mm.o @@ -61,3 +61,4 @@ obj-$(CONFIG_CLEANCACHE) += cleancache.o obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o obj-$(CONFIG_ZBUD) += zbud.o obj-$(CONFIG_ZSMALLOC) += zsmalloc.o +obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o diff --git a/mm/compaction.c b/mm/compaction.c index b6ab7716006..37f97628706 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -217,21 +217,12 @@ static inline bool compact_trylock_irqsave(spinlock_t *lock, /* Returns true if the page is within a block suitable for migration to */ static bool suitable_migration_target(struct page *page) { - int migratetype = get_pageblock_migratetype(page); - - /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ - if (migratetype == MIGRATE_RESERVE) - return false; - - if (is_migrate_isolate(migratetype)) - return false; - - /* If the page is a large free page, then allow migration */ + /* If the page is a large free page, then disallow migration */ if (PageBuddy(page) && page_order(page) >= pageblock_order) - return true; + return false; /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ - if (migrate_async_suitable(migratetype)) + if (migrate_async_suitable(get_pageblock_migratetype(page))) return true; /* Otherwise skip the block */ @@ -253,6 +244,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, struct page *cursor, *valid_page = NULL; unsigned long flags; bool locked = false; + bool checked_pageblock = false; cursor = pfn_to_page(blockpfn); @@ -284,8 +276,16 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, break; /* Recheck this is a suitable migration target under lock */ - if (!strict && !suitable_migration_target(page)) - break; + if (!strict && !checked_pageblock) { + /* + * We need to check suitability of pageblock only once + * and this isolate_freepages_block() is called with + * pageblock range, so just check once is sufficient. + */ + checked_pageblock = true; + if (!suitable_migration_target(page)) + break; + } /* Recheck this is a buddy page under lock */ if (!PageBuddy(page)) @@ -460,12 +460,13 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; struct list_head *migratelist = &cc->migratepages; - isolate_mode_t mode = 0; struct lruvec *lruvec; unsigned long flags; bool locked = false; struct page *page = NULL, *valid_page = NULL; bool skipped_async_unsuitable = false; + const isolate_mode_t mode = (!cc->sync ? ISOLATE_ASYNC_MIGRATE : 0) | + (unevictable ? ISOLATE_UNEVICTABLE : 0); /* * Ensure that there are not too many pages isolated from the LRU @@ -487,7 +488,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, cond_resched(); for (; low_pfn < end_pfn; low_pfn++) { /* give a chance to irqs before checking need_resched() */ - if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) { + if (locked && !(low_pfn % SWAP_CLUSTER_MAX)) { if (should_release_lock(&zone->lru_lock)) { spin_unlock_irqrestore(&zone->lru_lock, flags); locked = false; @@ -526,8 +527,25 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, /* If isolation recently failed, do not retry */ pageblock_nr = low_pfn >> pageblock_order; - if (!isolation_suitable(cc, page)) - goto next_pageblock; + if (last_pageblock_nr != pageblock_nr) { + int mt; + + last_pageblock_nr = pageblock_nr; + if (!isolation_suitable(cc, page)) + goto next_pageblock; + + /* + * For async migration, also only scan in MOVABLE + * blocks. Async migration is optimistic to see if + * the minimum amount of work satisfies the allocation + */ + mt = get_pageblock_migratetype(page); + if (!cc->sync && !migrate_async_suitable(mt)) { + cc->finished_update_migrate = true; + skipped_async_unsuitable = true; + goto next_pageblock; + } + } /* * Skip if free. page_order cannot be used without zone->lock @@ -537,18 +555,6 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, continue; /* - * For async migration, also only scan in MOVABLE blocks. Async - * migration is optimistic to see if the minimum amount of work - * satisfies the allocation - */ - if (!cc->sync && last_pageblock_nr != pageblock_nr && - !migrate_async_suitable(get_pageblock_migratetype(page))) { - cc->finished_update_migrate = true; - skipped_async_unsuitable = true; - goto next_pageblock; - } - - /* * Check may be lockless but that's ok as we recheck later. * It's possible to migrate LRU pages and balloon pages * Skip any other type of page @@ -557,11 +563,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, if (unlikely(balloon_page_movable(page))) { if (locked && balloon_page_isolate(page)) { /* Successfully isolated */ - cc->finished_update_migrate = true; - list_add(&page->lru, migratelist); - cc->nr_migratepages++; - nr_isolated++; - goto check_compact_cluster; + goto isolate_success; } } continue; @@ -607,12 +609,6 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, continue; } - if (!cc->sync) - mode |= ISOLATE_ASYNC_MIGRATE; - - if (unevictable) - mode |= ISOLATE_UNEVICTABLE; - lruvec = mem_cgroup_page_lruvec(page, zone); /* Try isolate the page */ @@ -622,13 +618,14 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, VM_BUG_ON_PAGE(PageTransCompound(page), page); /* Successfully isolated */ - cc->finished_update_migrate = true; del_page_from_lru_list(page, lruvec, page_lru(page)); + +isolate_success: + cc->finished_update_migrate = true; list_add(&page->lru, migratelist); cc->nr_migratepages++; nr_isolated++; -check_compact_cluster: /* Avoid isolating too much */ if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) { ++low_pfn; @@ -639,7 +636,6 @@ check_compact_cluster: next_pageblock: low_pfn = ALIGN(low_pfn + 1, pageblock_nr_pages) - 1; - last_pageblock_nr = pageblock_nr; } acct_isolated(zone, locked, cc); diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c new file mode 100644 index 00000000000..e10ccd299d6 --- /dev/null +++ b/mm/early_ioremap.c @@ -0,0 +1,245 @@ +/* + * Provide common bits of early_ioremap() support for architectures needing + * temporary mappings during boot before ioremap() is available. + * + * This is mostly a direct copy of the x86 early_ioremap implementation. + * + * (C) Copyright 1995 1996, 2014 Linus Torvalds + * + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <asm/fixmap.h> + +#ifdef CONFIG_MMU +static int early_ioremap_debug __initdata; + +static int __init early_ioremap_debug_setup(char *str) +{ + early_ioremap_debug = 1; + + return 0; +} +early_param("early_ioremap_debug", early_ioremap_debug_setup); + +static int after_paging_init __initdata; + +void __init __weak early_ioremap_shutdown(void) +{ +} + +void __init early_ioremap_reset(void) +{ + early_ioremap_shutdown(); + after_paging_init = 1; +} + +/* + * Generally, ioremap() is available after paging_init() has been called. + * Architectures wanting to allow early_ioremap after paging_init() can + * define __late_set_fixmap and __late_clear_fixmap to do the right thing. + */ +#ifndef __late_set_fixmap +static inline void __init __late_set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t prot) +{ + BUG(); +} +#endif + +#ifndef __late_clear_fixmap +static inline void __init __late_clear_fixmap(enum fixed_addresses idx) +{ + BUG(); +} +#endif + +static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; +static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; +static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; + +void __init early_ioremap_setup(void) +{ + int i; + + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + if (WARN_ON(prev_map[i])) + break; + + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); +} + +static int __init check_early_ioremap_leak(void) +{ + int count = 0; + int i; + + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + if (prev_map[i]) + count++; + + if (WARN(count, KERN_WARNING + "Debug warning: early ioremap leak of %d areas detected.\n" + "please boot with early_ioremap_debug and report the dmesg.\n", + count)) + return 1; + return 0; +} +late_initcall(check_early_ioremap_leak); + +static void __init __iomem * +__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) +{ + unsigned long offset; + resource_size_t last_addr; + unsigned int nrpages; + enum fixed_addresses idx; + int i, slot; + + WARN_ON(system_state != SYSTEM_BOOTING); + + slot = -1; + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { + if (!prev_map[i]) { + slot = i; + break; + } + } + + if (WARN(slot < 0, "%s(%08llx, %08lx) not found slot\n", + __func__, (u64)phys_addr, size)) + return NULL; + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (WARN_ON(!size || last_addr < phys_addr)) + return NULL; + + prev_size[slot] = size; + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr + 1) - phys_addr; + + /* + * Mappings have to fit in the FIX_BTMAP area. + */ + nrpages = size >> PAGE_SHIFT; + if (WARN_ON(nrpages > NR_FIX_BTMAPS)) + return NULL; + + /* + * Ok, go for it.. + */ + idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; + while (nrpages > 0) { + if (after_paging_init) + __late_set_fixmap(idx, phys_addr, prot); + else + __early_set_fixmap(idx, phys_addr, prot); + phys_addr += PAGE_SIZE; + --idx; + --nrpages; + } + WARN(early_ioremap_debug, "%s(%08llx, %08lx) [%d] => %08lx + %08lx\n", + __func__, (u64)phys_addr, size, slot, offset, slot_virt[slot]); + + prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); + return prev_map[slot]; +} + +void __init early_iounmap(void __iomem *addr, unsigned long size) +{ + unsigned long virt_addr; + unsigned long offset; + unsigned int nrpages; + enum fixed_addresses idx; + int i, slot; + + slot = -1; + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { + if (prev_map[i] == addr) { + slot = i; + break; + } + } + + if (WARN(slot < 0, "early_iounmap(%p, %08lx) not found slot\n", + addr, size)) + return; + + if (WARN(prev_size[slot] != size, + "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", + addr, size, slot, prev_size[slot])) + return; + + WARN(early_ioremap_debug, "early_iounmap(%p, %08lx) [%d]\n", + addr, size, slot); + + virt_addr = (unsigned long)addr; + if (WARN_ON(virt_addr < fix_to_virt(FIX_BTMAP_BEGIN))) + return; + + offset = virt_addr & ~PAGE_MASK; + nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; + + idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; + while (nrpages > 0) { + if (after_paging_init) + __late_clear_fixmap(idx); + else + __early_set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR); + --idx; + --nrpages; + } + prev_map[slot] = NULL; +} + +/* Remap an IO device */ +void __init __iomem * +early_ioremap(resource_size_t phys_addr, unsigned long size) +{ + return __early_ioremap(phys_addr, size, FIXMAP_PAGE_IO); +} + +/* Remap memory */ +void __init * +early_memremap(resource_size_t phys_addr, unsigned long size) +{ + return (__force void *)__early_ioremap(phys_addr, size, + FIXMAP_PAGE_NORMAL); +} +#else /* CONFIG_MMU */ + +void __init __iomem * +early_ioremap(resource_size_t phys_addr, unsigned long size) +{ + return (__force void __iomem *)phys_addr; +} + +/* Remap memory */ +void __init * +early_memremap(resource_size_t phys_addr, unsigned long size) +{ + return (void *)phys_addr; +} + +void __init early_iounmap(void __iomem *addr, unsigned long size) +{ +} + +#endif /* CONFIG_MMU */ + + +void __init early_memunmap(void *addr, unsigned long size) +{ + early_iounmap((__force void __iomem *)addr, size); +} diff --git a/mm/filemap.c b/mm/filemap.c index 21781f1fe52..27ebc0c9571 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -33,6 +33,7 @@ #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ #include <linux/memcontrol.h> #include <linux/cleancache.h> +#include <linux/rmap.h> #include "internal.h" #define CREATE_TRACE_POINTS @@ -562,7 +563,7 @@ static int __add_to_page_cache_locked(struct page *page, VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapBacked(page), page); - error = mem_cgroup_cache_charge(page, current->mm, + error = mem_cgroup_charge_file(page, current->mm, gfp_mask & GFP_RECLAIM_MASK); if (error) return error; @@ -1952,11 +1953,11 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = mapping->host; pgoff_t offset = vmf->pgoff; struct page *page; - pgoff_t size; + loff_t size; int ret = 0; - size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (offset >= size) + size = round_up(i_size_read(inode), PAGE_CACHE_SIZE); + if (offset >= size >> PAGE_CACHE_SHIFT) return VM_FAULT_SIGBUS; /* @@ -2005,8 +2006,8 @@ retry_find: * Found the page and have a reference on it. * We must recheck i_size under page lock. */ - size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (unlikely(offset >= size)) { + size = round_up(i_size_read(inode), PAGE_CACHE_SIZE); + if (unlikely(offset >= size >> PAGE_CACHE_SHIFT)) { unlock_page(page); page_cache_release(page); return VM_FAULT_SIGBUS; @@ -2064,6 +2065,78 @@ page_not_uptodate: } EXPORT_SYMBOL(filemap_fault); +void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct radix_tree_iter iter; + void **slot; + struct file *file = vma->vm_file; + struct address_space *mapping = file->f_mapping; + loff_t size; + struct page *page; + unsigned long address = (unsigned long) vmf->virtual_address; + unsigned long addr; + pte_t *pte; + + rcu_read_lock(); + radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, vmf->pgoff) { + if (iter.index > vmf->max_pgoff) + break; +repeat: + page = radix_tree_deref_slot(slot); + if (unlikely(!page)) + goto next; + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) + break; + else + goto next; + } + + if (!page_cache_get_speculative(page)) + goto repeat; + + /* Has the page moved? */ + if (unlikely(page != *slot)) { + page_cache_release(page); + goto repeat; + } + + if (!PageUptodate(page) || + PageReadahead(page) || + PageHWPoison(page)) + goto skip; + if (!trylock_page(page)) + goto skip; + + if (page->mapping != mapping || !PageUptodate(page)) + goto unlock; + + size = round_up(i_size_read(mapping->host), PAGE_CACHE_SIZE); + if (page->index >= size >> PAGE_CACHE_SHIFT) + goto unlock; + + pte = vmf->pte + page->index - vmf->pgoff; + if (!pte_none(*pte)) + goto unlock; + + if (file->f_ra.mmap_miss > 0) + file->f_ra.mmap_miss--; + addr = address + (page->index - vmf->pgoff) * PAGE_SIZE; + do_set_pte(vma, addr, page, pte, false, false); + unlock_page(page); + goto next; +unlock: + unlock_page(page); +skip: + page_cache_release(page); +next: + if (iter.index == vmf->max_pgoff) + break; + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(filemap_map_pages); + int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; @@ -2093,6 +2166,7 @@ EXPORT_SYMBOL(filemap_page_mkwrite); const struct vm_operations_struct generic_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = filemap_page_mkwrite, .remap_pages = generic_file_remap_pages, }; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 6ac89e9f82e..64635f5278f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -827,7 +827,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } - if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { + if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_KERNEL))) { put_page(page); count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; @@ -968,7 +968,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, __GFP_OTHER_NODE, vma, address, page_to_nid(page)); if (unlikely(!pages[i] || - mem_cgroup_newpage_charge(pages[i], mm, + mem_cgroup_charge_anon(pages[i], mm, GFP_KERNEL))) { if (pages[i]) put_page(pages[i]); @@ -1101,7 +1101,7 @@ alloc: goto out; } - if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { + if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) { put_page(new_page); if (page) { split_huge_page(page); @@ -1891,17 +1891,22 @@ out: int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { - struct mm_struct *mm = vma->vm_mm; - switch (advice) { case MADV_HUGEPAGE: +#ifdef CONFIG_S390 + /* + * qemu blindly sets MADV_HUGEPAGE on all allocations, but s390 + * can't handle this properly after s390_enable_sie, so we simply + * ignore the madvise to prevent qemu from causing a SIGSEGV. + */ + if (mm_has_pgste(vma->vm_mm)) + return 0; +#endif /* * Be somewhat over-protective like KSM for now! */ if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP)) return -EINVAL; - if (mm->def_flags & VM_NOHUGEPAGE) - return -EINVAL; *vm_flags &= ~VM_NOHUGEPAGE; *vm_flags |= VM_HUGEPAGE; /* @@ -2354,7 +2359,7 @@ static void collapse_huge_page(struct mm_struct *mm, if (!new_page) return; - if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) + if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))) return; /* diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7c02b9dadfb..dd30f22b35e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -13,6 +13,7 @@ #include <linux/nodemask.h> #include <linux/pagemap.h> #include <linux/mempolicy.h> +#include <linux/compiler.h> #include <linux/cpuset.h> #include <linux/mutex.h> #include <linux/bootmem.h> @@ -1535,6 +1536,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, while (min_count < persistent_huge_pages(h)) { if (!free_pool_huge_page(h, nodes_allowed, 0)) break; + cond_resched_lock(&hugetlb_lock); } while (count < persistent_huge_pages(h)) { if (!adjust_pool_surplus(h, nodes_allowed, 1)) @@ -2690,7 +2692,8 @@ retry_avoidcopy: BUG_ON(huge_pte_none(pte)); spin_lock(ptl); ptep = huge_pte_offset(mm, address & huge_page_mask(h)); - if (likely(pte_same(huge_ptep_get(ptep), pte))) + if (likely(ptep && + pte_same(huge_ptep_get(ptep), pte))) goto retry_avoidcopy; /* * race occurs while re-acquiring page table @@ -2734,7 +2737,7 @@ retry_avoidcopy: */ spin_lock(ptl); ptep = huge_pte_offset(mm, address & huge_page_mask(h)); - if (likely(pte_same(huge_ptep_get(ptep), pte))) { + if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) { ClearPagePrivate(new_page); /* Break COW */ @@ -2896,8 +2899,7 @@ retry: if (anon_rmap) { ClearPagePrivate(page); hugepage_add_new_anon_rmap(page, vma, address); - } - else + } else page_dup_rmap(page); new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_SHARED))); @@ -3185,6 +3187,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, BUG_ON(address >= end); flush_cache_range(vma, address, end); + mmu_notifier_invalidate_range_start(mm, start, end); mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); for (; address < end; address += huge_page_size(h)) { spinlock_t *ptl; @@ -3214,6 +3217,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, */ flush_tlb_range(vma, start, end); mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); + mmu_notifier_invalidate_range_end(mm, start, end); return pages << h->order; } @@ -3518,7 +3522,7 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, #else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */ /* Can be overriden by architectures */ -__attribute__((weak)) struct page * +struct page * __weak follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int write) { diff --git a/mm/internal.h b/mm/internal.h index 29e1e761f9e..07b67361a40 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -11,6 +11,7 @@ #ifndef __MM_INTERNAL_H #define __MM_INTERNAL_H +#include <linux/fs.h> #include <linux/mm.h> void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, @@ -21,6 +22,20 @@ static inline void set_page_count(struct page *page, int v) atomic_set(&page->_count, v); } +extern int __do_page_cache_readahead(struct address_space *mapping, + struct file *filp, pgoff_t offset, unsigned long nr_to_read, + unsigned long lookahead_size); + +/* + * Submit IO for the read-ahead request in file_ra_state. + */ +static inline unsigned long ra_submit(struct file_ra_state *ra, + struct address_space *mapping, struct file *filp) +{ + return __do_page_cache_readahead(mapping, filp, + ra->start, ra->size, ra->async_size); +} + /* * Turn a non-refcounted page (->_count == 0) into refcounted with * a count of one. @@ -370,5 +385,6 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ +#define ALLOC_FAIR 0x100 /* fair zone allocation */ #endif /* __MM_INTERNAL_H */ diff --git a/mm/memblock.c b/mm/memblock.c index 7fe5354e755..e9d6ca9a01a 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1253,7 +1253,7 @@ phys_addr_t __init memblock_mem_size(unsigned long limit_pfn) pages += end_pfn - start_pfn; } - return (phys_addr_t)pages << PAGE_SHIFT; + return PFN_PHYS(pages); } /* lowest address */ @@ -1271,16 +1271,14 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void) void __init memblock_enforce_memory_limit(phys_addr_t limit) { - unsigned long i; phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX; + struct memblock_region *r; if (!limit) return; /* find out max address */ - for (i = 0; i < memblock.memory.cnt; i++) { - struct memblock_region *r = &memblock.memory.regions[i]; - + for_each_memblock(memory, r) { if (limit <= r->size) { max_addr = r->base + limit; break; @@ -1326,7 +1324,7 @@ int __init_memblock memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn) { struct memblock_type *type = &memblock.memory; - int mid = memblock_search(type, (phys_addr_t)pfn << PAGE_SHIFT); + int mid = memblock_search(type, PFN_PHYS(pfn)); if (mid == -1) return -1; @@ -1379,13 +1377,12 @@ int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t si void __init_memblock memblock_trim_memory(phys_addr_t align) { - int i; phys_addr_t start, end, orig_start, orig_end; - struct memblock_type *mem = &memblock.memory; + struct memblock_region *r; - for (i = 0; i < mem->cnt; i++) { - orig_start = mem->regions[i].base; - orig_end = mem->regions[i].base + mem->regions[i].size; + for_each_memblock(memory, r) { + orig_start = r->base; + orig_end = r->base + r->size; start = round_up(orig_start, align); end = round_down(orig_end, align); @@ -1393,11 +1390,12 @@ void __init_memblock memblock_trim_memory(phys_addr_t align) continue; |