diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 54 | ||||
-rw-r--r-- | mm/internal.h | 29 | ||||
-rw-r--r-- | mm/mempolicy.c | 18 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/mlock.c | 16 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 3 | ||||
-rw-r--r-- | mm/page_alloc.c | 32 | ||||
-rw-r--r-- | mm/page_cgroup.c | 4 | ||||
-rw-r--r-- | mm/page_isolation.c | 5 | ||||
-rw-r--r-- | mm/sparse-vmemmap.c | 2 | ||||
-rw-r--r-- | mm/vmalloc.c | 9 |
12 files changed, 139 insertions, 37 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 421aee99b84..6058b53dcb8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -354,11 +354,26 @@ static int vma_has_reserves(struct vm_area_struct *vma) return 0; } +static void clear_gigantic_page(struct page *page, + unsigned long addr, unsigned long sz) +{ + int i; + struct page *p = page; + + might_sleep(); + for (i = 0; i < sz/PAGE_SIZE; i++, p = mem_map_next(p, page, i)) { + cond_resched(); + clear_user_highpage(p, addr + i * PAGE_SIZE); + } +} static void clear_huge_page(struct page *page, unsigned long addr, unsigned long sz) { int i; + if (unlikely(sz > MAX_ORDER_NR_PAGES)) + return clear_gigantic_page(page, addr, sz); + might_sleep(); for (i = 0; i < sz/PAGE_SIZE; i++) { cond_resched(); @@ -366,12 +381,32 @@ static void clear_huge_page(struct page *page, } } +static void copy_gigantic_page(struct page *dst, struct page *src, + unsigned long addr, struct vm_area_struct *vma) +{ + int i; + struct hstate *h = hstate_vma(vma); + struct page *dst_base = dst; + struct page *src_base = src; + might_sleep(); + for (i = 0; i < pages_per_huge_page(h); ) { + cond_resched(); + copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); + + i++; + dst = mem_map_next(dst, dst_base, i); + src = mem_map_next(src, src_base, i); + } +} static void copy_huge_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma) { int i; struct hstate *h = hstate_vma(vma); + if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) + return copy_gigantic_page(dst, src, addr, vma); + might_sleep(); for (i = 0; i < pages_per_huge_page(h); i++) { cond_resched(); @@ -456,6 +491,8 @@ static void update_and_free_page(struct hstate *h, struct page *page) { int i; + VM_BUG_ON(h->order >= MAX_ORDER); + h->nr_huge_pages--; h->nr_huge_pages_node[page_to_nid(page)]--; for (i = 0; i < pages_per_huge_page(h); i++) { @@ -970,6 +1007,14 @@ found: return 1; } +static void prep_compound_huge_page(struct page *page, int order) +{ + if (unlikely(order > (MAX_ORDER - 1))) + prep_compound_gigantic_page(page, order); + else + prep_compound_page(page, order); +} + /* Put bootmem huge pages into the standard lists after mem_map is up */ static void __init gather_bootmem_prealloc(void) { @@ -980,7 +1025,7 @@ static void __init gather_bootmem_prealloc(void) struct hstate *h = m->hstate; __ClearPageReserved(page); WARN_ON(page_count(page) != 1); - prep_compound_page(page, h->order); + prep_compound_huge_page(page, h->order); prep_new_huge_page(h, page, page_to_nid(page)); } } @@ -1751,6 +1796,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, unsigned long address) { + struct hstate *h = hstate_vma(vma); struct vm_area_struct *iter_vma; struct address_space *mapping; struct prio_tree_iter iter; @@ -1760,7 +1806,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * vm_pgoff is in PAGE_SIZE units, hence the different calculation * from page cache lookup which is in HPAGE_SIZE units. */ - address = address & huge_page_mask(hstate_vma(vma)); + address = address & huge_page_mask(h); pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + (vma->vm_pgoff >> PAGE_SHIFT); mapping = (struct address_space *)page_private(page); @@ -1779,7 +1825,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, */ if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER)) unmap_hugepage_range(iter_vma, - address, address + HPAGE_SIZE, + address, address + huge_page_size(h), page); } @@ -2130,7 +2176,7 @@ same_page: if (zeropage_ok) pages[i] = ZERO_PAGE(0); else - pages[i] = page + pfn_offset; + pages[i] = mem_map_offset(page, pfn_offset); get_page(pages[i]); } diff --git a/mm/internal.h b/mm/internal.h index e4e728bdf32..13333bc2eb6 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -17,6 +17,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long floor, unsigned long ceiling); extern void prep_compound_page(struct page *page, unsigned long order); +extern void prep_compound_gigantic_page(struct page *page, unsigned long order); static inline void set_page_count(struct page *page, int v) { @@ -176,6 +177,34 @@ static inline void free_page_mlock(struct page *page) { } #endif /* CONFIG_UNEVICTABLE_LRU */ /* + * Return the mem_map entry representing the 'offset' subpage within + * the maximally aligned gigantic page 'base'. Handle any discontiguity + * in the mem_map at MAX_ORDER_NR_PAGES boundaries. + */ +static inline struct page *mem_map_offset(struct page *base, int offset) +{ + if (unlikely(offset >= MAX_ORDER_NR_PAGES)) + return pfn_to_page(page_to_pfn(base) + offset); + return base + offset; +} + +/* + * Iterator over all subpages withing the maximally aligned gigantic + * page 'base'. Handle any discontiguity in the mem_map. + */ +static inline struct page *mem_map_next(struct page *iter, + struct page *base, int offset) +{ + if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) { + unsigned long pfn = page_to_pfn(base) + offset; + if (!pfn_valid(pfn)) + return NULL; + return pfn_to_page(pfn); + } + return iter + 1; +} + +/* * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, * so all functions starting at paging_init should be marked __init * in those cases. SPARSEMEM, however, allows for memory hotplug, diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 7555219c535..e412ffa8e52 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -489,12 +489,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, int err; struct vm_area_struct *first, *vma, *prev; - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { - - err = migrate_prep(); - if (err) - return ERR_PTR(err); - } first = find_vma(mm, start); if (!first) @@ -809,9 +803,13 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) { int busy = 0; - int err = 0; + int err; nodemask_t tmp; + err = migrate_prep(); + if (err) + return err; + down_read(&mm->mmap_sem); err = migrate_vmas(mm, from_nodes, to_nodes, flags); @@ -974,6 +972,12 @@ static long do_mbind(unsigned long start, unsigned long len, start, start + len, mode, mode_flags, nmask ? nodes_addr(*nmask)[0] : -1); + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + + err = migrate_prep(); + if (err) + return err; + } down_write(&mm->mmap_sem); vma = check_range(mm, start, end, nmask, flags | MPOL_MF_INVERT, &pagelist); diff --git a/mm/migrate.c b/mm/migrate.c index 142284229ce..9dd10da1cc2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -841,12 +841,12 @@ static int do_move_page_to_node_array(struct mm_struct *mm, struct page_to_node *pp; LIST_HEAD(pagelist); + migrate_prep(); down_read(&mm->mmap_sem); /* * Build a list of pages to migrate */ - migrate_prep(); for (pp = pm; pp->node != MAX_NUMNODES; pp++) { struct vm_area_struct *vma; struct page *page; diff --git a/mm/mlock.c b/mm/mlock.c index 008ea70b7af..a6da2aee940 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -66,14 +66,10 @@ void __clear_page_mlock(struct page *page) putback_lru_page(page); } else { /* - * Page not on the LRU yet. Flush all pagevecs and retry. + * We lost the race. the page already moved to evictable list. */ - lru_add_drain_all(); - if (!isolate_lru_page(page)) - putback_lru_page(page); - else if (PageUnevictable(page)) + if (PageUnevictable(page)) count_vm_event(UNEVICTABLE_PGSTRANDED); - } } @@ -187,8 +183,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, if (vma->vm_flags & VM_WRITE) gup_flags |= GUP_FLAGS_WRITE; - lru_add_drain_all(); /* push cached pages to LRU */ - while (nr_pages > 0) { int i; @@ -251,8 +245,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, ret = 0; } - lru_add_drain_all(); /* to update stats */ - return ret; /* count entire vma as locked_vm */ } @@ -546,6 +538,8 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) if (!can_do_mlock()) return -EPERM; + lru_add_drain_all(); /* flush pagevec */ + down_write(¤t->mm->mmap_sem); len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; @@ -612,6 +606,8 @@ asmlinkage long sys_mlockall(int flags) if (!can_do_mlock()) goto out; + lru_add_drain_all(); /* flush pagevec */ + down_write(¤t->mm->mmap_sem); lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; diff --git a/mm/mmap.c b/mm/mmap.c index de14ac21e5b..d4855a682ab 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1704,7 +1704,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) vma = find_vma_prev(mm, addr, &prev); if (vma && (vma->vm_start <= addr)) return vma; - if (expand_stack(prev, addr)) + if (!prev || expand_stack(prev, addr)) return NULL; if (prev->vm_flags & VM_LOCKED) { if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 0e0b282a207..558f9afe6e4 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -38,7 +38,6 @@ static DEFINE_SPINLOCK(zone_scan_mutex); * badness - calculate a numeric value for how bad this task has been * @p: task struct of which task we should calculate * @uptime: current uptime in seconds - * @mem: target memory controller * * The formula used is relatively simple and documented inline in the * function. The main rationale is that we want to select a good task @@ -295,6 +294,8 @@ static void dump_tasks(const struct mem_cgroup *mem) continue; if (mem && !task_in_mem_cgroup(p, mem)) continue; + if (!thread_group_leader(p)) + continue; task_lock(p); printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d0a240fbb8b..d8ac0147456 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -263,24 +263,39 @@ void prep_compound_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order; + + set_compound_page_dtor(page, free_compound_page); + set_compound_order(page, order); + __SetPageHead(page); + for (i = 1; i < nr_pages; i++) { + struct page *p = page + i; + + __SetPageTail(p); + p->first_page = page; + } +} + +#ifdef CONFIG_HUGETLBFS +void prep_compound_gigantic_page(struct page *page, unsigned long order) +{ + int i; + int nr_pages = 1 << order; struct page *p = page + 1; set_compound_page_dtor(page, free_compound_page); set_compound_order(page, order); __SetPageHead(page); - for (i = 1; i < nr_pages; i++, p++) { - if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) - p = pfn_to_page(page_to_pfn(page) + i); + for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { __SetPageTail(p); p->first_page = page; } } +#endif static void destroy_compound_page(struct page *page, unsigned long order) { int i; int nr_pages = 1 << order; - struct page *p = page + 1; if (unlikely(compound_order(page) != order)) bad_page(page); @@ -288,9 +303,8 @@ static void destroy_compound_page(struct page *page, unsigned long order) if (unlikely(!PageHead(page))) bad_page(page); __ClearPageHead(page); - for (i = 1; i < nr_pages; i++, p++) { - if (unlikely((i & (MAX_ORDER_NR_PAGES - 1)) == 0)) - p = pfn_to_page(page_to_pfn(page) + i); + for (i = 1; i < nr_pages; i++) { + struct page *p = page + i; if (unlikely(!PageTail(p) | (p->first_page != page))) @@ -1547,6 +1561,10 @@ nofail_alloc: /* We now go into synchronous reclaim */ cpuset_memory_pressure_bump(); + /* + * The task's cpuset might have expanded its set of allowable nodes + */ + cpuset_update_task_memory_state(); p->flags |= PF_MEMALLOC; reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index f59d797dc5a..1223d927904 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -165,7 +165,7 @@ int online_page_cgroup(unsigned long start_pfn, unsigned long start, end, pfn; int fail = 0; - start = start_pfn & (PAGES_PER_SECTION - 1); + start = start_pfn & ~(PAGES_PER_SECTION - 1); end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { @@ -188,7 +188,7 @@ int offline_page_cgroup(unsigned long start_pfn, { unsigned long start, end, pfn; - start = start_pfn & (PAGES_PER_SECTION - 1); + start = start_pfn & ~(PAGES_PER_SECTION - 1); end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index b70a7fec1ff..5e0ffd96745 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -130,10 +130,11 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) break; } - if (pfn < end_pfn) + page = __first_valid_page(start_pfn, end_pfn - start_pfn); + if ((pfn < end_pfn) || !page) return -EBUSY; /* Check all pages are free or Marked as ISOLATED */ - zone = page_zone(pfn_to_page(pfn)); + zone = page_zone(page); spin_lock_irqsave(&zone->lock, flags); ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn); spin_unlock_irqrestore(&zone->lock, flags); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index a91b5f8fcaf..a13ea6401ae 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -64,7 +64,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node, unsigned long pfn = pte_pfn(*pte); int actual_node = early_pfn_to_nid(pfn); - if (actual_node != node) + if (node_distance(actual_node, node) > LOCAL_DISTANCE) printk(KERN_WARNING "[%lx-%lx] potential offnode " "page_structs\n", start, end - 1); } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f1cc03bbf6a..ba6b0f5f7fa 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -178,7 +178,7 @@ static int vmap_page_range(unsigned long addr, unsigned long end, static inline int is_vmalloc_or_module_addr(const void *x) { /* - * x86-64 and sparc64 put modules in a special place, + * ARM, x86-64 and sparc64 put modules in a special place, * and fall back on vmalloc() if that fails. Others * just put it in the vmalloc space. */ @@ -592,6 +592,8 @@ static void free_unmap_vmap_area_addr(unsigned long addr) #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) +static bool vmap_initialized __read_mostly = false; + struct vmap_block_queue { spinlock_t lock; struct list_head free; @@ -828,6 +830,9 @@ void vm_unmap_aliases(void) int cpu; int flush = 0; + if (unlikely(!vmap_initialized)) + return; + for_each_possible_cpu(cpu) { struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); struct vmap_block *vb; @@ -942,6 +947,8 @@ void __init vmalloc_init(void) INIT_LIST_HEAD(&vbq->dirty); vbq->nr_dirty = 0; } + + vmap_initialized = true; } void unmap_kernel_range(unsigned long addr, unsigned long size) |