diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 6 | ||||
-rw-r--r-- | mm/Kconfig.debug | 5 | ||||
-rw-r--r-- | mm/backing-dev.c | 8 | ||||
-rw-r--r-- | mm/bootmem.c | 24 | ||||
-rw-r--r-- | mm/compaction.c | 19 | ||||
-rw-r--r-- | mm/debug-pagealloc.c | 3 | ||||
-rw-r--r-- | mm/fadvise.c | 3 | ||||
-rw-r--r-- | mm/failslab.c | 2 | ||||
-rw-r--r-- | mm/filemap.c | 38 | ||||
-rw-r--r-- | mm/huge_memory.c | 93 | ||||
-rw-r--r-- | mm/hugetlb.c | 55 | ||||
-rw-r--r-- | mm/ksm.c | 11 | ||||
-rw-r--r-- | mm/memblock.c | 961 | ||||
-rw-r--r-- | mm/memcontrol.c | 1230 | ||||
-rw-r--r-- | mm/memory-failure.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 4 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 2 | ||||
-rw-r--r-- | mm/mempolicy.c | 27 | ||||
-rw-r--r-- | mm/mempool.c | 104 | ||||
-rw-r--r-- | mm/migrate.c | 187 | ||||
-rw-r--r-- | mm/mmap.c | 60 | ||||
-rw-r--r-- | mm/mremap.c | 9 | ||||
-rw-r--r-- | mm/nobootmem.c | 45 | ||||
-rw-r--r-- | mm/oom_kill.c | 52 | ||||
-rw-r--r-- | mm/page-writeback.c | 568 | ||||
-rw-r--r-- | mm/page_alloc.c | 818 | ||||
-rw-r--r-- | mm/page_cgroup.c | 164 | ||||
-rw-r--r-- | mm/percpu.c | 6 | ||||
-rw-r--r-- | mm/rmap.c | 65 | ||||
-rw-r--r-- | mm/shmem.c | 17 | ||||
-rw-r--r-- | mm/slab.c | 41 | ||||
-rw-r--r-- | mm/slub.c | 99 | ||||
-rw-r--r-- | mm/swap.c | 93 | ||||
-rw-r--r-- | mm/swap_state.c | 11 | ||||
-rw-r--r-- | mm/swapfile.c | 15 | ||||
-rw-r--r-- | mm/vmalloc.c | 48 | ||||
-rw-r--r-- | mm/vmscan.c | 730 | ||||
-rw-r--r-- | mm/vmstat.c | 2 |
38 files changed, 3067 insertions, 2560 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 011b110365c..e338407f122 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -131,6 +131,12 @@ config SPARSEMEM_VMEMMAP config HAVE_MEMBLOCK boolean +config HAVE_MEMBLOCK_NODE_MAP + boolean + +config ARCH_DISCARD_MEMBLOCK + boolean + config NO_BOOTMEM boolean diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 8b1a477162d..4b2443254de 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -4,6 +4,7 @@ config DEBUG_PAGEALLOC depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC depends on !KMEMCHECK select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC + select PAGE_GUARD if ARCH_SUPPORTS_DEBUG_PAGEALLOC ---help--- Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types @@ -22,3 +23,7 @@ config WANT_PAGE_DEBUG_FLAGS config PAGE_POISONING bool select WANT_PAGE_DEBUG_FLAGS + +config PAGE_GUARD + bool + select WANT_PAGE_DEBUG_FLAGS diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 71034f41a2b..7ba8feae11b 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -600,14 +600,10 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) /* * Finally, kill the kernel thread. We don't need to be RCU - * safe anymore, since the bdi is gone from visibility. Force - * unfreeze of the thread before calling kthread_stop(), otherwise - * it would never exet if it is currently stuck in the refrigerator. + * safe anymore, since the bdi is gone from visibility. */ - if (bdi->wb.task) { - thaw_process(bdi->wb.task); + if (bdi->wb.task) kthread_stop(bdi->wb.task); - } } /* diff --git a/mm/bootmem.c b/mm/bootmem.c index 1a77012ecdb..668e94df8cf 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -56,7 +56,7 @@ early_param("bootmem_debug", bootmem_debug_setup); static unsigned long __init bootmap_bytes(unsigned long pages) { - unsigned long bytes = (pages + 7) / 8; + unsigned long bytes = DIV_ROUND_UP(pages, 8); return ALIGN(bytes, sizeof(long)); } @@ -171,7 +171,6 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size) static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) { - int aligned; struct page *page; unsigned long start, end, pages, count = 0; @@ -181,14 +180,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) start = bdata->node_min_pfn; end = bdata->node_low_pfn; - /* - * If the start is aligned to the machines wordsize, we might - * be able to free pages in bulks of that order. - */ - aligned = !(start & (BITS_PER_LONG - 1)); - - bdebug("nid=%td start=%lx end=%lx aligned=%d\n", - bdata - bootmem_node_data, start, end, aligned); + bdebug("nid=%td start=%lx end=%lx\n", + bdata - bootmem_node_data, start, end); while (start < end) { unsigned long *map, idx, vec; @@ -196,12 +189,17 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) map = bdata->node_bootmem_map; idx = start - bdata->node_min_pfn; vec = ~map[idx / BITS_PER_LONG]; - - if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) { + /* + * If we have a properly aligned and fully unreserved + * BITS_PER_LONG block of pages in front of us, free + * it in one go. + */ + if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) { int order = ilog2(BITS_PER_LONG); __free_pages_bootmem(pfn_to_page(start), order); count += BITS_PER_LONG; + start += BITS_PER_LONG; } else { unsigned long off = 0; @@ -214,8 +212,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) vec >>= 1; off++; } + start = ALIGN(start + 1, BITS_PER_LONG); } - start += BITS_PER_LONG; } page = virt_to_page(bdata->node_bootmem_map); diff --git a/mm/compaction.c b/mm/compaction.c index 899d9563858..71a58f67f48 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -350,7 +350,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, } if (!cc->sync) - mode |= ISOLATE_CLEAN; + mode |= ISOLATE_ASYNC_MIGRATE; /* Try isolate the page */ if (__isolate_lru_page(page, mode, 0) != 0) @@ -365,8 +365,10 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, nr_isolated++; /* Avoid isolating too much */ - if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) + if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) { + ++low_pfn; break; + } } acct_isolated(zone, cc); @@ -555,7 +557,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) nr_migrate = cc->nr_migratepages; err = migrate_pages(&cc->migratepages, compaction_alloc, (unsigned long)cc, false, - cc->sync); + cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC); update_nr_listpages(cc); nr_remaining = cc->nr_migratepages; @@ -669,6 +671,7 @@ static int compact_node(int nid) .nr_freepages = 0, .nr_migratepages = 0, .order = -1, + .sync = true, }; zone = &pgdat->node_zones[zoneid]; @@ -721,23 +724,23 @@ int sysctl_extfrag_handler(struct ctl_table *table, int write, } #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) -ssize_t sysfs_compact_node(struct sys_device *dev, - struct sysdev_attribute *attr, +ssize_t sysfs_compact_node(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { compact_node(dev->id); return count; } -static SYSDEV_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node); +static DEVICE_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node); int compaction_register_node(struct node *node) { - return sysdev_create_file(&node->sysdev, &attr_compact); + return device_create_file(&node->dev, &dev_attr_compact); } void compaction_unregister_node(struct node *node) { - return sysdev_remove_file(&node->sysdev, &attr_compact); + return device_remove_file(&node->dev, &dev_attr_compact); } #endif /* CONFIG_SYSFS && CONFIG_NUMA */ diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c index 7cea557407f..789ff70c8a4 100644 --- a/mm/debug-pagealloc.c +++ b/mm/debug-pagealloc.c @@ -95,9 +95,6 @@ static void unpoison_pages(struct page *page, int n) void kernel_map_pages(struct page *page, int numpages, int enable) { - if (!debug_pagealloc_enabled) - return; - if (enable) unpoison_pages(page, numpages); else diff --git a/mm/fadvise.c b/mm/fadvise.c index 8d723c9e8b7..469491e0af7 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -117,7 +117,8 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) break; case POSIX_FADV_DONTNEED: if (!bdi_write_congested(mapping->backing_dev_info)) - filemap_flush(mapping); + __filemap_fdatawrite_range(mapping, offset, endbyte, + WB_SYNC_NONE); /* First and last FULL page! */ start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT; diff --git a/mm/failslab.c b/mm/failslab.c index 0dd7b8fec71..fefaabaab76 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -35,7 +35,7 @@ __setup("failslab=", setup_failslab); static int __init failslab_debugfs_init(void) { struct dentry *dir; - mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr); if (IS_ERR(dir)) diff --git a/mm/filemap.c b/mm/filemap.c index c0018f2d50e..97f49ed35bd 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -393,24 +393,11 @@ EXPORT_SYMBOL(filemap_write_and_wait_range); int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) { int error; - struct mem_cgroup *memcg = NULL; VM_BUG_ON(!PageLocked(old)); VM_BUG_ON(!PageLocked(new)); VM_BUG_ON(new->mapping); - /* - * This is not page migration, but prepare_migration and - * end_migration does enough work for charge replacement. - * - * In the longer term we probably want a specialized function - * for moving the charge from old to new in a more efficient - * manner. - */ - error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask); - if (error) - return error; - error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); if (!error) { struct address_space *mapping = old->mapping; @@ -432,13 +419,12 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) if (PageSwapBacked(new)) __inc_zone_page_state(new, NR_SHMEM); spin_unlock_irq(&mapping->tree_lock); + /* mem_cgroup codes must not be called under tree_lock */ + mem_cgroup_replace_page_cache(old, new); radix_tree_preload_end(); if (freepage) freepage(old); page_cache_release(old); - mem_cgroup_end_migration(memcg, old, new, true); - } else { - mem_cgroup_end_migration(memcg, old, new, false); } return error; @@ -1828,7 +1814,7 @@ repeat: page = __page_cache_alloc(gfp | __GFP_COLD); if (!page) return ERR_PTR(-ENOMEM); - err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); + err = add_to_page_cache_lru(page, mapping, index, gfp); if (unlikely(err)) { page_cache_release(page); if (err == -EEXIST) @@ -1925,10 +1911,7 @@ static struct page *wait_on_page_read(struct page *page) * @gfp: the page allocator flags to use if allocating * * This is the same as "read_mapping_page(mapping, index, NULL)", but with - * any new page allocations done using the specified allocation flags. Note - * that the Radix tree operations will still use GFP_KERNEL, so you can't - * expect to do this atomically or anything like that - but you can pass in - * other page requirements. + * any new page allocations done using the specified allocation flags. * * If the page does not get brought uptodate, return -EIO. */ @@ -1971,7 +1954,7 @@ EXPORT_SYMBOL(read_cache_page); */ int should_remove_suid(struct dentry *dentry) { - mode_t mode = dentry->d_inode->i_mode; + umode_t mode = dentry->d_inode->i_mode; int kill = 0; /* suid always must be killed */ @@ -2354,8 +2337,11 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index, unsigned flags) { int status; + gfp_t gfp_mask; struct page *page; gfp_t gfp_notmask = 0; + + gfp_mask = mapping_gfp_mask(mapping) | __GFP_WRITE; if (flags & AOP_FLAG_NOFS) gfp_notmask = __GFP_FS; repeat: @@ -2363,7 +2349,7 @@ repeat: if (page) goto found; - page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask); + page = __page_cache_alloc(gfp_mask & ~gfp_notmask); if (!page) return NULL; status = add_to_page_cache_lru(page, mapping, index, @@ -2407,7 +2393,6 @@ static ssize_t generic_perform_write(struct file *file, iov_iter_count(i)); again: - /* * Bring in the user page that we will copy from _first_. * Otherwise there's a nasty deadlock on copying from the @@ -2463,7 +2448,10 @@ again: written += copied; balance_dirty_pages_ratelimited(mapping); - + if (fatal_signal_pending(current)) { + status = -EINTR; + break; + } } while (iov_iter_count(i)); return written ? written : status; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 36b3d988b4e..b3ffc21ce80 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -487,41 +487,68 @@ static struct attribute_group khugepaged_attr_group = { .attrs = khugepaged_attr, .name = "khugepaged", }; -#endif /* CONFIG_SYSFS */ -static int __init hugepage_init(void) +static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) { int err; -#ifdef CONFIG_SYSFS - static struct kobject *hugepage_kobj; -#endif - - err = -EINVAL; - if (!has_transparent_hugepage()) { - transparent_hugepage_flags = 0; - goto out; - } -#ifdef CONFIG_SYSFS - err = -ENOMEM; - hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); - if (unlikely(!hugepage_kobj)) { + *hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); + if (unlikely(!*hugepage_kobj)) { printk(KERN_ERR "hugepage: failed kobject create\n"); - goto out; + return -ENOMEM; } - err = sysfs_create_group(hugepage_kobj, &hugepage_attr_group); + err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group); if (err) { printk(KERN_ERR "hugepage: failed register hugeage group\n"); - goto out; + goto delete_obj; } - err = sysfs_create_group(hugepage_kobj, &khugepaged_attr_group); + err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group); if (err) { printk(KERN_ERR "hugepage: failed register hugeage group\n"); - goto out; + goto remove_hp_group; } -#endif + + return 0; + +remove_hp_group: + sysfs_remove_group(*hugepage_kobj, &hugepage_attr_group); +delete_obj: + kobject_put(*hugepage_kobj); + return err; +} + +static void __init hugepage_exit_sysfs(struct kobject *hugepage_kobj) +{ + sysfs_remove_group(hugepage_kobj, &khugepaged_attr_group); + sysfs_remove_group(hugepage_kobj, &hugepage_attr_group); + kobject_put(hugepage_kobj); +} +#else +static inline int hugepage_init_sysfs(struct kobject **hugepage_kobj) +{ + return 0; +} + +static inline void hugepage_exit_sysfs(struct kobject *hugepage_kobj) +{ +} +#endif /* CONFIG_SYSFS */ + +static int __init hugepage_init(void) +{ + int err; + struct kobject *hugepage_kobj; + + if (!has_transparent_hugepage()) { + transparent_hugepage_flags = 0; + return -EINVAL; + } + + err = hugepage_init_sysfs(&hugepage_kobj); + if (err) + return err; err = khugepaged_slab_init(); if (err) @@ -545,7 +572,9 @@ static int __init hugepage_init(void) set_recommended_min_free_kbytes(); + return 0; out: + hugepage_exit_sysfs(hugepage_kobj); return err; } module_init(hugepage_init) @@ -997,7 +1026,7 @@ out: } int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, - pmd_t *pmd) + pmd_t *pmd, unsigned long addr) { int ret = 0; @@ -1013,6 +1042,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pgtable = get_pmd_huge_pte(tlb->mm); page = pmd_page(*pmd); pmd_clear(pmd); + tlb_remove_pmd_tlb_entry(tlb, pmd, addr); page_remove_rmap(page); VM_BUG_ON(page_mapcount(page) < 0); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); @@ -1116,7 +1146,6 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, entry = pmd_modify(entry, newprot); set_pmd_at(mm, addr, pmd, entry); spin_unlock(&vma->vm_mm->page_table_lock); - flush_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE); ret = 1; } } else @@ -1199,16 +1228,16 @@ static int __split_huge_page_splitting(struct page *page, static void __split_huge_page_refcount(struct page *page) { int i; - unsigned long head_index = page->index; struct zone *zone = page_zone(page); - int zonestat; int tail_count = 0; /* prevent PageLRU to go away from under us, and freeze lru stats */ spin_lock_irq(&zone->lru_lock); compound_lock(page); + /* complete memcg works before add pages to LRU */ + mem_cgroup_split_huge_fixup(page); - for (i = 1; i < HPAGE_PMD_NR; i++) { + for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { struct page *page_tail = page + i; /* tail_page->_mapcount cannot change */ @@ -1271,14 +1300,13 @@ static void __split_huge_page_refcount(struct page *page) BUG_ON(page_tail->mapping); page_tail->mapping = page->mapping; - page_tail->index = ++head_index; + page_tail->index = page->index + i; BUG_ON(!PageAnon(page_tail)); BUG_ON(!PageUptodate(page_tail)); BUG_ON(!PageDirty(page_tail)); BUG_ON(!PageSwapBacked(page_tail)); - mem_cgroup_split_huge_fixup(page, page_tail); lru_add_page_tail(zone, page, page_tail); } @@ -1288,15 +1316,6 @@ static void __split_huge_page_refcount(struct page *page) __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); - /* - * A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics, - * so adjust those appropriately if this page is on the LRU. - */ - if (PageLRU(page)) { - zonestat = NR_LRU_BASE + page_lru(page); - __mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1)); - } - ClearPageCompound(page); compound_unlock(page); spin_unlock_irq(&zone->lru_lock); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 73f17c0293c..ea8c3a4cd2a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -800,7 +800,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) if (page && arch_prepare_hugepage(page)) { __free_pages(page, huge_page_order(h)); - return NULL; + page = NULL; } spin_lock(&hugetlb_lock); @@ -901,7 +901,6 @@ retry: h->resv_huge_pages += delta; ret = 0; - spin_unlock(&hugetlb_lock); /* Free the needed pages to the hugetlb pool */ list_for_each_entry_safe(page, tmp, &surplus_list, lru) { if ((--needed) < 0) @@ -915,6 +914,7 @@ retry: VM_BUG_ON(page_count(page)); enqueue_huge_page(h, page); } + spin_unlock(&hugetlb_lock); /* Free unnecessary surplus pages to the buddy allocator */ free: @@ -1592,9 +1592,9 @@ static void __init hugetlb_sysfs_init(void) /* * node_hstate/s - associate per node hstate attributes, via their kobjects, - * with node sysdevs in node_devices[] using a parallel array. The array - * index of a node sysdev or _hstate == node id. - * This is here to avoid any static dependency of the node sysdev driver, in + * with node devices in node_devices[] using a parallel array. The array + * index of a node device or _hstate == node id. + * This is here to avoid any static dependency of the node device driver, in * the base kernel, on the hugetlb module. */ struct node_hstate { @@ -1604,7 +1604,7 @@ struct node_hstate { struct node_hstate node_hstates[MAX_NUMNODES]; /* - * A subset of global hstate attributes for node sysdevs + * A subset of global hstate attributes for node devices */ static struct attribute *per_node_hstate_attrs[] = { &nr_hugepages_attr.attr, @@ -1618,7 +1618,7 @@ static struct attribute_group per_node_hstate_attr_group = { }; /* - * kobj_to_node_hstate - lookup global hstate for node sysdev hstate attr kobj. + * kobj_to_node_hstate - lookup global hstate for node device hstate attr kobj. * Returns node id via non-NULL nidp. */ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp) @@ -1641,13 +1641,13 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp) } /* - * Unregister hstate attributes from a single node sysdev. + * Unregister hstate attributes from a single node device. * No-op if no hstate attributes attached. */ void hugetlb_unregister_node(struct node *node) { struct hstate *h; - struct node_hstate *nhs = &node_hstates[node->sysdev.id]; + struct node_hstate *nhs = &node_hstates[node->dev.id]; if (!nhs->hugepages_kobj) return; /* no hstate attributes */ @@ -1663,7 +1663,7 @@ void hugetlb_unregister_node(struct node *node) } /* - * hugetlb module exit: unregister hstate attributes from node sysdevs + * hugetlb module exit: unregister hstate attributes from node devices * that have them. */ static void hugetlb_unregister_all_nodes(void) @@ -1671,7 +1671,7 @@ static void hugetlb_unregister_all_nodes(void) int nid; /* - * disable node sysdev registrations. + * disable node device registrations. */ register_hugetlbfs_with_node(NULL, NULL); @@ -1683,20 +1683,20 @@ static void hugetlb_unregister_all_nodes(void) } /* - * Register hstate attributes for a single node sysdev. + * Register hstate attributes for a single node device. * No-op if attributes already registered. */ void hugetlb_register_node(struct node *node) { struct hstate *h; - struct node_hstate *nhs = &node_hstates[node->sysdev.id]; + struct node_hstate *nhs = &node_hstates[node->dev.id]; int err; if (nhs->hugepages_kobj) return; /* already allocated */ nhs->hugepages_kobj = kobject_create_and_add("hugepages", - &node->sysdev.kobj); + &node->dev.kobj); if (!nhs->hugepages_kobj) return; @@ -1707,7 +1707,7 @@ void hugetlb_register_node(struct node *node) if (err) { printk(KERN_ERR "Hugetlb: Unable to add hstate %s" " for node %d\n", - h->name, node->sysdev.id); + h->name, node->dev.id); hugetlb_unregister_node(node); break; } @@ -1716,8 +1716,8 @@ void hugetlb_register_node(struct node *node) /* * hugetlb init time: register hstate attributes for all registered node - * sysdevs of nodes that have memory. All on-line nodes should have - * registered their associated sysdev by this time. + * devices of nodes that have memory. All on-line nodes should have + * registered their associated device by this time. */ static void hugetlb_register_all_nodes(void) { @@ -1725,12 +1725,12 @@ static void hugetlb_register_all_nodes(void) for_each_node_state(nid, N_HIGH_MEMORY) { struct node *node = &node_devices[nid]; - if (node->sysdev.id == nid) + if (node->dev.id == nid) hugetlb_register_node(node); } /* - * Let the node sysdev driver know we're here so it can + * Let the node device driver know we're here so it can * [un]register hstate attributes on node hotplug. */ register_hugetlbfs_with_node(hugetlb_register_node, @@ -2315,8 +2315,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * from page cache lookup which is in HPAGE_SIZE units. */ address = address & huge_page_mask(h); - pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) - + (vma->vm_pgoff >> PAGE_SHIFT); + pgoff = vma_hugecache_offset(h, vma, address); mapping = (struct address_space *)page_private(page); /* @@ -2349,6 +2348,9 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, /* * Hugetlb_cow() should be called with page lock of the original hugepage held. + * Called with hugetlb_instantiation_mutex held and pte_page locked so we + * cannot race with other handlers or page migration. + * Keep the pte_same checks anyway to make transition from the mutex easier. */ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t pte, @@ -2408,7 +2410,14 @@ retry_avoidcopy: BUG_ON(page_count(old_page) != 1); BUG_ON(huge_pte_none(pte)); spin_lock(&mm->page_table_lock); - goto retry_avoidcopy; + ptep = huge_pte_offset(mm, address & huge_page_mask(h)); + if (likely(pte_same(huge_ptep_get(ptep), pte))) + goto retry_avoidcopy; + /* + * race occurs while re-acquiring page_table_lock, and + * our job is done. + */ + return 0; } WARN_ON_ONCE(1); } @@ -2630,6 +2639,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, static DEFINE_MUTEX(hugetlb_instantiation_mutex); struct hstate *h = hstate_vma(vma); + address &= huge_page_mask(h); + ptep = huge_pte_offset(mm, address); if (ptep) { entry = huge_ptep_get(ptep); @@ -28,6 +28,7 @@ #include <linux/kthread.h> #include <linux/wait.h> #include <linux/slab.h> +#include <linux/memcontrol.h> #include <linux/rbtree.h> #include <linux/memory.h> #include <linux/mmu_notifier.h> @@ -1571,6 +1572,16 @@ struct page *ksm_does_need_to_copy(struct page *page, new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); if (new_page) { + /* + * The memcg-specific accounting when moving + * pages around the LRU lists relies on the + * page's owner (memcg) to be valid. Usually, + * pages are assigned to a new owner before + * being put on the LRU list, but since this + * is not the case here, the stale owner from + * a previous allocation cycle must be reset. + */ + mem_cgroup_reset_owner(new_page); copy_user_highpage(new_page, page, address, vma); SetPageDirty(new_page); diff --git a/mm/memblock.c b/mm/memblock.c index 84bec4969ed..2f55f19b7c8 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,12 +20,23 @@ #include <linux/seq_file.h> #include <linux/memblock.h> -struct memblock memblock __initdata_memblock; +static struct memblock_region |