aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/Kconfig.debug5
-rw-r--r--mm/backing-dev.c8
-rw-r--r--mm/bootmem.c24
-rw-r--r--mm/compaction.c19
-rw-r--r--mm/debug-pagealloc.c3
-rw-r--r--mm/fadvise.c3
-rw-r--r--mm/failslab.c2
-rw-r--r--mm/filemap.c38
-rw-r--r--mm/huge_memory.c93
-rw-r--r--mm/hugetlb.c55
-rw-r--r--mm/ksm.c11
-rw-r--r--mm/memblock.c961
-rw-r--r--mm/memcontrol.c1230
-rw-r--r--mm/memory-failure.c2
-rw-r--r--mm/memory.c4
-rw-r--r--mm/memory_hotplug.c2
-rw-r--r--mm/mempolicy.c27
-rw-r--r--mm/mempool.c104
-rw-r--r--mm/migrate.c187
-rw-r--r--mm/mmap.c60
-rw-r--r--mm/mremap.c9
-rw-r--r--mm/nobootmem.c45
-rw-r--r--mm/oom_kill.c52
-rw-r--r--mm/page-writeback.c568
-rw-r--r--mm/page_alloc.c818
-rw-r--r--mm/page_cgroup.c164
-rw-r--r--mm/percpu.c6
-rw-r--r--mm/rmap.c65
-rw-r--r--mm/shmem.c17
-rw-r--r--mm/slab.c41
-rw-r--r--mm/slub.c99
-rw-r--r--mm/swap.c93
-rw-r--r--mm/swap_state.c11
-rw-r--r--mm/swapfile.c15
-rw-r--r--mm/vmalloc.c48
-rw-r--r--mm/vmscan.c730
-rw-r--r--mm/vmstat.c2
38 files changed, 3067 insertions, 2560 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 011b110365c..e338407f122 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -131,6 +131,12 @@ config SPARSEMEM_VMEMMAP
config HAVE_MEMBLOCK
boolean
+config HAVE_MEMBLOCK_NODE_MAP
+ boolean
+
+config ARCH_DISCARD_MEMBLOCK
+ boolean
+
config NO_BOOTMEM
boolean
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 8b1a477162d..4b2443254de 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -4,6 +4,7 @@ config DEBUG_PAGEALLOC
depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC
depends on !KMEMCHECK
select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC
+ select PAGE_GUARD if ARCH_SUPPORTS_DEBUG_PAGEALLOC
---help---
Unmap pages from the kernel linear mapping after free_pages().
This results in a large slowdown, but helps to find certain types
@@ -22,3 +23,7 @@ config WANT_PAGE_DEBUG_FLAGS
config PAGE_POISONING
bool
select WANT_PAGE_DEBUG_FLAGS
+
+config PAGE_GUARD
+ bool
+ select WANT_PAGE_DEBUG_FLAGS
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 71034f41a2b..7ba8feae11b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -600,14 +600,10 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
/*
* Finally, kill the kernel thread. We don't need to be RCU
- * safe anymore, since the bdi is gone from visibility. Force
- * unfreeze of the thread before calling kthread_stop(), otherwise
- * it would never exet if it is currently stuck in the refrigerator.
+ * safe anymore, since the bdi is gone from visibility.
*/
- if (bdi->wb.task) {
- thaw_process(bdi->wb.task);
+ if (bdi->wb.task)
kthread_stop(bdi->wb.task);
- }
}
/*
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 1a77012ecdb..668e94df8cf 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -56,7 +56,7 @@ early_param("bootmem_debug", bootmem_debug_setup);
static unsigned long __init bootmap_bytes(unsigned long pages)
{
- unsigned long bytes = (pages + 7) / 8;
+ unsigned long bytes = DIV_ROUND_UP(pages, 8);
return ALIGN(bytes, sizeof(long));
}
@@ -171,7 +171,6 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
{
- int aligned;
struct page *page;
unsigned long start, end, pages, count = 0;
@@ -181,14 +180,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
start = bdata->node_min_pfn;
end = bdata->node_low_pfn;
- /*
- * If the start is aligned to the machines wordsize, we might
- * be able to free pages in bulks of that order.
- */
- aligned = !(start & (BITS_PER_LONG - 1));
-
- bdebug("nid=%td start=%lx end=%lx aligned=%d\n",
- bdata - bootmem_node_data, start, end, aligned);
+ bdebug("nid=%td start=%lx end=%lx\n",
+ bdata - bootmem_node_data, start, end);
while (start < end) {
unsigned long *map, idx, vec;
@@ -196,12 +189,17 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
map = bdata->node_bootmem_map;
idx = start - bdata->node_min_pfn;
vec = ~map[idx / BITS_PER_LONG];
-
- if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) {
+ /*
+ * If we have a properly aligned and fully unreserved
+ * BITS_PER_LONG block of pages in front of us, free
+ * it in one go.
+ */
+ if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) {
int order = ilog2(BITS_PER_LONG);
__free_pages_bootmem(pfn_to_page(start), order);
count += BITS_PER_LONG;
+ start += BITS_PER_LONG;
} else {
unsigned long off = 0;
@@ -214,8 +212,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
vec >>= 1;
off++;
}
+ start = ALIGN(start + 1, BITS_PER_LONG);
}
- start += BITS_PER_LONG;
}
page = virt_to_page(bdata->node_bootmem_map);
diff --git a/mm/compaction.c b/mm/compaction.c
index 899d9563858..71a58f67f48 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -350,7 +350,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
}
if (!cc->sync)
- mode |= ISOLATE_CLEAN;
+ mode |= ISOLATE_ASYNC_MIGRATE;
/* Try isolate the page */
if (__isolate_lru_page(page, mode, 0) != 0)
@@ -365,8 +365,10 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
nr_isolated++;
/* Avoid isolating too much */
- if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
+ if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {
+ ++low_pfn;
break;
+ }
}
acct_isolated(zone, cc);
@@ -555,7 +557,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
nr_migrate = cc->nr_migratepages;
err = migrate_pages(&cc->migratepages, compaction_alloc,
(unsigned long)cc, false,
- cc->sync);
+ cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
update_nr_listpages(cc);
nr_remaining = cc->nr_migratepages;
@@ -669,6 +671,7 @@ static int compact_node(int nid)
.nr_freepages = 0,
.nr_migratepages = 0,
.order = -1,
+ .sync = true,
};
zone = &pgdat->node_zones[zoneid];
@@ -721,23 +724,23 @@ int sysctl_extfrag_handler(struct ctl_table *table, int write,
}
#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
-ssize_t sysfs_compact_node(struct sys_device *dev,
- struct sysdev_attribute *attr,
+ssize_t sysfs_compact_node(struct device *dev,
+ struct device_attribute *attr,
const char *buf, size_t count)
{
compact_node(dev->id);
return count;
}
-static SYSDEV_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node);
+static DEVICE_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node);
int compaction_register_node(struct node *node)
{
- return sysdev_create_file(&node->sysdev, &attr_compact);
+ return device_create_file(&node->dev, &dev_attr_compact);
}
void compaction_unregister_node(struct node *node)
{
- return sysdev_remove_file(&node->sysdev, &attr_compact);
+ return device_remove_file(&node->dev, &dev_attr_compact);
}
#endif /* CONFIG_SYSFS && CONFIG_NUMA */
diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c
index 7cea557407f..789ff70c8a4 100644
--- a/mm/debug-pagealloc.c
+++ b/mm/debug-pagealloc.c
@@ -95,9 +95,6 @@ static void unpoison_pages(struct page *page, int n)
void kernel_map_pages(struct page *page, int numpages, int enable)
{
- if (!debug_pagealloc_enabled)
- return;
-
if (enable)
unpoison_pages(page, numpages);
else
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 8d723c9e8b7..469491e0af7 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -117,7 +117,8 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
break;
case POSIX_FADV_DONTNEED:
if (!bdi_write_congested(mapping->backing_dev_info))
- filemap_flush(mapping);
+ __filemap_fdatawrite_range(mapping, offset, endbyte,
+ WB_SYNC_NONE);
/* First and last FULL page! */
start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT;
diff --git a/mm/failslab.c b/mm/failslab.c
index 0dd7b8fec71..fefaabaab76 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -35,7 +35,7 @@ __setup("failslab=", setup_failslab);
static int __init failslab_debugfs_init(void)
{
struct dentry *dir;
- mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+ umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr);
if (IS_ERR(dir))
diff --git a/mm/filemap.c b/mm/filemap.c
index c0018f2d50e..97f49ed35bd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -393,24 +393,11 @@ EXPORT_SYMBOL(filemap_write_and_wait_range);
int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
{
int error;
- struct mem_cgroup *memcg = NULL;
VM_BUG_ON(!PageLocked(old));
VM_BUG_ON(!PageLocked(new));
VM_BUG_ON(new->mapping);
- /*
- * This is not page migration, but prepare_migration and
- * end_migration does enough work for charge replacement.
- *
- * In the longer term we probably want a specialized function
- * for moving the charge from old to new in a more efficient
- * manner.
- */
- error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask);
- if (error)
- return error;
-
error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
if (!error) {
struct address_space *mapping = old->mapping;
@@ -432,13 +419,12 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
if (PageSwapBacked(new))
__inc_zone_page_state(new, NR_SHMEM);
spin_unlock_irq(&mapping->tree_lock);
+ /* mem_cgroup codes must not be called under tree_lock */
+ mem_cgroup_replace_page_cache(old, new);
radix_tree_preload_end();
if (freepage)
freepage(old);
page_cache_release(old);
- mem_cgroup_end_migration(memcg, old, new, true);
- } else {
- mem_cgroup_end_migration(memcg, old, new, false);
}
return error;
@@ -1828,7 +1814,7 @@ repeat:
page = __page_cache_alloc(gfp | __GFP_COLD);
if (!page)
return ERR_PTR(-ENOMEM);
- err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+ err = add_to_page_cache_lru(page, mapping, index, gfp);
if (unlikely(err)) {
page_cache_release(page);
if (err == -EEXIST)
@@ -1925,10 +1911,7 @@ static struct page *wait_on_page_read(struct page *page)
* @gfp: the page allocator flags to use if allocating
*
* This is the same as "read_mapping_page(mapping, index, NULL)", but with
- * any new page allocations done using the specified allocation flags. Note
- * that the Radix tree operations will still use GFP_KERNEL, so you can't
- * expect to do this atomically or anything like that - but you can pass in
- * other page requirements.
+ * any new page allocations done using the specified allocation flags.
*
* If the page does not get brought uptodate, return -EIO.
*/
@@ -1971,7 +1954,7 @@ EXPORT_SYMBOL(read_cache_page);
*/
int should_remove_suid(struct dentry *dentry)
{
- mode_t mode = dentry->d_inode->i_mode;
+ umode_t mode = dentry->d_inode->i_mode;
int kill = 0;
/* suid always must be killed */
@@ -2354,8 +2337,11 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
pgoff_t index, unsigned flags)
{
int status;
+ gfp_t gfp_mask;
struct page *page;
gfp_t gfp_notmask = 0;
+
+ gfp_mask = mapping_gfp_mask(mapping) | __GFP_WRITE;
if (flags & AOP_FLAG_NOFS)
gfp_notmask = __GFP_FS;
repeat:
@@ -2363,7 +2349,7 @@ repeat:
if (page)
goto found;
- page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);
+ page = __page_cache_alloc(gfp_mask & ~gfp_notmask);
if (!page)
return NULL;
status = add_to_page_cache_lru(page, mapping, index,
@@ -2407,7 +2393,6 @@ static ssize_t generic_perform_write(struct file *file,
iov_iter_count(i));
again:
-
/*
* Bring in the user page that we will copy from _first_.
* Otherwise there's a nasty deadlock on copying from the
@@ -2463,7 +2448,10 @@ again:
written += copied;
balance_dirty_pages_ratelimited(mapping);
-
+ if (fatal_signal_pending(current)) {
+ status = -EINTR;
+ break;
+ }
} while (iov_iter_count(i));
return written ? written : status;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 36b3d988b4e..b3ffc21ce80 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -487,41 +487,68 @@ static struct attribute_group khugepaged_attr_group = {
.attrs = khugepaged_attr,
.name = "khugepaged",
};
-#endif /* CONFIG_SYSFS */
-static int __init hugepage_init(void)
+static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
{
int err;
-#ifdef CONFIG_SYSFS
- static struct kobject *hugepage_kobj;
-#endif
-
- err = -EINVAL;
- if (!has_transparent_hugepage()) {
- transparent_hugepage_flags = 0;
- goto out;
- }
-#ifdef CONFIG_SYSFS
- err = -ENOMEM;
- hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
- if (unlikely(!hugepage_kobj)) {
+ *hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
+ if (unlikely(!*hugepage_kobj)) {
printk(KERN_ERR "hugepage: failed kobject create\n");
- goto out;
+ return -ENOMEM;
}
- err = sysfs_create_group(hugepage_kobj, &hugepage_attr_group);
+ err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group);
if (err) {
printk(KERN_ERR "hugepage: failed register hugeage group\n");
- goto out;
+ goto delete_obj;
}
- err = sysfs_create_group(hugepage_kobj, &khugepaged_attr_group);
+ err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group);
if (err) {
printk(KERN_ERR "hugepage: failed register hugeage group\n");
- goto out;
+ goto remove_hp_group;
}
-#endif
+
+ return 0;
+
+remove_hp_group:
+ sysfs_remove_group(*hugepage_kobj, &hugepage_attr_group);
+delete_obj:
+ kobject_put(*hugepage_kobj);
+ return err;
+}
+
+static void __init hugepage_exit_sysfs(struct kobject *hugepage_kobj)
+{
+ sysfs_remove_group(hugepage_kobj, &khugepaged_attr_group);
+ sysfs_remove_group(hugepage_kobj, &hugepage_attr_group);
+ kobject_put(hugepage_kobj);
+}
+#else
+static inline int hugepage_init_sysfs(struct kobject **hugepage_kobj)
+{
+ return 0;
+}
+
+static inline void hugepage_exit_sysfs(struct kobject *hugepage_kobj)
+{
+}
+#endif /* CONFIG_SYSFS */
+
+static int __init hugepage_init(void)
+{
+ int err;
+ struct kobject *hugepage_kobj;
+
+ if (!has_transparent_hugepage()) {
+ transparent_hugepage_flags = 0;
+ return -EINVAL;
+ }
+
+ err = hugepage_init_sysfs(&hugepage_kobj);
+ if (err)
+ return err;
err = khugepaged_slab_init();
if (err)
@@ -545,7 +572,9 @@ static int __init hugepage_init(void)
set_recommended_min_free_kbytes();
+ return 0;
out:
+ hugepage_exit_sysfs(hugepage_kobj);
return err;
}
module_init(hugepage_init)
@@ -997,7 +1026,7 @@ out:
}
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
- pmd_t *pmd)
+ pmd_t *pmd, unsigned long addr)
{
int ret = 0;
@@ -1013,6 +1042,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pgtable = get_pmd_huge_pte(tlb->mm);
page = pmd_page(*pmd);
pmd_clear(pmd);
+ tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
page_remove_rmap(page);
VM_BUG_ON(page_mapcount(page) < 0);
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
@@ -1116,7 +1146,6 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
entry = pmd_modify(entry, newprot);
set_pmd_at(mm, addr, pmd, entry);
spin_unlock(&vma->vm_mm->page_table_lock);
- flush_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE);
ret = 1;
}
} else
@@ -1199,16 +1228,16 @@ static int __split_huge_page_splitting(struct page *page,
static void __split_huge_page_refcount(struct page *page)
{
int i;
- unsigned long head_index = page->index;
struct zone *zone = page_zone(page);
- int zonestat;
int tail_count = 0;
/* prevent PageLRU to go away from under us, and freeze lru stats */
spin_lock_irq(&zone->lru_lock);
compound_lock(page);
+ /* complete memcg works before add pages to LRU */
+ mem_cgroup_split_huge_fixup(page);
- for (i = 1; i < HPAGE_PMD_NR; i++) {
+ for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
struct page *page_tail = page + i;
/* tail_page->_mapcount cannot change */
@@ -1271,14 +1300,13 @@ static void __split_huge_page_refcount(struct page *page)
BUG_ON(page_tail->mapping);
page_tail->mapping = page->mapping;
- page_tail->index = ++head_index;
+ page_tail->index = page->index + i;
BUG_ON(!PageAnon(page_tail));
BUG_ON(!PageUptodate(page_tail));
BUG_ON(!PageDirty(page_tail));
BUG_ON(!PageSwapBacked(page_tail));
- mem_cgroup_split_huge_fixup(page, page_tail);
lru_add_page_tail(zone, page, page_tail);
}
@@ -1288,15 +1316,6 @@ static void __split_huge_page_refcount(struct page *page)
__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
__mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
- /*
- * A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics,
- * so adjust those appropriately if this page is on the LRU.
- */
- if (PageLRU(page)) {
- zonestat = NR_LRU_BASE + page_lru(page);
- __mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1));
- }
-
ClearPageCompound(page);
compound_unlock(page);
spin_unlock_irq(&zone->lru_lock);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 73f17c0293c..ea8c3a4cd2a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -800,7 +800,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
if (page && arch_prepare_hugepage(page)) {
__free_pages(page, huge_page_order(h));
- return NULL;
+ page = NULL;
}
spin_lock(&hugetlb_lock);
@@ -901,7 +901,6 @@ retry:
h->resv_huge_pages += delta;
ret = 0;
- spin_unlock(&hugetlb_lock);
/* Free the needed pages to the hugetlb pool */
list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
if ((--needed) < 0)
@@ -915,6 +914,7 @@ retry:
VM_BUG_ON(page_count(page));
enqueue_huge_page(h, page);
}
+ spin_unlock(&hugetlb_lock);
/* Free unnecessary surplus pages to the buddy allocator */
free:
@@ -1592,9 +1592,9 @@ static void __init hugetlb_sysfs_init(void)
/*
* node_hstate/s - associate per node hstate attributes, via their kobjects,
- * with node sysdevs in node_devices[] using a parallel array. The array
- * index of a node sysdev or _hstate == node id.
- * This is here to avoid any static dependency of the node sysdev driver, in
+ * with node devices in node_devices[] using a parallel array. The array
+ * index of a node device or _hstate == node id.
+ * This is here to avoid any static dependency of the node device driver, in
* the base kernel, on the hugetlb module.
*/
struct node_hstate {
@@ -1604,7 +1604,7 @@ struct node_hstate {
struct node_hstate node_hstates[MAX_NUMNODES];
/*
- * A subset of global hstate attributes for node sysdevs
+ * A subset of global hstate attributes for node devices
*/
static struct attribute *per_node_hstate_attrs[] = {
&nr_hugepages_attr.attr,
@@ -1618,7 +1618,7 @@ static struct attribute_group per_node_hstate_attr_group = {
};
/*
- * kobj_to_node_hstate - lookup global hstate for node sysdev hstate attr kobj.
+ * kobj_to_node_hstate - lookup global hstate for node device hstate attr kobj.
* Returns node id via non-NULL nidp.
*/
static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
@@ -1641,13 +1641,13 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
}
/*
- * Unregister hstate attributes from a single node sysdev.
+ * Unregister hstate attributes from a single node device.
* No-op if no hstate attributes attached.
*/
void hugetlb_unregister_node(struct node *node)
{
struct hstate *h;
- struct node_hstate *nhs = &node_hstates[node->sysdev.id];
+ struct node_hstate *nhs = &node_hstates[node->dev.id];
if (!nhs->hugepages_kobj)
return; /* no hstate attributes */
@@ -1663,7 +1663,7 @@ void hugetlb_unregister_node(struct node *node)
}
/*
- * hugetlb module exit: unregister hstate attributes from node sysdevs
+ * hugetlb module exit: unregister hstate attributes from node devices
* that have them.
*/
static void hugetlb_unregister_all_nodes(void)
@@ -1671,7 +1671,7 @@ static void hugetlb_unregister_all_nodes(void)
int nid;
/*
- * disable node sysdev registrations.
+ * disable node device registrations.
*/
register_hugetlbfs_with_node(NULL, NULL);
@@ -1683,20 +1683,20 @@ static void hugetlb_unregister_all_nodes(void)
}
/*
- * Register hstate attributes for a single node sysdev.
+ * Register hstate attributes for a single node device.
* No-op if attributes already registered.
*/
void hugetlb_register_node(struct node *node)
{
struct hstate *h;
- struct node_hstate *nhs = &node_hstates[node->sysdev.id];
+ struct node_hstate *nhs = &node_hstates[node->dev.id];
int err;
if (nhs->hugepages_kobj)
return; /* already allocated */
nhs->hugepages_kobj = kobject_create_and_add("hugepages",
- &node->sysdev.kobj);
+ &node->dev.kobj);
if (!nhs->hugepages_kobj)
return;
@@ -1707,7 +1707,7 @@ void hugetlb_register_node(struct node *node)
if (err) {
printk(KERN_ERR "Hugetlb: Unable to add hstate %s"
" for node %d\n",
- h->name, node->sysdev.id);
+ h->name, node->dev.id);
hugetlb_unregister_node(node);
break;
}
@@ -1716,8 +1716,8 @@ void hugetlb_register_node(struct node *node)
/*
* hugetlb init time: register hstate attributes for all registered node
- * sysdevs of nodes that have memory. All on-line nodes should have
- * registered their associated sysdev by this time.
+ * devices of nodes that have memory. All on-line nodes should have
+ * registered their associated device by this time.
*/
static void hugetlb_register_all_nodes(void)
{
@@ -1725,12 +1725,12 @@ static void hugetlb_register_all_nodes(void)
for_each_node_state(nid, N_HIGH_MEMORY) {
struct node *node = &node_devices[nid];
- if (node->sysdev.id == nid)
+ if (node->dev.id == nid)
hugetlb_register_node(node);
}
/*
- * Let the node sysdev driver know we're here so it can
+ * Let the node device driver know we're here so it can
* [un]register hstate attributes on node hotplug.
*/
register_hugetlbfs_with_node(hugetlb_register_node,
@@ -2315,8 +2315,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
* from page cache lookup which is in HPAGE_SIZE units.
*/
address = address & huge_page_mask(h);
- pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
- + (vma->vm_pgoff >> PAGE_SHIFT);
+ pgoff = vma_hugecache_offset(h, vma, address);
mapping = (struct address_space *)page_private(page);
/*
@@ -2349,6 +2348,9 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
/*
* Hugetlb_cow() should be called with page lock of the original hugepage held.
+ * Called with hugetlb_instantiation_mutex held and pte_page locked so we
+ * cannot race with other handlers or page migration.
+ * Keep the pte_same checks anyway to make transition from the mutex easier.
*/
static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *ptep, pte_t pte,
@@ -2408,7 +2410,14 @@ retry_avoidcopy:
BUG_ON(page_count(old_page) != 1);
BUG_ON(huge_pte_none(pte));
spin_lock(&mm->page_table_lock);
- goto retry_avoidcopy;
+ ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+ if (likely(pte_same(huge_ptep_get(ptep), pte)))
+ goto retry_avoidcopy;
+ /*
+ * race occurs while re-acquiring page_table_lock, and
+ * our job is done.
+ */
+ return 0;
}
WARN_ON_ONCE(1);
}
@@ -2630,6 +2639,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
static DEFINE_MUTEX(hugetlb_instantiation_mutex);
struct hstate *h = hstate_vma(vma);
+ address &= huge_page_mask(h);
+
ptep = huge_pte_offset(mm, address);
if (ptep) {
entry = huge_ptep_get(ptep);
diff --git a/mm/ksm.c b/mm/ksm.c
index 310544a379a..1925ffbfb27 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -28,6 +28,7 @@
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/slab.h>
+#include <linux/memcontrol.h>
#include <linux/rbtree.h>
#include <linux/memory.h>
#include <linux/mmu_notifier.h>
@@ -1571,6 +1572,16 @@ struct page *ksm_does_need_to_copy(struct page *page,
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
if (new_page) {
+ /*
+ * The memcg-specific accounting when moving
+ * pages around the LRU lists relies on the
+ * page's owner (memcg) to be valid. Usually,
+ * pages are assigned to a new owner before
+ * being put on the LRU list, but since this
+ * is not the case here, the stale owner from
+ * a previous allocation cycle must be reset.
+ */
+ mem_cgroup_reset_owner(new_page);
copy_user_highpage(new_page, page, address, vma);
SetPageDirty(new_page);
diff --git a/mm/memblock.c b/mm/memblock.c
index 84bec4969ed..2f55f19b7c8 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,12 +20,23 @@
#include <linux/seq_file.h>
#include <linux/memblock.h>
-struct memblock memblock __initdata_memblock;
+static struct memblock_region