aboutsummaryrefslogtreecommitdiff
path: root/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c381
1 files changed, 272 insertions, 109 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index a26bccd44cc..be6dbf995c0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -36,6 +36,7 @@
#include <linux/hugetlb_cgroup.h>
#include <linux/gfp.h>
#include <linux/balloon_compaction.h>
+#include <linux/mmu_notifier.h>
#include <asm/tlbflush.h>
@@ -71,28 +72,12 @@ int migrate_prep_local(void)
}
/*
- * Add isolated pages on the list back to the LRU under page lock
- * to avoid leaking evictable pages back onto unevictable list.
- */
-void putback_lru_pages(struct list_head *l)
-{
- struct page *page;
- struct page *page2;
-
- list_for_each_entry_safe(page, page2, l, lru) {
- list_del(&page->lru);
- dec_zone_page_state(page, NR_ISOLATED_ANON +
- page_is_file_cache(page));
- putback_lru_page(page);
- }
-}
-
-/*
* Put previously isolated pages back onto the appropriate lists
* from where they were once taken off for compaction/migration.
*
- * This function shall be used instead of putback_lru_pages(),
- * whenever the isolated pageset has been built by isolate_migratepages_range()
+ * This function shall be used whenever the isolated pageset has been
+ * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range()
+ * and isolate_huge_page().
*/
void putback_movable_pages(struct list_head *l)
{
@@ -130,13 +115,11 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
ptep = huge_pte_offset(mm, addr);
if (!ptep)
goto out;
- ptl = &mm->page_table_lock;
+ ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);
} else {
pmd = mm_find_pmd(mm, addr);
if (!pmd)
goto out;
- if (pmd_trans_huge(*pmd))
- goto out;
ptep = pte_offset_map(pmd, addr);
@@ -161,6 +144,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
get_page(new);
pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
+ if (pte_swp_soft_dirty(*ptep))
+ pte = pte_mksoft_dirty(pte);
if (is_write_migration_entry(entry))
pte = pte_mkwrite(pte);
#ifdef CONFIG_HUGETLB_PAGE
@@ -191,12 +176,49 @@ out:
}
/*
+ * Congratulations to trinity for discovering this bug.
+ * mm/fremap.c's remap_file_pages() accepts any range within a single vma to
+ * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then
+ * replace the specified range by file ptes throughout (maybe populated after).
+ * If page migration finds a page within that range, while it's still located
+ * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem:
+ * zap_pte() clears the temporary migration entry before mmap_sem is dropped.
+ * But if the migrating page is in a part of the vma outside the range to be
+ * remapped, then it will not be cleared, and remove_migration_ptes() needs to
+ * deal with it. Fortunately, this part of the vma is of course still linear,
+ * so we just need to use linear location on the nonlinear list.
+ */
+static int remove_linear_migration_ptes_from_nonlinear(struct page *page,
+ struct address_space *mapping, void *arg)
+{
+ struct vm_area_struct *vma;
+ /* hugetlbfs does not support remap_pages, so no huge pgoff worries */
+ pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ unsigned long addr;
+
+ list_for_each_entry(vma,
+ &mapping->i_mmap_nonlinear, shared.nonlinear) {
+
+ addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ if (addr >= vma->vm_start && addr < vma->vm_end)
+ remove_migration_pte(page, vma, addr, arg);
+ }
+ return SWAP_AGAIN;
+}
+
+/*
* Get rid of all migration entries and replace them by
* references to the indicated page.
*/
static void remove_migration_ptes(struct page *old, struct page *new)
{
- rmap_walk(new, remove_migration_pte, old);
+ struct rmap_walk_control rwc = {
+ .rmap_one = remove_migration_pte,
+ .arg = old,
+ .file_nonlinear = remove_linear_migration_ptes_from_nonlinear,
+ };
+
+ rmap_walk(new, &rwc);
}
/*
@@ -247,9 +269,10 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
__migration_entry_wait(mm, ptep, ptl);
}
-void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte)
+void migration_entry_wait_huge(struct vm_area_struct *vma,
+ struct mm_struct *mm, pte_t *pte)
{
- spinlock_t *ptl = &(mm)->page_table_lock;
+ spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
__migration_entry_wait(mm, pte, ptl);
}
@@ -313,14 +336,15 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
*/
int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page,
- struct buffer_head *head, enum migrate_mode mode)
+ struct buffer_head *head, enum migrate_mode mode,
+ int extra_count)
{
- int expected_count = 0;
+ int expected_count = 1 + extra_count;
void **pslot;
if (!mapping) {
/* Anonymous page without mapping */
- if (page_count(page) != 1)
+ if (page_count(page) != expected_count)
return -EAGAIN;
return MIGRATEPAGE_SUCCESS;
}
@@ -330,7 +354,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
pslot = radix_tree_lookup_slot(&mapping->page_tree,
page_index(page));
- expected_count = 2 + page_has_private(page);
+ expected_count += 1 + page_has_private(page);
if (page_count(page) != expected_count ||
radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
spin_unlock_irq(&mapping->tree_lock);
@@ -439,10 +463,60 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
}
/*
+ * Gigantic pages are so large that we do not guarantee that page++ pointer
+ * arithmetic will work across the entire page. We need something more
+ * specialized.
+ */
+static void __copy_gigantic_page(struct page *dst, struct page *src,
+ int nr_pages)
+{
+ int i;
+ struct page *dst_base = dst;
+ struct page *src_base = src;
+
+ for (i = 0; i < nr_pages; ) {
+ cond_resched();
+ copy_highpage(dst, src);
+
+ i++;
+ dst = mem_map_next(dst, dst_base, i);
+ src = mem_map_next(src, src_base, i);
+ }
+}
+
+static void copy_huge_page(struct page *dst, struct page *src)
+{
+ int i;
+ int nr_pages;
+
+ if (PageHuge(src)) {
+ /* hugetlbfs page */
+ struct hstate *h = page_hstate(src);
+ nr_pages = pages_per_huge_page(h);
+
+ if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) {
+ __copy_gigantic_page(dst, src, nr_pages);
+ return;
+ }
+ } else {
+ /* thp page */
+ BUG_ON(!PageTransHuge(src));
+ nr_pages = hpage_nr_pages(src);
+ }
+
+ for (i = 0; i < nr_pages; i++) {
+ cond_resched();
+ copy_highpage(dst + i, src + i);
+ }
+}
+
+/*
* Copy the page to its new location
*/
void migrate_page_copy(struct page *newpage, struct page *page)
{
+ int cpupid;
+
if (PageHuge(page) || PageTransHuge(page))
copy_huge_page(newpage, page);
else
@@ -455,7 +529,7 @@ void migrate_page_copy(struct page *newpage, struct page *page)
if (PageUptodate(page))
SetPageUptodate(newpage);
if (TestClearPageActive(page)) {
- VM_BUG_ON(PageUnevictable(page));
+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
SetPageActive(newpage);
} else if (TestClearPageUnevictable(page))
SetPageUnevictable(newpage);
@@ -479,6 +553,13 @@ void migrate_page_copy(struct page *newpage, struct page *page)
__set_page_dirty_nobuffers(newpage);
}
+ /*
+ * Copy NUMA information to the new page, to prevent over-eager
+ * future migrations of this same page.
+ */
+ cpupid = page_cpupid_xchg_last(page, -1);
+ page_cpupid_xchg_last(newpage, cpupid);
+
mlock_migrate_page(newpage, page);
ksm_migrate_page(newpage, page);
/*
@@ -501,14 +582,6 @@ void migrate_page_copy(struct page *newpage, struct page *page)
* Migration functions
***********************************************************/
-/* Always fail migration. Used for mappings that are not movable */
-int fail_migrate_page(struct address_space *mapping,
- struct page *newpage, struct page *page)
-{
- return -EIO;
-}
-EXPORT_SYMBOL(fail_migrate_page);
-
/*
* Common logic to directly migrate a single page suitable for
* pages that do not use PagePrivate/PagePrivate2.
@@ -523,7 +596,7 @@ int migrate_page(struct address_space *mapping,
BUG_ON(PageWriteback(page)); /* Writeback must be complete */
- rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
+ rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
@@ -550,7 +623,7 @@ int buffer_migrate_page(struct address_space *mapping,
head = page_buffers(page);
- rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
+ rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
@@ -828,7 +901,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
* free the metadata, so the page can be freed.
*/
if (!page->mapping) {
- VM_BUG_ON(PageAnon(page));
+ VM_BUG_ON_PAGE(PageAnon(page), page);
if (page_has_private(page)) {
try_to_free_buffers(page);
goto uncharge;
@@ -863,8 +936,9 @@ out:
* Obtain the lock on page, remove all ptes and migrate the page
* to the newly allocated page in newpage.
*/
-static int unmap_and_move(new_page_t get_new_page, unsigned long private,
- struct page *page, int force, enum migrate_mode mode)
+static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page,
+ unsigned long private, struct page *page, int force,
+ enum migrate_mode mode)
{
int rc = 0;
int *result = NULL;
@@ -908,11 +982,18 @@ out:
page_is_file_cache(page));
putback_lru_page(page);
}
+
/*
- * Move the new page to the LRU. If migration was not successful
- * then this will free the page.
+ * If migration was not successful and there's a freeing callback, use
+ * it. Otherwise, putback_lru_page() will drop the reference grabbed
+ * during isolation.
*/
- putback_lru_page(newpage);
+ if (rc != MIGRATEPAGE_SUCCESS && put_new_page) {
+ ClearPageSwapBacked(newpage);
+ put_new_page(newpage, private);
+ } else
+ putback_lru_page(newpage);
+
if (result) {
if (rc)
*result = rc;
@@ -941,12 +1022,13 @@ out:
* will wait in the page fault for migration to complete.
*/
static int unmap_and_move_huge_page(new_page_t get_new_page,
- unsigned long private, struct page *hpage,
- int force, enum migrate_mode mode)
+ free_page_t put_new_page, unsigned long private,
+ struct page *hpage, int force,
+ enum migrate_mode mode)
{
int rc = 0;
int *result = NULL;
- struct page *new_hpage = get_new_page(hpage, private, &result);
+ struct page *new_hpage;
struct anon_vma *anon_vma = NULL;
/*
@@ -956,9 +1038,12 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
* tables or check whether the hugepage is pmd-based or not before
* kicking migration.
*/
- if (!hugepage_migration_support(page_hstate(hpage)))
+ if (!hugepage_migration_supported(page_hstate(hpage))) {
+ putback_active_hugepage(hpage);
return -ENOSYS;
+ }
+ new_hpage = get_new_page(hpage, private, &result);
if (!new_hpage)
return -ENOMEM;
@@ -978,20 +1063,30 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
if (!page_mapped(hpage))
rc = move_to_new_page(new_hpage, hpage, 1, mode);
- if (rc)
+ if (rc != MIGRATEPAGE_SUCCESS)
remove_migration_ptes(hpage, hpage);
if (anon_vma)
put_anon_vma(anon_vma);
- if (!rc)
+ if (rc == MIGRATEPAGE_SUCCESS)
hugetlb_cgroup_migrate(hpage, new_hpage);
unlock_page(hpage);
out:
if (rc != -EAGAIN)
putback_active_hugepage(hpage);
- put_page(new_hpage);
+
+ /*
+ * If migration was not successful and there's a freeing callback, use
+ * it. Otherwise, put_page() will drop the reference grabbed during
+ * isolation.
+ */
+ if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
+ put_new_page(new_hpage, private);
+ else
+ put_page(new_hpage);
+
if (result) {
if (rc)
*result = rc;
@@ -1008,6 +1103,8 @@ out:
* @from: The list of pages to be migrated.
* @get_new_page: The function used to allocate free pages to be used
* as the target of the page migration.
+ * @put_new_page: The function used to free target pages if migration
+ * fails, or NULL if no special handling is necessary.
* @private: Private data to be passed on to get_new_page()
* @mode: The migration mode that specifies the constraints for
* page migration, if any.
@@ -1021,7 +1118,8 @@ out:
* Returns the number of pages that were not migrated, or an error code.
*/
int migrate_pages(struct list_head *from, new_page_t get_new_page,
- unsigned long private, enum migrate_mode mode, int reason)
+ free_page_t put_new_page, unsigned long private,
+ enum migrate_mode mode, int reason)
{
int retry = 1;
int nr_failed = 0;
@@ -1043,10 +1141,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
if (PageHuge(page))
rc = unmap_and_move_huge_page(get_new_page,
- private, page, pass > 2, mode);
+ put_new_page, private, page,
+ pass > 2, mode);
else
- rc = unmap_and_move(get_new_page, private,
- page, pass > 2, mode);
+ rc = unmap_and_move(get_new_page, put_new_page,
+ private, page, pass > 2, mode);
switch(rc) {
case -ENOMEM:
@@ -1058,7 +1157,12 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
nr_succeeded++;
break;
default:
- /* Permanent failure */
+ /*
+ * Permanent failure (-EBUSY, -ENOSYS, etc.):
+ * unlike -EAGAIN case, the failed page is
+ * removed from migration page list and not
+ * retried in the next outer loop.
+ */
nr_failed++;
break;
}
@@ -1107,7 +1211,7 @@ static struct page *new_page_node(struct page *p, unsigned long private,
pm->node);
else
return alloc_pages_exact_node(pm->node,
- GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0);
+ GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
}
/*
@@ -1190,7 +1294,7 @@ set_status:
err = 0;
if (!list_empty(&pagelist)) {
- err = migrate_pages(&pagelist, new_page_node,
+ err = migrate_pages(&pagelist, new_page_node, NULL,
(unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
if (err)
putback_movable_pages(&pagelist);
@@ -1493,12 +1597,10 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
struct page *newpage;
newpage = alloc_pages_exact_node(nid,
- (GFP_HIGHUSER_MOVABLE | GFP_THISNODE |
- __GFP_NOMEMALLOC | __GFP_NORETRY |
- __GFP_NOWARN) &
+ (GFP_HIGHUSER_MOVABLE |
+ __GFP_THISNODE | __GFP_NOMEMALLOC |
+ __GFP_NORETRY | __GFP_NOWARN) &
~GFP_IOFS, 0);
- if (newpage)
- page_nid_xchg_last(newpage, page_nid_last(page));
return newpage;
}
@@ -1532,35 +1634,42 @@ bool migrate_ratelimited(int node)
}
/* Returns true if the node is migrate rate-limited after the update */
-bool numamigrate_update_ratelimit(pg_data_t *pgdat, unsigned long nr_pages)
+static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
+ unsigned long nr_pages)
{
- bool rate_limited = false;
-
/*
* Rate-limit the amount of data that is being migrated to a node.
* Optimal placement is no good if the memory bus is saturated and
* all the time is being spent migrating!
*/
- spin_lock(&pgdat->numabalancing_migrate_lock);
if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {
+ spin_lock(&pgdat->numabalancing_migrate_lock);
pgdat->numabalancing_migrate_nr_pages = 0;
pgdat->numabalancing_migrate_next_window = jiffies +
msecs_to_jiffies(migrate_interval_millisecs);
+ spin_unlock(&pgdat->numabalancing_migrate_lock);
}
- if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages)
- rate_limited = true;
- else
- pgdat->numabalancing_migrate_nr_pages += nr_pages;
- spin_unlock(&pgdat->numabalancing_migrate_lock);
-
- return rate_limited;
+ if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
+ trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
+ nr_pages);
+ return true;
+ }
+
+ /*
+ * This is an unlocked non-atomic update so errors are possible.
+ * The consequences are failing to migrate when we potentiall should
+ * have which is not severe enough to warrant locking. If it is ever
+ * a problem, it can be converted to a per-cpu counter.
+ */
+ pgdat->numabalancing_migrate_nr_pages += nr_pages;
+ return false;
}
-int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
+static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
{
int page_lru;
- VM_BUG_ON(compound_order(page) && !PageTransHuge(page));
+ VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
/* Avoid migrating to a node that is nearly full */
if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
@@ -1594,12 +1703,25 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
return 1;
}
+bool pmd_trans_migrating(pmd_t pmd)
+{
+ struct page *page = pmd_page(pmd);
+ return PageLocked(page);
+}
+
+void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd)
+{
+ struct page *page = pmd_page(*pmd);
+ wait_on_page_locked(page);
+}
+
/*
* Attempt to migrate a misplaced page to the specified destination
* node. Caller is expected to have an elevated reference count on
* the page that will be dropped by this function before returning.
*/
-int migrate_misplaced_page(struct page *page, int node)
+int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
+ int node)
{
pg_data_t *pgdat = NODE_DATA(node);
int isolated;
@@ -1607,10 +1729,11 @@ int migrate_misplaced_page(struct page *page, int node)
LIST_HEAD(migratepages);
/*
- * Don't migrate pages that are mapped in multiple processes.
- * TODO: Handle false sharing detection instead of this hammer
+ * Don't migrate file pages that are mapped in multiple processes
+ * with execute permissions as they are probably shared libraries.
*/
- if (page_mapcount(page) != 1)
+ if (page_mapcount(page) != 1 && page_is_file_cache(page) &&
+ (vma->vm_flags & VM_EXEC))
goto out;
/*
@@ -1627,9 +1750,15 @@ int migrate_misplaced_page(struct page *page, int node)
list_add(&page->lru, &migratepages);
nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
- node, MIGRATE_ASYNC, MR_NUMA_MISPLACED);
+ NULL, node, MIGRATE_ASYNC,
+ MR_NUMA_MISPLACED);
if (nr_remaining) {
- putback_lru_pages(&migratepages);
+ if (!list_empty(&migratepages)) {
+ list_del(&page->lru);
+ dec_zone_page_state(page, NR_ISOLATED_ANON +
+ page_is_file_cache(page));
+ putback_lru_page(page);
+ }
isolated = 0;
} else
count_vm_numa_event(NUMA_PAGE_MIGRATE);
@@ -1653,19 +1782,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
unsigned long address,
struct page *page, int node)
{
- unsigned long haddr = address & HPAGE_PMD_MASK;
+ spinlock_t *ptl;
pg_data_t *pgdat = NODE_DATA(node);
int isolated = 0;
struct page *new_page = NULL;
struct mem_cgroup *memcg = NULL;
int page_lru = page_is_file_cache(page);
-
- /*
- * Don't migrate pages that are mapped in multiple processes.
- * TODO: Handle false sharing detection instead of this hammer
- */
- if (page_mapcount(page) != 1)
- goto out_dropref;
+ unsigned long mmun_start = address & HPAGE_PMD_MASK;
+ unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
+ pmd_t orig_entry;
/*
* Rate-limit the amount of data that is being migrated to a node.
@@ -1676,18 +1801,20 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
goto out_dropref;
new_page = alloc_pages_node(node,
- (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
+ (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT,
+ HPAGE_PMD_ORDER);
if (!new_page)
goto out_fail;
- page_nid_xchg_last(new_page, page_nid_last(page));
-
isolated = numamigrate_isolate_page(pgdat, page);
if (!isolated) {
put_page(new_page);
goto out_fail;
}
+ if (mm_tlb_flush_pending(mm))
+ flush_tlb_range(vma, mmun_start, mmun_end);
+
/* Prepare a page as a migration target */
__set_page_locked(new_page);
SetPageSwapBacked(new_page);
@@ -1699,9 +1826,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
WARN_ON(PageLRU(new_page));
/* Recheck the target PMD */
- spin_lock(&mm->page_table_lock);
- if (unlikely(!pmd_same(*pmd, entry))) {
- spin_unlock(&mm->page_table_lock);
+ mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+ ptl = pmd_lock(mm, pmd);
+ if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) {
+fail_putback:
+ spin_unlock(ptl);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
/* Reverse changes made by migrate_page_copy() */
if (TestClearPageActive(new_page))
@@ -1713,12 +1843,13 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
unlock_page(new_page);
put_page(new_page); /* Free it */
- unlock_page(page);
+ /* Retake the callers reference and putback on LRU */
+ get_page(page);
putback_lru_page(page);
+ mod_zone_page_state(page_zone(page),
+ NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
- count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
- isolated = 0;
- goto out;
+ goto out_unlock;
}
/*
@@ -1730,23 +1861,47 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
*/
mem_cgroup_prepare_migration(page, new_page, &memcg);
+ orig_entry = *pmd;
entry = mk_pmd(new_page, vma->vm_page_prot);
- entry = pmd_mknonnuma(entry);
- entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
entry = pmd_mkhuge(entry);
+ entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
- page_add_new_anon_rmap(new_page, vma, haddr);
-
- set_pmd_at(mm, haddr, pmd, entry);
+ /*
+ * Clear the old entry under pagetable lock and establish the new PTE.
+ * Any parallel GUP will either observe the old page blocking on the
+ * page lock, block on the page table lock or observe the new page.
+ * The SetPageUptodate on the new page and page_add_new_anon_rmap
+ * guarantee the copy is visible before the pagetable update.
+ */
+ flush_cache_range(vma, mmun_start, mmun_end);
+ page_add_anon_rmap(new_page, vma, mmun_start);
+ pmdp_clear_flush(vma, mmun_start, pmd);
+ set_pmd_at(mm, mmun_start, pmd, entry);
+ flush_tlb_range(vma, mmun_start, mmun_end);
update_mmu_cache_pmd(vma, address, &entry);
+
+ if (page_count(page) != 2) {
+ set_pmd_at(mm, mmun_start, pmd, orig_entry);
+ flush_tlb_range(vma, mmun_start, mmun_end);
+ update_mmu_cache_pmd(vma, address, &entry);
+ page_remove_rmap(new_page);
+ goto fail_putback;
+ }
+
page_remove_rmap(page);
+
/*
* Finish the charge transaction under the page table lock to
* prevent split_huge_page() from dividing up the charge
* before it's fully transferred to the new page.
*/
mem_cgroup_end_migration(memcg, page, new_page, true);
- spin_unlock(&mm->page_table_lock);
+ spin_unlock(ptl);
+ mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+
+ /* Take an "isolate" reference and put new page on the LRU. */
+ get_page(new_page);
+ putback_lru_page(new_page);
unlock_page(new_page);
unlock_page(page);
@@ -1756,7 +1911,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
-out:
mod_zone_page_state(page_zone(page),
NR_ISOLATED_ANON + page_lru,
-HPAGE_PMD_NR);
@@ -1765,6 +1919,15 @@ out:
out_fail:
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
out_dropref:
+ ptl = pmd_lock(mm, pmd);
+ if (pmd_same(*pmd, entry)) {
+ entry = pmd_mknonnuma(entry);
+ set_pmd_at(mm, mmun_start, pmd, entry);
+ update_mmu_cache_pmd(vma, address, &entry);
+ }
+ spin_unlock(ptl);
+
+out_unlock:
unlock_page(page);
put_page(page);
return 0;