diff options
Diffstat (limited to 'mm/migrate.c')
| -rw-r--r-- | mm/migrate.c | 381 | 
1 files changed, 272 insertions, 109 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index a26bccd44cc..be6dbf995c0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -36,6 +36,7 @@  #include <linux/hugetlb_cgroup.h>  #include <linux/gfp.h>  #include <linux/balloon_compaction.h> +#include <linux/mmu_notifier.h>  #include <asm/tlbflush.h> @@ -71,28 +72,12 @@ int migrate_prep_local(void)  }  /* - * Add isolated pages on the list back to the LRU under page lock - * to avoid leaking evictable pages back onto unevictable list. - */ -void putback_lru_pages(struct list_head *l) -{ -	struct page *page; -	struct page *page2; - -	list_for_each_entry_safe(page, page2, l, lru) { -		list_del(&page->lru); -		dec_zone_page_state(page, NR_ISOLATED_ANON + -				page_is_file_cache(page)); -			putback_lru_page(page); -	} -} - -/*   * Put previously isolated pages back onto the appropriate lists   * from where they were once taken off for compaction/migration.   * - * This function shall be used instead of putback_lru_pages(), - * whenever the isolated pageset has been built by isolate_migratepages_range() + * This function shall be used whenever the isolated pageset has been + * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range() + * and isolate_huge_page().   */  void putback_movable_pages(struct list_head *l)  { @@ -130,13 +115,11 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,  		ptep = huge_pte_offset(mm, addr);  		if (!ptep)  			goto out; -		ptl = &mm->page_table_lock; +		ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep);  	} else {  		pmd = mm_find_pmd(mm, addr);  		if (!pmd)  			goto out; -		if (pmd_trans_huge(*pmd)) -			goto out;  		ptep = pte_offset_map(pmd, addr); @@ -161,6 +144,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,  	get_page(new);  	pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); +	if (pte_swp_soft_dirty(*ptep)) +		pte = pte_mksoft_dirty(pte);  	if (is_write_migration_entry(entry))  		pte = pte_mkwrite(pte);  #ifdef CONFIG_HUGETLB_PAGE @@ -191,12 +176,49 @@ out:  }  /* + * Congratulations to trinity for discovering this bug. + * mm/fremap.c's remap_file_pages() accepts any range within a single vma to + * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then + * replace the specified range by file ptes throughout (maybe populated after). + * If page migration finds a page within that range, while it's still located + * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem: + * zap_pte() clears the temporary migration entry before mmap_sem is dropped. + * But if the migrating page is in a part of the vma outside the range to be + * remapped, then it will not be cleared, and remove_migration_ptes() needs to + * deal with it.  Fortunately, this part of the vma is of course still linear, + * so we just need to use linear location on the nonlinear list. + */ +static int remove_linear_migration_ptes_from_nonlinear(struct page *page, +		struct address_space *mapping, void *arg) +{ +	struct vm_area_struct *vma; +	/* hugetlbfs does not support remap_pages, so no huge pgoff worries */ +	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); +	unsigned long addr; + +	list_for_each_entry(vma, +		&mapping->i_mmap_nonlinear, shared.nonlinear) { + +		addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); +		if (addr >= vma->vm_start && addr < vma->vm_end) +			remove_migration_pte(page, vma, addr, arg); +	} +	return SWAP_AGAIN; +} + +/*   * Get rid of all migration entries and replace them by   * references to the indicated page.   */  static void remove_migration_ptes(struct page *old, struct page *new)  { -	rmap_walk(new, remove_migration_pte, old); +	struct rmap_walk_control rwc = { +		.rmap_one = remove_migration_pte, +		.arg = old, +		.file_nonlinear = remove_linear_migration_ptes_from_nonlinear, +	}; + +	rmap_walk(new, &rwc);  }  /* @@ -247,9 +269,10 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,  	__migration_entry_wait(mm, ptep, ptl);  } -void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte) +void migration_entry_wait_huge(struct vm_area_struct *vma, +		struct mm_struct *mm, pte_t *pte)  { -	spinlock_t *ptl = &(mm)->page_table_lock; +	spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);  	__migration_entry_wait(mm, pte, ptl);  } @@ -313,14 +336,15 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,   */  int migrate_page_move_mapping(struct address_space *mapping,  		struct page *newpage, struct page *page, -		struct buffer_head *head, enum migrate_mode mode) +		struct buffer_head *head, enum migrate_mode mode, +		int extra_count)  { -	int expected_count = 0; +	int expected_count = 1 + extra_count;  	void **pslot;  	if (!mapping) {  		/* Anonymous page without mapping */ -		if (page_count(page) != 1) +		if (page_count(page) != expected_count)  			return -EAGAIN;  		return MIGRATEPAGE_SUCCESS;  	} @@ -330,7 +354,7 @@ int migrate_page_move_mapping(struct address_space *mapping,  	pslot = radix_tree_lookup_slot(&mapping->page_tree,   					page_index(page)); -	expected_count = 2 + page_has_private(page); +	expected_count += 1 + page_has_private(page);  	if (page_count(page) != expected_count ||  		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {  		spin_unlock_irq(&mapping->tree_lock); @@ -439,10 +463,60 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,  }  /* + * Gigantic pages are so large that we do not guarantee that page++ pointer + * arithmetic will work across the entire page.  We need something more + * specialized. + */ +static void __copy_gigantic_page(struct page *dst, struct page *src, +				int nr_pages) +{ +	int i; +	struct page *dst_base = dst; +	struct page *src_base = src; + +	for (i = 0; i < nr_pages; ) { +		cond_resched(); +		copy_highpage(dst, src); + +		i++; +		dst = mem_map_next(dst, dst_base, i); +		src = mem_map_next(src, src_base, i); +	} +} + +static void copy_huge_page(struct page *dst, struct page *src) +{ +	int i; +	int nr_pages; + +	if (PageHuge(src)) { +		/* hugetlbfs page */ +		struct hstate *h = page_hstate(src); +		nr_pages = pages_per_huge_page(h); + +		if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) { +			__copy_gigantic_page(dst, src, nr_pages); +			return; +		} +	} else { +		/* thp page */ +		BUG_ON(!PageTransHuge(src)); +		nr_pages = hpage_nr_pages(src); +	} + +	for (i = 0; i < nr_pages; i++) { +		cond_resched(); +		copy_highpage(dst + i, src + i); +	} +} + +/*   * Copy the page to its new location   */  void migrate_page_copy(struct page *newpage, struct page *page)  { +	int cpupid; +  	if (PageHuge(page) || PageTransHuge(page))  		copy_huge_page(newpage, page);  	else @@ -455,7 +529,7 @@ void migrate_page_copy(struct page *newpage, struct page *page)  	if (PageUptodate(page))  		SetPageUptodate(newpage);  	if (TestClearPageActive(page)) { -		VM_BUG_ON(PageUnevictable(page)); +		VM_BUG_ON_PAGE(PageUnevictable(page), page);  		SetPageActive(newpage);  	} else if (TestClearPageUnevictable(page))  		SetPageUnevictable(newpage); @@ -479,6 +553,13 @@ void migrate_page_copy(struct page *newpage, struct page *page)  			__set_page_dirty_nobuffers(newpage);   	} +	/* +	 * Copy NUMA information to the new page, to prevent over-eager +	 * future migrations of this same page. +	 */ +	cpupid = page_cpupid_xchg_last(page, -1); +	page_cpupid_xchg_last(newpage, cpupid); +  	mlock_migrate_page(newpage, page);  	ksm_migrate_page(newpage, page);  	/* @@ -501,14 +582,6 @@ void migrate_page_copy(struct page *newpage, struct page *page)   *                    Migration functions   ***********************************************************/ -/* Always fail migration. Used for mappings that are not movable */ -int fail_migrate_page(struct address_space *mapping, -			struct page *newpage, struct page *page) -{ -	return -EIO; -} -EXPORT_SYMBOL(fail_migrate_page); -  /*   * Common logic to directly migrate a single page suitable for   * pages that do not use PagePrivate/PagePrivate2. @@ -523,7 +596,7 @@ int migrate_page(struct address_space *mapping,  	BUG_ON(PageWriteback(page));	/* Writeback must be complete */ -	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode); +	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);  	if (rc != MIGRATEPAGE_SUCCESS)  		return rc; @@ -550,7 +623,7 @@ int buffer_migrate_page(struct address_space *mapping,  	head = page_buffers(page); -	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode); +	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);  	if (rc != MIGRATEPAGE_SUCCESS)  		return rc; @@ -828,7 +901,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,  	 * free the metadata, so the page can be freed.  	 */  	if (!page->mapping) { -		VM_BUG_ON(PageAnon(page)); +		VM_BUG_ON_PAGE(PageAnon(page), page);  		if (page_has_private(page)) {  			try_to_free_buffers(page);  			goto uncharge; @@ -863,8 +936,9 @@ out:   * Obtain the lock on page, remove all ptes and migrate the page   * to the newly allocated page in newpage.   */ -static int unmap_and_move(new_page_t get_new_page, unsigned long private, -			struct page *page, int force, enum migrate_mode mode) +static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page, +			unsigned long private, struct page *page, int force, +			enum migrate_mode mode)  {  	int rc = 0;  	int *result = NULL; @@ -908,11 +982,18 @@ out:  				page_is_file_cache(page));  		putback_lru_page(page);  	} +  	/* -	 * Move the new page to the LRU. If migration was not successful -	 * then this will free the page. +	 * If migration was not successful and there's a freeing callback, use +	 * it.  Otherwise, putback_lru_page() will drop the reference grabbed +	 * during isolation.  	 */ -	putback_lru_page(newpage); +	if (rc != MIGRATEPAGE_SUCCESS && put_new_page) { +		ClearPageSwapBacked(newpage); +		put_new_page(newpage, private); +	} else +		putback_lru_page(newpage); +  	if (result) {  		if (rc)  			*result = rc; @@ -941,12 +1022,13 @@ out:   * will wait in the page fault for migration to complete.   */  static int unmap_and_move_huge_page(new_page_t get_new_page, -				unsigned long private, struct page *hpage, -				int force, enum migrate_mode mode) +				free_page_t put_new_page, unsigned long private, +				struct page *hpage, int force, +				enum migrate_mode mode)  {  	int rc = 0;  	int *result = NULL; -	struct page *new_hpage = get_new_page(hpage, private, &result); +	struct page *new_hpage;  	struct anon_vma *anon_vma = NULL;  	/* @@ -956,9 +1038,12 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,  	 * tables or check whether the hugepage is pmd-based or not before  	 * kicking migration.  	 */ -	if (!hugepage_migration_support(page_hstate(hpage))) +	if (!hugepage_migration_supported(page_hstate(hpage))) { +		putback_active_hugepage(hpage);  		return -ENOSYS; +	} +	new_hpage = get_new_page(hpage, private, &result);  	if (!new_hpage)  		return -ENOMEM; @@ -978,20 +1063,30 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,  	if (!page_mapped(hpage))  		rc = move_to_new_page(new_hpage, hpage, 1, mode); -	if (rc) +	if (rc != MIGRATEPAGE_SUCCESS)  		remove_migration_ptes(hpage, hpage);  	if (anon_vma)  		put_anon_vma(anon_vma); -	if (!rc) +	if (rc == MIGRATEPAGE_SUCCESS)  		hugetlb_cgroup_migrate(hpage, new_hpage);  	unlock_page(hpage);  out:  	if (rc != -EAGAIN)  		putback_active_hugepage(hpage); -	put_page(new_hpage); + +	/* +	 * If migration was not successful and there's a freeing callback, use +	 * it.  Otherwise, put_page() will drop the reference grabbed during +	 * isolation. +	 */ +	if (rc != MIGRATEPAGE_SUCCESS && put_new_page) +		put_new_page(new_hpage, private); +	else +		put_page(new_hpage); +  	if (result) {  		if (rc)  			*result = rc; @@ -1008,6 +1103,8 @@ out:   * @from:		The list of pages to be migrated.   * @get_new_page:	The function used to allocate free pages to be used   *			as the target of the page migration. + * @put_new_page:	The function used to free target pages if migration + *			fails, or NULL if no special handling is necessary.   * @private:		Private data to be passed on to get_new_page()   * @mode:		The migration mode that specifies the constraints for   *			page migration, if any. @@ -1021,7 +1118,8 @@ out:   * Returns the number of pages that were not migrated, or an error code.   */  int migrate_pages(struct list_head *from, new_page_t get_new_page, -		unsigned long private, enum migrate_mode mode, int reason) +		free_page_t put_new_page, unsigned long private, +		enum migrate_mode mode, int reason)  {  	int retry = 1;  	int nr_failed = 0; @@ -1043,10 +1141,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,  			if (PageHuge(page))  				rc = unmap_and_move_huge_page(get_new_page, -						private, page, pass > 2, mode); +						put_new_page, private, page, +						pass > 2, mode);  			else -				rc = unmap_and_move(get_new_page, private, -						page, pass > 2, mode); +				rc = unmap_and_move(get_new_page, put_new_page, +						private, page, pass > 2, mode);  			switch(rc) {  			case -ENOMEM: @@ -1058,7 +1157,12 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,  				nr_succeeded++;  				break;  			default: -				/* Permanent failure */ +				/* +				 * Permanent failure (-EBUSY, -ENOSYS, etc.): +				 * unlike -EAGAIN case, the failed page is +				 * removed from migration page list and not +				 * retried in the next outer loop. +				 */  				nr_failed++;  				break;  			} @@ -1107,7 +1211,7 @@ static struct page *new_page_node(struct page *p, unsigned long private,  					pm->node);  	else  		return alloc_pages_exact_node(pm->node, -				GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); +				GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);  }  /* @@ -1190,7 +1294,7 @@ set_status:  	err = 0;  	if (!list_empty(&pagelist)) { -		err = migrate_pages(&pagelist, new_page_node, +		err = migrate_pages(&pagelist, new_page_node, NULL,  				(unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);  		if (err)  			putback_movable_pages(&pagelist); @@ -1493,12 +1597,10 @@ static struct page *alloc_misplaced_dst_page(struct page *page,  	struct page *newpage;  	newpage = alloc_pages_exact_node(nid, -					 (GFP_HIGHUSER_MOVABLE | GFP_THISNODE | -					  __GFP_NOMEMALLOC | __GFP_NORETRY | -					  __GFP_NOWARN) & +					 (GFP_HIGHUSER_MOVABLE | +					  __GFP_THISNODE | __GFP_NOMEMALLOC | +					  __GFP_NORETRY | __GFP_NOWARN) &  					 ~GFP_IOFS, 0); -	if (newpage) -		page_nid_xchg_last(newpage, page_nid_last(page));  	return newpage;  } @@ -1532,35 +1634,42 @@ bool migrate_ratelimited(int node)  }  /* Returns true if the node is migrate rate-limited after the update */ -bool numamigrate_update_ratelimit(pg_data_t *pgdat, unsigned long nr_pages) +static bool numamigrate_update_ratelimit(pg_data_t *pgdat, +					unsigned long nr_pages)  { -	bool rate_limited = false; -  	/*  	 * Rate-limit the amount of data that is being migrated to a node.  	 * Optimal placement is no good if the memory bus is saturated and  	 * all the time is being spent migrating!  	 */ -	spin_lock(&pgdat->numabalancing_migrate_lock);  	if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) { +		spin_lock(&pgdat->numabalancing_migrate_lock);  		pgdat->numabalancing_migrate_nr_pages = 0;  		pgdat->numabalancing_migrate_next_window = jiffies +  			msecs_to_jiffies(migrate_interval_millisecs); +		spin_unlock(&pgdat->numabalancing_migrate_lock);  	} -	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) -		rate_limited = true; -	else -		pgdat->numabalancing_migrate_nr_pages += nr_pages; -	spin_unlock(&pgdat->numabalancing_migrate_lock); -	 -	return rate_limited; +	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) { +		trace_mm_numa_migrate_ratelimit(current, pgdat->node_id, +								nr_pages); +		return true; +	} + +	/* +	 * This is an unlocked non-atomic update so errors are possible. +	 * The consequences are failing to migrate when we potentiall should +	 * have which is not severe enough to warrant locking. If it is ever +	 * a problem, it can be converted to a per-cpu counter. +	 */ +	pgdat->numabalancing_migrate_nr_pages += nr_pages; +	return false;  } -int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) +static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)  {  	int page_lru; -	VM_BUG_ON(compound_order(page) && !PageTransHuge(page)); +	VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);  	/* Avoid migrating to a node that is nearly full */  	if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) @@ -1594,12 +1703,25 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)  	return 1;  } +bool pmd_trans_migrating(pmd_t pmd) +{ +	struct page *page = pmd_page(pmd); +	return PageLocked(page); +} + +void wait_migrate_huge_page(struct anon_vma *anon_vma, pmd_t *pmd) +{ +	struct page *page = pmd_page(*pmd); +	wait_on_page_locked(page); +} +  /*   * Attempt to migrate a misplaced page to the specified destination   * node. Caller is expected to have an elevated reference count on   * the page that will be dropped by this function before returning.   */ -int migrate_misplaced_page(struct page *page, int node) +int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, +			   int node)  {  	pg_data_t *pgdat = NODE_DATA(node);  	int isolated; @@ -1607,10 +1729,11 @@ int migrate_misplaced_page(struct page *page, int node)  	LIST_HEAD(migratepages);  	/* -	 * Don't migrate pages that are mapped in multiple processes. -	 * TODO: Handle false sharing detection instead of this hammer +	 * Don't migrate file pages that are mapped in multiple processes +	 * with execute permissions as they are probably shared libraries.  	 */ -	if (page_mapcount(page) != 1) +	if (page_mapcount(page) != 1 && page_is_file_cache(page) && +	    (vma->vm_flags & VM_EXEC))  		goto out;  	/* @@ -1627,9 +1750,15 @@ int migrate_misplaced_page(struct page *page, int node)  	list_add(&page->lru, &migratepages);  	nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page, -				     node, MIGRATE_ASYNC, MR_NUMA_MISPLACED); +				     NULL, node, MIGRATE_ASYNC, +				     MR_NUMA_MISPLACED);  	if (nr_remaining) { -		putback_lru_pages(&migratepages); +		if (!list_empty(&migratepages)) { +			list_del(&page->lru); +			dec_zone_page_state(page, NR_ISOLATED_ANON + +					page_is_file_cache(page)); +			putback_lru_page(page); +		}  		isolated = 0;  	} else  		count_vm_numa_event(NUMA_PAGE_MIGRATE); @@ -1653,19 +1782,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,  				unsigned long address,  				struct page *page, int node)  { -	unsigned long haddr = address & HPAGE_PMD_MASK; +	spinlock_t *ptl;  	pg_data_t *pgdat = NODE_DATA(node);  	int isolated = 0;  	struct page *new_page = NULL;  	struct mem_cgroup *memcg = NULL;  	int page_lru = page_is_file_cache(page); - -	/* -	 * Don't migrate pages that are mapped in multiple processes. -	 * TODO: Handle false sharing detection instead of this hammer -	 */ -	if (page_mapcount(page) != 1) -		goto out_dropref; +	unsigned long mmun_start = address & HPAGE_PMD_MASK; +	unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; +	pmd_t orig_entry;  	/*  	 * Rate-limit the amount of data that is being migrated to a node. @@ -1676,18 +1801,20 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,  		goto out_dropref;  	new_page = alloc_pages_node(node, -		(GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); +		(GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT, +		HPAGE_PMD_ORDER);  	if (!new_page)  		goto out_fail; -	page_nid_xchg_last(new_page, page_nid_last(page)); -  	isolated = numamigrate_isolate_page(pgdat, page);  	if (!isolated) {  		put_page(new_page);  		goto out_fail;  	} +	if (mm_tlb_flush_pending(mm)) +		flush_tlb_range(vma, mmun_start, mmun_end); +  	/* Prepare a page as a migration target */  	__set_page_locked(new_page);  	SetPageSwapBacked(new_page); @@ -1699,9 +1826,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,  	WARN_ON(PageLRU(new_page));  	/* Recheck the target PMD */ -	spin_lock(&mm->page_table_lock); -	if (unlikely(!pmd_same(*pmd, entry))) { -		spin_unlock(&mm->page_table_lock); +	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); +	ptl = pmd_lock(mm, pmd); +	if (unlikely(!pmd_same(*pmd, entry) || page_count(page) != 2)) { +fail_putback: +		spin_unlock(ptl); +		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);  		/* Reverse changes made by migrate_page_copy() */  		if (TestClearPageActive(new_page)) @@ -1713,12 +1843,13 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,  		unlock_page(new_page);  		put_page(new_page);		/* Free it */ -		unlock_page(page); +		/* Retake the callers reference and putback on LRU */ +		get_page(page);  		putback_lru_page(page); +		mod_zone_page_state(page_zone(page), +			 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); -		count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); -		isolated = 0; -		goto out; +		goto out_unlock;  	}  	/* @@ -1730,23 +1861,47 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,  	 */  	mem_cgroup_prepare_migration(page, new_page, &memcg); +	orig_entry = *pmd;  	entry = mk_pmd(new_page, vma->vm_page_prot); -	entry = pmd_mknonnuma(entry); -	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);  	entry = pmd_mkhuge(entry); +	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); -	page_add_new_anon_rmap(new_page, vma, haddr); - -	set_pmd_at(mm, haddr, pmd, entry); +	/* +	 * Clear the old entry under pagetable lock and establish the new PTE. +	 * Any parallel GUP will either observe the old page blocking on the +	 * page lock, block on the page table lock or observe the new page. +	 * The SetPageUptodate on the new page and page_add_new_anon_rmap +	 * guarantee the copy is visible before the pagetable update. +	 */ +	flush_cache_range(vma, mmun_start, mmun_end); +	page_add_anon_rmap(new_page, vma, mmun_start); +	pmdp_clear_flush(vma, mmun_start, pmd); +	set_pmd_at(mm, mmun_start, pmd, entry); +	flush_tlb_range(vma, mmun_start, mmun_end);  	update_mmu_cache_pmd(vma, address, &entry); + +	if (page_count(page) != 2) { +		set_pmd_at(mm, mmun_start, pmd, orig_entry); +		flush_tlb_range(vma, mmun_start, mmun_end); +		update_mmu_cache_pmd(vma, address, &entry); +		page_remove_rmap(new_page); +		goto fail_putback; +	} +  	page_remove_rmap(page); +  	/*  	 * Finish the charge transaction under the page table lock to  	 * prevent split_huge_page() from dividing up the charge  	 * before it's fully transferred to the new page.  	 */  	mem_cgroup_end_migration(memcg, page, new_page, true); -	spin_unlock(&mm->page_table_lock); +	spin_unlock(ptl); +	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); + +	/* Take an "isolate" reference and put new page on the LRU. */ +	get_page(new_page); +	putback_lru_page(new_page);  	unlock_page(new_page);  	unlock_page(page); @@ -1756,7 +1911,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,  	count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);  	count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); -out:  	mod_zone_page_state(page_zone(page),  			NR_ISOLATED_ANON + page_lru,  			-HPAGE_PMD_NR); @@ -1765,6 +1919,15 @@ out:  out_fail:  	count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);  out_dropref: +	ptl = pmd_lock(mm, pmd); +	if (pmd_same(*pmd, entry)) { +		entry = pmd_mknonnuma(entry); +		set_pmd_at(mm, mmun_start, pmd, entry); +		update_mmu_cache_pmd(vma, address, &entry); +	} +	spin_unlock(ptl); + +out_unlock:  	unlock_page(page);  	put_page(page);  	return 0;  | 
