diff options
Diffstat (limited to 'mm/mprotect.c')
| -rw-r--r-- | mm/mprotect.c | 161 | 
1 files changed, 92 insertions, 69 deletions
diff --git a/mm/mprotect.c b/mm/mprotect.c index 94722a4d6b4..c43d557941f 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -23,6 +23,7 @@  #include <linux/mmu_notifier.h>  #include <linux/migrate.h>  #include <linux/perf_event.h> +#include <linux/ksm.h>  #include <asm/uaccess.h>  #include <asm/pgtable.h>  #include <asm/cacheflush.h> @@ -35,18 +36,47 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)  }  #endif +/* + * For a prot_numa update we only hold mmap_sem for read so there is a + * potential race with faulting where a pmd was temporarily none. This + * function checks for a transhuge pmd under the appropriate lock. It + * returns a pte if it was successfully locked or NULL if it raced with + * a transhuge insertion. + */ +static pte_t *lock_pte_protection(struct vm_area_struct *vma, pmd_t *pmd, +			unsigned long addr, int prot_numa, spinlock_t **ptl) +{ +	pte_t *pte; +	spinlock_t *pmdl; + +	/* !prot_numa is protected by mmap_sem held for write */ +	if (!prot_numa) +		return pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); + +	pmdl = pmd_lock(vma->vm_mm, pmd); +	if (unlikely(pmd_trans_huge(*pmd) || pmd_none(*pmd))) { +		spin_unlock(pmdl); +		return NULL; +	} + +	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl); +	spin_unlock(pmdl); +	return pte; +} +  static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,  		unsigned long addr, unsigned long end, pgprot_t newprot, -		int dirty_accountable, int prot_numa, bool *ret_all_same_node) +		int dirty_accountable, int prot_numa)  {  	struct mm_struct *mm = vma->vm_mm;  	pte_t *pte, oldpte;  	spinlock_t *ptl;  	unsigned long pages = 0; -	bool all_same_node = true; -	int last_nid = -1; -	pte = pte_offset_map_lock(mm, pmd, addr, &ptl); +	pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl); +	if (!pte) +		return 0; +  	arch_enter_lazy_mmu_mode();  	do {  		oldpte = *pte; @@ -54,117 +84,111 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,  			pte_t ptent;  			bool updated = false; -			ptent = ptep_modify_prot_start(mm, addr, pte);  			if (!prot_numa) { +				ptent = ptep_modify_prot_start(mm, addr, pte); +				if (pte_numa(ptent)) +					ptent = pte_mknonnuma(ptent);  				ptent = pte_modify(ptent, newprot); +				/* +				 * Avoid taking write faults for pages we +				 * know to be dirty. +				 */ +				if (dirty_accountable && pte_dirty(ptent)) +					ptent = pte_mkwrite(ptent); +				ptep_modify_prot_commit(mm, addr, pte, ptent);  				updated = true;  			} else {  				struct page *page;  				page = vm_normal_page(vma, addr, oldpte); -				if (page) { -					int this_nid = page_to_nid(page); -					if (last_nid == -1) -						last_nid = this_nid; -					if (last_nid != this_nid) -						all_same_node = false; - -					/* only check non-shared pages */ -					if (!pte_numa(oldpte) && -					    page_mapcount(page) == 1) { -						ptent = pte_mknuma(ptent); +				if (page && !PageKsm(page)) { +					if (!pte_numa(oldpte)) { +						ptep_set_numa(mm, addr, pte);  						updated = true;  					}  				}  			} - -			/* -			 * Avoid taking write faults for pages we know to be -			 * dirty. -			 */ -			if (dirty_accountable && pte_dirty(ptent)) { -				ptent = pte_mkwrite(ptent); -				updated = true; -			} -  			if (updated)  				pages++; -			ptep_modify_prot_commit(mm, addr, pte, ptent);  		} else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {  			swp_entry_t entry = pte_to_swp_entry(oldpte);  			if (is_write_migration_entry(entry)) { +				pte_t newpte;  				/*  				 * A protection check is difficult so  				 * just be safe and disable write  				 */  				make_migration_entry_read(&entry); -				set_pte_at(mm, addr, pte, -					swp_entry_to_pte(entry)); +				newpte = swp_entry_to_pte(entry); +				if (pte_swp_soft_dirty(oldpte)) +					newpte = pte_swp_mksoft_dirty(newpte); +				set_pte_at(mm, addr, pte, newpte); + +				pages++;  			} -			pages++;  		}  	} while (pte++, addr += PAGE_SIZE, addr != end);  	arch_leave_lazy_mmu_mode();  	pte_unmap_unlock(pte - 1, ptl); -	*ret_all_same_node = all_same_node;  	return pages;  } -#ifdef CONFIG_NUMA_BALANCING -static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, -				       pmd_t *pmd) -{ -	spin_lock(&mm->page_table_lock); -	set_pmd_at(mm, addr & PMD_MASK, pmd, pmd_mknuma(*pmd)); -	spin_unlock(&mm->page_table_lock); -} -#else -static inline void change_pmd_protnuma(struct mm_struct *mm, unsigned long addr, -				       pmd_t *pmd) -{ -	BUG(); -} -#endif /* CONFIG_NUMA_BALANCING */ -  static inline unsigned long change_pmd_range(struct vm_area_struct *vma,  		pud_t *pud, unsigned long addr, unsigned long end,  		pgprot_t newprot, int dirty_accountable, int prot_numa)  {  	pmd_t *pmd; +	struct mm_struct *mm = vma->vm_mm;  	unsigned long next;  	unsigned long pages = 0; -	bool all_same_node; +	unsigned long nr_huge_updates = 0; +	unsigned long mni_start = 0;  	pmd = pmd_offset(pud, addr);  	do { +		unsigned long this_pages; +  		next = pmd_addr_end(addr, end); +		if (!pmd_trans_huge(*pmd) && pmd_none_or_clear_bad(pmd)) +			continue; + +		/* invoke the mmu notifier if the pmd is populated */ +		if (!mni_start) { +			mni_start = addr; +			mmu_notifier_invalidate_range_start(mm, mni_start, end); +		} +  		if (pmd_trans_huge(*pmd)) {  			if (next - addr != HPAGE_PMD_SIZE)  				split_huge_page_pmd(vma, addr, pmd); -			else if (change_huge_pmd(vma, pmd, addr, newprot, -						 prot_numa)) { -				pages += HPAGE_PMD_NR; -				continue; +			else { +				int nr_ptes = change_huge_pmd(vma, pmd, addr, +						newprot, prot_numa); + +				if (nr_ptes) { +					if (nr_ptes == HPAGE_PMD_NR) { +						pages += HPAGE_PMD_NR; +						nr_huge_updates++; +					} + +					/* huge pmd was handled */ +					continue; +				}  			} -			/* fall through */ +			/* fall through, the trans huge pmd just split */  		} -		if (pmd_none_or_clear_bad(pmd)) -			continue; -		pages += change_pte_range(vma, pmd, addr, next, newprot, -				 dirty_accountable, prot_numa, &all_same_node); - -		/* -		 * If we are changing protections for NUMA hinting faults then -		 * set pmd_numa if the examined pages were all on the same -		 * node. This allows a regular PMD to be handled as one fault -		 * and effectively batches the taking of the PTL -		 */ -		if (prot_numa && all_same_node) -			change_pmd_protnuma(vma->vm_mm, addr, pmd); +		this_pages = change_pte_range(vma, pmd, addr, next, newprot, +				 dirty_accountable, prot_numa); +		pages += this_pages;  	} while (pmd++, addr = next, addr != end); +	if (mni_start) +		mmu_notifier_invalidate_range_end(mm, mni_start, end); + +	if (nr_huge_updates) +		count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);  	return pages;  } @@ -201,6 +225,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,  	BUG_ON(addr >= end);  	pgd = pgd_offset(mm, addr);  	flush_cache_range(vma, addr, end); +	set_tlb_flush_pending(mm);  	do {  		next = pgd_addr_end(addr, end);  		if (pgd_none_or_clear_bad(pgd)) @@ -212,6 +237,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,  	/* Only flush the TLB if we actually modified any entries: */  	if (pages)  		flush_tlb_range(vma, start, end); +	clear_tlb_flush_pending(mm);  	return pages;  } @@ -220,15 +246,12 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,  		       unsigned long end, pgprot_t newprot,  		       int dirty_accountable, int prot_numa)  { -	struct mm_struct *mm = vma->vm_mm;  	unsigned long pages; -	mmu_notifier_invalidate_range_start(mm, start, end);  	if (is_vm_hugetlb_page(vma))  		pages = hugetlb_change_protection(vma, start, end, newprot);  	else  		pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa); -	mmu_notifier_invalidate_range_end(mm, start, end);  	return pages;  }  | 
