diff options
Diffstat (limited to 'mm/pagewalk.c')
| -rw-r--r-- | mm/pagewalk.c | 98 | 
1 files changed, 76 insertions, 22 deletions
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 8b1a2ce21ee..2beeabf502c 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -33,18 +33,35 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,  	pmd = pmd_offset(pud, addr);  	do { +again:  		next = pmd_addr_end(addr, end); -		if (pmd_none_or_clear_bad(pmd)) { +		if (pmd_none(*pmd)) {  			if (walk->pte_hole)  				err = walk->pte_hole(addr, next, walk);  			if (err)  				break;  			continue;  		} +		/* +		 * This implies that each ->pmd_entry() handler +		 * needs to know about pmd_trans_huge() pmds +		 */  		if (walk->pmd_entry)  			err = walk->pmd_entry(pmd, addr, next, walk); -		if (!err && walk->pte_entry) -			err = walk_pte_range(pmd, addr, next, walk); +		if (err) +			break; + +		/* +		 * Check this here so we only break down trans_huge +		 * pages when we _need_ to +		 */ +		if (!walk->pte_entry) +			continue; + +		split_huge_page_pmd_mm(walk->mm, addr, pmd); +		if (pmd_none_or_trans_huge_or_clear_bad(pmd)) +			goto again; +		err = walk_pte_range(pmd, addr, next, walk);  		if (err)  			break;  	} while (pmd++, addr = next, addr != end); @@ -109,11 +126,21 @@ static int walk_hugetlb_range(struct vm_area_struct *vma,  	return 0;  } -#endif + +#else /* CONFIG_HUGETLB_PAGE */ +static int walk_hugetlb_range(struct vm_area_struct *vma, +			      unsigned long addr, unsigned long end, +			      struct mm_walk *walk) +{ +	return 0; +} + +#endif /* CONFIG_HUGETLB_PAGE */ + +  /**   * walk_page_range - walk a memory map's page tables with a callback - * @mm: memory map to walk   * @addr: starting address   * @end: ending address   * @walk: set of callbacks to invoke for each level of the tree @@ -127,11 +154,15 @@ static int walk_hugetlb_range(struct vm_area_struct *vma,   * associated range, and a copy of the original mm_walk for access to   * the ->private or ->mm fields.   * - * No locks are taken, but the bottom level iterator will map PTE + * Usually no locks are taken, but splitting transparent huge page may + * take page table lock. And the bottom level iterator will map PTE   * directories from highmem if necessary.   *   * If any callback returns a non-zero value, the walk is aborted and   * the return value is propagated back to the caller. Otherwise 0 is returned. + * + * walk->mm->mmap_sem must be held for at least read if walk->hugetlb_entry + * is !NULL.   */  int walk_page_range(unsigned long addr, unsigned long end,  		    struct mm_walk *walk) @@ -139,7 +170,6 @@ int walk_page_range(unsigned long addr, unsigned long end,  	pgd_t *pgd;  	unsigned long next;  	int err = 0; -	struct vm_area_struct *vma;  	if (addr >= end)  		return err; @@ -147,31 +177,55 @@ int walk_page_range(unsigned long addr, unsigned long end,  	if (!walk->mm)  		return -EINVAL; +	VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); +  	pgd = pgd_offset(walk->mm, addr);  	do { +		struct vm_area_struct *vma = NULL; +  		next = pgd_addr_end(addr, end);  		/* -		 * handle hugetlb vma individually because pagetable walk for -		 * the hugetlb page is dependent on the architecture and -		 * we can't handled it in the same manner as non-huge pages. +		 * This function was not intended to be vma based. +		 * But there are vma special cases to be handled: +		 * - hugetlb vma's +		 * - VM_PFNMAP vma's  		 */  		vma = find_vma(walk->mm, addr); -#ifdef CONFIG_HUGETLB_PAGE -		if (vma && is_vm_hugetlb_page(vma)) { -			if (vma->vm_end < next) +		if (vma) { +			/* +			 * There are no page structures backing a VM_PFNMAP +			 * range, so do not allow split_huge_page_pmd(). +			 */ +			if ((vma->vm_start <= addr) && +			    (vma->vm_flags & VM_PFNMAP)) {  				next = vma->vm_end; +				pgd = pgd_offset(walk->mm, next); +				continue; +			}  			/* -			 * Hugepage is very tightly coupled with vma, so -			 * walk through hugetlb entries within a given vma. +			 * Handle hugetlb vma individually because pagetable +			 * walk for the hugetlb page is dependent on the +			 * architecture and we can't handled it in the same +			 * manner as non-huge pages.  			 */ -			err = walk_hugetlb_range(vma, addr, next, walk); -			if (err) -				break; -			pgd = pgd_offset(walk->mm, next); -			continue; +			if (walk->hugetlb_entry && (vma->vm_start <= addr) && +			    is_vm_hugetlb_page(vma)) { +				if (vma->vm_end < next) +					next = vma->vm_end; +				/* +				 * Hugepage is very tightly coupled with vma, +				 * so walk through hugetlb entries within a +				 * given vma. +				 */ +				err = walk_hugetlb_range(vma, addr, next, walk); +				if (err) +					break; +				pgd = pgd_offset(walk->mm, next); +				continue; +			}  		} -#endif +  		if (pgd_none_or_clear_bad(pgd)) {  			if (walk->pte_hole)  				err = walk->pte_hole(addr, next, walk); @@ -188,7 +242,7 @@ int walk_page_range(unsigned long addr, unsigned long end,  		if (err)  			break;  		pgd++; -	} while (addr = next, addr != end); +	} while (addr = next, addr < end);  	return err;  }  | 
