From 6a60f1b3588aef6ddceaa14192df475d430cce45 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 24 May 2010 14:32:09 -0700 Subject: mincore: cleanups This fixes some minor issues that bugged me while going over the code: o adjust argument order of do_mincore() to match the syscall o simplify range length calculation o drop superfluous shift in huge tlb calculation, address is page aligned o drop dead nr_huge calculation o check pte_none() before pte_present() o comment and whitespace fixes No semantic changes intended. Signed-off-by: Johannes Weiner Cc: Andrea Arcangeli Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mincore.c | 76 +++++++++++++++++++++--------------------------------------- 1 file changed, 27 insertions(+), 49 deletions(-) (limited to 'mm/mincore.c') diff --git a/mm/mincore.c b/mm/mincore.c index f77433c2027..1f6574c5167 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -54,7 +54,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) * all the arguments, we hold the mmap semaphore: we should * just return the amount of info we're asked for. */ -static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages) +static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec) { pgd_t *pgd; pud_t *pud; @@ -64,35 +64,29 @@ static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pag unsigned long nr; int i; pgoff_t pgoff; - struct vm_area_struct *vma = find_vma(current->mm, addr); + struct vm_area_struct *vma; - /* - * find_vma() didn't find anything above us, or we're - * in an unmapped hole in the address space: ENOMEM. - */ + vma = find_vma(current->mm, addr); if (!vma || addr < vma->vm_start) return -ENOMEM; + nr = min(pages, (vma->vm_end - addr) >> PAGE_SHIFT); + #ifdef CONFIG_HUGETLB_PAGE if (is_vm_hugetlb_page(vma)) { struct hstate *h; - unsigned long nr_huge; - unsigned char present; i = 0; - nr = min(pages, (vma->vm_end - addr) >> PAGE_SHIFT); h = hstate_vma(vma); - nr_huge = ((addr + pages * PAGE_SIZE - 1) >> huge_page_shift(h)) - - (addr >> huge_page_shift(h)) + 1; - nr_huge = min(nr_huge, - (vma->vm_end - addr) >> huge_page_shift(h)); while (1) { - /* hugepage always in RAM for now, - * but generally it needs to be check */ + unsigned char present; + /* + * Huge pages are always in RAM for now, but + * theoretically it needs to be checked. + */ ptep = huge_pte_offset(current->mm, addr & huge_page_mask(h)); - present = !!(ptep && - !huge_pte_none(huge_ptep_get(ptep))); + present = ptep && !huge_pte_none(huge_ptep_get(ptep)); while (1) { vec[i++] = present; addr += PAGE_SIZE; @@ -100,8 +94,7 @@ static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pag if (i == nr) return nr; /* check hugepage border */ - if (!((addr & ~huge_page_mask(h)) - >> PAGE_SHIFT)) + if (!(addr & ~huge_page_mask(h))) break; } } @@ -113,17 +106,7 @@ static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pag * Calculate how many pages there are left in the last level of the * PTE array for our address. */ - nr = PTRS_PER_PTE - ((addr >> PAGE_SHIFT) & (PTRS_PER_PTE-1)); - - /* - * Don't overrun this vma - */ - nr = min(nr, (vma->vm_end - addr) >> PAGE_SHIFT); - - /* - * Don't return more than the caller asked for - */ - nr = min(nr, pages); + nr = min(nr, PTRS_PER_PTE - ((addr >> PAGE_SHIFT) & (PTRS_PER_PTE-1))); pgd = pgd_offset(vma->vm_mm, addr); if (pgd_none_or_clear_bad(pgd)) @@ -137,43 +120,38 @@ static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pag ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (i = 0; i < nr; i++, ptep++, addr += PAGE_SIZE) { - unsigned char present; pte_t pte = *ptep; - if (pte_present(pte)) { - present = 1; - - } else if (pte_none(pte)) { + if (pte_none(pte)) { if (vma->vm_file) { pgoff = linear_page_index(vma, addr); - present = mincore_page(vma->vm_file->f_mapping, - pgoff); + vec[i] = mincore_page(vma->vm_file->f_mapping, + pgoff); } else - present = 0; - - } else if (pte_file(pte)) { + vec[i] = 0; + } else if (pte_present(pte)) + vec[i] = 1; + else if (pte_file(pte)) { pgoff = pte_to_pgoff(pte); - present = mincore_page(vma->vm_file->f_mapping, pgoff); - + vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); } else { /* pte is a swap entry */ swp_entry_t entry = pte_to_swp_entry(pte); + if (is_migration_entry(entry)) { /* migration entries are always uptodate */ - present = 1; + vec[i] = 1; } else { #ifdef CONFIG_SWAP pgoff = entry.val; - present = mincore_page(&swapper_space, pgoff); + vec[i] = mincore_page(&swapper_space, pgoff); #else WARN_ON(1); - present = 1; + vec[i] = 1; #endif } } - - vec[i] = present; } - pte_unmap_unlock(ptep-1, ptl); + pte_unmap_unlock(ptep - 1, ptl); return nr; @@ -247,7 +225,7 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, * the temporary buffer size. */ down_read(¤t->mm->mmap_sem); - retval = do_mincore(start, tmp, min(pages, PAGE_SIZE)); + retval = do_mincore(start, min(pages, PAGE_SIZE), tmp); up_read(¤t->mm->mmap_sem); if (retval <= 0) -- cgit v1.2.3-18-g5258 From f488401076c5570130c018e573f450a9a6c43365 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 24 May 2010 14:32:10 -0700 Subject: mincore: break do_mincore() into logical pieces Split out functions to handle hugetlb ranges, pte ranges and unmapped ranges, to improve readability but also to prepare the file structure for nested page table walks. No semantic changes intended. Signed-off-by: Johannes Weiner Cc: Andrea Arcangeli Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mincore.c | 171 +++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 97 insertions(+), 74 deletions(-) (limited to 'mm/mincore.c') diff --git a/mm/mincore.c b/mm/mincore.c index 1f6574c5167..a0c4c10bbab 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -19,6 +19,42 @@ #include #include +static void mincore_hugetlb_page_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long nr, + unsigned char *vec) +{ +#ifdef CONFIG_HUGETLB_PAGE + struct hstate *h; + int i; + + i = 0; + h = hstate_vma(vma); + while (1) { + unsigned char present; + pte_t *ptep; + /* + * Huge pages are always in RAM for now, but + * theoretically it needs to be checked. + */ + ptep = huge_pte_offset(current->mm, + addr & huge_page_mask(h)); + present = ptep && !huge_pte_none(huge_ptep_get(ptep)); + while (1) { + vec[i++] = present; + addr += PAGE_SIZE; + /* reach buffer limit */ + if (i == nr) + return; + /* check hugepage border */ + if (!(addr & ~huge_page_mask(h))) + break; + } + } +#else + BUG(); +#endif +} + /* * Later we can get more picky about what "in core" means precisely. * For now, simply check to see if the page is in the page cache, @@ -49,6 +85,64 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) return present; } +static void mincore_unmapped_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long nr, + unsigned char *vec) +{ + int i; + + if (vma->vm_file) { + pgoff_t pgoff; + + pgoff = linear_page_index(vma, addr); + for (i = 0; i < nr; i++, pgoff++) + vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); + } else { + for (i = 0; i < nr; i++) + vec[i] = 0; + } +} + +static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr, unsigned long nr, + unsigned char *vec) +{ + spinlock_t *ptl; + pte_t *ptep; + int i; + + ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + for (i = 0; i < nr; i++, ptep++, addr += PAGE_SIZE) { + pte_t pte = *ptep; + pgoff_t pgoff; + + if (pte_none(pte)) + mincore_unmapped_range(vma, addr, 1, vec); + else if (pte_present(pte)) + vec[i] = 1; + else if (pte_file(pte)) { + pgoff = pte_to_pgoff(pte); + vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); + } else { /* pte is a swap entry */ + swp_entry_t entry = pte_to_swp_entry(pte); + + if (is_migration_entry(entry)) { + /* migration entries are always uptodate */ + vec[i] = 1; + } else { +#ifdef CONFIG_SWAP + pgoff = entry.val; + vec[i] = mincore_page(&swapper_space, pgoff); +#else + WARN_ON(1); + vec[i] = 1; +#endif + } + } + } + pte_unmap_unlock(ptep - 1, ptl); +} + /* * Do a chunk of "sys_mincore()". We've already checked * all the arguments, we hold the mmap semaphore: we should @@ -59,11 +153,7 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *ptep; - spinlock_t *ptl; unsigned long nr; - int i; - pgoff_t pgoff; struct vm_area_struct *vma; vma = find_vma(current->mm, addr); @@ -72,35 +162,10 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v nr = min(pages, (vma->vm_end - addr) >> PAGE_SHIFT); -#ifdef CONFIG_HUGETLB_PAGE if (is_vm_hugetlb_page(vma)) { - struct hstate *h; - - i = 0; - h = hstate_vma(vma); - while (1) { - unsigned char present; - /* - * Huge pages are always in RAM for now, but - * theoretically it needs to be checked. - */ - ptep = huge_pte_offset(current->mm, - addr & huge_page_mask(h)); - present = ptep && !huge_pte_none(huge_ptep_get(ptep)); - while (1) { - vec[i++] = present; - addr += PAGE_SIZE; - /* reach buffer limit */ - if (i == nr) - return nr; - /* check hugepage border */ - if (!(addr & ~huge_page_mask(h))) - break; - } - } + mincore_hugetlb_page_range(vma, addr, nr, vec); return nr; } -#endif /* * Calculate how many pages there are left in the last level of the @@ -118,53 +183,11 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v if (pmd_none_or_clear_bad(pmd)) goto none_mapped; - ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); - for (i = 0; i < nr; i++, ptep++, addr += PAGE_SIZE) { - pte_t pte = *ptep; - - if (pte_none(pte)) { - if (vma->vm_file) { - pgoff = linear_page_index(vma, addr); - vec[i] = mincore_page(vma->vm_file->f_mapping, - pgoff); - } else - vec[i] = 0; - } else if (pte_present(pte)) - vec[i] = 1; - else if (pte_file(pte)) { - pgoff = pte_to_pgoff(pte); - vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); - } else { /* pte is a swap entry */ - swp_entry_t entry = pte_to_swp_entry(pte); - - if (is_migration_entry(entry)) { - /* migration entries are always uptodate */ - vec[i] = 1; - } else { -#ifdef CONFIG_SWAP - pgoff = entry.val; - vec[i] = mincore_page(&swapper_space, pgoff); -#else - WARN_ON(1); - vec[i] = 1; -#endif - } - } - } - pte_unmap_unlock(ptep - 1, ptl); - + mincore_pte_range(vma, pmd, addr, nr, vec); return nr; none_mapped: - if (vma->vm_file) { - pgoff = linear_page_index(vma, addr); - for (i = 0; i < nr; i++, pgoff++) - vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); - } else { - for (i = 0; i < nr; i++) - vec[i] = 0; - } - + mincore_unmapped_range(vma, addr, nr, vec); return nr; } -- cgit v1.2.3-18-g5258 From 25ef0e50cca790370ad7838e3ad74db6a6a2d829 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 24 May 2010 14:32:11 -0700 Subject: mincore: pass ranges as start,end address pairs Instead of passing a start address and a number of pages into the helper functions, convert them to use a start and an end address. Signed-off-by: Johannes Weiner Cc: Andrea Arcangeli Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mincore.c | 57 +++++++++++++++++++++++++++------------------------------ 1 file changed, 27 insertions(+), 30 deletions(-) (limited to 'mm/mincore.c') diff --git a/mm/mincore.c b/mm/mincore.c index a0c4c10bbab..211604adc23 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -20,14 +20,12 @@ #include static void mincore_hugetlb_page_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long nr, + unsigned long addr, unsigned long end, unsigned char *vec) { #ifdef CONFIG_HUGETLB_PAGE struct hstate *h; - int i; - i = 0; h = hstate_vma(vma); while (1) { unsigned char present; @@ -40,10 +38,10 @@ static void mincore_hugetlb_page_range(struct vm_area_struct *vma, addr & huge_page_mask(h)); present = ptep && !huge_pte_none(huge_ptep_get(ptep)); while (1) { - vec[i++] = present; + *vec = present; + vec++; addr += PAGE_SIZE; - /* reach buffer limit */ - if (i == nr) + if (addr == end) return; /* check hugepage border */ if (!(addr & ~huge_page_mask(h))) @@ -86,9 +84,10 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) } static void mincore_unmapped_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long nr, + unsigned long addr, unsigned long end, unsigned char *vec) { + unsigned long nr = (end - addr) >> PAGE_SHIFT; int i; if (vma->vm_file) { @@ -104,42 +103,44 @@ static void mincore_unmapped_range(struct vm_area_struct *vma, } static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long nr, + unsigned long addr, unsigned long end, unsigned char *vec) { + unsigned long next; spinlock_t *ptl; pte_t *ptep; - int i; ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); - for (i = 0; i < nr; i++, ptep++, addr += PAGE_SIZE) { + do { pte_t pte = *ptep; pgoff_t pgoff; + next = addr + PAGE_SIZE; if (pte_none(pte)) - mincore_unmapped_range(vma, addr, 1, vec); + mincore_unmapped_range(vma, addr, next, vec); else if (pte_present(pte)) - vec[i] = 1; + *vec = 1; else if (pte_file(pte)) { pgoff = pte_to_pgoff(pte); - vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); + *vec = mincore_page(vma->vm_file->f_mapping, pgoff); } else { /* pte is a swap entry */ swp_entry_t entry = pte_to_swp_entry(pte); if (is_migration_entry(entry)) { /* migration entries are always uptodate */ - vec[i] = 1; + *vec = 1; } else { #ifdef CONFIG_SWAP pgoff = entry.val; - vec[i] = mincore_page(&swapper_space, pgoff); + *vec = mincore_page(&swapper_space, pgoff); #else WARN_ON(1); - vec[i] = 1; + *vec = 1; #endif } } - } + vec++; + } while (ptep++, addr = next, addr != end); pte_unmap_unlock(ptep - 1, ptl); } @@ -153,25 +154,21 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v pgd_t *pgd; pud_t *pud; pmd_t *pmd; - unsigned long nr; struct vm_area_struct *vma; + unsigned long end; vma = find_vma(current->mm, addr); if (!vma || addr < vma->vm_start) return -ENOMEM; - nr = min(pages, (vma->vm_end - addr) >> PAGE_SHIFT); + end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); if (is_vm_hugetlb_page(vma)) { - mincore_hugetlb_page_range(vma, addr, nr, vec); - return nr; + mincore_hugetlb_page_range(vma, addr, end, vec); + return (end - addr) >> PAGE_SHIFT; } - /* - * Calculate how many pages there are left in the last level of the - * PTE array for our address. - */ - nr = min(nr, PTRS_PER_PTE - ((addr >> PAGE_SHIFT) & (PTRS_PER_PTE-1))); + end = pmd_addr_end(addr, end); pgd = pgd_offset(vma->vm_mm, addr); if (pgd_none_or_clear_bad(pgd)) @@ -183,12 +180,12 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v if (pmd_none_or_clear_bad(pmd)) goto none_mapped; - mincore_pte_range(vma, pmd, addr, nr, vec); - return nr; + mincore_pte_range(vma, pmd, addr, end, vec); + return (end - addr) >> PAGE_SHIFT; none_mapped: - mincore_unmapped_range(vma, addr, nr, vec); - return nr; + mincore_unmapped_range(vma, addr, end, vec); + return (end - addr) >> PAGE_SHIFT; } /* -- cgit v1.2.3-18-g5258 From e48293fd75b3aa67f43ad6e3d2ff397caa55d58b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 24 May 2010 14:32:11 -0700 Subject: mincore: do nested page table walks Do page table walks with the well-known nested loops we use in several other places already. This avoids doing full page table walks after every pte range and also allows to handle unmapped areas bigger than one pte range in one go. Signed-off-by: Johannes Weiner Cc: Andrea Arcangeli Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mincore.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 17 deletions(-) (limited to 'mm/mincore.c') diff --git a/mm/mincore.c b/mm/mincore.c index 211604adc23..9ac42dc6d7b 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -144,6 +144,60 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd, pte_unmap_unlock(ptep - 1, ptl); } +static void mincore_pmd_range(struct vm_area_struct *vma, pud_t *pud, + unsigned long addr, unsigned long end, + unsigned char *vec) +{ + unsigned long next; + pmd_t *pmd; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none_or_clear_bad(pmd)) + mincore_unmapped_range(vma, addr, next, vec); + else + mincore_pte_range(vma, pmd, addr, next, vec); + vec += (next - addr) >> PAGE_SHIFT; + } while (pmd++, addr = next, addr != end); +} + +static void mincore_pud_range(struct vm_area_struct *vma, pgd_t *pgd, + unsigned long addr, unsigned long end, + unsigned char *vec) +{ + unsigned long next; + pud_t *pud; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + mincore_unmapped_range(vma, addr, next, vec); + else + mincore_pmd_range(vma, pud, addr, next, vec); + vec += (next - addr) >> PAGE_SHIFT; + } while (pud++, addr = next, addr != end); +} + +static void mincore_page_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long end, + unsigned char *vec) +{ + unsigned long next; + pgd_t *pgd; + + pgd = pgd_offset(vma->vm_mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + mincore_unmapped_range(vma, addr, next, vec); + else + mincore_pud_range(vma, pgd, addr, next, vec); + vec += (next - addr) >> PAGE_SHIFT; + } while (pgd++, addr = next, addr != end); +} + /* * Do a chunk of "sys_mincore()". We've already checked * all the arguments, we hold the mmap semaphore: we should @@ -151,9 +205,6 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd, */ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; struct vm_area_struct *vma; unsigned long end; @@ -170,21 +221,11 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v end = pmd_addr_end(addr, end); - pgd = pgd_offset(vma->vm_mm, addr); - if (pgd_none_or_clear_bad(pgd)) - goto none_mapped; - pud = pud_offset(pgd, addr); - if (pud_none_or_clear_bad(pud)) - goto none_mapped; - pmd = pmd_offset(pud, addr); - if (pmd_none_or_clear_bad(pmd)) - goto none_mapped; - - mincore_pte_range(vma, pmd, addr, end, vec); - return (end - addr) >> PAGE_SHIFT; + if (is_vm_hugetlb_page(vma)) + mincore_hugetlb_page_range(vma, addr, end, vec); + else + mincore_page_range(vma, addr, end, vec); -none_mapped: - mincore_unmapped_range(vma, addr, end, vec); return (end - addr) >> PAGE_SHIFT; } -- cgit v1.2.3-18-g5258