diff options
Diffstat (limited to 'arch/powerpc/mm/gup.c')
| -rw-r--r-- | arch/powerpc/mm/gup.c | 227 |
1 files changed, 91 insertions, 136 deletions
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index 9fdf4d6335e..d8746684f60 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c @@ -14,6 +14,8 @@ #include <linux/rwsem.h> #include <asm/pgtable.h> +#ifdef __HAVE_ARCH_PTE_SPECIAL + /* * The performance critical leaf functions are made noinline otherwise gcc * inlines everything into a single function which results in too much @@ -32,8 +34,13 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, ptep = pte_offset_kernel(&pmd, addr); do { - pte_t pte = *ptep; + pte_t pte = ACCESS_ONCE(*ptep); struct page *page; + /* + * Similar to the PMD case, NUMA hinting must take slow path + */ + if (pte_numa(pte)) + return 0; if ((pte_val(pte) & mask) != result) return 0; @@ -41,7 +48,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, page = pte_page(pte); if (!page_cache_get_speculative(page)) return 0; - if (unlikely(pte != *ptep)) { + if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(page); return 0; } @@ -53,57 +60,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, return 1; } -#ifdef CONFIG_HUGETLB_PAGE -static noinline int gup_huge_pte(pte_t *ptep, struct hstate *hstate, - unsigned long *addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long mask; - unsigned long pte_end; - struct page *head, *page; - pte_t pte; - int refs; - - pte_end = (*addr + huge_page_size(hstate)) & huge_page_mask(hstate); - if (pte_end < end) - end = pte_end; - - pte = *ptep; - mask = _PAGE_PRESENT|_PAGE_USER; - if (write) - mask |= _PAGE_RW; - if ((pte_val(pte) & mask) != mask) - return 0; - /* hugepages are never "special" */ - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - page = head + ((*addr & ~huge_page_mask(hstate)) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (*addr += PAGE_SIZE, *addr != end); - - if (!page_cache_add_speculative(head, refs)) { - *nr -= refs; - return 0; - } - if (unlikely(pte != *ptep)) { - /* Could be optimized better */ - while (*nr) { - put_page(page); - (*nr)--; - } - } - - return 1; -} -#endif /* CONFIG_HUGETLB_PAGE */ - static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -112,12 +68,34 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pmdp = pmd_offset(&pud, addr); do { - pmd_t pmd = *pmdp; + pmd_t pmd = ACCESS_ONCE(*pmdp); next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) + /* + * If we find a splitting transparent hugepage we + * return zero. That will result in taking the slow + * path which will call wait_split_huge_page() + * if the pmd is still in splitting state + */ + if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; - if (!gup_pte_range(pmd, addr, next, write, pages, nr)) + if (pmd_huge(pmd) || pmd_large(pmd)) { + /* + * NUMA hinting faults need to be handled in the GUP + * slowpath for accounting purposes and so that they + * can be serialised against THP migration. + */ + if (pmd_numa(pmd)) + return 0; + + if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, + write, pages, nr)) + return 0; + } else if (is_hugepd(pmdp)) { + if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, + addr, next, write, pages, nr)) + return 0; + } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); @@ -132,29 +110,37 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, pudp = pud_offset(&pgd, addr); do { - pud_t pud = *pudp; + pud_t pud = ACCESS_ONCE(*pudp); next = pud_addr_end(addr, end); if (pud_none(pud)) return 0; - if (!gup_pmd_range(pud, addr, next, write, pages, nr)) + if (pud_huge(pud)) { + if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next, + write, pages, nr)) + return 0; + } else if (is_hugepd(pudp)) { + if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, + addr, next, write, pages, nr)) + return 0; + } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) return 0; } while (pudp++, addr = next, addr != end); return 1; } -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; unsigned long next; + unsigned long flags; pgd_t *pgdp; - int psize, nr = 0; - unsigned int shift; + int nr = 0; - pr_debug("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); + pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); start &= PAGE_MASK; addr = start; @@ -163,28 +149,9 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, start, len))) - goto slow_irqon; - - pr_debug(" aligned: %lx .. %lx\n", start, end); + return 0; -#ifdef CONFIG_HUGETLB_PAGE - /* We bail out on slice boundary crossing when hugetlb is - * enabled in order to not have to deal with two different - * page table formats - */ - if (addr < SLICE_LOW_TOP) { - if (end > SLICE_LOW_TOP) - goto slow_irqon; - - if (unlikely(GET_LOW_SLICE_INDEX(addr) != - GET_LOW_SLICE_INDEX(end - 1))) - goto slow_irqon; - } else { - if (unlikely(GET_HIGH_SLICE_INDEX(addr) != - GET_HIGH_SLICE_INDEX(end - 1))) - goto slow_irqon; - } -#endif /* CONFIG_HUGETLB_PAGE */ + pr_devel(" aligned: %lx .. %lx\n", start, end); /* * XXX: batch / limit 'nr', to avoid large irq off latency @@ -203,60 +170,46 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, * So long as we atomically load page table pointers versus teardown, * we can follow the address down to the the page and take a ref on it. */ - local_irq_disable(); + local_irq_save(flags); - psize = get_slice_psize(mm, addr); - shift = mmu_psize_defs[psize].shift; - -#ifdef CONFIG_HUGETLB_PAGE - if (unlikely(mmu_huge_psizes[psize])) { - pte_t *ptep; - unsigned long a = addr; - unsigned long sz = ((1UL) << shift); - struct hstate *hstate = size_to_hstate(sz); - - BUG_ON(!hstate); - /* - * XXX: could be optimized to avoid hstate - * lookup entirely (just use shift) - */ - - do { - VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, a)].shift); - ptep = huge_pte_offset(mm, a); - pr_debug(" %016lx: huge ptep %p\n", a, ptep); - if (!ptep || !gup_huge_pte(ptep, hstate, &a, end, write, pages, - &nr)) - goto slow; - } while (a != end); - } else -#endif /* CONFIG_HUGETLB_PAGE */ - { - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift); - pr_debug(" %016lx: normal pgd %p\n", addr, (void *)pgd); - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - goto slow; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - goto slow; - } while (pgdp++, addr = next, addr != end); - } - local_irq_enable(); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = ACCESS_ONCE(*pgdp); + + pr_devel(" %016lx: normal pgd %p\n", addr, + (void *)pgd_val(pgd)); + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (pgd_huge(pgd)) { + if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next, + write, pages, &nr)) + break; + } else if (is_hugepd(pgdp)) { + if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, + addr, next, write, pages, &nr)) + break; + } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + + local_irq_restore(flags); - VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); return nr; +} - { - int ret; +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + int nr, ret; -slow: - local_irq_enable(); -slow_irqon: - pr_debug(" slow path ! nr = %d\n", nr); + start &= PAGE_MASK; + nr = __get_user_pages_fast(start, nr_pages, write, pages); + ret = nr; + + if (nr < nr_pages) { + pr_devel(" slow path ! nr = %d\n", nr); /* Try to get the remaining pages with get_user_pages */ start += nr << PAGE_SHIFT; @@ -264,7 +217,7 @@ slow_irqon: down_read(&mm->mmap_sem); ret = get_user_pages(current, mm, start, - (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); + nr_pages - nr, write, 0, pages, NULL); up_read(&mm->mmap_sem); /* Have to be a bit careful with return values */ @@ -274,7 +227,9 @@ slow_irqon: else ret += nr; } - - return ret; } + + return ret; } + +#endif /* __HAVE_ARCH_PTE_SPECIAL */ |
