aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/mm/gup.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/gup.c')
-rw-r--r--arch/powerpc/mm/gup.c227
1 files changed, 91 insertions, 136 deletions
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index 9fdf4d6335e..d8746684f60 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -14,6 +14,8 @@
#include <linux/rwsem.h>
#include <asm/pgtable.h>
+#ifdef __HAVE_ARCH_PTE_SPECIAL
+
/*
* The performance critical leaf functions are made noinline otherwise gcc
* inlines everything into a single function which results in too much
@@ -32,8 +34,13 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
ptep = pte_offset_kernel(&pmd, addr);
do {
- pte_t pte = *ptep;
+ pte_t pte = ACCESS_ONCE(*ptep);
struct page *page;
+ /*
+ * Similar to the PMD case, NUMA hinting must take slow path
+ */
+ if (pte_numa(pte))
+ return 0;
if ((pte_val(pte) & mask) != result)
return 0;
@@ -41,7 +48,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
page = pte_page(pte);
if (!page_cache_get_speculative(page))
return 0;
- if (unlikely(pte != *ptep)) {
+ if (unlikely(pte_val(pte) != pte_val(*ptep))) {
put_page(page);
return 0;
}
@@ -53,57 +60,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
return 1;
}
-#ifdef CONFIG_HUGETLB_PAGE
-static noinline int gup_huge_pte(pte_t *ptep, struct hstate *hstate,
- unsigned long *addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long mask;
- unsigned long pte_end;
- struct page *head, *page;
- pte_t pte;
- int refs;
-
- pte_end = (*addr + huge_page_size(hstate)) & huge_page_mask(hstate);
- if (pte_end < end)
- end = pte_end;
-
- pte = *ptep;
- mask = _PAGE_PRESENT|_PAGE_USER;
- if (write)
- mask |= _PAGE_RW;
- if ((pte_val(pte) & mask) != mask)
- return 0;
- /* hugepages are never "special" */
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-
- refs = 0;
- head = pte_page(pte);
- page = head + ((*addr & ~huge_page_mask(hstate)) >> PAGE_SHIFT);
- do {
- VM_BUG_ON(compound_head(page) != head);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (*addr += PAGE_SIZE, *addr != end);
-
- if (!page_cache_add_speculative(head, refs)) {
- *nr -= refs;
- return 0;
- }
- if (unlikely(pte != *ptep)) {
- /* Could be optimized better */
- while (*nr) {
- put_page(page);
- (*nr)--;
- }
- }
-
- return 1;
-}
-#endif /* CONFIG_HUGETLB_PAGE */
-
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
@@ -112,12 +68,34 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
pmdp = pmd_offset(&pud, addr);
do {
- pmd_t pmd = *pmdp;
+ pmd_t pmd = ACCESS_ONCE(*pmdp);
next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
+ /*
+ * If we find a splitting transparent hugepage we
+ * return zero. That will result in taking the slow
+ * path which will call wait_split_huge_page()
+ * if the pmd is still in splitting state
+ */
+ if (pmd_none(pmd) || pmd_trans_splitting(pmd))
return 0;
- if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+ if (pmd_huge(pmd) || pmd_large(pmd)) {
+ /*
+ * NUMA hinting faults need to be handled in the GUP
+ * slowpath for accounting purposes and so that they
+ * can be serialised against THP migration.
+ */
+ if (pmd_numa(pmd))
+ return 0;
+
+ if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
+ write, pages, nr))
+ return 0;
+ } else if (is_hugepd(pmdp)) {
+ if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
+ addr, next, write, pages, nr))
+ return 0;
+ } else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
return 0;
} while (pmdp++, addr = next, addr != end);
@@ -132,29 +110,37 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
pudp = pud_offset(&pgd, addr);
do {
- pud_t pud = *pudp;
+ pud_t pud = ACCESS_ONCE(*pudp);
next = pud_addr_end(addr, end);
if (pud_none(pud))
return 0;
- if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+ if (pud_huge(pud)) {
+ if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next,
+ write, pages, nr))
+ return 0;
+ } else if (is_hugepd(pudp)) {
+ if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
+ addr, next, write, pages, nr))
+ return 0;
+ } else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
return 0;
} while (pudp++, addr = next, addr != end);
return 1;
}
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
{
struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
unsigned long next;
+ unsigned long flags;
pgd_t *pgdp;
- int psize, nr = 0;
- unsigned int shift;
+ int nr = 0;
- pr_debug("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
+ pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
start &= PAGE_MASK;
addr = start;
@@ -163,28 +149,9 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
start, len)))
- goto slow_irqon;
-
- pr_debug(" aligned: %lx .. %lx\n", start, end);
+ return 0;
-#ifdef CONFIG_HUGETLB_PAGE
- /* We bail out on slice boundary crossing when hugetlb is
- * enabled in order to not have to deal with two different
- * page table formats
- */
- if (addr < SLICE_LOW_TOP) {
- if (end > SLICE_LOW_TOP)
- goto slow_irqon;
-
- if (unlikely(GET_LOW_SLICE_INDEX(addr) !=
- GET_LOW_SLICE_INDEX(end - 1)))
- goto slow_irqon;
- } else {
- if (unlikely(GET_HIGH_SLICE_INDEX(addr) !=
- GET_HIGH_SLICE_INDEX(end - 1)))
- goto slow_irqon;
- }
-#endif /* CONFIG_HUGETLB_PAGE */
+ pr_devel(" aligned: %lx .. %lx\n", start, end);
/*
* XXX: batch / limit 'nr', to avoid large irq off latency
@@ -203,60 +170,46 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
* So long as we atomically load page table pointers versus teardown,
* we can follow the address down to the the page and take a ref on it.
*/
- local_irq_disable();
+ local_irq_save(flags);
- psize = get_slice_psize(mm, addr);
- shift = mmu_psize_defs[psize].shift;
-
-#ifdef CONFIG_HUGETLB_PAGE
- if (unlikely(mmu_huge_psizes[psize])) {
- pte_t *ptep;
- unsigned long a = addr;
- unsigned long sz = ((1UL) << shift);
- struct hstate *hstate = size_to_hstate(sz);
-
- BUG_ON(!hstate);
- /*
- * XXX: could be optimized to avoid hstate
- * lookup entirely (just use shift)
- */
-
- do {
- VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, a)].shift);
- ptep = huge_pte_offset(mm, a);
- pr_debug(" %016lx: huge ptep %p\n", a, ptep);
- if (!ptep || !gup_huge_pte(ptep, hstate, &a, end, write, pages,
- &nr))
- goto slow;
- } while (a != end);
- } else
-#endif /* CONFIG_HUGETLB_PAGE */
- {
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = *pgdp;
-
- VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift);
- pr_debug(" %016lx: normal pgd %p\n", addr, (void *)pgd);
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- goto slow;
- if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
- goto slow;
- } while (pgdp++, addr = next, addr != end);
- }
- local_irq_enable();
+ pgdp = pgd_offset(mm, addr);
+ do {
+ pgd_t pgd = ACCESS_ONCE(*pgdp);
+
+ pr_devel(" %016lx: normal pgd %p\n", addr,
+ (void *)pgd_val(pgd));
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(pgd))
+ break;
+ if (pgd_huge(pgd)) {
+ if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
+ write, pages, &nr))
+ break;
+ } else if (is_hugepd(pgdp)) {
+ if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
+ addr, next, write, pages, &nr))
+ break;
+ } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ break;
+ } while (pgdp++, addr = next, addr != end);
+
+ local_irq_restore(flags);
- VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
return nr;
+}
- {
- int ret;
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ struct mm_struct *mm = current->mm;
+ int nr, ret;
-slow:
- local_irq_enable();
-slow_irqon:
- pr_debug(" slow path ! nr = %d\n", nr);
+ start &= PAGE_MASK;
+ nr = __get_user_pages_fast(start, nr_pages, write, pages);
+ ret = nr;
+
+ if (nr < nr_pages) {
+ pr_devel(" slow path ! nr = %d\n", nr);
/* Try to get the remaining pages with get_user_pages */
start += nr << PAGE_SHIFT;
@@ -264,7 +217,7 @@ slow_irqon:
down_read(&mm->mmap_sem);
ret = get_user_pages(current, mm, start,
- (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
+ nr_pages - nr, write, 0, pages, NULL);
up_read(&mm->mmap_sem);
/* Have to be a bit careful with return values */
@@ -274,7 +227,9 @@ slow_irqon:
else
ret += nr;
}
-
- return ret;
}
+
+ return ret;
}
+
+#endif /* __HAVE_ARCH_PTE_SPECIAL */