diff options
Diffstat (limited to 'arch/powerpc/mm/pgtable.c')
| -rw-r--r-- | arch/powerpc/mm/pgtable.c | 239 |
1 files changed, 179 insertions, 60 deletions
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 6d94116fdea..c695943a513 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -1,5 +1,6 @@ /* * This file contains common routines for dealing with free of page tables + * Along with common page table handling code * * Derived from arch/powerpc/mm/tlb_64.c: * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) @@ -21,97 +22,215 @@ */ #include <linux/kernel.h> +#include <linux/gfp.h> #include <linux/mm.h> -#include <linux/init.h> #include <linux/percpu.h> #include <linux/hardirq.h> +#include <linux/hugetlb.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/tlb.h> -static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -static unsigned long pte_freelist_forced_free; - -struct pte_freelist_batch +static inline int is_exec_fault(void) { - struct rcu_head rcu; - unsigned int index; - pgtable_free_t tables[0]; -}; - -#define PTE_FREELIST_SIZE \ - ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ - / sizeof(pgtable_free_t)) + return current->thread.regs && TRAP(current->thread.regs) == 0x400; +} -static void pte_free_smp_sync(void *arg) +/* We only try to do i/d cache coherency on stuff that looks like + * reasonably "normal" PTEs. We currently require a PTE to be present + * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE. We also only do that + * on userspace PTEs + */ +static inline int pte_looks_normal(pte_t pte) { - /* Do nothing, just ensure we sync with all CPUs */ + return (pte_val(pte) & + (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) == + (_PAGE_PRESENT | _PAGE_USER); } -/* This is only called when we are critically out of memory - * (and fail to get a page in pte_free_tlb). - */ -static void pgtable_free_now(pgtable_free_t pgf) +struct page * maybe_pte_to_page(pte_t pte) { - pte_freelist_forced_free++; + unsigned long pfn = pte_pfn(pte); + struct page *page; + + if (unlikely(!pfn_valid(pfn))) + return NULL; + page = pfn_to_page(pfn); + if (PageReserved(page)) + return NULL; + return page; +} - smp_call_function(pte_free_smp_sync, NULL, 1); +#if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 + +/* Server-style MMU handles coherency when hashing if HW exec permission + * is supposed per page (currently 64-bit only). If not, then, we always + * flush the cache for valid PTEs in set_pte. Embedded CPU without HW exec + * support falls into the same category. + */ - pgtable_free(pgf); +static pte_t set_pte_filter(pte_t pte) +{ + pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); + if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) || + cpu_has_feature(CPU_FTR_NOEXECUTE))) { + struct page *pg = maybe_pte_to_page(pte); + if (!pg) + return pte; + if (!test_bit(PG_arch_1, &pg->flags)) { + flush_dcache_icache_page(pg); + set_bit(PG_arch_1, &pg->flags); + } + } + return pte; } -static void pte_free_rcu_callback(struct rcu_head *head) +static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, + int dirty) { - struct pte_freelist_batch *batch = - container_of(head, struct pte_freelist_batch, rcu); - unsigned int i; + return pte; +} - for (i = 0; i < batch->index; i++) - pgtable_free(batch->tables[i]); +#else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */ + +/* Embedded type MMU with HW exec support. This is a bit more complicated + * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so + * instead we "filter out" the exec permission for non clean pages. + */ +static pte_t set_pte_filter(pte_t pte) +{ + struct page *pg; + + /* No exec permission in the first place, move on */ + if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte)) + return pte; + + /* If you set _PAGE_EXEC on weird pages you're on your own */ + pg = maybe_pte_to_page(pte); + if (unlikely(!pg)) + return pte; + + /* If the page clean, we move on */ + if (test_bit(PG_arch_1, &pg->flags)) + return pte; + + /* If it's an exec fault, we flush the cache and make it clean */ + if (is_exec_fault()) { + flush_dcache_icache_page(pg); + set_bit(PG_arch_1, &pg->flags); + return pte; + } - free_page((unsigned long)batch); + /* Else, we filter out _PAGE_EXEC */ + return __pte(pte_val(pte) & ~_PAGE_EXEC); } -static void pte_free_submit(struct pte_freelist_batch *batch) +static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, + int dirty) { - INIT_RCU_HEAD(&batch->rcu); - call_rcu(&batch->rcu, pte_free_rcu_callback); + struct page *pg; + + /* So here, we only care about exec faults, as we use them + * to recover lost _PAGE_EXEC and perform I$/D$ coherency + * if necessary. Also if _PAGE_EXEC is already set, same deal, + * we just bail out + */ + if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault()) + return pte; + +#ifdef CONFIG_DEBUG_VM + /* So this is an exec fault, _PAGE_EXEC is not set. If it was + * an error we would have bailed out earlier in do_page_fault() + * but let's make sure of it + */ + if (WARN_ON(!(vma->vm_flags & VM_EXEC))) + return pte; +#endif /* CONFIG_DEBUG_VM */ + + /* If you set _PAGE_EXEC on weird pages you're on your own */ + pg = maybe_pte_to_page(pte); + if (unlikely(!pg)) + goto bail; + + /* If the page is already clean, we move on */ + if (test_bit(PG_arch_1, &pg->flags)) + goto bail; + + /* Clean the page and set PG_arch_1 */ + flush_dcache_icache_page(pg); + set_bit(PG_arch_1, &pg->flags); + + bail: + return __pte(pte_val(pte) | _PAGE_EXEC); } -void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) -{ - /* This is safe since tlb_gather_mmu has disabled preemption */ - cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); +#endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */ - if (atomic_read(&tlb->mm->mm_users) < 2 || - cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { - pgtable_free(pgf); - return; - } +/* + * set_pte stores a linux PTE into the linux page table. + */ +void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, + pte_t pte) +{ +#ifdef CONFIG_DEBUG_VM + WARN_ON(pte_val(*ptep) & _PAGE_PRESENT); +#endif + /* Note: mm->context.id might not yet have been assigned as + * this context might not have been activated yet when this + * is called. + */ + pte = set_pte_filter(pte); + + /* Perform the setting of the PTE */ + __set_pte_at(mm, addr, ptep, pte, 0); +} - if (*batchp == NULL) { - *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); - if (*batchp == NULL) { - pgtable_free_now(pgf); - return; - } - (*batchp)->index = 0; - } - (*batchp)->tables[(*batchp)->index++] = pgf; - if ((*batchp)->index == PTE_FREELIST_SIZE) { - pte_free_submit(*batchp); - *batchp = NULL; +/* + * This is called when relaxing access to a PTE. It's also called in the page + * fault path when we don't hit any of the major fault cases, ie, a minor + * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have + * handled those two for us, we additionally deal with missing execute + * permission here on some processors + */ +int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, pte_t entry, int dirty) +{ + int changed; + entry = set_access_flags_filter(entry, vma, dirty); + changed = !pte_same(*(ptep), entry); + if (changed) { + if (!is_vm_hugetlb_page(vma)) + assert_pte_locked(vma->vm_mm, address); + __ptep_set_access_flags(ptep, entry); + flush_tlb_page_nohash(vma, address); } + return changed; } -void pte_free_finish(void) +#ifdef CONFIG_DEBUG_VM +void assert_pte_locked(struct mm_struct *mm, unsigned long addr) { - /* This is safe since tlb_gather_mmu has disabled preemption */ - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; - if (*batchp == NULL) + if (mm == &init_mm) + return; + pgd = mm->pgd + pgd_index(addr); + BUG_ON(pgd_none(*pgd)); + pud = pud_offset(pgd, addr); + BUG_ON(pud_none(*pud)); + pmd = pmd_offset(pud, addr); + /* + * khugepaged to collapse normal pages to hugepage, first set + * pmd to none to force page fault/gup to take mmap_sem. After + * pmd is set to none, we do a pte_clear which does this assertion + * so if we find pmd none, return. + */ + if (pmd_none(*pmd)) return; - pte_free_submit(*batchp); - *batchp = NULL; + BUG_ON(!pmd_present(*pmd)); + assert_spin_locked(pte_lockptr(mm, pmd)); } +#endif /* CONFIG_DEBUG_VM */ + |
