diff options
Diffstat (limited to 'arch/arm/include/asm/pgtable.h')
| -rw-r--r-- | arch/arm/include/asm/pgtable.h | 274 |
1 files changed, 63 insertions, 211 deletions
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index ebcb6432f45..5478e5d6ad8 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -11,19 +11,28 @@ #define _ASMARM_PGTABLE_H #include <linux/const.h> -#include <asm-generic/4level-fixup.h> #include <asm/proc-fns.h> #ifndef CONFIG_MMU -#include "pgtable-nommu.h" +#include <asm-generic/4level-fixup.h> +#include <asm/pgtable-nommu.h> #else +#include <asm-generic/pgtable-nopud.h> #include <asm/memory.h> -#include <mach/vmalloc.h> #include <asm/pgtable-hwdef.h> + +#include <asm/tlbflush.h> + +#ifdef CONFIG_ARM_LPAE +#include <asm/pgtable-3level.h> +#else +#include <asm/pgtable-2level.h> +#endif + /* * Just any arbitrary offset to the start of the vmalloc VM area: the * current 8MB value just means that there will be a 8MB "hole" after the @@ -31,88 +40,10 @@ * any out-of-bounds memory accesses will hopefully be caught. * The vmalloc() routines leaves a hole of 4kB between each vmalloced * area for the same reason. ;) - * - * Note that platforms may override VMALLOC_START, but they must provide - * VMALLOC_END. VMALLOC_END defines the (exclusive) limit of this space, - * which may not overlap IO space. */ -#ifndef VMALLOC_START #define VMALLOC_OFFSET (8*1024*1024) #define VMALLOC_START (((unsigned long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) -#endif - -/* - * Hardware-wise, we have a two level page table structure, where the first - * level has 4096 entries, and the second level has 256 entries. Each entry - * is one 32-bit word. Most of the bits in the second level entry are used - * by hardware, and there aren't any "accessed" and "dirty" bits. - * - * Linux on the other hand has a three level page table structure, which can - * be wrapped to fit a two level page table structure easily - using the PGD - * and PTE only. However, Linux also expects one "PTE" table per page, and - * at least a "dirty" bit. - * - * Therefore, we tweak the implementation slightly - we tell Linux that we - * have 2048 entries in the first level, each of which is 8 bytes (iow, two - * hardware pointers to the second level.) The second level contains two - * hardware PTE tables arranged contiguously, preceded by Linux versions - * which contain the state information Linux needs. We, therefore, end up - * with 512 entries in the "PTE" level. - * - * This leads to the page tables having the following layout: - * - * pgd pte - * | | - * +--------+ - * | | +------------+ +0 - * +- - - - + | Linux pt 0 | - * | | +------------+ +1024 - * +--------+ +0 | Linux pt 1 | - * | |-----> +------------+ +2048 - * +- - - - + +4 | h/w pt 0 | - * | |-----> +------------+ +3072 - * +--------+ +8 | h/w pt 1 | - * | | +------------+ +4096 - * - * See L_PTE_xxx below for definitions of bits in the "Linux pt", and - * PTE_xxx for definitions of bits appearing in the "h/w pt". - * - * PMD_xxx definitions refer to bits in the first level page table. - * - * The "dirty" bit is emulated by only granting hardware write permission - * iff the page is marked "writable" and "dirty" in the Linux PTE. This - * means that a write to a clean page will cause a permission fault, and - * the Linux MM layer will mark the page dirty via handle_pte_fault(). - * For the hardware to notice the permission change, the TLB entry must - * be flushed, and ptep_set_access_flags() does that for us. - * - * The "accessed" or "young" bit is emulated by a similar method; we only - * allow accesses to the page if the "young" bit is set. Accesses to the - * page will cause a fault, and handle_pte_fault() will set the young bit - * for us as long as the page is marked present in the corresponding Linux - * PTE entry. Again, ptep_set_access_flags() will ensure that the TLB is - * up to date. - * - * However, when the "young" bit is cleared, we deny access to the page - * by clearing the hardware PTE. Currently Linux does not flush the TLB - * for us in this case, which means the TLB will retain the transation - * until either the TLB entry is evicted under pressure, or a context - * switch which changes the user space mapping occurs. - */ -#define PTRS_PER_PTE 512 -#define PTRS_PER_PMD 1 -#define PTRS_PER_PGD 2048 - -#define PTE_HWTABLE_PTRS (PTRS_PER_PTE) -#define PTE_HWTABLE_OFF (PTE_HWTABLE_PTRS * sizeof(pte_t)) -#define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u32)) - -/* - * PMD_SHIFT determines the size of the area a second-level page table can map - * PGDIR_SHIFT determines what a third-level page table entry can map - */ -#define PMD_SHIFT 21 -#define PGDIR_SHIFT 21 +#define VMALLOC_END 0xff000000UL #define LIBRARY_TEXT_START 0x0c000000 @@ -124,73 +55,22 @@ extern void __pgd_error(const char *file, int line, pgd_t); #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte) #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd) #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd) -#endif /* !__ASSEMBLY__ */ - -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) /* * This is the lowest virtual address we can permit any user space * mapping to be mapped at. This is particularly important for * non-high vector CPUs. */ -#define FIRST_USER_ADDRESS PAGE_SIZE - -#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) +#define FIRST_USER_ADDRESS (PAGE_SIZE * 2) /* - * section address mask and size definitions. + * Use TASK_SIZE as the ceiling argument for free_pgtables() and + * free_pgd_range() to avoid freeing the modules pmd when LPAE is enabled (pmd + * page shared between user and kernel). */ -#define SECTION_SHIFT 20 -#define SECTION_SIZE (1UL << SECTION_SHIFT) -#define SECTION_MASK (~(SECTION_SIZE-1)) - -/* - * ARMv6 supersection address mask and size definitions. - */ -#define SUPERSECTION_SHIFT 24 -#define SUPERSECTION_SIZE (1UL << SUPERSECTION_SHIFT) -#define SUPERSECTION_MASK (~(SUPERSECTION_SIZE-1)) - -/* - * "Linux" PTE definitions. - * - * We keep two sets of PTEs - the hardware and the linux version. - * This allows greater flexibility in the way we map the Linux bits - * onto the hardware tables, and allows us to have YOUNG and DIRTY - * bits. - * - * The PTE table pointer refers to the hardware entries; the "Linux" - * entries are stored 1024 bytes below. - */ -#define L_PTE_PRESENT (_AT(pteval_t, 1) << 0) -#define L_PTE_YOUNG (_AT(pteval_t, 1) << 1) -#define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */ -#define L_PTE_DIRTY (_AT(pteval_t, 1) << 6) -#define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) -#define L_PTE_USER (_AT(pteval_t, 1) << 8) -#define L_PTE_XN (_AT(pteval_t, 1) << 9) -#define L_PTE_SHARED (_AT(pteval_t, 1) << 10) /* shared(v6), coherent(xsc3) */ - -/* - * These are the memory types, defined to be compatible with - * pre-ARMv6 CPUs cacheable and bufferable bits: XXCB - */ -#define L_PTE_MT_UNCACHED (_AT(pteval_t, 0x00) << 2) /* 0000 */ -#define L_PTE_MT_BUFFERABLE (_AT(pteval_t, 0x01) << 2) /* 0001 */ -#define L_PTE_MT_WRITETHROUGH (_AT(pteval_t, 0x02) << 2) /* 0010 */ -#define L_PTE_MT_WRITEBACK (_AT(pteval_t, 0x03) << 2) /* 0011 */ -#define L_PTE_MT_MINICACHE (_AT(pteval_t, 0x06) << 2) /* 0110 (sa1100, xscale) */ -#define L_PTE_MT_WRITEALLOC (_AT(pteval_t, 0x07) << 2) /* 0111 */ -#define L_PTE_MT_DEV_SHARED (_AT(pteval_t, 0x04) << 2) /* 0100 */ -#define L_PTE_MT_DEV_NONSHARED (_AT(pteval_t, 0x0c) << 2) /* 1100 */ -#define L_PTE_MT_DEV_WC (_AT(pteval_t, 0x09) << 2) /* 1001 */ -#define L_PTE_MT_DEV_CACHED (_AT(pteval_t, 0x0b) << 2) /* 1011 */ -#define L_PTE_MT_MASK (_AT(pteval_t, 0x0f) << 2) - -#ifndef __ASSEMBLY__ +#ifdef CONFIG_ARM_LPAE +#define USER_PGTABLES_CEILING TASK_SIZE +#endif /* * The pgprot_* and protection_map entries will be fixed up in runtime @@ -202,10 +82,13 @@ extern void __pgd_error(const char *file, int line, pgd_t); extern pgprot_t pgprot_user; extern pgprot_t pgprot_kernel; +extern pgprot_t pgprot_hyp_device; +extern pgprot_t pgprot_s2; +extern pgprot_t pgprot_s2_device; #define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b)) -#define PAGE_NONE _MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY) +#define PAGE_NONE _MOD_PROT(pgprot_user, L_PTE_XN | L_PTE_RDONLY | L_PTE_NONE) #define PAGE_SHARED _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_XN) #define PAGE_SHARED_EXEC _MOD_PROT(pgprot_user, L_PTE_USER) #define PAGE_COPY _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) @@ -214,8 +97,12 @@ extern pgprot_t pgprot_kernel; #define PAGE_READONLY_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY) #define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN) #define PAGE_KERNEL_EXEC pgprot_kernel +#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP) +#define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP) +#define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY) +#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR) -#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN) +#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) #define __PAGE_SHARED_EXEC __pgprot(_L_PTE_DEFAULT | L_PTE_USER) #define __PAGE_COPY __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_RDONLY | L_PTE_XN) @@ -232,6 +119,9 @@ extern pgprot_t pgprot_kernel; #define pgprot_writecombine(prot) \ __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE) +#define pgprot_stronglyordered(prot) \ + __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED) + #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE #define pgprot_dmacoherent(prot) \ __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE | L_PTE_XN) @@ -291,49 +181,15 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) -/* - * The "pgd_xxx()" functions here are trivial for a folded two-level - * setup: the pgd is never bad, and a pmd always exists (as it's folded - * into the pgd entry) - */ -#define pgd_none(pgd) (0) -#define pgd_bad(pgd) (0) -#define pgd_present(pgd) (1) -#define pgd_clear(pgdp) do { } while (0) -#define set_pgd(pgd,pgdp) do { } while (0) - - -/* Find an entry in the second-level page table.. */ -#define pmd_offset(dir, addr) ((pmd_t *)(dir)) - #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_present(pmd) (pmd_val(pmd)) -#define pmd_bad(pmd) (pmd_val(pmd) & 2) - -#define copy_pmd(pmdpd,pmdps) \ - do { \ - pmdpd[0] = pmdps[0]; \ - pmdpd[1] = pmdps[1]; \ - flush_pmd_entry(pmdpd); \ - } while (0) - -#define pmd_clear(pmdp) \ - do { \ - pmdp[0] = __pmd(0); \ - pmdp[1] = __pmd(0); \ - clean_pmd_entry(pmdp); \ - } while (0) static inline pte_t *pmd_page_vaddr(pmd_t pmd) { - return __va(pmd_val(pmd) & PAGE_MASK); + return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK); } -#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd))) - -/* we don't need complex calculations here as the pmd is folded into the pgd */ -#define pmd_addr_end(addr,end) (end) - +#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) #ifndef CONFIG_HIGHPTE #define __pte_map(pmd) pmd_page_vaddr(*(pmd)) @@ -350,15 +206,27 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define pte_offset_map(pmd,addr) (__pte_map(pmd) + pte_index(addr)) #define pte_unmap(pte) __pte_unmap(pte) -#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) -#define pfn_pte(pfn,prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) +#define pte_pfn(pte) ((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT) +#define pfn_pte(pfn,prot) __pte(__pfn_to_phys(pfn) | pgprot_val(prot)) #define pte_page(pte) pfn_to_page(pte_pfn(pte)) #define mk_pte(page,prot) pfn_pte(page_to_pfn(page), prot) -#define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) #define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0) +#define pte_none(pte) (!pte_val(pte)) +#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT) +#define pte_valid(pte) (pte_val(pte) & L_PTE_VALID) +#define pte_accessible(mm, pte) (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) +#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY)) +#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) +#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) +#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) +#define pte_special(pte) (0) + +#define pte_valid_user(pte) \ + (pte_valid(pte) && (pte_val(pte) & L_PTE_USER) && pte_young(pte)) + #if __LINUX_ARM_ARCH__ < 6 static inline void __sync_icache_dcache(pte_t pteval) { @@ -370,25 +238,15 @@ extern void __sync_icache_dcache(pte_t pteval); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { - if (addr >= TASK_SIZE) - set_pte_ext(ptep, pteval, 0); - else { + unsigned long ext = 0; + + if (addr < TASK_SIZE && pte_valid_user(pteval)) { __sync_icache_dcache(pteval); - set_pte_ext(ptep, pteval, PTE_EXT_NG); + ext |= PTE_EXT_NG; } -} - -#define pte_none(pte) (!pte_val(pte)) -#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT) -#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY)) -#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) -#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) -#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) -#define pte_special(pte) (0) -#define pte_present_user(pte) \ - ((pte_val(pte) & (L_PTE_PRESENT | L_PTE_USER)) == \ - (L_PTE_PRESENT | L_PTE_USER)) + set_pte_ext(ptep, pteval, ext); +} #define PTE_BIT_FUNC(fn,op) \ static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } @@ -399,12 +257,15 @@ PTE_BIT_FUNC(mkclean, &= ~L_PTE_DIRTY); PTE_BIT_FUNC(mkdirty, |= L_PTE_DIRTY); PTE_BIT_FUNC(mkold, &= ~L_PTE_YOUNG); PTE_BIT_FUNC(mkyoung, |= L_PTE_YOUNG); +PTE_BIT_FUNC(mkexec, &= ~L_PTE_XN); +PTE_BIT_FUNC(mknexec, |= L_PTE_XN); static inline pte_t pte_mkspecial(pte_t pte) { return pte; } static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER; + const pteval_t mask = L_PTE_XN | L_PTE_RDONLY | L_PTE_USER | + L_PTE_NONE | L_PTE_VALID; pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } @@ -415,13 +276,13 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 - * <--------------- offset --------------------> <- type --> 0 0 0 + * <--------------- offset ----------------------> < type -> 0 0 0 * - * This gives us up to 63 swap files and 32GB per swap file. Note that + * This gives us up to 31 swap files and 64GB per swap file. Note that * the offset field is always non-zero. */ #define __SWP_TYPE_SHIFT 3 -#define __SWP_TYPE_BITS 6 +#define __SWP_TYPE_BITS 5 #define __SWP_TYPE_MASK ((1 << __SWP_TYPE_BITS) - 1) #define __SWP_OFFSET_SHIFT (__SWP_TYPE_BITS + __SWP_TYPE_SHIFT) @@ -463,19 +324,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * We provide our own arch_get_unmapped_area to cope with VIPT caches. */ #define HAVE_ARCH_UNMAPPED_AREA - -/* - * remap a physical page `pfn' of size `size' with page protection `prot' - * into virtual address `from' - */ -#define io_remap_pfn_range(vma,from,pfn,size,prot) \ - remap_pfn_range(vma, from, pfn, size, prot) +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #define pgtable_cache_init() do { } while (0) -void identity_mapping_add(pgd_t *, unsigned long, unsigned long); -void identity_mapping_del(pgd_t *, unsigned long, unsigned long); - #endif /* !__ASSEMBLY__ */ #endif /* CONFIG_MMU */ |
