diff options
Diffstat (limited to 'arch/powerpc/include/asm/mmu-hash64.h')
| -rw-r--r-- | arch/powerpc/include/asm/mmu-hash64.h | 302 |
1 files changed, 209 insertions, 93 deletions
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 412ba493cb9..c2b4dcf23d0 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -16,6 +16,15 @@ #include <asm/page.h> /* + * This is necessary to get the definition of PGTABLE_RANGE which we + * need for various slices related matters. Note that this isn't the + * complete pgtable.h but only a portion of it. + */ +#include <asm/pgtable-ppc64.h> +#include <asm/bug.h> +#include <asm/processor.h> + +/* * Segment table */ @@ -108,17 +117,27 @@ extern char initial_stab[]; #define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000) /* Values for PP (assumes Ks=0, Kp=1) */ -/* pp0 will always be 0 for linux */ #define PP_RWXX 0 /* Supervisor read/write, User none */ #define PP_RWRX 1 /* Supervisor read/write, User read */ #define PP_RWRW 2 /* Supervisor read/write, User read/write */ #define PP_RXRX 3 /* Supervisor read, User read */ +#define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */ + +/* Fields for tlbiel instruction in architecture 2.06 */ +#define TLBIEL_INVAL_SEL_MASK 0xc00 /* invalidation selector */ +#define TLBIEL_INVAL_PAGE 0x000 /* invalidate a single page */ +#define TLBIEL_INVAL_SET_LPID 0x800 /* invalidate a set for current LPID */ +#define TLBIEL_INVAL_SET 0xc00 /* invalidate a set for all LPIDs */ +#define TLBIEL_INVAL_SET_MASK 0xfff000 /* set number to inval. */ +#define TLBIEL_INVAL_SET_SHIFT 12 + +#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */ #ifndef __ASSEMBLY__ struct hash_pte { - unsigned long v; - unsigned long r; + __be64 v; + __be64 r; }; extern struct hash_pte *htab_address; @@ -137,11 +156,29 @@ extern unsigned long htab_hash_mask; struct mmu_psize_def { unsigned int shift; /* number of bits */ - unsigned int penc; /* HPTE encoding */ + int penc[MMU_PAGE_COUNT]; /* HPTE encoding */ unsigned int tlbiel; /* tlbiel supported for that page size */ unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */ unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */ }; +extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; + +static inline int shift_to_mmu_psize(unsigned int shift) +{ + int psize; + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) + if (mmu_psize_defs[psize].shift == shift) + return psize; + return -1; +} + +static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) +{ + if (mmu_psize_defs[mmu_psize].shift) + return mmu_psize_defs[mmu_psize].shift; + BUG(); +} #endif /* __ASSEMBLY__ */ @@ -154,13 +191,35 @@ struct mmu_psize_def #define MMU_SEGSIZE_256M 0 #define MMU_SEGSIZE_1T 1 +/* + * encode page number shift. + * in order to fit the 78 bit va in a 64 bit variable we shift the va by + * 12 bits. This enable us to address upto 76 bit va. + * For hpt hash from a va we can ignore the page size bits of va and for + * hpte encoding we ignore up to 23 bits of va. So ignoring lower 12 bits ensure + * we work in all cases including 4k page size. + */ +#define VPN_SHIFT 12 + +/* + * HPTE Large Page (LP) details + */ +#define LP_SHIFT 12 +#define LP_BITS 8 +#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT) #ifndef __ASSEMBLY__ +static inline int segment_shift(int ssize) +{ + if (ssize == MMU_SEGSIZE_256M) + return SID_SHIFT; + return SID_SHIFT_1T; +} + /* * The current system page and segment sizes */ -extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; extern int mmu_linear_psize; extern int mmu_virtual_psize; extern int mmu_vmalloc_psize; @@ -180,66 +239,92 @@ extern unsigned long tce_alloc_start, tce_alloc_end; extern int mmu_ci_restrictions; /* - * This function sets the AVPN and L fields of the HPTE appropriately - * for the page size + * This computes the AVPN and B fields of the first dword of a HPTE, + * for use when we want to match an existing PTE. The bottom 7 bits + * of the returned value are zero. */ -static inline unsigned long hpte_encode_v(unsigned long va, int psize, - int ssize) +static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, + int ssize) { unsigned long v; - v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm); + /* + * The AVA field omits the low-order 23 bits of the 78 bits VA. + * These bits are not needed in the PTE, because the + * low-order b of these bits are part of the byte offset + * into the virtual page and, if b < 23, the high-order + * 23-b of these bits are always used in selecting the + * PTEGs to be searched + */ + v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); v <<= HPTE_V_AVPN_SHIFT; - if (psize != MMU_PAGE_4K) - v |= HPTE_V_LARGE; v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; return v; } /* + * This function sets the AVPN and L fields of the HPTE appropriately + * using the base page size and actual page size. + */ +static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize, + int actual_psize, int ssize) +{ + unsigned long v; + v = hpte_encode_avpn(vpn, base_psize, ssize); + if (actual_psize != MMU_PAGE_4K) + v |= HPTE_V_LARGE; + return v; +} + +/* * This function sets the ARPN, and LP fields of the HPTE appropriately * for the page size. We assume the pa is already "clean" that is properly * aligned for the requested page size */ -static inline unsigned long hpte_encode_r(unsigned long pa, int psize) +static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize, + int actual_psize) { - unsigned long r; - /* A 4K page needs no special encoding */ - if (psize == MMU_PAGE_4K) + if (actual_psize == MMU_PAGE_4K) return pa & HPTE_R_RPN; else { - unsigned int penc = mmu_psize_defs[psize].penc; - unsigned int shift = mmu_psize_defs[psize].shift; - return (pa & ~((1ul << shift) - 1)) | (penc << 12); + unsigned int penc = mmu_psize_defs[base_psize].penc[actual_psize]; + unsigned int shift = mmu_psize_defs[actual_psize].shift; + return (pa & ~((1ul << shift) - 1)) | (penc << LP_SHIFT); } - return r; } /* - * Build a VA given VSID, EA and segment size + * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size. */ -static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid, - int ssize) +static inline unsigned long hpt_vpn(unsigned long ea, + unsigned long vsid, int ssize) { - if (ssize == MMU_SEGSIZE_256M) - return (vsid << 28) | (ea & 0xfffffffUL); - return (vsid << 40) | (ea & 0xffffffffffUL); + unsigned long mask; + int s_shift = segment_shift(ssize); + + mask = (1ul << (s_shift - VPN_SHIFT)) - 1; + return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask); } /* * This hashes a virtual address */ - -static inline unsigned long hpt_hash(unsigned long va, unsigned int shift, - int ssize) +static inline unsigned long hpt_hash(unsigned long vpn, + unsigned int shift, int ssize) { + int mask; unsigned long hash, vsid; + /* VPN_SHIFT can be atmost 12 */ if (ssize == MMU_SEGSIZE_256M) { - hash = (va >> 28) ^ ((va & 0x0fffffffUL) >> shift); + mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1; + hash = (vpn >> (SID_SHIFT - VPN_SHIFT)) ^ + ((vpn & mask) >> (shift - VPN_SHIFT)); } else { - vsid = va >> 40; - hash = vsid ^ (vsid << 25) ^ ((va & 0xffffffffffUL) >> shift); + mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1; + vsid = vpn >> (SID_SHIFT_1T - VPN_SHIFT); + hash = vsid ^ (vsid << 25) ^ + ((vpn & mask) >> (shift - VPN_SHIFT)) ; } return hash & 0x7fffffffffUL; } @@ -256,9 +341,24 @@ extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap) int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, int local, int ssize, unsigned int shift, unsigned int mmu_psize); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern int __hash_page_thp(unsigned long ea, unsigned long access, + unsigned long vsid, pmd_t *pmdp, unsigned long trap, + int local, int ssize, unsigned int psize); +#else +static inline int __hash_page_thp(unsigned long ea, unsigned long access, + unsigned long vsid, pmd_t *pmdp, + unsigned long trap, int local, + int ssize, unsigned int psize) +{ + BUG(); + return -1; +} +#endif extern void hash_failure_debug(unsigned long ea, unsigned long access, unsigned long vsid, unsigned long trap, - int ssize, int psize, unsigned long pte); + int ssize, int psize, int lpsize, + unsigned long pte); extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long pstart, unsigned long prot, int psize, int ssize); @@ -267,7 +367,6 @@ extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); extern void hpte_init_native(void); extern void hpte_init_lpar(void); -extern void hpte_init_iSeries(void); extern void hpte_init_beat(void); extern void hpte_init_beat_v3(void); @@ -281,68 +380,70 @@ extern void slb_set_size(u16 size); #endif /* __ASSEMBLY__ */ /* - * VSID allocation + * VSID allocation (256MB segment) * - * We first generate a 36-bit "proto-VSID". For kernel addresses this - * is equal to the ESID, for user addresses it is: - * (context << 15) | (esid & 0x7fff) + * We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated + * from mmu context id and effective segment id of the address. * - * The two forms are distinguishable because the top bit is 0 for user - * addresses, whereas the top two bits are 1 for kernel addresses. - * Proto-VSIDs with the top two bits equal to 0b10 are reserved for - * now. + * For user processes max context id is limited to ((1ul << 19) - 5) + * for kernel space, we use the top 4 context ids to map address as below + * NOTE: each context only support 64TB now. + * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ] + * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ] + * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ] + * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ] * * The proto-VSIDs are then scrambled into real VSIDs with the * multiplicative hash: * * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS - * where VSID_MULTIPLIER = 268435399 = 0xFFFFFC7 - * VSID_MODULUS = 2^36-1 = 0xFFFFFFFFF * - * This scramble is only well defined for proto-VSIDs below - * 0xFFFFFFFFF, so both proto-VSID and actual VSID 0xFFFFFFFFF are - * reserved. VSID_MULTIPLIER is prime, so in particular it is + * VSID_MULTIPLIER is prime, so in particular it is * co-prime to VSID_MODULUS, making this a 1:1 scrambling function. * Because the modulus is 2^n-1 we can compute it efficiently without - * a divide or extra multiply (see below). + * a divide or extra multiply (see below). The scramble function gives + * robust scattering in the hash table (at least based on some initial + * results). * - * This scheme has several advantages over older methods: + * We also consider VSID 0 special. We use VSID 0 for slb entries mapping + * bad address. This enables us to consolidate bad address handling in + * hash_page. * - * - We have VSIDs allocated for every kernel address - * (i.e. everything above 0xC000000000000000), except the very top - * segment, which simplifies several things. - * - * - We allow for 16 significant bits of ESID and 19 bits of - * context for user addresses. i.e. 16T (44 bits) of address space for - * up to half a million contexts. - * - * - The scramble function gives robust scattering in the hash - * table (at least based on some initial results). The previous - * method was more susceptible to pathological cases giving excessive - * hash collisions. + * We also need to avoid the last segment of the last context, because that + * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 + * because of the modulo operation in vsid scramble. But the vmemmap + * (which is what uses region 0xf) will never be close to 64TB in size + * (it's 56 bytes per page of system memory). */ + +#define CONTEXT_BITS 19 +#define ESID_BITS 18 +#define ESID_BITS_1T 6 + /* - * WARNING - If you change these you must make sure the asm - * implementations in slb_allocate (slb_low.S), do_stab_bolted - * (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly. - * - * You'll also need to change the precomputed VSID values in head.S - * which are used by the iSeries firmware. + * 256MB segment + * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments + * available for user + kernel mapping. The top 4 contexts are used for + * kernel mapping. Each segment contains 2^28 bytes. Each + * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts + * (19 == 37 + 28 - 46). */ +#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 5) -#define VSID_MULTIPLIER_256M ASM_CONST(200730139) /* 28-bit prime */ -#define VSID_BITS_256M 36 +/* + * This should be computed such that protovosid * vsid_mulitplier + * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus + */ +#define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */ +#define VSID_BITS_256M (CONTEXT_BITS + ESID_BITS) #define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1) #define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */ -#define VSID_BITS_1T 24 +#define VSID_BITS_1T (CONTEXT_BITS + ESID_BITS_1T) #define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1) -#define CONTEXT_BITS 19 -#define USER_ESID_BITS 16 -#define USER_ESID_BITS_1T 4 -#define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT)) +#define USER_VSID_RANGE (1UL << (ESID_BITS + SID_SHIFT)) /* * This macro generates asm code to compute the VSID scramble @@ -366,7 +467,8 @@ extern void slb_set_size(u16 size); srdi rx,rt,VSID_BITS_##size; \ clrldi rt,rt,(64-VSID_BITS_##size); \ add rt,rt,rx; /* add high and low bits */ \ - /* Now, r3 == VSID (mod 2^36-1), and lies between 0 and \ + /* NOTE: explanation based on VSID_BITS_##size = 36 \ + * Now, r3 == VSID (mod 2^36-1), and lies between 0 and \ * 2^36-1+2^28-1. That in particular means that if r3 >= \ * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \ * the bit clear, r3 already has the answer we want, if it \ @@ -376,6 +478,8 @@ extern void slb_set_size(u16 size); srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ add rt,rt,rx +/* 4 bits per slice and we have one slice per 1TB */ +#define SLICE_ARRAY_SIZE (PGTABLE_RANGE >> 41) #ifndef __ASSEMBLY__ @@ -393,7 +497,7 @@ extern void slb_set_size(u16 size); */ struct subpage_prot_table { unsigned long maxaddr; /* only addresses < this are protected */ - unsigned int **protptrs[2]; + unsigned int **protptrs[(TASK_SIZE_USER64 >> 43)]; unsigned int *low_prot[4]; }; @@ -420,7 +524,7 @@ typedef struct { #ifdef CONFIG_PPC_MM_SLICES u64 low_slices_psize; /* SLB page size encodings */ - u64 high_slices_psize; /* 4 bits per slice for now */ + unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; #else u16 sllp; /* SLB page size encoding */ #endif @@ -433,6 +537,10 @@ typedef struct { unsigned long acop; /* mask of enabled coprocessor types */ unsigned int cop_pid; /* pid value used with coprocessors */ #endif /* CONFIG_PPC_ICSWX */ +#ifdef CONFIG_PPC_64K_PAGES + /* for 4K PTE fragment support */ + void *pte_frag; +#endif } mm_context_t; @@ -456,14 +564,6 @@ typedef struct { }) #endif /* 1 */ -/* This is only valid for addresses >= PAGE_OFFSET */ -static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) -{ - if (ssize == MMU_SEGSIZE_256M) - return vsid_scramble(ea >> SID_SHIFT, 256M); - return vsid_scramble(ea >> SID_SHIFT_1T, 1T); -} - /* Returns the segment size indicator for a user address */ static inline int user_segment_size(unsigned long addr) { @@ -473,25 +573,41 @@ static inline int user_segment_size(unsigned long addr) return MMU_SEGSIZE_256M; } -/* This is only valid for user addresses (which are below 2^44) */ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, int ssize) { + /* + * Bad address. We return VSID 0 for that + */ + if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) + return 0; + if (ssize == MMU_SEGSIZE_256M) - return vsid_scramble((context << USER_ESID_BITS) + return vsid_scramble((context << ESID_BITS) | (ea >> SID_SHIFT), 256M); - return vsid_scramble((context << USER_ESID_BITS_1T) + return vsid_scramble((context << ESID_BITS_1T) | (ea >> SID_SHIFT_1T), 1T); } /* - * This is only used on legacy iSeries in lparmap.c, - * hence the 256MB segment assumption. + * This is only valid for addresses >= PAGE_OFFSET + * + * For kernel space, we use the top 4 context ids to map address as below + * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ] + * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ] + * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ] + * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ] */ -#define VSID_SCRAMBLE(pvsid) (((pvsid) * VSID_MULTIPLIER_256M) % \ - VSID_MODULUS_256M) -#define KERNEL_VSID(ea) VSID_SCRAMBLE(GET_ESID(ea)) +static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) +{ + unsigned long context; + /* + * kernel take the top 4 context from the available range + */ + context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1; + return get_vsid(context, ea, ssize); +} #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_MMU_HASH64_H_ */ |
