diff options
Diffstat (limited to 'arch/arc/mm')
| -rw-r--r-- | arch/arc/mm/Makefile | 2 | ||||
| -rw-r--r-- | arch/arc/mm/cache_arc700.c | 676 | ||||
| -rw-r--r-- | arch/arc/mm/extable.c | 4 | ||||
| -rw-r--r-- | arch/arc/mm/fault.c | 36 | ||||
| -rw-r--r-- | arch/arc/mm/init.c | 97 | ||||
| -rw-r--r-- | arch/arc/mm/ioremap.c | 2 | ||||
| -rw-r--r-- | arch/arc/mm/mmap.c | 78 | ||||
| -rw-r--r-- | arch/arc/mm/tlb.c | 333 | ||||
| -rw-r--r-- | arch/arc/mm/tlbex.S | 244 |
9 files changed, 781 insertions, 691 deletions
diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile index 168dc146a8f..ac95cc239c1 100644 --- a/arch/arc/mm/Makefile +++ b/arch/arc/mm/Makefile @@ -7,4 +7,4 @@ # obj-y := extable.o ioremap.o dma.o fault.o init.o -obj-y += tlb.o tlbex.o cache_arc700.o +obj-y += tlb.o tlbex.o cache_arc700.o mmap.o diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index 88d617d8423..353b202c37c 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -68,24 +68,14 @@ #include <linux/mmu_context.h> #include <linux/syscalls.h> #include <linux/uaccess.h> +#include <linux/pagemap.h> #include <asm/cacheflush.h> #include <asm/cachectl.h> #include <asm/setup.h> - -#ifdef CONFIG_ARC_HAS_ICACHE -static void __ic_line_inv_no_alias(unsigned long, int); -static void __ic_line_inv_2_alias(unsigned long, int); -static void __ic_line_inv_4_alias(unsigned long, int); - -/* Holds the ptr to flush routine, dependign on size due to aliasing issues */ -static void (*___flush_icache_rtn) (unsigned long, int); -#endif - -char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len) +char *arc_cache_mumbojumbo(int c, char *buf, int len) { int n = 0; - unsigned int c = smp_processor_id(); #define PR_CACHE(p, enb, str) \ { \ @@ -98,8 +88,10 @@ char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len) enb ? "" : "DISABLED (kernel-build)"); \ } - PR_CACHE(&cpuinfo_arc700[c].icache, __CONFIG_ARC_HAS_ICACHE, "I-Cache"); - PR_CACHE(&cpuinfo_arc700[c].dcache, __CONFIG_ARC_HAS_DCACHE, "D-Cache"); + PR_CACHE(&cpuinfo_arc700[c].icache, IS_ENABLED(CONFIG_ARC_HAS_ICACHE), + "I-Cache"); + PR_CACHE(&cpuinfo_arc700[c].dcache, IS_ENABLED(CONFIG_ARC_HAS_DCACHE), + "D-Cache"); return buf; } @@ -109,17 +101,23 @@ char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len) * the cpuinfo structure for later use. * No Validation done here, simply read/convert the BCRs */ -void __init read_decode_cache_bcr(void) +void read_decode_cache_bcr(void) { - struct bcr_cache ibcr, dbcr; struct cpuinfo_arc_cache *p_ic, *p_dc; unsigned int cpu = smp_processor_id(); + struct bcr_cache { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; +#else + unsigned int ver:8, config:4, sz:4, line_len:4, pad:12; +#endif + } ibcr, dbcr; p_ic = &cpuinfo_arc700[cpu].icache; READ_BCR(ARC_REG_IC_BCR, ibcr); - if (ibcr.config == 0x3) - p_ic->assoc = 2; + BUG_ON(ibcr.config != 3); + p_ic->assoc = 2; /* Fixed to 2w set assoc */ p_ic->line_len = 8 << ibcr.line_len; p_ic->sz = 0x200 << ibcr.sz; p_ic->ver = ibcr.ver; @@ -127,8 +125,8 @@ void __init read_decode_cache_bcr(void) p_dc = &cpuinfo_arc700[cpu].dcache; READ_BCR(ARC_REG_DC_BCR, dbcr); - if (dbcr.config == 0x2) - p_dc->assoc = 4; + BUG_ON(dbcr.config != 2); + p_dc->assoc = 4; /* Fixed to 4w set assoc */ p_dc->line_len = 16 << dbcr.line_len; p_dc->sz = 0x200 << dbcr.sz; p_dc->ver = dbcr.ver; @@ -141,112 +139,116 @@ void __init read_decode_cache_bcr(void) * 3. Enable the Caches, setup default flush mode for D-Cache * 3. Calculate the SHMLBA used by user space */ -void __init arc_cache_init(void) +void arc_cache_init(void) { - unsigned int temp; - unsigned int cpu = smp_processor_id(); - struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; - struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; - int way_pg_ratio = way_pg_ratio; + unsigned int __maybe_unused cpu = smp_processor_id(); + struct cpuinfo_arc_cache __maybe_unused *ic, __maybe_unused *dc; char str[256]; printk(arc_cache_mumbojumbo(0, str, sizeof(str))); - if (!ic->ver) - goto chk_dc; - #ifdef CONFIG_ARC_HAS_ICACHE - /* 1. Confirm some of I-cache params which Linux assumes */ - if ((ic->assoc != ARC_ICACHE_WAYS) || - (ic->line_len != ARC_ICACHE_LINE_LEN)) { - panic("Cache H/W doesn't match kernel Config"); - } -#if (CONFIG_ARC_MMU_VER > 2) - if (ic->ver != 3) { - if (running_on_hw) - panic("Cache ver doesn't match MMU ver\n"); - - /* For ISS - suggest the toggles to use */ - pr_err("Use -prop=icache_version=3,-prop=dcache_version=3\n"); - + ic = &cpuinfo_arc700[cpu].icache; + if (ic->ver) { + if (ic->line_len != L1_CACHE_BYTES) + panic("ICache line [%d] != kernel Config [%d]", + ic->line_len, L1_CACHE_BYTES); + + if (ic->ver != CONFIG_ARC_MMU_VER) + panic("Cache ver [%d] doesn't match MMU ver [%d]\n", + ic->ver, CONFIG_ARC_MMU_VER); } #endif - /* - * if Cache way size is <= page size then no aliasing exhibited - * otherwise ratio determines num of aliases. - * e.g. 32K I$, 2 way set assoc, 8k pg size - * way-sz = 32k/2 = 16k - * way-pg-ratio = 16k/8k = 2, so 2 aliases possible - * (meaning 1 line could be in 2 possible locations). - */ - way_pg_ratio = ic->sz / ARC_ICACHE_WAYS / PAGE_SIZE; - switch (way_pg_ratio) { - case 0: - case 1: - ___flush_icache_rtn = __ic_line_inv_no_alias; - break; - case 2: - ___flush_icache_rtn = __ic_line_inv_2_alias; - break; - case 4: - ___flush_icache_rtn = __ic_line_inv_4_alias; - break; - default: - panic("Unsupported I-Cache Sz\n"); - } -#endif +#ifdef CONFIG_ARC_HAS_DCACHE + dc = &cpuinfo_arc700[cpu].dcache; + if (dc->ver) { + unsigned int dcache_does_alias; - /* Enable/disable I-Cache */ - temp = read_aux_reg(ARC_REG_IC_CTRL); + if (dc->line_len != L1_CACHE_BYTES) + panic("DCache line [%d] != kernel Config [%d]", + dc->line_len, L1_CACHE_BYTES); -#ifdef CONFIG_ARC_HAS_ICACHE - temp &= ~IC_CTRL_CACHE_DISABLE; -#else - temp |= IC_CTRL_CACHE_DISABLE; + /* check for D-Cache aliasing */ + dcache_does_alias = (dc->sz / dc->assoc) > PAGE_SIZE; + + if (dcache_does_alias && !cache_is_vipt_aliasing()) + panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + else if (!dcache_does_alias && cache_is_vipt_aliasing()) + panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + } #endif +} - write_aux_reg(ARC_REG_IC_CTRL, temp); +#define OP_INV 0x1 +#define OP_FLUSH 0x2 +#define OP_FLUSH_N_INV 0x3 +#define OP_INV_IC 0x4 -chk_dc: - if (!dc->ver) - return; +/* + * Common Helper for Line Operations on {I,D}-Cache + */ +static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int cacheop) +{ + unsigned int aux_cmd, aux_tag; + int num_lines; + const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE; -#ifdef CONFIG_ARC_HAS_DCACHE - if ((dc->assoc != ARC_DCACHE_WAYS) || - (dc->line_len != ARC_DCACHE_LINE_LEN)) { - panic("Cache H/W doesn't match kernel Config"); + if (cacheop == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; +#if (CONFIG_ARC_MMU_VER > 2) + aux_tag = ARC_REG_IC_PTAG; +#endif } - - /* check for D-Cache aliasing */ - if ((dc->sz / ARC_DCACHE_WAYS) > PAGE_SIZE) - panic("D$ aliasing not handled right now\n"); + else { + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; +#if (CONFIG_ARC_MMU_VER > 2) + aux_tag = ARC_REG_DC_PTAG; #endif + } - /* Set the default Invalidate Mode to "simpy discard dirty lines" - * as this is more frequent then flush before invalidate - * Ofcourse we toggle this default behviour when desired + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). */ - temp = read_aux_reg(ARC_REG_DC_CTRL); - temp &= ~DC_CTRL_INV_MODE_FLUSH; + if (!full_page_op) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + vaddr &= CACHE_LINE_MASK; + } -#ifdef CONFIG_ARC_HAS_DCACHE - /* Enable D-Cache: Clear Bit 0 */ - write_aux_reg(ARC_REG_DC_CTRL, temp & ~IC_CTRL_CACHE_DISABLE); + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + +#if (CONFIG_ARC_MMU_VER <= 2) + /* MMUv2 and before: paddr contains stuffed vaddrs bits */ + paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; #else - /* Flush D cache */ - write_aux_reg(ARC_REG_DC_FLSH, 0x1); - /* Disable D cache */ - write_aux_reg(ARC_REG_DC_CTRL, temp | IC_CTRL_CACHE_DISABLE); + /* if V-P const for loop, PTAG can be written once outside loop */ + if (full_page_op) + write_aux_reg(aux_tag, paddr); #endif - return; + while (num_lines-- > 0) { +#if (CONFIG_ARC_MMU_VER > 2) + /* MMUv3, cache ops require paddr seperately */ + if (!full_page_op) { + write_aux_reg(aux_tag, paddr); + paddr += L1_CACHE_BYTES; + } + + write_aux_reg(aux_cmd, vaddr); + vaddr += L1_CACHE_BYTES; +#else + write_aux_reg(aux_cmd, paddr); + paddr += L1_CACHE_BYTES; +#endif + } } -#define OP_INV 0x1 -#define OP_FLUSH 0x2 -#define OP_FLUSH_N_INV 0x3 - #ifdef CONFIG_ARC_HAS_DCACHE /*************************************************************** @@ -267,11 +269,9 @@ static inline void wait_for_flush(void) */ static inline void __dc_entire_op(const int cacheop) { - unsigned long flags, tmp = tmp; + unsigned int tmp = tmp; int aux; - local_irq_save(flags); - if (cacheop == OP_FLUSH_N_INV) { /* Dcache provides 2 cmd: FLUSH or INV * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE @@ -295,58 +295,18 @@ static inline void __dc_entire_op(const int cacheop) /* Switch back the DISCARD ONLY Invalidate mode */ if (cacheop == OP_FLUSH_N_INV) write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH); - - local_irq_restore(flags); } -/* - * Per Line Operation on D-Cache - * Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete - * It's sole purpose is to help gcc generate ZOL - */ -static inline void __dc_line_loop(unsigned long start, unsigned long sz, - int aux_reg) -{ - int num_lines, slack; - - /* Ensure we properly floor/ceil the non-line aligned/sized requests - * and have @start - aligned to cache line and integral @num_lines. - * This however can be avoided for page sized since: - * -@start will be cache-line aligned already (being page aligned) - * -@sz will be integral multiple of line size (being page sized). - */ - if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) { - slack = start & ~DCACHE_LINE_MASK; - sz += slack; - start -= slack; - } - - num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN); - - while (num_lines-- > 0) { -#if (CONFIG_ARC_MMU_VER > 2) - /* - * Just as for I$, in MMU v3, D$ ops also require - * "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops - * But we pass phy addr for both. This works since Linux - * doesn't support aliasing configs for D$, yet. - * Thus paddr is enough to provide both tag and index. - */ - write_aux_reg(ARC_REG_DC_PTAG, start); -#endif - write_aux_reg(aux_reg, start); - start += ARC_DCACHE_LINE_LEN; - } -} +/* For kernel mappings cache operation: index is same as paddr */ +#define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op) /* * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback) */ -static inline void __dc_line_op(unsigned long start, unsigned long sz, - const int cacheop) +static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int cacheop) { unsigned long flags, tmp = tmp; - int aux; local_irq_save(flags); @@ -361,12 +321,7 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz, write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH); } - if (cacheop & OP_INV) /* Inv / flush-n-inv use same cmd reg */ - aux = ARC_REG_DC_IVDL; - else - aux = ARC_REG_DC_FLDL; - - __dc_line_loop(start, sz, aux); + __cache_line_loop(paddr, vaddr, sz, cacheop); if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */ wait_for_flush(); @@ -381,7 +336,8 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz, #else #define __dc_entire_op(cacheop) -#define __dc_line_op(start, sz, cacheop) +#define __dc_line_op(paddr, vaddr, sz, cacheop) +#define __dc_line_op_k(paddr, sz, cacheop) #endif /* CONFIG_ARC_HAS_DCACHE */ @@ -391,75 +347,38 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz, /* * I-Cache Aliasing in ARC700 VIPT caches * - * For fetching code from I$, ARC700 uses vaddr (embedded in program code) - * to "index" into SET of cache-line and paddr from MMU to match the TAG - * in the WAYS of SET. - * - * However the CDU iterface (to flush/inv) lines from software, only takes - * paddr (to have simpler hardware interface). For simpler cases, using paddr - * alone suffices. - * e.g. 2-way-set-assoc, 16K I$ (8k MMU pg sz, 32b cache line size): - * way_sz = cache_sz / num_ways = 16k/2 = 8k - * num_sets = way_sz / line_sz = 8k/32 = 256 => 8 bits - * Ignoring the bottom 5 bits corresp to the off within a 32b cacheline, - * bits req for calc set-index = bits 12:5 (0 based). Since this range fits - * inside the bottom 13 bits of paddr, which are same for vaddr and paddr - * (with 8k pg sz), paddr alone can be safely used by CDU to unambigously - * locate a cache-line. + * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag. + * The orig Cache Management Module "CDU" only required paddr to invalidate a + * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry. + * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching + * the exact same line. * - * However for a difft sized cache, say 32k I$, above math yields need - * for 14 bits of vaddr to locate a cache line, which can't be provided by - * paddr, since the bit 13 (0 based) might differ between the two. - * - * This lack of extra bits needed for correct line addressing, defines the - * classical problem of Cache aliasing with VIPT architectures - * num_aliases = 1 << extra_bits - * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz => 2 aliases - * 2-way-set-assoc, 64K I$ with 8k MMU pg sz => 4 aliases - * 2-way-set-assoc, 16K I$ with 8k MMU pg sz => NO aliases + * However for larger Caches (way-size > page-size) - i.e. in Aliasing config, + * paddr alone could not be used to correctly index the cache. * * ------------------ * MMU v1/v2 (Fixed Page Size 8k) * ------------------ * The solution was to provide CDU with these additonal vaddr bits. These - * would be bits [x:13], x would depend on cache-geom. + * would be bits [x:13], x would depend on cache-geometry, 13 comes from + * standard page size of 8k. * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the * orig 5 bits of paddr were anyways ignored by CDU line ops, as they * represent the offset within cache-line. The adv of using this "clumsy" - * interface for additional info was no new reg was needed in CDU. + * interface for additional info was no new reg was needed in CDU programming + * model. * * 17:13 represented the max num of bits passable, actual bits needed were * fewer, based on the num-of-aliases possible. * -for 2 alias possibility, only bit 13 needed (32K cache) * -for 4 alias possibility, bits 14:13 needed (64K cache) * - * Since vaddr was not available for all instances of I$ flush req by core - * kernel, the only safe way (non-optimal though) was to kill all possible - * lines which could represent an alias (even if they didnt represent one - * in execution). - * e.g. for 64K I$, 4 aliases possible, so we did - * flush start - * flush start | 0x01 - * flush start | 0x2 - * flush start | 0x3 - * - * The penalty was invoking the operation itself, since tag match is anyways - * paddr based, a line which didn't represent an alias would not match the - * paddr, hence wont be killed - * - * Note that aliasing concerns are independent of line-sz for a given cache - * geometry (size + set_assoc) because the extra bits required by line-sz are - * reduced from the set calc. - * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz and using math above - * 32b line-sz: 9 bits set-index-calc, 5 bits offset-in-line => 1 extra bit - * 64b line-sz: 8 bits set-index-calc, 6 bits offset-in-line => 1 extra bit - * * ------------------ * MMU v3 * ------------------ - * This ver of MMU supports var page sizes (1k-16k) - Linux will support - * 8k (default), 16k and 4k. + * This ver of MMU supports variable page sizes (1k-16k): although Linux will + * only support 8k (default), 16k and 4k. * However from hardware perspective, smaller page sizes aggrevate aliasing * meaning more vaddr bits needed to disambiguate the cache-line-op ; * the existing scheme of piggybacking won't work for certain configurations. @@ -468,144 +387,44 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz, */ /*********************************************************** - * Machine specific helpers for per line I-Cache invalidate. - * 3 routines to accpunt for 1, 2, 4 aliases possible + * Machine specific helper for per line I-Cache invalidate. */ - -static void __ic_line_inv_no_alias(unsigned long start, int num_lines) -{ - while (num_lines-- > 0) { -#if (CONFIG_ARC_MMU_VER > 2) - write_aux_reg(ARC_REG_IC_PTAG, start); -#endif - write_aux_reg(ARC_REG_IC_IVIL, start); - start += ARC_ICACHE_LINE_LEN; - } -} - -static void __ic_line_inv_2_alias(unsigned long start, int num_lines) -{ - while (num_lines-- > 0) { - -#if (CONFIG_ARC_MMU_VER > 2) - /* - * MMU v3, CDU prog model (for line ops) now uses a new IC_PTAG - * reg to pass the "tag" bits and existing IVIL reg only looks - * at bits relevant for "index" (details above) - * Programming Notes: - * -when writing tag to PTAG reg, bit chopping can be avoided, - * CDU ignores non-tag bits. - * -Ideally "index" must be computed from vaddr, but it is not - * avail in these rtns. So to be safe, we kill the lines in all - * possible indexes corresp to num of aliases possible for - * given cache config. - */ - write_aux_reg(ARC_REG_IC_PTAG, start); - write_aux_reg(ARC_REG_IC_IVIL, - start & ~(0x1 << PAGE_SHIFT)); - write_aux_reg(ARC_REG_IC_IVIL, start | (0x1 << PAGE_SHIFT)); -#else - write_aux_reg(ARC_REG_IC_IVIL, start); - write_aux_reg(ARC_REG_IC_IVIL, start | 0x01); -#endif - start += ARC_ICACHE_LINE_LEN; - } -} - -static void __ic_line_inv_4_alias(unsigned long start, int num_lines) -{ - while (num_lines-- > 0) { - -#if (CONFIG_ARC_MMU_VER > 2) - write_aux_reg(ARC_REG_IC_PTAG, start); - - write_aux_reg(ARC_REG_IC_IVIL, - start & ~(0x3 << PAGE_SHIFT)); - write_aux_reg(ARC_REG_IC_IVIL, - start & ~(0x2 << PAGE_SHIFT)); - write_aux_reg(ARC_REG_IC_IVIL, - start & ~(0x1 << PAGE_SHIFT)); - write_aux_reg(ARC_REG_IC_IVIL, start | (0x3 << PAGE_SHIFT)); -#else - write_aux_reg(ARC_REG_IC_IVIL, start); - write_aux_reg(ARC_REG_IC_IVIL, start | 0x01); - write_aux_reg(ARC_REG_IC_IVIL, start | 0x02); - write_aux_reg(ARC_REG_IC_IVIL, start | 0x03); -#endif - start += ARC_ICACHE_LINE_LEN; - } -} - -static void __ic_line_inv(unsigned long start, unsigned long sz) +static void __ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr, + unsigned long sz) { unsigned long flags; - int num_lines, slack; - - /* - * Ensure we properly floor/ceil the non-line aligned/sized requests - * and have @start - aligned to cache line, and integral @num_lines - * However page sized flushes can be compile time optimised. - * -@start will be cache-line aligned already (being page aligned) - * -@sz will be integral multiple of line size (being page sized). - */ - if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) { - slack = start & ~ICACHE_LINE_MASK; - sz += slack; - start -= slack; - } - - num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN); local_irq_save(flags); - (*___flush_icache_rtn) (start, num_lines); + __cache_line_loop(paddr, vaddr, sz, OP_INV_IC); local_irq_restore(flags); } -/* Unlike routines above, having vaddr for flush op (along with paddr), - * prevents the need to speculatively kill the lines in multiple sets - * based on ratio of way_sz : pg_sz - */ -static void __ic_line_inv_vaddr(unsigned long phy_start, - unsigned long vaddr, unsigned long sz) +static inline void __ic_entire_inv(void) { - unsigned long flags; - int num_lines, slack; - unsigned int addr; - - slack = phy_start & ~ICACHE_LINE_MASK; - sz += slack; - phy_start -= slack; - num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN); - -#if (CONFIG_ARC_MMU_VER > 2) - vaddr &= ~ICACHE_LINE_MASK; - addr = phy_start; -#else - /* bits 17:13 of vaddr go as bits 4:0 of paddr */ - addr = phy_start | ((vaddr >> 13) & 0x1F); -#endif + write_aux_reg(ARC_REG_IC_IVIC, 1); + read_aux_reg(ARC_REG_IC_CTRL); /* blocks */ +} - local_irq_save(flags); - while (num_lines-- > 0) { -#if (CONFIG_ARC_MMU_VER > 2) - /* tag comes from phy addr */ - write_aux_reg(ARC_REG_IC_PTAG, addr); +struct ic_line_inv_vaddr_ipi { + unsigned long paddr, vaddr; + int sz; +}; - /* index bits come from vaddr */ - write_aux_reg(ARC_REG_IC_IVIL, vaddr); - vaddr += ARC_ICACHE_LINE_LEN; -#else - /* this paddr contains vaddrs bits as needed */ - write_aux_reg(ARC_REG_IC_IVIL, addr); -#endif - addr += ARC_ICACHE_LINE_LEN; - } - local_irq_restore(flags); +static void __ic_line_inv_vaddr_helper(void *info) +{ + struct ic_line_inv_vaddr_ipi *ic_inv = (struct ic_line_inv_vaddr_ipi*) info; + __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz); } +static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, + unsigned long sz) +{ + struct ic_line_inv_vaddr_ipi ic_inv = { paddr, vaddr , sz}; + on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1); +} #else -#define __ic_line_inv(start, sz) +#define __ic_entire_inv() #define __ic_line_inv_vaddr(pstart, vstart, sz) #endif /* CONFIG_ARC_HAS_ICACHE */ @@ -615,35 +434,72 @@ static void __ic_line_inv_vaddr(unsigned long phy_start, * Exported APIs */ -/* TBD: use pg_arch_1 to optimize this */ +/* + * Handle cache congruency of kernel and userspace mappings of page when kernel + * writes-to/reads-from + * + * The idea is to defer flushing of kernel mapping after a WRITE, possible if: + * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent + * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache) + * -In SMP, if hardware caches are coherent + * + * There's a corollary case, where kernel READs from a userspace mapped page. + * If the U-mapping is not congruent to to K-mapping, former needs flushing. + */ void flush_dcache_page(struct page *page) { - __dc_line_op((unsigned long)page_address(page), PAGE_SIZE, OP_FLUSH); + struct address_space *mapping; + + if (!cache_is_vipt_aliasing()) { + clear_bit(PG_dc_clean, &page->flags); + return; + } + + /* don't handle anon pages here */ + mapping = page_mapping(page); + if (!mapping) + return; + + /* + * pagecache page, file not yet mapped to userspace + * Make a note that K-mapping is dirty + */ + if (!mapping_mapped(mapping)) { + clear_bit(PG_dc_clean, &page->flags); + } else if (page_mapped(page)) { + + /* kernel reading from page with U-mapping */ + void *paddr = page_address(page); + unsigned long vaddr = page->index << PAGE_CACHE_SHIFT; + + if (addr_not_cache_congruent(paddr, vaddr)) + __flush_dcache_page(paddr, vaddr); + } } EXPORT_SYMBOL(flush_dcache_page); void dma_cache_wback_inv(unsigned long start, unsigned long sz) { - __dc_line_op(start, sz, OP_FLUSH_N_INV); + __dc_line_op_k(start, sz, OP_FLUSH_N_INV); } EXPORT_SYMBOL(dma_cache_wback_inv); void dma_cache_inv(unsigned long start, unsigned long sz) { - __dc_line_op(start, sz, OP_INV); + __dc_line_op_k(start, sz, OP_INV); } EXPORT_SYMBOL(dma_cache_inv); void dma_cache_wback(unsigned long start, unsigned long sz) { - __dc_line_op(start, sz, OP_FLUSH); + __dc_line_op_k(start, sz, OP_FLUSH); } EXPORT_SYMBOL(dma_cache_wback); /* - * This is API for making I/D Caches consistent when modifying code - * (loadable modules, kprobes, etc) + * This is API for making I/D Caches consistent when modifying + * kernel code (loadable modules, kprobes, kgdb...) * This is called on insmod, with kernel virtual address for CODE of * the module. ARC cache maintenance ops require PHY address thus we * need to convert vmalloc addr to PHY addr @@ -652,7 +508,6 @@ void flush_icache_range(unsigned long kstart, unsigned long kend) { unsigned int tot_sz, off, sz; unsigned long phy, pfn; - unsigned long flags; /* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */ @@ -673,8 +528,13 @@ void flush_icache_range(unsigned long kstart, unsigned long kend) /* Case: Kernel Phy addr (0x8000_0000 onwards) */ if (likely(kstart > PAGE_OFFSET)) { - __ic_line_inv(kstart, kend - kstart); - __dc_line_op(kstart, kend - kstart, OP_FLUSH); + /* + * The 2nd arg despite being paddr will be used to index icache + * This is OK since no alternate virtual mappings will exist + * given the callers for this case: kprobe/kgdb in built-in + * kernel code only. + */ + __sync_icache_dcache(kstart, kstart, kend - kstart); return; } @@ -692,70 +552,144 @@ void flush_icache_range(unsigned long kstart, unsigned long kend) pfn = vmalloc_to_pfn((void *)kstart); phy = (pfn << PAGE_SHIFT) + off; sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off); - local_irq_save(flags); - __dc_line_op(phy, sz, OP_FLUSH); - __ic_line_inv(phy, sz); - local_irq_restore(flags); + __sync_icache_dcache(phy, kstart, sz); kstart += sz; tot_sz -= sz; } } /* - * Optimised ver of flush_icache_range() with spec callers: ptrace/signals - * where vaddr is also available. This allows passing both vaddr and paddr - * bits to CDU for cache flush, short-circuting the current pessimistic algo - * which kills all possible aliases. - * An added adv of knowing that vaddr is user-vaddr avoids various checks - * and handling for k-vaddr, k-paddr as done in orig ver above + * General purpose helper to make I and D cache lines consistent. + * @paddr is phy addr of region + * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc) + * However in one instance, when called by kprobe (for a breakpt in + * builtin kernel code) @vaddr will be paddr only, meaning CDU operation will + * use a paddr to index the cache (despite VIPT). This is fine since since a + * builtin kernel page will not have any virtual mappings. + * kprobe on loadable module will be kernel vaddr. */ -void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr, - int len) +void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len) { - __ic_line_inv_vaddr(paddr, u_vaddr, len); - __dc_line_op(paddr, len, OP_FLUSH); + __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV); + __ic_line_inv_vaddr(paddr, vaddr, len); +} + +/* wrapper to compile time eliminate alignment checks in flush loop */ +void __inv_icache_page(unsigned long paddr, unsigned long vaddr) +{ + __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE); } /* - * XXX: This also needs to be optim using pg_arch_1 - * This is called when a page-cache page is about to be mapped into a - * user process' address space. It offers an opportunity for a - * port to ensure d-cache/i-cache coherency if necessary. + * wrapper to clearout kernel or userspace mappings of a page + * For kernel mappings @vaddr == @paddr */ -void flush_icache_page(struct vm_area_struct *vma, struct page *page) +void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr) { - if (!(vma->vm_flags & VM_EXEC)) - return; - - __ic_line_inv((unsigned long)page_address(page), PAGE_SIZE); + __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV); } -void flush_icache_all(void) +noinline void flush_cache_all(void) { unsigned long flags; local_irq_save(flags); - write_aux_reg(ARC_REG_IC_IVIC, 1); + __ic_entire_inv(); + __dc_entire_op(OP_FLUSH_N_INV); - /* lr will not complete till the icache inv operation is not over */ - read_aux_reg(ARC_REG_IC_CTRL); local_irq_restore(flags); + } -noinline void flush_cache_all(void) +#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING + +void flush_cache_mm(struct mm_struct *mm) { - unsigned long flags; + flush_cache_all(); +} - local_irq_save(flags); +void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, + unsigned long pfn) +{ + unsigned int paddr = pfn << PAGE_SHIFT; - flush_icache_all(); - __dc_entire_op(OP_FLUSH_N_INV); + u_vaddr &= PAGE_MASK; - local_irq_restore(flags); + ___flush_dcache_page(paddr, u_vaddr); + + if (vma->vm_flags & VM_EXEC) + __inv_icache_page(paddr, u_vaddr); +} + +void flush_cache_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + flush_cache_all(); +} + +void flush_anon_page(struct vm_area_struct *vma, struct page *page, + unsigned long u_vaddr) +{ + /* TBD: do we really need to clear the kernel mapping */ + __flush_dcache_page(page_address(page), u_vaddr); + __flush_dcache_page(page_address(page), page_address(page)); } +#endif + +void copy_user_highpage(struct page *to, struct page *from, + unsigned long u_vaddr, struct vm_area_struct *vma) +{ + void *kfrom = page_address(from); + void *kto = page_address(to); + int clean_src_k_mappings = 0; + + /* + * If SRC page was already mapped in userspace AND it's U-mapping is + * not congruent with K-mapping, sync former to physical page so that + * K-mapping in memcpy below, sees the right data + * + * Note that while @u_vaddr refers to DST page's userspace vaddr, it is + * equally valid for SRC page as well + */ + if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { + __flush_dcache_page(kfrom, u_vaddr); + clean_src_k_mappings = 1; + } + + copy_page(kto, kfrom); + + /* + * Mark DST page K-mapping as dirty for a later finalization by + * update_mmu_cache(). Although the finalization could have been done + * here as well (given that both vaddr/paddr are available). + * But update_mmu_cache() already has code to do that for other + * non copied user pages (e.g. read faults which wire in pagecache page + * directly). + */ + clear_bit(PG_dc_clean, &to->flags); + + /* + * if SRC was already usermapped and non-congruent to kernel mapping + * sync the kernel mapping back to physical page + */ + if (clean_src_k_mappings) { + __flush_dcache_page(kfrom, kfrom); + set_bit(PG_dc_clean, &from->flags); + } else { + clear_bit(PG_dc_clean, &from->flags); + } +} + +void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) +{ + clear_page(to); + clear_bit(PG_dc_clean, &page->flags); +} + + /********************************************************************** * Explicit Cache flush request from user space via syscall * Needed for JITs which generate code on the fly diff --git a/arch/arc/mm/extable.c b/arch/arc/mm/extable.c index 014172ba843..aa652e28132 100644 --- a/arch/arc/mm/extable.c +++ b/arch/arc/mm/extable.c @@ -27,7 +27,7 @@ int fixup_exception(struct pt_regs *regs) #ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE -long arc_copy_from_user_noinline(void *to, const void __user * from, +long arc_copy_from_user_noinline(void *to, const void __user *from, unsigned long n) { return __arc_copy_from_user(to, from, n); @@ -48,7 +48,7 @@ unsigned long arc_clear_user_noinline(void __user *to, } EXPORT_SYMBOL(arc_clear_user_noinline); -long arc_strncpy_from_user_noinline (char *dst, const char __user *src, +long arc_strncpy_from_user_noinline(char *dst, const char __user *src, long count) { return __arc_strncpy_from_user(dst, src, count); diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index af55aab803d..9c69552350c 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -12,12 +12,12 @@ #include <linux/sched.h> #include <linux/errno.h> #include <linux/ptrace.h> -#include <linux/version.h> #include <linux/uaccess.h> #include <linux/kdebug.h> #include <asm/pgalloc.h> +#include <asm/mmu.h> -static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address) +static int handle_vmalloc_fault(unsigned long address) { /* * Synchronize this task's top level page-table @@ -27,7 +27,7 @@ static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address) pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; - pgd = pgd_offset_fast(mm, address); + pgd = pgd_offset_fast(current->active_mm, address); pgd_k = pgd_offset_k(address); if (!pgd_present(*pgd_k)) @@ -52,16 +52,15 @@ bad_area: return 1; } -void do_page_fault(struct pt_regs *regs, int write, unsigned long address, - unsigned long cause_code) +void do_page_fault(unsigned long address, struct pt_regs *regs) { struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; siginfo_t info; int fault, ret; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; /* * We fault-in kernel-space virtual memory on-demand. The @@ -73,7 +72,7 @@ void do_page_fault(struct pt_regs *regs, int write, unsigned long address, * nothing more. */ if (address >= VMALLOC_START && address <= VMALLOC_END) { - ret = handle_vmalloc_fault(mm, address); + ret = handle_vmalloc_fault(address); if (unlikely(ret)) goto bad_area_nosemaphore; else @@ -89,6 +88,8 @@ void do_page_fault(struct pt_regs *regs, int write, unsigned long address, if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -110,18 +111,19 @@ good_area: /* Handle protection violation, execute on heap or stack */ - if (cause_code == ((ECR_V_PROTV << 16) | ECR_C_PROTV_INST_FETCH)) + if ((regs->ecr_vec == ECR_V_PROTV) && + (regs->ecr_cause == ECR_C_PROTV_INST_FETCH)) goto bad_area; if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } -survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -177,7 +179,6 @@ bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { tsk->thread.fault_address = address; - tsk->thread.cause_code = cause_code; info.si_signo = SIGSEGV; info.si_errno = 0; /* info.si_code has been set above */ @@ -198,17 +199,15 @@ no_context: if (fixup_exception(regs)) return; - die("Oops", regs, address, cause_code); + die("Oops", regs, address); out_of_memory: - if (is_global_init(tsk)) { - yield(); - goto survive; - } up_read(&mm->mmap_sem); - if (user_mode(regs)) - do_group_exit(SIGKILL); /* This will never return */ + if (user_mode(regs)) { + pagefault_out_of_memory(); + return; + } goto no_context; @@ -219,7 +218,6 @@ do_sigbus: goto no_context; tsk->thread.fault_address = address; - tsk->thread.cause_code = cause_code; info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index caf797de23f..523412369f7 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -10,8 +10,8 @@ #include <linux/mm.h> #include <linux/bootmem.h> #include <linux/memblock.h> -#ifdef CONFIG_BLOCK_DEV_RAM -#include <linux/blk.h> +#ifdef CONFIG_BLK_DEV_INITRD +#include <linux/initrd.h> #endif #include <linux/swap.h> #include <linux/module.h> @@ -45,6 +45,24 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) pr_info("Memory size set via devicetree %ldM\n", TO_MB(arc_mem_sz)); } +#ifdef CONFIG_BLK_DEV_INITRD +static int __init early_initrd(char *p) +{ + unsigned long start, size; + char *endp; + + start = memparse(p, &endp); + if (*endp == ',') { + size = memparse(endp + 1, NULL); + + initrd_start = (unsigned long)__va(start); + initrd_end = (unsigned long)__va(start + size); + } + return 0; +} +early_param("initrd", early_initrd); +#endif + /* * First memory setup routine called from setup_arch() * 1. setup swapper's mm @init_mm @@ -77,17 +95,23 @@ void __init setup_arch_memory(void) /* Last usable page of low mem (no HIGHMEM yet for ARC port) */ max_low_pfn = max_pfn = PFN_DOWN(end_mem); - max_mapnr = num_physpages = max_low_pfn - min_low_pfn; + max_mapnr = max_low_pfn - min_low_pfn; /*------------- reserve kernel image -----------------------*/ memblock_reserve(CONFIG_LINUX_LINK_BASE, __pa(_end) - CONFIG_LINUX_LINK_BASE); +#ifdef CONFIG_BLK_DEV_INITRD + /*------------- reserve initrd image -----------------------*/ + if (initrd_start) + memblock_reserve(__pa(initrd_start), initrd_end - initrd_start); +#endif + memblock_dump_all(); /*-------------- node setup --------------------------------*/ memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_NORMAL] = num_physpages; + zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; /* * We can't use the helper free_area_init(zones[]) because it uses @@ -109,56 +133,9 @@ void __init setup_arch_memory(void) */ void __init mem_init(void) { - int codesize, datasize, initsize, reserved_pages, free_pages; - int tmp; - high_memory = (void *)(CONFIG_LINUX_LINK_BASE + arc_mem_sz); - - totalram_pages = free_all_bootmem(); - - /* count all reserved pages [kernel code/data/mem_map..] */ - reserved_pages = 0; - for (tmp = 0; tmp < max_mapnr; tmp++) - if (PageReserved(mem_map + tmp)) - reserved_pages++; - - /* XXX: nr_free_pages() is equivalent */ - free_pages = max_mapnr - reserved_pages; - - /* - * For the purpose of display below, split the "reserve mem" - * kernel code/data is already shown explicitly, - * Show any other reservations (mem_map[ ] et al) - */ - reserved_pages -= (((unsigned int)_end - CONFIG_LINUX_LINK_BASE) >> - PAGE_SHIFT); - - codesize = _etext - _text; - datasize = _end - _etext; - initsize = __init_end - __init_begin; - - pr_info("Memory Available: %dM / %ldM (%dK code, %dK data, %dK init, %dK reserv)\n", - PAGES_TO_MB(free_pages), - TO_MB(arc_mem_sz), - TO_KB(codesize), TO_KB(datasize), TO_KB(initsize), - PAGES_TO_KB(reserved_pages)); -} - -static void __init free_init_pages(const char *what, unsigned long begin, - unsigned long end) -{ - unsigned long addr; - - pr_info("Freeing %s: %ldk [%lx] to [%lx]\n", - what, TO_KB(end - begin), begin, end); - - /* need to check that the page we free is not a partial page */ - for (addr = begin; addr + PAGE_SIZE <= end; addr += PAGE_SIZE) { - ClearPageReserved(virt_to_page(addr)); - init_page_count(virt_to_page(addr)); - free_page(addr); - totalram_pages++; - } + free_all_bootmem(); + mem_init_print_info(NULL); } /* @@ -166,22 +143,12 @@ static void __init free_init_pages(const char *what, unsigned long begin, */ void __init_refok free_initmem(void) { - free_init_pages("unused kernel memory", - (unsigned long)__init_begin, - (unsigned long)__init_end); + free_initmem_default(-1); } #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { - free_init_pages("initrd memory", start, end); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif - -#ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) -{ - pr_err("%s(%lx, %lx)\n", __func__, start, end); -} -#endif /* CONFIG_OF_FLATTREE */ diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index 3e5c92c7993..739e65f355d 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -12,7 +12,7 @@ #include <linux/io.h> #include <linux/mm.h> #include <linux/slab.h> -#include <asm/cache.h> +#include <linux/cache.h> void __iomem *ioremap(unsigned long paddr, unsigned long size) { diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c new file mode 100644 index 00000000000..2e06d56e987 --- /dev/null +++ b/arch/arc/mm/mmap.c @@ -0,0 +1,78 @@ +/* + * ARC700 mmap + * + * (started from arm version - for VIPT alias handling) + * + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/sched.h> +#include <asm/cacheflush.h> + +#define COLOUR_ALIGN(addr, pgoff) \ + ((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) + \ + (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1))) + +/* + * Ensure that shared mappings are correctly aligned to + * avoid aliasing issues with VIPT caches. + * We need to ensure that + * a specific page of an object is always mapped at a multiple of + * SHMLBA bytes. + */ +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int do_align = 0; + int aliasing = cache_is_vipt_aliasing(); + struct vm_unmapped_area_info info; + + /* + * We only need to do colour alignment if D cache aliases. + */ + if (aliasing) + do_align = filp || (flags & MAP_SHARED); + + /* + * We enforce the MAP_FIXED case. + */ + if (flags & MAP_FIXED) { + if (aliasing && flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) + return -EINVAL; + return addr; + } + + if (len > TASK_SIZE) + return -ENOMEM; + + if (addr) { + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); +} diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 9b9ce23f4ec..e1acf0ce564 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -52,10 +52,11 @@ */ #include <linux/module.h> +#include <linux/bug.h> #include <asm/arcregs.h> #include <asm/setup.h> #include <asm/mmu_context.h> -#include <asm/tlb.h> +#include <asm/mmu.h> /* Need for ARC MMU v2 * @@ -97,49 +98,47 @@ * J-TLB entry got evicted/replaced. */ -/* A copy of the ASID from the PID reg is kept in asid_cache */ -int asid_cache = FIRST_ASID; -/* ASID to mm struct mapping. We have one extra entry corresponding to - * NO_ASID to save us a compare when clearing the mm entry for old asid - * see get_new_mmu_context (asm-arc/mmu_context.h) - */ -struct mm_struct *asid_mm_map[NUM_ASID + 1]; +/* A copy of the ASID from the PID reg is kept in asid_cache */ +DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; /* * Utility Routine to erase a J-TLB entry - * The procedure is to look it up in the MMU. If found, ERASE it by - * issuing a TlbWrite CMD with PD0 = PD1 = 0 + * Caller needs to setup Index Reg (manually or via getIndex) */ - -static void __tlb_entry_erase(void) +static inline void __tlb_entry_erase(void) { write_aux_reg(ARC_REG_TLBPD1, 0); write_aux_reg(ARC_REG_TLBPD0, 0); write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } -static void tlb_entry_erase(unsigned int vaddr_n_asid) +static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid) { unsigned int idx; - /* Locate the TLB entry for this vaddr + ASID */ write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe); idx = read_aux_reg(ARC_REG_TLBINDEX); + return idx; +} + +static void tlb_entry_erase(unsigned int vaddr_n_asid) +{ + unsigned int idx; + + /* Locate the TLB entry for this vaddr + ASID */ + idx = tlb_entry_lkup(vaddr_n_asid); + /* No error means entry found, zero it out */ if (likely(!(idx & TLB_LKUP_ERR))) { __tlb_entry_erase(); - } else { /* Some sort of Error */ - + } else { /* Duplicate entry error */ - if (idx & 0x1) { - /* TODO we need to handle this case too */ - pr_emerg("unhandled Duplicate flush for %x\n", - vaddr_n_asid); - } - /* else entry not found so nothing to do */ + WARN(idx == TLB_DUP_ERR, "Probe returned Dup PD for %x\n", + vaddr_n_asid); } } @@ -158,7 +157,7 @@ static void utlb_invalidate(void) { #if (CONFIG_ARC_MMU_VER >= 2) -#if (CONFIG_ARC_MMU_VER < 3) +#if (CONFIG_ARC_MMU_VER == 2) /* MMU v2 introduced the uTLB Flush command. * There was however an obscure hardware bug, where uTLB flush would * fail when a prior probe for J-TLB (both totally unrelated) would @@ -181,6 +180,36 @@ static void utlb_invalidate(void) } +static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +{ + unsigned int idx; + + /* + * First verify if entry for this vaddr+ASID already exists + * This also sets up PD0 (vaddr, ASID..) for final commit + */ + idx = tlb_entry_lkup(pd0); + + /* + * If Not already present get a free slot from MMU. + * Otherwise, Probe would have located the entry and set INDEX Reg + * with existing location. This will cause Write CMD to over-write + * existing entry with new PD0 and PD1 + */ + if (likely(idx & TLB_LKUP_ERR)) + write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex); + + /* setup the other half of TLB entry (pfn, rwx..) */ + write_aux_reg(ARC_REG_TLBPD1, pd1); + + /* + * Commit the Entry to MMU + * It doesnt sound safe to use the TLBWriteNI cmd here + * which doesn't flush uTLBs. I'd rather be safe than sorry. + */ + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); +} + /* * Un-conditionally (without lookup) erase the entire MMU contents */ @@ -223,13 +252,14 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm) return; /* - * Workaround for Android weirdism: - * A binder VMA could end up in a task such that vma->mm != tsk->mm - * old code would cause h/w - s/w ASID to get out of sync + * - Move to a new ASID, but only if the mm is still wired in + * (Android Binder ended up calling this for vma->mm != tsk->mm, + * causing h/w - s/w ASID to get out of sync) + * - Also get_new_mmu_context() new implementation allocates a new + * ASID only if it is not allocated already - so unallocate first */ - if (current->mm != mm) - destroy_context(mm); - else + destroy_context(mm); + if (current->mm == mm) get_new_mmu_context(mm); } @@ -244,8 +274,8 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm) void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + const unsigned int cpu = smp_processor_id(); unsigned long flags; - unsigned int asid; /* If range @start to @end is more than 32 TLB entries deep, * its better to move to a new ASID rather than searching for @@ -267,11 +297,10 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, start &= PAGE_MASK; local_irq_save(flags); - asid = vma->vm_mm->context.asid; - if (asid != NO_ASID) { + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { while (start < end) { - tlb_entry_erase(start | (asid & 0xff)); + tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu)); start += PAGE_SIZE; } } @@ -318,6 +347,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { + const unsigned int cpu = smp_processor_id(); unsigned long flags; /* Note that it is critical that interrupts are DISABLED between @@ -325,23 +355,95 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) */ local_irq_save(flags); - if (vma->vm_mm->context.asid != NO_ASID) { - tlb_entry_erase((page & PAGE_MASK) | - (vma->vm_mm->context.asid & 0xff)); + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { + tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu)); utlb_invalidate(); } local_irq_restore(flags); } +#ifdef CONFIG_SMP + +struct tlb_args { + struct vm_area_struct *ta_vma; + unsigned long ta_start; + unsigned long ta_end; +}; + +static inline void ipi_flush_tlb_page(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_tlb_page(ta->ta_vma, ta->ta_start); +} + +static inline void ipi_flush_tlb_range(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} + +static inline void ipi_flush_tlb_kernel_range(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end); +} + +void flush_tlb_all(void) +{ + on_each_cpu((smp_call_func_t)local_flush_tlb_all, NULL, 1); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + on_each_cpu_mask(mm_cpumask(mm), (smp_call_func_t)local_flush_tlb_mm, + mm, 1); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = uaddr + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = start, + .ta_end = end + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + struct tlb_args ta = { + .ta_start = start, + .ta_end = end + }; + + on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); +} +#endif + /* * Routine to create a TLB entry */ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { unsigned long flags; - unsigned int idx, asid_or_sasid; - unsigned long pd0_flags; + unsigned int asid_or_sasid, rwx; + unsigned long pd0, pd1; /* * create_tlb() assumes that current->mm == vma->mm, since @@ -373,73 +475,112 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) local_irq_save(flags); - tlb_paranoid_check(vma->vm_mm->context.asid, address); + tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address); address &= PAGE_MASK; /* update this PTE credentials */ pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED); - /* Create HW TLB entry Flags (in PD0) from PTE Flags */ -#if (CONFIG_ARC_MMU_VER <= 2) - pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0) >> 1); -#else - pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0)); -#endif + /* Create HW TLB(PD0,PD1) from PTE */ /* ASID for this task */ asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff; - write_aux_reg(ARC_REG_TLBPD0, address | pd0_flags | asid_or_sasid); - - /* Load remaining info in PD1 (Page Frame Addr and Kx/Kw/Kr Flags) */ - write_aux_reg(ARC_REG_TLBPD1, (pte_val(*ptep) & PTE_BITS_IN_PD1)); - - /* First verify if entry for this vaddr+ASID already exists */ - write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe); - idx = read_aux_reg(ARC_REG_TLBINDEX); + pd0 = address | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); /* - * If Not already present get a free slot from MMU. - * Otherwise, Probe would have located the entry and set INDEX Reg - * with existing location. This will cause Write CMD to over-write - * existing entry with new PD0 and PD1 + * ARC MMU provides fully orthogonal access bits for K/U mode, + * however Linux only saves 1 set to save PTE real-estate + * Here we convert 3 PTE bits into 6 MMU bits: + * -Kernel only entries have Kr Kw Kx 0 0 0 + * -User entries have mirrored K and U bits */ - if (likely(idx & TLB_LKUP_ERR)) - write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex); + rwx = pte_val(*ptep) & PTE_BITS_RWX; - /* - * Commit the Entry to MMU - * It doesnt sound safe to use the TLBWriteNI cmd here - * which doesn't flush uTLBs. I'd rather be safe than sorry. - */ - write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + if (pte_val(*ptep) & _PAGE_GLOBAL) + rwx <<= 3; /* r w x => Kr Kw Kx 0 0 0 */ + else + rwx |= (rwx << 3); /* r w x => Kr Kw Kx Ur Uw Ux */ + + pd1 = rwx | (pte_val(*ptep) & PTE_BITS_NON_RWX_IN_PD1); + + tlb_entry_insert(pd0, pd1); local_irq_restore(flags); } -/* arch hook called by core VM at the end of handle_mm_fault( ), - * when a new PTE is entered in Page Tables or an existing one - * is modified. We aggresively pre-install a TLB entry +/* + * Called at the end of pagefault, for a userspace mapped page + * -pre-install the corresponding TLB entry into MMU + * -Finalize the delayed D-cache flush of kernel mapping of page due to + * flush_dcache_page(), copy_user_page() + * + * Note that flush (when done) involves both WBACK - so physical page is + * in sync as well as INV - so any non-congruent aliases don't remain */ - -void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddress, +void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, pte_t *ptep) { + unsigned long vaddr = vaddr_unaligned & PAGE_MASK; + unsigned long paddr = pte_val(*ptep) & PAGE_MASK; + struct page *page = pfn_to_page(pte_pfn(*ptep)); + + create_tlb(vma, vaddr, ptep); - create_tlb(vma, vaddress, ptep); + if (page == ZERO_PAGE(0)) { + return; + } + + /* + * Exec page : Independent of aliasing/page-color considerations, + * since icache doesn't snoop dcache on ARC, any dirty + * K-mapping of a code page needs to be wback+inv so that + * icache fetch by userspace sees code correctly. + * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it + * so userspace sees the right data. + * (Avoids the flush for Non-exec + congruent mapping case) + */ + if ((vma->vm_flags & VM_EXEC) || + addr_not_cache_congruent(paddr, vaddr)) { + + int dirty = !test_and_set_bit(PG_dc_clean, &page->flags); + if (dirty) { + /* wback + inv dcache lines */ + __flush_dcache_page(paddr, paddr); + + /* invalidate any existing icache lines */ + if (vma->vm_flags & VM_EXEC) + __inv_icache_page(paddr, vaddr); + } + } } /* Read the Cache Build Confuration Registers, Decode them and save into * the cpuinfo structure for later use. * No Validation is done here, simply read/convert the BCRs */ -void __init read_decode_mmu_bcr(void) +void read_decode_mmu_bcr(void) { - unsigned int tmp; - struct bcr_mmu_1_2 *mmu2; /* encoded MMU2 attr */ - struct bcr_mmu_3 *mmu3; /* encoded MMU3 attr */ struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + unsigned int tmp; + struct bcr_mmu_1_2 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8; +#else + unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8; +#endif + } *mmu2; + + struct bcr_mmu_3 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4, + u_itlb:4, u_dtlb:4; +#else + unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4, + ways:4, ver:8; +#endif + } *mmu3; tmp = read_aux_reg(ARC_REG_MMU_BCR); mmu->ver = (tmp >> 24); @@ -466,7 +607,7 @@ void __init read_decode_mmu_bcr(void) char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) { int n = 0; - struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu; n += scnprintf(buf + n, len - n, "ARC700 MMU [v%x]\t: %dk PAGE, ", p_mmu->ver, TO_KB(p_mmu->pg_sz)); @@ -475,12 +616,12 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) "J-TLB %d (%dx%d), uDTLB %d, uITLB %d, %s\n", p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, - __CONFIG_ARC_MMU_SASID_VAL ? "SASID" : ""); + IS_ENABLED(CONFIG_ARC_MMU_SASID) ? "SASID" : ""); return buf; } -void __init arc_mmu_init(void) +void arc_mmu_init(void) { char str[256]; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; @@ -503,13 +644,6 @@ void __init arc_mmu_init(void) if (mmu->pg_sz != PAGE_SIZE) panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); - /* - * ASID mgmt data structures are compile time init - * asid_cache = FIRST_ASID and asid_mm_map[] all zeroes - */ - - local_flush_tlb_all(); - /* Enable the MMU */ write_aux_reg(ARC_REG_PID, MMU_ENABLE); @@ -551,9 +685,9 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, struct pt_regs *regs) { int set, way, n; - unsigned int pd0[4], pd1[4]; /* assume max 4 ways */ unsigned long flags, is_valid; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + unsigned int pd0[mmu->ways], pd1[mmu->ways]; local_irq_save(flags); @@ -578,7 +712,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, continue; /* Scan the set for duplicate ways: needs a nested loop */ - for (way = 0; way < mmu->ways; way++) { + for (way = 0; way < mmu->ways - 1; way++) { if (!pd0[way]) continue; @@ -621,25 +755,28 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS * don't match */ -void print_asid_mismatch(int is_fast_path) +void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path) { - int pid_sw, pid_hw; - pid_sw = current->active_mm->context.asid; - pid_hw = read_aux_reg(ARC_REG_PID) & 0xff; - pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n", - is_fast_path ? "Fast" : "Slow", pid_sw, pid_hw); + is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid); __asm__ __volatile__("flag 1"); } -void tlb_paranoid_check(unsigned int pid_sw, unsigned long addr) +void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr) { - unsigned int pid_hw; + unsigned int mmu_asid; - pid_hw = read_aux_reg(ARC_REG_PID) & 0xff; + mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff; - if (addr < 0x70000000 && ((pid_hw != pid_sw) || (pid_sw == NO_ASID))) - print_asid_mismatch(0); + /* + * At the time of a TLB miss/installation + * - HW version needs to match SW version + * - SW needs to have a valid ASID + */ + if (addr < 0x70000000 && + ((mm_asid == MM_CTXT_NO_ASID) || + (mmu_asid != (mm_asid & MM_CTXT_ASID_MASK)))) + print_asid_mismatch(mm_asid, mmu_asid, 0); } #endif diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 9df765dc7c3..79bfc81358c 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -39,22 +39,41 @@ #include <linux/linkage.h> #include <asm/entry.h> -#include <asm/tlb.h> +#include <asm/mmu.h> #include <asm/pgtable.h> #include <asm/arcregs.h> #include <asm/cache.h> #include <asm/processor.h> -#if (CONFIG_ARC_MMU_VER == 1) #include <asm/tlb-mmu1.h> -#endif -;-------------------------------------------------------------------------- -; scratch memory to save the registers (r0-r3) used to code TLB refill Handler -; For details refer to comments before TLBMISS_FREEUP_REGS below +;----------------------------------------------------------------- +; ARC700 Exception Handling doesn't auto-switch stack and it only provides +; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" +; +; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a +; "global" is used to free-up FIRST core reg to be able to code the rest of +; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe). +; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3 +; need to be saved as well by extending the "global" to be 4 words. Hence +; ".size ex_saved_reg1, 16" +; [All of this dance is to avoid stack switching for each TLB Miss, since we +; only need to save only a handful of regs, as opposed to complete reg file] +; +; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST +; core reg as it will not be SMP safe. +; Thus scratch AUX reg is used (and no longer used to cache task PGD). +; To save the rest of 3 regs - per cpu, the global is made "per-cpu". +; Epilogue thus has to locate the "per-cpu" storage for regs. +; To avoid cache line bouncing the per-cpu global is aligned/sized per +; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence +; ".size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)" + +; As simple as that.... ;-------------------------------------------------------------------------- +; scratch memory to save [r0-r3] used to code TLB refill Handler ARCFP_DATA ex_saved_reg1 - .align 1 << L1_CACHE_SHIFT ; IMP: Must be Cache Line aligned + .align 1 << L1_CACHE_SHIFT .type ex_saved_reg1, @object #ifdef CONFIG_SMP .size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT) @@ -66,6 +85,44 @@ ex_saved_reg1: .zero 16 #endif +.macro TLBMISS_FREEUP_REGS +#ifdef CONFIG_SMP + sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with + GET_CPU_ID r0 ; get to per cpu scratch mem, + lsl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu + add r0, @ex_saved_reg1, r0 +#else + st r0, [@ex_saved_reg1] + mov_s r0, @ex_saved_reg1 +#endif + st_s r1, [r0, 4] + st_s r2, [r0, 8] + st_s r3, [r0, 12] + + ; VERIFY if the ASID in MMU-PID Reg is same as + ; one in Linux data structures + + tlb_paranoid_check_asm +.endm + +.macro TLBMISS_RESTORE_REGS +#ifdef CONFIG_SMP + GET_CPU_ID r0 ; get to per cpu scratch mem + lsl r0, r0, L1_CACHE_SHIFT ; each is cache line wide + add r0, @ex_saved_reg1, r0 + ld_s r3, [r0,12] + ld_s r2, [r0, 8] + ld_s r1, [r0, 4] + lr r0, [ARC_REG_SCRATCH_DATA0] +#else + mov_s r0, @ex_saved_reg1 + ld_s r3, [r0,12] + ld_s r2, [r0, 8] + ld_s r1, [r0, 4] + ld_s r0, [r0] +#endif +.endm + ;============================================================================ ; Troubleshooting Stuff ;============================================================================ @@ -76,34 +133,35 @@ ex_saved_reg1: ; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble. ; So we try to detect this in TLB Mis shandler - -.macro DBG_ASID_MISMATCH +.macro tlb_paranoid_check_asm #ifdef CONFIG_ARC_DBG_TLB_PARANOIA - ; make sure h/w ASID is same as s/w ASID - GET_CURR_TASK_ON_CPU r3 ld r0, [r3, TASK_ACT_MM] ld r0, [r0, MM_CTXT+MM_CTXT_ASID] + breq r0, 0, 55f ; Error if no ASID allocated lr r1, [ARC_REG_PID] and r1, r1, 0xFF - breq r1, r0, 5f + and r2, r0, 0xFF ; MMU PID bits only for comparison + breq r1, r2, 5f + +55: ; Error if H/w and S/w ASID don't match, but NOT if in kernel mode - lr r0, [erstatus] - bbit0 r0, STATUS_U_BIT, 5f + lr r2, [erstatus] + bbit0 r2, STATUS_U_BIT, 5f ; We sure are in troubled waters, Flag the error, but to do so ; need to switch to kernel mode stack to call error routine GET_TSK_STACK_BASE r3, sp ; Call printk to shoutout aloud - mov r0, 1 + mov r2, 1 j print_asid_mismatch -5: ; ASIDs match so proceed normally +5: ; ASIDs match so proceed normally nop #endif @@ -147,9 +205,9 @@ ex_saved_reg1: #ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT and.f 0, r0, _PAGE_PRESENT bz 1f - ld r2, [num_pte_not_present] - add r2, r2, 1 - st r2, [num_pte_not_present] + ld r3, [num_pte_not_present] + add r3, r3, 1 + st r3, [num_pte_not_present] 1: #endif @@ -161,13 +219,17 @@ ex_saved_reg1: ; IN: r0 = PTE, r1 = ptr to PTE .macro CONV_PTE_TO_TLB - and r3, r0, PTE_BITS_IN_PD1 ; Extract permission flags+PFN from PTE - sr r3, [ARC_REG_TLBPD1] ; these go in PD1 + and r3, r0, PTE_BITS_RWX ; r w x + lsl r2, r3, 3 ; r w x 0 0 0 + and.f 0, r0, _PAGE_GLOBAL + or.z r2, r2, r3 ; r w x r w x + + and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE + or r3, r3, r2 + + sr r3, [ARC_REG_TLBPD1] ; these go in PD1 and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb -#if (CONFIG_ARC_MMU_VER <= 2) /* Neednot be done with v3 onwards */ - lsr r2, r2 ; shift PTE flags to match layout in PD0 -#endif lr r3,[ARC_REG_TLBPD0] ; MMU prepares PD0 with vaddr and asid @@ -191,68 +253,6 @@ ex_saved_reg1: #endif .endm -;----------------------------------------------------------------- -; ARC700 Exception Handling doesn't auto-switch stack and it only provides -; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" -; -; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a -; "global" is used to free-up FIRST core reg to be able to code the rest of -; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe). -; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3 -; need to be saved as well by extending the "global" to be 4 words. Hence -; ".size ex_saved_reg1, 16" -; [All of this dance is to avoid stack switching for each TLB Miss, since we -; only need to save only a handful of regs, as opposed to complete reg file] -; -; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST -; core reg as it will not be SMP safe. -; Thus scratch AUX reg is used (and no longer used to cache task PGD). -; To save the rest of 3 regs - per cpu, the global is made "per-cpu". -; Epilogue thus has to locate the "per-cpu" storage for regs. -; To avoid cache line bouncing the per-cpu global is aligned/sized per -; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence -; ".size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)" - -; As simple as that.... - -.macro TLBMISS_FREEUP_REGS -#ifdef CONFIG_SMP - sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with - GET_CPU_ID r0 ; get to per cpu scratch mem, - lsl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu - add r0, @ex_saved_reg1, r0 -#else - st r0, [@ex_saved_reg1] - mov_s r0, @ex_saved_reg1 -#endif - st_s r1, [r0, 4] - st_s r2, [r0, 8] - st_s r3, [r0, 12] - - ; VERIFY if the ASID in MMU-PID Reg is same as - ; one in Linux data structures - - DBG_ASID_MISMATCH -.endm - -;----------------------------------------------------------------- -.macro TLBMISS_RESTORE_REGS -#ifdef CONFIG_SMP - GET_CPU_ID r0 ; get to per cpu scratch mem - lsl r0, r0, L1_CACHE_SHIFT ; each is cache line wide - add r0, @ex_saved_reg1, r0 - ld_s r3, [r0,12] - ld_s r2, [r0, 8] - ld_s r1, [r0, 4] - lr r0, [ARC_REG_SCRATCH_DATA0] -#else - mov_s r0, @ex_saved_reg1 - ld_s r3, [r0,12] - ld_s r2, [r0, 8] - ld_s r1, [r0, 4] - ld_s r0, [r0] -#endif -.endm ARCFP_CODE ;Fast Path Code, candidate for ICCM @@ -260,7 +260,7 @@ ARCFP_CODE ;Fast Path Code, candidate for ICCM ; I-TLB Miss Exception Handler ;----------------------------------------------------------------------------- -ARC_ENTRY EV_TLBMissI +ENTRY(EV_TLBMissI) TLBMISS_FREEUP_REGS @@ -271,35 +271,35 @@ ARC_ENTRY EV_TLBMissI #endif ;---------------------------------------------------------------- - ; Get the PTE corresponding to V-addr accessed + ; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA LOAD_FAULT_PTE ;---------------------------------------------------------------- ; VERIFY_PTE: Check if PTE permissions approp for executing code cmp_s r2, VMALLOC_START - mov.lo r2, (_PAGE_PRESENT | _PAGE_READ | _PAGE_EXECUTE) - mov.hs r2, (_PAGE_PRESENT | _PAGE_K_READ | _PAGE_K_EXECUTE) + mov_s r2, (_PAGE_PRESENT | _PAGE_EXECUTE) + or.hs r2, r2, _PAGE_GLOBAL and r3, r0, r2 ; Mask out NON Flag bits from PTE xor.f r3, r3, r2 ; check ( ( pte & flags_test ) == flags_test ) bnz do_slow_path_pf ; Let Linux VM know that the page was accessed - or r0, r0, (_PAGE_PRESENT | _PAGE_ACCESSED) ; set Accessed Bit - st_s r0, [r1] ; Write back PTE + or r0, r0, _PAGE_ACCESSED ; set Accessed Bit + st_s r0, [r1] ; Write back PTE CONV_PTE_TO_TLB COMMIT_ENTRY_TO_MMU TLBMISS_RESTORE_REGS rtie -ARC_EXIT EV_TLBMissI +END(EV_TLBMissI) ;----------------------------------------------------------------------------- ; D-TLB Miss Exception Handler ;----------------------------------------------------------------------------- -ARC_ENTRY EV_TLBMissD +ENTRY(EV_TLBMissD) TLBMISS_FREEUP_REGS @@ -311,32 +311,27 @@ ARC_ENTRY EV_TLBMissD ;---------------------------------------------------------------- ; Get the PTE corresponding to V-addr accessed - ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE + ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA LOAD_FAULT_PTE ;---------------------------------------------------------------- ; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W) - mov_s r2, 0 + cmp_s r2, VMALLOC_START + mov_s r2, _PAGE_PRESENT ; common bit for K/U PTE + or.hs r2, r2, _PAGE_GLOBAL ; kernel PTE only + + ; Linux PTE [RWX] bits are semantically overloaded: + ; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc) + ; -Otherwise they are user-mode permissions, and those are exactly + ; same for kernel mode as well (e.g. copy_(to|from)_user) + lr r3, [ecr] btst_s r3, ECR_C_BIT_DTLB_LD_MISS ; Read Access or.nz r2, r2, _PAGE_READ ; chk for Read flag in PTE btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; Write Access or.nz r2, r2, _PAGE_WRITE ; chk for Write flag in PTE - ; Above laddering takes care of XCHG access - ; which is both Read and Write - - ; If kernel mode access, ; make _PAGE_xx flags as _PAGE_K_xx - ; For copy_(to|from)_user, despite exception taken in kernel mode, - ; this code is not hit, because EFA would still be the user mode - ; address (EFA < 0x6000_0000). - ; This code is for legit kernel mode faults, vmalloc specifically - ; (EFA: 0x7000_0000 to 0x7FFF_FFFF) - - lr r3, [efa] - cmp r3, VMALLOC_START - 1 ; If kernel mode access - asl.hi r2, r2, 3 ; make _PAGE_xx flags as _PAGE_K_xx - or r2, r2, _PAGE_PRESENT ; Common flag for K/U mode + ; Above laddering takes care of XCHG access (both R and W) ; By now, r2 setup with all the Flags we need to check in PTE and r3, r0, r2 ; Mask out NON Flag bits from PTE @@ -345,7 +340,7 @@ ARC_ENTRY EV_TLBMissD ;---------------------------------------------------------------- ; UPDATE_PTE: Let Linux VM know that page was accessed/dirty lr r3, [ecr] - or r0, r0, (_PAGE_PRESENT | _PAGE_ACCESSED) ; Accessed bit always + or r0, r0, _PAGE_ACCESSED ; Accessed bit always btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ? or.nz r0, r0, _PAGE_MODIFIED ; if Write, set Dirty bit as well st_s r0, [r1] ; Write back PTE @@ -371,28 +366,11 @@ do_slow_path_pf: ; Slow path TLB Miss handled as a regular ARC Exception ; (stack switching / save the complete reg-file). - ; That requires freeing up r9 - EXCPN_PROLOG_FREEUP_REG r9 - - lr r9, [erstatus] - - SWITCH_TO_KERNEL_STK - SAVE_ALL_SYS + EXCEPTION_PROLOGUE ; ------- setup args for Linux Page fault Hanlder --------- - mov_s r0, sp - lr r2, [efa] - lr r3, [ecr] - - ; Both st and ex imply WRITE access of some sort, hence do_page_fault( ) - ; invoked with write=1 for DTLB-st/ex Miss and write=0 for ITLB miss or - ; DTLB-ld Miss - ; DTLB Miss Cause code is ld = 0x01 , st = 0x02, ex = 0x03 - ; Following code uses that fact that st/ex have one bit in common - - btst_s r3, ECR_C_BIT_DTLB_ST_MISS - mov.z r1, 0 - mov.nz r1, 1 + mov_s r1, sp + lr r0, [efa] ; We don't want exceptions to be disabled while the fault is handled. ; Now that we have saved the context we return from exception hence @@ -403,6 +381,4 @@ do_slow_path_pf: bl do_page_fault b ret_from_exception -ARC_EXIT EV_TLBMissD - -ARC_ENTRY EV_TLBMissB ; Bogus entry to measure sz of DTLBMiss hdlr +END(EV_TLBMissD) |
