diff options
-rw-r--r-- | arch/sparc/include/asm/hugetlb.h | 1 | ||||
-rw-r--r-- | arch/sparc/include/asm/page_64.h | 4 | ||||
-rw-r--r-- | arch/sparc/include/asm/tsb.h | 28 | ||||
-rw-r--r-- | arch/sparc/kernel/kernel.h | 12 | ||||
-rw-r--r-- | arch/sparc/kernel/leon_smp.c | 33 | ||||
-rw-r--r-- | arch/sparc/kernel/smp_32.c | 86 | ||||
-rw-r--r-- | arch/sparc/kernel/sun4d_smp.c | 29 | ||||
-rw-r--r-- | arch/sparc/kernel/sun4m_smp.c | 33 | ||||
-rw-r--r-- | arch/sparc/kernel/trampoline_32.S | 17 | ||||
-rw-r--r-- | arch/sparc/kernel/tsb.S | 39 | ||||
-rw-r--r-- | arch/sparc/mm/fault_64.c | 9 | ||||
-rw-r--r-- | arch/sparc/mm/init_64.c | 62 | ||||
-rw-r--r-- | arch/sparc/mm/tlb.c | 11 | ||||
-rw-r--r-- | arch/sparc/mm/tsb.c | 2 |
14 files changed, 246 insertions, 120 deletions
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 9661e9bc7bb..7eb57d24504 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -12,7 +12,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) { - hugetlb_setup(mm); } static inline int is_hugepage_only_range(struct mm_struct *mm, diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 4b39f74d6ca..e15538899f3 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -27,8 +27,8 @@ #ifndef __ASSEMBLY__ #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) -struct mm_struct; -extern void hugetlb_setup(struct mm_struct *mm); +struct pt_regs; +extern void hugetlb_setup(struct pt_regs *regs); #endif #define WANT_PAGE_VIRTUAL diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index b4c258de444..e696432b950 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h @@ -157,17 +157,26 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; andn REG2, 0x7, REG2; \ add REG1, REG2, REG1; - /* This macro exists only to make the PMD translator below easier - * to read. It hides the ELF section switch for the sun4v code - * patching. + /* These macros exists only to make the PMD translator below + * easier to read. It hides the ELF section switch for the + * sun4v code patching. */ -#define OR_PTE_BIT(REG, NAME) \ +#define OR_PTE_BIT_1INSN(REG, NAME) \ 661: or REG, _PAGE_##NAME##_4U, REG; \ .section .sun4v_1insn_patch, "ax"; \ .word 661b; \ or REG, _PAGE_##NAME##_4V, REG; \ .previous; +#define OR_PTE_BIT_2INSN(REG, TMP, NAME) \ +661: sethi %hi(_PAGE_##NAME##_4U), TMP; \ + or REG, TMP, REG; \ + .section .sun4v_2insn_patch, "ax"; \ + .word 661b; \ + mov -1, TMP; \ + or REG, _PAGE_##NAME##_4V, REG; \ + .previous; + /* Load into REG the PTE value for VALID, CACHE, and SZHUGE. */ #define BUILD_PTE_VALID_SZHUGE_CACHE(REG) \ 661: sethi %uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG; \ @@ -214,12 +223,13 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; andn REG1, PMD_HUGE_PROTBITS, REG2; \ sllx REG2, PMD_PADDR_SHIFT, REG2; \ /* REG2 now holds PFN << PAGE_SHIFT */ \ - andcc REG1, PMD_HUGE_EXEC, %g0; \ - bne,a,pt %xcc, 1f; \ - OR_PTE_BIT(REG2, EXEC); \ -1: andcc REG1, PMD_HUGE_WRITE, %g0; \ + andcc REG1, PMD_HUGE_WRITE, %g0; \ bne,a,pt %xcc, 1f; \ - OR_PTE_BIT(REG2, W); \ + OR_PTE_BIT_1INSN(REG2, W); \ +1: andcc REG1, PMD_HUGE_EXEC, %g0; \ + be,pt %xcc, 1f; \ + nop; \ + OR_PTE_BIT_2INSN(REG2, REG1, EXEC); \ /* REG1 can now be clobbered, build final PTE */ \ 1: BUILD_PTE_VALID_SZHUGE_CACHE(REG1); \ ba,pt %xcc, PTE_LABEL; \ diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index 291bb5de9ce..a702d9ab019 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -48,6 +48,10 @@ extern void sun4m_init_IRQ(void); extern void sun4m_unmask_profile_irq(void); extern void sun4m_clear_profile_irq(int cpu); +/* sun4m_smp.c */ +void sun4m_cpu_pre_starting(void *arg); +void sun4m_cpu_pre_online(void *arg); + /* sun4d_irq.c */ extern spinlock_t sun4d_imsk_lock; @@ -60,6 +64,14 @@ extern int show_sun4d_interrupts(struct seq_file *, void *); extern void sun4d_distribute_irqs(void); extern void sun4d_free_irq(unsigned int irq, void *dev_id); +/* sun4d_smp.c */ +void sun4d_cpu_pre_starting(void *arg); +void sun4d_cpu_pre_online(void *arg); + +/* leon_smp.c */ +void leon_cpu_pre_starting(void *arg); +void leon_cpu_pre_online(void *arg); + /* head_32.S */ extern unsigned int t_nmi[]; extern unsigned int linux_trap_ipi15_sun4d[]; diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 0f3fb6d9c8e..9b40c9c12a0 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -69,31 +69,19 @@ static inline unsigned long do_swap(volatile unsigned long *ptr, return val; } -void __cpuinit leon_callin(void) +void __cpuinit leon_cpu_pre_starting(void *arg) { - int cpuid = hard_smp_processor_id(); - - local_ops->cache_all(); - local_ops->tlb_all(); leon_configure_cache_smp(); +} - notify_cpu_starting(cpuid); - - /* Get our local ticker going. */ - register_percpu_ce(cpuid); - - calibrate_delay(); - smp_store_cpu_info(cpuid); - - local_ops->cache_all(); - local_ops->tlb_all(); +void __cpuinit leon_cpu_pre_online(void *arg) +{ + int cpuid = hard_smp_processor_id(); - /* - * Unblock the master CPU _only_ when the scheduler state - * of all secondary CPUs will be up-to-date, so after - * the SMP initialization the master will be just allowed - * to call the scheduler code. - * Allow master to continue. + /* Allow master to continue. The master will then give us the + * go-ahead by setting the smp_commenced_mask and will wait without + * timeouts until our setup is completed fully (signified by + * our bit being set in the cpu_online_mask). */ do_swap(&cpu_callin_map[cpuid], 1); @@ -110,9 +98,6 @@ void __cpuinit leon_callin(void) while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) mb(); - - local_irq_enable(); - set_cpu_online(cpuid, true); } /* diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 79db45e5134..9e7e6d71836 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -20,6 +20,7 @@ #include <linux/seq_file.h> #include <linux/cache.h> #include <linux/delay.h> +#include <linux/cpu.h> #include <asm/ptrace.h> #include <linux/atomic.h> @@ -32,8 +33,10 @@ #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/cpudata.h> +#include <asm/timer.h> #include <asm/leon.h> +#include "kernel.h" #include "irq.h" volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,}; @@ -294,6 +297,89 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) return ret; } +void __cpuinit arch_cpu_pre_starting(void *arg) +{ + local_ops->cache_all(); + local_ops->tlb_all(); + + switch(sparc_cpu_model) { + case sun4m: + sun4m_cpu_pre_starting(arg); + break; + case sun4d: + sun4d_cpu_pre_starting(arg); + break; + case sparc_leon: + leon_cpu_pre_starting(arg); + break; + default: + BUG(); + } +} + +void __cpuinit arch_cpu_pre_online(void *arg) +{ + unsigned int cpuid = hard_smp_processor_id(); + + register_percpu_ce(cpuid); + + calibrate_delay(); + smp_store_cpu_info(cpuid); + + local_ops->cache_all(); + local_ops->tlb_all(); + + switch(sparc_cpu_model) { + case sun4m: + sun4m_cpu_pre_online(arg); + break; + case sun4d: + sun4d_cpu_pre_online(arg); + break; + case sparc_leon: + leon_cpu_pre_online(arg); + break; + default: + BUG(); + } +} + +void __cpuinit sparc_start_secondary(void *arg) +{ + unsigned int cpu; + + /* + * SMP booting is extremely fragile in some architectures. So run + * the cpu initialization code first before anything else. + */ + arch_cpu_pre_starting(arg); + + preempt_disable(); + cpu = smp_processor_id(); + + /* Invoke the CPU_STARTING notifier callbacks */ + notify_cpu_starting(cpu); + + arch_cpu_pre_online(arg); + + /* Set the CPU in the cpu_online_mask */ + set_cpu_online(cpu, true); + + /* Enable local interrupts now */ + local_irq_enable(); + + wmb(); + cpu_idle(); + + /* We should never reach here! */ + BUG(); +} + +void __cpuinit smp_callin(void) +{ + sparc_start_secondary(NULL); +} + void smp_bogo(struct seq_file *m) { int i; diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index ddaea31de58..c9eb82f23d9 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -50,10 +50,9 @@ static inline void show_leds(int cpuid) "i" (ASI_M_CTL)); } -void __cpuinit smp4d_callin(void) +void __cpuinit sun4d_cpu_pre_starting(void *arg) { int cpuid = hard_smp_processor_id(); - unsigned long flags; /* Show we are alive */ cpu_leds[cpuid] = 0x6; @@ -61,26 +60,20 @@ void __cpuinit smp4d_callin(void) /* Enable level15 interrupt, disable level14 interrupt for now */ cc_set_imsk((cc_get_imsk() & ~0x8000) | 0x4000); +} - local_ops->cache_all(); - local_ops->tlb_all(); +void __cpuinit sun4d_cpu_pre_online(void *arg) +{ + unsigned long flags; + int cpuid; - notify_cpu_starting(cpuid); - /* - * Unblock the master CPU _only_ when the scheduler state + cpuid = hard_smp_processor_id(); + + /* Unblock the master CPU _only_ when the scheduler state * of all secondary CPUs will be up-to-date, so after * the SMP initialization the master will be just allowed * to call the scheduler code. */ - /* Get our local ticker going. */ - register_percpu_ce(cpuid); - - calibrate_delay(); - smp_store_cpu_info(cpuid); - local_ops->cache_all(); - local_ops->tlb_all(); - - /* Allow master to continue. */ sun4d_swap((unsigned long *)&cpu_callin_map[cpuid], 1); local_ops->cache_all(); local_ops->tlb_all(); @@ -106,16 +99,12 @@ void __cpuinit smp4d_callin(void) local_ops->cache_all(); local_ops->tlb_all(); - local_irq_enable(); /* We don't allow PIL 14 yet */ - while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) barrier(); spin_lock_irqsave(&sun4d_imsk_lock, flags); cc_set_imsk(cc_get_imsk() & ~0x4000); /* Allow PIL 14 as well */ spin_unlock_irqrestore(&sun4d_imsk_lock, flags); - set_cpu_online(cpuid, true); - } /* diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index 128af730428..8a65f158153 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c @@ -34,30 +34,19 @@ swap_ulong(volatile unsigned long *ptr, unsigned long val) return val; } -void __cpuinit smp4m_callin(void) +void __cpuinit sun4m_cpu_pre_starting(void *arg) { - int cpuid = hard_smp_processor_id(); - - local_ops->cache_all(); - local_ops->tlb_all(); - - notify_cpu_starting(cpuid); - - register_percpu_ce(cpuid); - - calibrate_delay(); - smp_store_cpu_info(cpuid); +} - local_ops->cache_all(); - local_ops->tlb_all(); +void __cpuinit sun4m_cpu_pre_online(void *arg) +{ + int cpuid = hard_smp_processor_id(); - /* - * Unblock the master CPU _only_ when the scheduler state - * of all secondary CPUs will be up-to-date, so after - * the SMP initialization the master will be just allowed - * to call the scheduler code. + /* Allow master to continue. The master will then give us the + * go-ahead by setting the smp_commenced_mask and will wait without + * timeouts until our setup is completed fully (signified by + * our bit being set in the cpu_online_mask). */ - /* Allow master to continue. */ swap_ulong(&cpu_callin_map[cpuid], 1); /* XXX: What's up with all the flushes? */ @@ -75,10 +64,6 @@ void __cpuinit smp4m_callin(void) while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) mb(); - - local_irq_enable(); - - set_cpu_online(cpuid, true); } /* diff --git a/arch/sparc/kernel/trampoline_32.S b/arch/sparc/kernel/trampoline_32.S index af27acab448..6cdb08cdabf 100644 --- a/arch/sparc/kernel/trampoline_32.S +++ b/arch/sparc/kernel/trampoline_32.S @@ -79,18 +79,15 @@ cpu3_startup: nop /* Start this processor. */ - call smp4m_callin + call smp_callin nop - b,a smp_do_cpu_idle + b,a smp_panic .text .align 4 -smp_do_cpu_idle: - call cpu_idle - mov 0, %o0 - +smp_panic: call cpu_panic nop @@ -144,10 +141,10 @@ sun4d_cpu_startup: nop /* Start this processor. */ - call smp4d_callin + call smp_callin nop - b,a smp_do_cpu_idle + b,a smp_panic __CPUINIT .align 4 @@ -201,7 +198,7 @@ leon_smp_cpu_startup: nop /* Start this processor. */ - call leon_callin + call smp_callin nop - b,a smp_do_cpu_idle + b,a smp_panic diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index d4bdc7a6237..a313e4a9399 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -136,12 +136,43 @@ tsb_miss_page_table_walk_sun4v_fastpath: nop /* It is a huge page, use huge page TSB entry address we - * calculated above. + * calculated above. If the huge page TSB has not been + * allocated, setup a trap stack and call hugetlb_setup() + * to do so, then return from the trap to replay the TLB + * miss. + * + * This is necessary to handle the case of transparent huge + * pages where we don't really have a non-atomic context + * in which to allocate the hugepage TSB hash table. When + * the 'mm' faults in the hugepage for the first time, we + * thus handle it here. This also makes sure that we can + * allocate the TSB hash table on the correct NUMA node. */ TRAP_LOAD_TRAP_BLOCK(%g7, %g2) - ldx [%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP], %g2 - cmp %g2, -1 - movne %xcc, %g2, %g1 + ldx [%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP], %g1 + cmp %g1, -1 + bne,pt %xcc, 60f + nop + +661: rdpr %pstate, %g5 + wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + .section .sun4v_2insn_patch, "ax" + .word 661b + SET_GL(1) + nop + .previous + + rdpr %tl, %g3 + cmp %g3, 1 + bne,pn %xcc, winfix_trampoline + nop + ba,pt %xcc, etrap + rd %pc, %g7 + call hugetlb_setup + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + nop + 60: #endif diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 097aee763af..5062ff389e8 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -472,8 +472,13 @@ good_area: #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) mm_rss = mm->context.huge_pte_count; if (unlikely(mm_rss > - mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) - tsb_grow(mm, MM_TSB_HUGE, mm_rss); + mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { + if (mm->context.tsb_block[MM_TSB_HUGE].tsb) + tsb_grow(mm, MM_TSB_HUGE, mm_rss); + else + hugetlb_setup(regs); + + } #endif return; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index c3b72423c84..82bbf048a5b 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -314,16 +314,31 @@ static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_inde struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb; unsigned long tag; + if (unlikely(!tsb)) + return; + tsb += ((address >> tsb_hash_shift) & (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL)); tag = (address >> 22UL); tsb_insert(tsb, tag, tte); } +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) +static inline bool is_hugetlb_pte(pte_t pte) +{ + if ((tlb_type == hypervisor && + (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) || + (tlb_type != hypervisor && + (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) + return true; + return false; +} +#endif + void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - unsigned long tsb_index, tsb_hash_shift, flags; struct mm_struct *mm; + unsigned long flags; pte_t pte = *ptep; if (tlb_type != hypervisor) { @@ -335,25 +350,16 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * mm = vma->vm_mm; - tsb_index = MM_TSB_BASE; - tsb_hash_shift = PAGE_SHIFT; - spin_lock_irqsave(&mm->context.lock, flags); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) { - if ((tlb_type == hypervisor && - (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) || - (tlb_type != hypervisor && - (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) { - tsb_index = MM_TSB_HUGE; - tsb_hash_shift = HPAGE_SHIFT; - } - } + if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT, + address, pte_val(pte)); + else #endif - - __update_mmu_tsb_insert(mm, tsb_index, tsb_hash_shift, - address, pte_val(pte)); + __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT, + address, pte_val(pte)); spin_unlock_irqrestore(&mm->context.lock, flags); } @@ -2712,14 +2718,28 @@ static void context_reload(void *__data) load_secondary_context(mm); } -void hugetlb_setup(struct mm_struct *mm) +void hugetlb_setup(struct pt_regs *regs) { - struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE]; + struct mm_struct *mm = current->mm; + struct tsb_config *tp; - if (likely(tp->tsb != NULL)) - return; + if (in_atomic() || !mm) { + const struct exception_table_entry *entry; + + entry = search_exception_tables(regs->tpc); + if (entry) { + regs->tpc = entry->fixup; + regs->tnpc = regs->tpc + 4; + return; + } + pr_alert("Unexpected HugeTLB setup in atomic context.\n"); + die_if_kernel("HugeTSB in atomic", regs); + } + + tp = &mm->context.tsb_block[MM_TSB_HUGE]; + if (likely(tp->tsb == NULL)) + tsb_grow(mm, MM_TSB_HUGE, 0); - tsb_grow(mm, MM_TSB_HUGE, 0); tsb_context_switch(mm); smp_tsb_sync(mm); diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 3e8fec391fe..ba6ae7ffdc2 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -135,8 +135,15 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, mm->context.huge_pte_count++; else mm->context.huge_pte_count--; - if (mm->context.huge_pte_count == 1) - hugetlb_setup(mm); + + /* Do not try to allocate the TSB hash table if we + * don't have one already. We have various locks held + * and thus we'll end up doing a GFP_KERNEL allocation + * in an atomic context. + * + * Instead, we let the first TLB miss on a hugepage + * take care of this. + */ } if (!pmd_none(orig)) { diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 7f647434749..428982b9bec 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -314,7 +314,7 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss) retry_tsb_alloc: gfp_flags = GFP_KERNEL; if (new_size > (PAGE_SIZE * 2)) - gfp_flags = __GFP_NOWARN | __GFP_NORETRY; + gfp_flags |= __GFP_NOWARN | __GFP_NORETRY; new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index], gfp_flags, numa_node_id()); |