diff options
Diffstat (limited to 'arch/powerpc/kernel/setup_64.c')
| -rw-r--r-- | arch/powerpc/kernel/setup_64.c | 375 |
1 files changed, 263 insertions, 112 deletions
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6568406b2a3..ee082d77117 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -10,9 +10,9 @@ * 2 of the License, or (at your option) any later version. */ -#undef DEBUG +#define DEBUG -#include <linux/module.h> +#include <linux/export.h> #include <linux/string.h> #include <linux/sched.h> #include <linux/init.h> @@ -34,7 +34,10 @@ #include <linux/bootmem.h> #include <linux/pci.h> #include <linux/lockdep.h> -#include <linux/lmb.h> +#include <linux/memblock.h> +#include <linux/hugetlb.h> +#include <linux/memory.h> + #include <asm/io.h> #include <asm/kdump.h> #include <asm/prom.h> @@ -50,7 +53,6 @@ #include <asm/btext.h> #include <asm/nvram.h> #include <asm/setup.h> -#include <asm/system.h> #include <asm/rtas.h> #include <asm/iommu.h> #include <asm/serial.h> @@ -61,10 +63,11 @@ #include <asm/xmon.h> #include <asm/udbg.h> #include <asm/kexec.h> -#include <asm/swiotlb.h> #include <asm/mmu_context.h> - -#include "setup.h" +#include <asm/code-patching.h> +#include <asm/kvm_ppc.h> +#include <asm/hugetlb.h> +#include <asm/epapr_hcalls.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -72,7 +75,7 @@ #define DBG(fmt...) #endif -int boot_cpuid = 0; +int spinning_secondaries; u64 ppc64_pft_size; /* Pick defaults since we might want to patch instructions @@ -94,9 +97,41 @@ int dcache_bsize; int icache_bsize; int ucache_bsize; +#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +static void setup_tlb_core_data(void) +{ + int cpu; + + BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0); + + for_each_possible_cpu(cpu) { + int first = cpu_first_thread_sibling(cpu); + + paca[cpu].tcd_ptr = &paca[first].tcd; + + /* + * If we have threads, we need either tlbsrx. + * or e6500 tablewalk mode, or else TLB handlers + * will be racy and could produce duplicate entries. + */ + if (smt_enabled_at_boot >= 2 && + !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && + book3e_htw_mode != PPC_HTW_E6500) { + /* Should we panic instead? */ + WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n", + __func__); + } + } +} +#else +static void setup_tlb_core_data(void) +{ +} +#endif + #ifdef CONFIG_SMP -static int smt_enabled_cmdline; +static char *smt_enabled_cmdline; /* Look for ibm,smt-enabled OF option */ static void check_smt_enabled(void) @@ -104,37 +139,46 @@ static void check_smt_enabled(void) struct device_node *dn; const char *smt_option; - /* Allow the command line to overrule the OF option */ - if (smt_enabled_cmdline) - return; + /* Default to enabling all threads */ + smt_enabled_at_boot = threads_per_core; - dn = of_find_node_by_path("/options"); - - if (dn) { - smt_option = of_get_property(dn, "ibm,smt-enabled", NULL); - - if (smt_option) { - if (!strcmp(smt_option, "on")) - smt_enabled_at_boot = 1; - else if (!strcmp(smt_option, "off")) - smt_enabled_at_boot = 0; - } - } + /* Allow the command line to overrule the OF option */ + if (smt_enabled_cmdline) { + if (!strcmp(smt_enabled_cmdline, "on")) + smt_enabled_at_boot = threads_per_core; + else if (!strcmp(smt_enabled_cmdline, "off")) + smt_enabled_at_boot = 0; + else { + long smt; + int rc; + + rc = strict_strtol(smt_enabled_cmdline, 10, &smt); + if (!rc) + smt_enabled_at_boot = + min(threads_per_core, (int)smt); + } + } else { + dn = of_find_node_by_path("/options"); + if (dn) { + smt_option = of_get_property(dn, "ibm,smt-enabled", + NULL); + + if (smt_option) { + if (!strcmp(smt_option, "on")) + smt_enabled_at_boot = threads_per_core; + else if (!strcmp(smt_option, "off")) + smt_enabled_at_boot = 0; + } + + of_node_put(dn); + } + } } /* Look for smt-enabled= cmdline option */ static int __init early_smt_enabled(char *p) { - smt_enabled_cmdline = 1; - - if (!p) - return 0; - - if (!strcmp(p, "on") || !strcmp(p, "1")) - smt_enabled_at_boot = 1; - else if (!strcmp(p, "off") || !strcmp(p, "0")) - smt_enabled_at_boot = 0; - + smt_enabled_cmdline = p; return 0; } early_param("smt-enabled", early_smt_enabled); @@ -143,14 +187,26 @@ early_param("smt-enabled", early_smt_enabled); #define check_smt_enabled() #endif /* CONFIG_SMP */ -/* Put the paca pointer into r13 and SPRG_PACA */ -void __init setup_paca(int cpu) +/** Fix up paca fields required for the boot cpu */ +static void fixup_boot_paca(void) { - local_paca = &paca[cpu]; - mtspr(SPRN_SPRG_PACA, local_paca); -#ifdef CONFIG_PPC_BOOK3E - mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); -#endif + /* The boot cpu is started */ + get_paca()->cpu_start = 1; + /* Allow percpu accesses to work until we setup percpu data */ + get_paca()->data_offset = 0; +} + +static void cpu_ready_for_interrupts(void) +{ + /* Set IR and DR in PACA MSR */ + get_paca()->kernel_msr = MSR_KERNEL; + + /* Enable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) { + unsigned long lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + } } /* @@ -159,7 +215,7 @@ void __init setup_paca(int cpu) * the CPU that ignores the top 2 bits of the address in real * mode so we can access kernel globals normally provided we * only toy with things in the RMO region. From here, we do - * some early parsing of the device-tree to setup out LMB + * some early parsing of the device-tree to setup out MEMBLOCK * data structures, and allocate & initialize the hash table * and segment tables so we can start running with translation * enabled. @@ -174,16 +230,17 @@ void __init setup_paca(int cpu) void __init early_setup(unsigned long dt_ptr) { - /* -------- printk is _NOT_ safe to use here ! ------- */ + static __initdata struct paca_struct boot_paca; - /* Fill in any unititialised pacas */ - initialise_pacas(); + /* -------- printk is _NOT_ safe to use here ! ------- */ /* Identify CPU type */ identify_cpu(0, mfspr(SPRN_PVR)); /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ - setup_paca(0); + initialise_paca(&boot_paca, 0); + setup_paca(&boot_paca); + fixup_boot_paca(); /* Initialize lockdep early or else spinlocks will blow */ lockdep_init(); @@ -202,11 +259,11 @@ void __init early_setup(unsigned long dt_ptr) */ early_init_devtree(__va(dt_ptr)); - /* Now we know the logical id of our boot cpu, setup the paca. */ - setup_paca(boot_cpuid); + epapr_paravirt_early_init(); - /* Fix up paca fields required for the boot cpu */ - get_paca()->cpu_start = 1; + /* Now we know the logical id of our boot cpu, setup the paca. */ + setup_paca(&paca[boot_cpuid]); + fixup_boot_paca(); /* Probe the machine type */ probe_machine(); @@ -218,7 +275,36 @@ void __init early_setup(unsigned long dt_ptr) /* Initialize the hash table or TLB handling */ early_init_mmu(); + /* + * At this point, we can let interrupts switch to virtual mode + * (the MMU has been setup), so adjust the MSR in the PACA to + * have IR and DR set and enable AIL if it exists + */ + cpu_ready_for_interrupts(); + + /* Reserve large chunks of memory for use by CMA for KVM */ + kvm_cma_reserve(); + + /* + * Reserve any gigantic pages requested on the command line. + * memblock needs to have been initialized by the time this is + * called since this will reserve memory. + */ + reserve_hugetlb_gpages(); + DBG(" <- early_setup()\n"); + +#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX + /* + * This needs to be done *last* (after the above DBG() even) + * + * Right after we return from this function, we turn on the MMU + * which means the real-mode access trick that btext does will + * no longer work, it needs to switch to using a real MMU + * mapping. This call will ensure that it does + */ + btext_map(); +#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ } #ifdef CONFIG_SMP @@ -229,6 +315,13 @@ void early_setup_secondary(void) /* Initialize the hash table or TLB handling */ early_init_mmu_secondary(); + + /* + * At this point, we can let interrupts switch to virtual mode + * (the MMU has been setup), so adjust the MSR in the PACA to + * have IR and DR set. + */ + cpu_ready_for_interrupts(); } #endif /* CONFIG_SMP */ @@ -237,6 +330,7 @@ void early_setup_secondary(void) void smp_release_cpus(void) { unsigned long *ptr; + int i; DBG(" -> smp_release_cpus()\n"); @@ -248,8 +342,17 @@ void smp_release_cpus(void) ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop - PHYSICAL_START); - *ptr = __pa(generic_secondary_smp_init); - mb(); + *ptr = ppc_function_entry(generic_secondary_smp_init); + + /* And wait a bit for them to catch up */ + for (i = 0; i < 100000; i++) { + mb(); + HMT_low(); + if (spinning_secondaries == 0) + break; + udelay(1); + } + DBG("spinning_secondaries = %d\n", spinning_secondaries); DBG(" <- smp_release_cpus()\n"); } @@ -269,29 +372,32 @@ static void __init initialize_cache_info(void) DBG(" -> initialize_cache_info()\n"); - for (np = NULL; (np = of_find_node_by_type(np, "cpu"));) { + for_each_node_by_type(np, "cpu") { num_cpus += 1; - /* We're assuming *all* of the CPUs have the same + /* + * We're assuming *all* of the CPUs have the same * d-cache and i-cache sizes... -Peter */ - - if ( num_cpus == 1 ) { - const u32 *sizep, *lsizep; + if (num_cpus == 1) { + const __be32 *sizep, *lsizep; u32 size, lsize; size = 0; lsize = cur_cpu_spec->dcache_bsize; sizep = of_get_property(np, "d-cache-size", NULL); if (sizep != NULL) - size = *sizep; - lsizep = of_get_property(np, "d-cache-block-size", NULL); + size = be32_to_cpu(*sizep); + lsizep = of_get_property(np, "d-cache-block-size", + NULL); /* fallback if block size missing */ if (lsizep == NULL) - lsizep = of_get_property(np, "d-cache-line-size", NULL); + lsizep = of_get_property(np, + "d-cache-line-size", + NULL); if (lsizep != NULL) - lsize = *lsizep; - if (sizep == 0 || lsizep == 0) + lsize = be32_to_cpu(*lsizep); + if (sizep == NULL || lsizep == NULL) DBG("Argh, can't find dcache properties ! " "sizep: %p, lsizep: %p\n", sizep, lsizep); @@ -304,13 +410,16 @@ static void __init initialize_cache_info(void) lsize = cur_cpu_spec->icache_bsize; sizep = of_get_property(np, "i-cache-size", NULL); if (sizep != NULL) - size = *sizep; - lsizep = of_get_property(np, "i-cache-block-size", NULL); + size = be32_to_cpu(*sizep); + lsizep = of_get_property(np, "i-cache-block-size", + NULL); if (lsizep == NULL) - lsizep = of_get_property(np, "i-cache-line-size", NULL); + lsizep = of_get_property(np, + "i-cache-line-size", + NULL); if (lsizep != NULL) - lsize = *lsizep; - if (sizep == 0 || lsizep == 0) + lsize = be32_to_cpu(*lsizep); + if (sizep == NULL || lsizep == NULL) DBG("Argh, can't find icache properties ! " "sizep: %p, lsizep: %p\n", sizep, lsizep); @@ -344,6 +453,7 @@ void __init setup_system(void) &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); do_lwsync_fixups(cur_cpu_spec->cpu_features, &__start___lwsync_fixup, &__stop___lwsync_fixup); + do_final_fixups(); /* * Unflatten the device-tree passed by prom_init or kexec @@ -393,8 +503,9 @@ void __init setup_system(void) */ xmon_setup(); - check_smt_enabled(); smp_setup_cpu_maps(); + check_smt_enabled(); + setup_tlb_core_data(); #ifdef CONFIG_SMP /* Release secondary cpus out of their spinloops at 0x60 now that @@ -407,7 +518,7 @@ void __init setup_system(void) printk("-----------------------------------------------------\n"); printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - printk("physicalMemorySize = 0x%llx\n", lmb_phys_mem_size()); + printk("physicalMemorySize = 0x%llx\n", memblock_phys_mem_size()); if (ppc64_caches.dline_size != 0x80) printk("ppc64_caches.dcache_line_size = 0x%x\n", ppc64_caches.dline_size); @@ -427,41 +538,68 @@ void __init setup_system(void) DBG(" <- setup_system()\n"); } -#ifdef CONFIG_IRQSTACKS +/* This returns the limit below which memory accesses to the linear + * mapping are guarnateed not to cause a TLB or SLB miss. This is + * used to allocate interrupt or emergency stacks for which our + * exception entry path doesn't deal with being interrupted. + */ +static u64 safe_stack_limit(void) +{ +#ifdef CONFIG_PPC_BOOK3E + /* Freescale BookE bolts the entire linear mapping */ + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + return linear_map_top; + /* Other BookE, we assume the first GB is bolted */ + return 1ul << 30; +#else + /* BookS, the first segment is bolted */ + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) + return 1UL << SID_SHIFT_1T; + return 1UL << SID_SHIFT; +#endif +} + static void __init irqstack_early_init(void) { + u64 limit = safe_stack_limit(); unsigned int i; /* - * interrupt stacks must be under 256MB, we cannot afford to take - * SLB misses on them. + * Interrupt stacks must be in the first segment since we + * cannot afford to take SLB misses on them. */ for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); + __va(memblock_alloc_base(THREAD_SIZE, + THREAD_SIZE, limit)); hardirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); + __va(memblock_alloc_base(THREAD_SIZE, + THREAD_SIZE, limit)); } } -#else -#define irqstack_early_init() -#endif #ifdef CONFIG_PPC_BOOK3E static void __init exc_lvl_early_init(void) { unsigned int i; + unsigned long sp; for_each_possible_cpu(i) { - critirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); - dbgirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); - mcheckirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); + critirq_ctx[i] = (struct thread_info *)__va(sp); + paca[i].crit_kstack = __va(sp + THREAD_SIZE); + + sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); + dbgirq_ctx[i] = (struct thread_info *)__va(sp); + paca[i].dbg_kstack = __va(sp + THREAD_SIZE); + + sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); + mcheckirq_ctx[i] = (struct thread_info *)__va(sp); + paca[i].mc_kstack = __va(sp + THREAD_SIZE); } + + if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) + patch_exception(0x040, exc_debug_debug_book3e); } #else #define exc_lvl_early_init() @@ -469,11 +607,12 @@ static void __init exc_lvl_early_init(void) /* * Stack space used when we detect a bad kernel stack pointer, and - * early in SMP boots before relocation is enabled. + * early in SMP boots before relocation is enabled. Exclusive emergency + * stack for machine checks. */ static void __init emergency_stack_init(void) { - unsigned long limit; + u64 limit; unsigned int i; /* @@ -485,20 +624,26 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(0x10000000ULL, lmb.rmo_size); + limit = min(safe_stack_limit(), ppc64_rma_size); for_each_possible_cpu(i) { unsigned long sp; - sp = lmb_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); + sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); sp += THREAD_SIZE; paca[i].emergency_sp = __va(sp); + +#ifdef CONFIG_PPC_BOOK3S_64 + /* emergency stack for machine check exception handling. */ + sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); + sp += THREAD_SIZE; + paca[i].mc_emergency_sp = __va(sp); +#endif } } /* - * Called into from start_kernel, after lock_kernel has been called. - * Initializes bootmem, which is unsed to manage page allocation until - * mem_init is called. + * Called into from start_kernel this initializes bootmem, which is used + * to manage page allocation until mem_init is called. */ void __init setup_arch(char **cmdline_p) { @@ -514,9 +659,6 @@ void __init setup_arch(char **cmdline_p) dcache_bsize = ppc64_caches.dline_size; icache_bsize = ppc64_caches.iline_size; - /* reboot on panic */ - panic_timeout = 180; - if (ppc_md.panic) setup_panic(); @@ -524,7 +666,9 @@ void __init setup_arch(char **cmdline_p) init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; init_mm.brk = klimit; - +#ifdef CONFIG_PPC_64K_PAGES + init_mm.context.pte_frag = NULL; +#endif irqstack_early_init(); exc_lvl_early_init(); emergency_stack_init(); @@ -543,16 +687,16 @@ void __init setup_arch(char **cmdline_p) if (ppc_md.setup_arch) ppc_md.setup_arch(); -#ifdef CONFIG_SWIOTLB - if (ppc_swiotlb_enable) - swiotlb_init(1); -#endif - paging_init(); /* Initialize the MMU context management stuff */ mmu_context_init(); + /* Interrupt code needs to be 64K-aligned */ + if ((unsigned long)_stext & 0xffff) + panic("Kernelbase not 64K-aligned (0x%lx)!\n", + (unsigned long)_stext); + ppc64_boot_msg(0x15, "Setup Done"); } @@ -581,12 +725,6 @@ void ppc64_boot_msg(unsigned int src, const char *msg) printk("[boot]%04x %s\n", src, msg); } -void cpu_die(void) -{ - if (ppc_md.cpu_die) - ppc_md.cpu_die(); -} - #ifdef CONFIG_SMP #define PCPU_DYN_SIZE () @@ -609,6 +747,9 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + void __init setup_per_cpu_areas(void) { const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -633,14 +774,24 @@ void __init setup_per_cpu_areas(void) panic("cannot initialize percpu area (err=%d)", rc); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; - for_each_possible_cpu(cpu) - paca[cpu].data_offset = delta + pcpu_unit_offsets[cpu]; + for_each_possible_cpu(cpu) { + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; + paca[cpu].data_offset = __per_cpu_offset[cpu]; + } } #endif +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +unsigned long memory_block_size_bytes(void) +{ + if (ppc_md.memory_block_size) + return ppc_md.memory_block_size(); + + return MIN_MEMORY_BLOCK_SIZE; +} +#endif -#ifdef CONFIG_PPC_INDIRECT_IO +#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO) struct ppc_pci_io ppc_pci_io; EXPORT_SYMBOL(ppc_pci_io); -#endif /* CONFIG_PPC_INDIRECT_IO */ - +#endif |
