diff options
Diffstat (limited to 'arch/powerpc/kernel/setup_64.c')
| -rw-r--r-- | arch/powerpc/kernel/setup_64.c | 972 |
1 files changed, 420 insertions, 552 deletions
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index b0994050024..ee082d77117 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -10,10 +10,9 @@ * 2 of the License, or (at your option) any later version. */ -#undef DEBUG +#define DEBUG -#include <linux/config.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/string.h> #include <linux/sched.h> #include <linux/init.h> @@ -21,7 +20,6 @@ #include <linux/reboot.h> #include <linux/delay.h> #include <linux/initrd.h> -#include <linux/ide.h> #include <linux/seq_file.h> #include <linux/ioport.h> #include <linux/console.h> @@ -33,7 +31,15 @@ #include <linux/unistd.h> #include <linux/serial.h> #include <linux/serial_8250.h> +#include <linux/bootmem.h> +#include <linux/pci.h> +#include <linux/lockdep.h> +#include <linux/memblock.h> +#include <linux/hugetlb.h> +#include <linux/memory.h> + #include <asm/io.h> +#include <asm/kdump.h> #include <asm/prom.h> #include <asm/processor.h> #include <asm/pgtable.h> @@ -41,25 +47,27 @@ #include <asm/elf.h> #include <asm/machdep.h> #include <asm/paca.h> -#include <asm/ppcdebug.h> #include <asm/time.h> #include <asm/cputable.h> #include <asm/sections.h> #include <asm/btext.h> #include <asm/nvram.h> #include <asm/setup.h> -#include <asm/system.h> #include <asm/rtas.h> #include <asm/iommu.h> #include <asm/serial.h> #include <asm/cache.h> #include <asm/page.h> #include <asm/mmu.h> -#include <asm/lmb.h> -#include <asm/iseries/it_lp_naca.h> #include <asm/firmware.h> -#include <asm/systemcfg.h> #include <asm/xmon.h> +#include <asm/udbg.h> +#include <asm/kexec.h> +#include <asm/mmu_context.h> +#include <asm/code-patching.h> +#include <asm/kvm_ppc.h> +#include <asm/hugetlb.h> +#include <asm/epapr_hcalls.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -67,49 +75,18 @@ #define DBG(fmt...) #endif -/* - * Here are some early debugging facilities. You can enable one - * but your kernel will not boot on anything else if you do so - */ - -/* This one is for use on LPAR machines that support an HVC console - * on vterm 0 - */ -extern void udbg_init_debug_lpar(void); -/* This one is for use on Apple G5 machines - */ -extern void udbg_init_pmac_realmode(void); -/* That's RTAS panel debug */ -extern void call_rtas_display_status_delay(unsigned char c); -/* Here's maple real mode debug */ -extern void udbg_init_maple_realmode(void); - -#define EARLY_DEBUG_INIT() do {} while(0) - -#if 0 -#define EARLY_DEBUG_INIT() udbg_init_debug_lpar() -#define EARLY_DEBUG_INIT() udbg_init_maple_realmode() -#define EARLY_DEBUG_INIT() udbg_init_pmac_realmode() -#define EARLY_DEBUG_INIT() \ - do { udbg_putc = call_rtas_display_status_delay; } while(0) -#endif - -/* extern void *stab; */ -extern unsigned long klimit; - -extern void mm_init_ppc64(void); -extern void stab_initialize(unsigned long stab); -extern void htab_initialize(void); -extern void early_init_devtree(void *flat_dt); -extern void unflatten_device_tree(void); - -int have_of = 1; -int boot_cpuid = 0; -int boot_cpuid_phys = 0; -dev_t boot_dev; +int spinning_secondaries; u64 ppc64_pft_size; -struct ppc64_caches ppc64_caches; +/* Pick defaults since we might want to patch instructions + * before we've read this from the device tree. + */ +struct ppc64_caches ppc64_caches = { + .dline_size = 0x40, + .log_dline_size = 6, + .iline_size = 0x40, + .log_iline_size = 6 +}; EXPORT_SYMBOL_GPL(ppc64_caches); /* @@ -120,63 +97,88 @@ int dcache_bsize; int icache_bsize; int ucache_bsize; -/* The main machine-dep calls structure - */ -struct machdep_calls ppc_md; -EXPORT_SYMBOL(ppc_md); +#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +static void setup_tlb_core_data(void) +{ + int cpu; -#ifdef CONFIG_MAGIC_SYSRQ -unsigned long SYSRQ_KEY; -#endif /* CONFIG_MAGIC_SYSRQ */ + BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0); + for_each_possible_cpu(cpu) { + int first = cpu_first_thread_sibling(cpu); -static int ppc64_panic_event(struct notifier_block *, unsigned long, void *); -static struct notifier_block ppc64_panic_block = { - .notifier_call = ppc64_panic_event, - .priority = INT_MIN /* may not return; must be done last */ -}; + paca[cpu].tcd_ptr = &paca[first].tcd; + + /* + * If we have threads, we need either tlbsrx. + * or e6500 tablewalk mode, or else TLB handlers + * will be racy and could produce duplicate entries. + */ + if (smt_enabled_at_boot >= 2 && + !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && + book3e_htw_mode != PPC_HTW_E6500) { + /* Should we panic instead? */ + WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n", + __func__); + } + } +} +#else +static void setup_tlb_core_data(void) +{ +} +#endif #ifdef CONFIG_SMP -static int smt_enabled_cmdline; +static char *smt_enabled_cmdline; /* Look for ibm,smt-enabled OF option */ static void check_smt_enabled(void) { struct device_node *dn; - char *smt_option; - - /* Allow the command line to overrule the OF option */ - if (smt_enabled_cmdline) - return; + const char *smt_option; - dn = of_find_node_by_path("/options"); + /* Default to enabling all threads */ + smt_enabled_at_boot = threads_per_core; - if (dn) { - smt_option = (char *)get_property(dn, "ibm,smt-enabled", NULL); + /* Allow the command line to overrule the OF option */ + if (smt_enabled_cmdline) { + if (!strcmp(smt_enabled_cmdline, "on")) + smt_enabled_at_boot = threads_per_core; + else if (!strcmp(smt_enabled_cmdline, "off")) + smt_enabled_at_boot = 0; + else { + long smt; + int rc; + + rc = strict_strtol(smt_enabled_cmdline, 10, &smt); + if (!rc) + smt_enabled_at_boot = + min(threads_per_core, (int)smt); + } + } else { + dn = of_find_node_by_path("/options"); + if (dn) { + smt_option = of_get_property(dn, "ibm,smt-enabled", + NULL); + + if (smt_option) { + if (!strcmp(smt_option, "on")) + smt_enabled_at_boot = threads_per_core; + else if (!strcmp(smt_option, "off")) + smt_enabled_at_boot = 0; + } - if (smt_option) { - if (!strcmp(smt_option, "on")) - smt_enabled_at_boot = 1; - else if (!strcmp(smt_option, "off")) - smt_enabled_at_boot = 0; - } - } + of_node_put(dn); + } + } } /* Look for smt-enabled= cmdline option */ static int __init early_smt_enabled(char *p) { - smt_enabled_cmdline = 1; - - if (!p) - return 0; - - if (!strcmp(p, "on") || !strcmp(p, "1")) - smt_enabled_at_boot = 1; - else if (!strcmp(p, "off") || !strcmp(p, "0")) - smt_enabled_at_boot = 0; - + smt_enabled_cmdline = p; return 0; } early_param("smt-enabled", early_smt_enabled); @@ -185,31 +187,27 @@ early_param("smt-enabled", early_smt_enabled); #define check_smt_enabled() #endif /* CONFIG_SMP */ -extern struct machdep_calls pSeries_md; -extern struct machdep_calls pmac_md; -extern struct machdep_calls maple_md; -extern struct machdep_calls cell_md; -extern struct machdep_calls iseries_md; - -/* Ultimately, stuff them in an elf section like initcalls... */ -static struct machdep_calls __initdata *machines[] = { -#ifdef CONFIG_PPC_PSERIES - &pSeries_md, -#endif /* CONFIG_PPC_PSERIES */ -#ifdef CONFIG_PPC_PMAC - &pmac_md, -#endif /* CONFIG_PPC_PMAC */ -#ifdef CONFIG_PPC_MAPLE - &maple_md, -#endif /* CONFIG_PPC_MAPLE */ -#ifdef CONFIG_PPC_CELL - &cell_md, -#endif -#ifdef CONFIG_PPC_ISERIES - &iseries_md, -#endif - NULL -}; +/** Fix up paca fields required for the boot cpu */ +static void fixup_boot_paca(void) +{ + /* The boot cpu is started */ + get_paca()->cpu_start = 1; + /* Allow percpu accesses to work until we setup percpu data */ + get_paca()->data_offset = 0; +} + +static void cpu_ready_for_interrupts(void) +{ + /* Set IR and DR in PACA MSR */ + get_paca()->kernel_msr = MSR_KERNEL; + + /* Enable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) { + unsigned long lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + } +} /* * Early initialization entry point. This is called by head.S @@ -217,7 +215,7 @@ static struct machdep_calls __initdata *machines[] = { * the CPU that ignores the top 2 bits of the address in real * mode so we can access kernel globals normally provided we * only toy with things in the RMO region. From here, we do - * some early parsing of the device-tree to setup out LMB + * some early parsing of the device-tree to setup out MEMBLOCK * data structures, and allocate & initialize the hash table * and segment tables so we can start running with translation * enabled. @@ -232,75 +230,107 @@ static struct machdep_calls __initdata *machines[] = { void __init early_setup(unsigned long dt_ptr) { - struct paca_struct *lpaca = get_paca(); - static struct machdep_calls **mach; + static __initdata struct paca_struct boot_paca; - /* - * Enable early debugging if any specified (see top of - * this file) - */ - EARLY_DEBUG_INIT(); + /* -------- printk is _NOT_ safe to use here ! ------- */ - DBG(" -> early_setup()\n"); + /* Identify CPU type */ + identify_cpu(0, mfspr(SPRN_PVR)); - /* - * Fill the default DBG level (do we want to keep - * that old mecanism around forever ?) - */ - ppcdbg_initialize(); + /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ + initialise_paca(&boot_paca, 0); + setup_paca(&boot_paca); + fixup_boot_paca(); + + /* Initialize lockdep early or else spinlocks will blow */ + lockdep_init(); + + /* -------- printk is now safe to use ------- */ + + /* Enable early debugging if any specified (see udbg.h) */ + udbg_early_init(); + + DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr); /* - * Do early initializations using the flattened device - * tree, like retreiving the physical memory map or - * calculating/retreiving the hash table size + * Do early initialization using the flattened device + * tree, such as retrieving the physical memory map or + * calculating/retrieving the hash table size. */ early_init_devtree(__va(dt_ptr)); - /* - * Iterate all ppc_md structures until we find the proper - * one for the current machine type - */ - DBG("Probing machine type for platform %x...\n", - systemcfg->platform); + epapr_paravirt_early_init(); - for (mach = machines; *mach; mach++) { - if ((*mach)->probe(systemcfg->platform)) - break; - } - /* What can we do if we didn't find ? */ - if (*mach == NULL) { - DBG("No suitable machine found !\n"); - for (;;); - } - ppc_md = **mach; + /* Now we know the logical id of our boot cpu, setup the paca. */ + setup_paca(&paca[boot_cpuid]); + fixup_boot_paca(); + + /* Probe the machine type */ + probe_machine(); + + setup_kdump_trampoline(); DBG("Found, Initializing memory management...\n"); + /* Initialize the hash table or TLB handling */ + early_init_mmu(); + /* - * Initialize the MMU Hash table and create the linear mapping - * of memory. Has to be done before stab/slb initialization as - * this is currently where the page size encoding is obtained + * At this point, we can let interrupts switch to virtual mode + * (the MMU has been setup), so adjust the MSR in the PACA to + * have IR and DR set and enable AIL if it exists */ - htab_initialize(); + cpu_ready_for_interrupts(); + + /* Reserve large chunks of memory for use by CMA for KVM */ + kvm_cma_reserve(); /* - * Initialize stab / SLB management except on iSeries + * Reserve any gigantic pages requested on the command line. + * memblock needs to have been initialized by the time this is + * called since this will reserve memory. */ - if (!firmware_has_feature(FW_FEATURE_ISERIES)) { - if (cpu_has_feature(CPU_FTR_SLB)) - slb_initialize(); - else - stab_initialize(lpaca->stab_real); - } + reserve_hugetlb_gpages(); DBG(" <- early_setup()\n"); + +#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX + /* + * This needs to be done *last* (after the above DBG() even) + * + * Right after we return from this function, we turn on the MMU + * which means the real-mode access trick that btext does will + * no longer work, it needs to switch to using a real MMU + * mapping. This call will ensure that it does + */ + btext_map(); +#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ +} + +#ifdef CONFIG_SMP +void early_setup_secondary(void) +{ + /* Mark interrupts enabled in PACA */ + get_paca()->soft_enabled = 0; + + /* Initialize the hash table or TLB handling */ + early_init_mmu_secondary(); + + /* + * At this point, we can let interrupts switch to virtual mode + * (the MMU has been setup), so adjust the MSR in the PACA to + * have IR and DR set. + */ + cpu_ready_for_interrupts(); } +#endif /* CONFIG_SMP */ #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) void smp_release_cpus(void) { - extern unsigned long __secondary_hold_spinloop; + unsigned long *ptr; + int i; DBG(" -> smp_release_cpus()\n"); @@ -308,20 +338,29 @@ void smp_release_cpus(void) * all now so they can start to spin on their individual paca * spinloops. For non SMP kernels, the secondary cpus never get out * of the common spinloop. - * This is useless but harmless on iSeries, secondaries are already - * waiting on their paca spinloops. */ + */ + + ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop + - PHYSICAL_START); + *ptr = ppc_function_entry(generic_secondary_smp_init); - __secondary_hold_spinloop = 1; - mb(); + /* And wait a bit for them to catch up */ + for (i = 0; i < 100000; i++) { + mb(); + HMT_low(); + if (spinning_secondaries == 0) + break; + udelay(1); + } + DBG("spinning_secondaries = %d\n", spinning_secondaries); DBG(" <- smp_release_cpus()\n"); } -#else -#define smp_release_cpus() #endif /* CONFIG_SMP || CONFIG_KEXEC */ /* - * Initialize some remaining members of the ppc64_caches and systemcfg structures + * Initialize some remaining members of the ppc64_caches and systemcfg + * structures * (at least until we get rid of them completely). This is mostly some * cache informations about the CPU that will be used by cache flush * routines and/or provided to userland @@ -333,111 +372,67 @@ static void __init initialize_cache_info(void) DBG(" -> initialize_cache_info()\n"); - for (np = NULL; (np = of_find_node_by_type(np, "cpu"));) { + for_each_node_by_type(np, "cpu") { num_cpus += 1; - /* We're assuming *all* of the CPUs have the same + /* + * We're assuming *all* of the CPUs have the same * d-cache and i-cache sizes... -Peter */ - - if ( num_cpus == 1 ) { - u32 *sizep, *lsizep; + if (num_cpus == 1) { + const __be32 *sizep, *lsizep; u32 size, lsize; - const char *dc, *ic; - - /* Then read cache informations */ - if (systemcfg->platform == PLATFORM_POWERMAC) { - dc = "d-cache-block-size"; - ic = "i-cache-block-size"; - } else { - dc = "d-cache-line-size"; - ic = "i-cache-line-size"; - } size = 0; lsize = cur_cpu_spec->dcache_bsize; - sizep = (u32 *)get_property(np, "d-cache-size", NULL); + sizep = of_get_property(np, "d-cache-size", NULL); if (sizep != NULL) - size = *sizep; - lsizep = (u32 *) get_property(np, dc, NULL); + size = be32_to_cpu(*sizep); + lsizep = of_get_property(np, "d-cache-block-size", + NULL); + /* fallback if block size missing */ + if (lsizep == NULL) + lsizep = of_get_property(np, + "d-cache-line-size", + NULL); if (lsizep != NULL) - lsize = *lsizep; - if (sizep == 0 || lsizep == 0) + lsize = be32_to_cpu(*lsizep); + if (sizep == NULL || lsizep == NULL) DBG("Argh, can't find dcache properties ! " "sizep: %p, lsizep: %p\n", sizep, lsizep); - systemcfg->dcache_size = ppc64_caches.dsize = size; - systemcfg->dcache_line_size = - ppc64_caches.dline_size = lsize; + ppc64_caches.dsize = size; + ppc64_caches.dline_size = lsize; ppc64_caches.log_dline_size = __ilog2(lsize); ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; size = 0; lsize = cur_cpu_spec->icache_bsize; - sizep = (u32 *)get_property(np, "i-cache-size", NULL); + sizep = of_get_property(np, "i-cache-size", NULL); if (sizep != NULL) - size = *sizep; - lsizep = (u32 *)get_property(np, ic, NULL); + size = be32_to_cpu(*sizep); + lsizep = of_get_property(np, "i-cache-block-size", + NULL); + if (lsizep == NULL) + lsizep = of_get_property(np, + "i-cache-line-size", + NULL); if (lsizep != NULL) - lsize = *lsizep; - if (sizep == 0 || lsizep == 0) + lsize = be32_to_cpu(*lsizep); + if (sizep == NULL || lsizep == NULL) DBG("Argh, can't find icache properties ! " "sizep: %p, lsizep: %p\n", sizep, lsizep); - systemcfg->icache_size = ppc64_caches.isize = size; - systemcfg->icache_line_size = - ppc64_caches.iline_size = lsize; + ppc64_caches.isize = size; + ppc64_caches.iline_size = lsize; ppc64_caches.log_iline_size = __ilog2(lsize); ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; } } - /* Add an eye catcher and the systemcfg layout version number */ - strcpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); - systemcfg->version.major = SYSTEMCFG_MAJOR; - systemcfg->version.minor = SYSTEMCFG_MINOR; - systemcfg->processor = mfspr(SPRN_PVR); - DBG(" <- initialize_cache_info()\n"); } -static void __init check_for_initrd(void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - u64 *prop; - - DBG(" -> check_for_initrd()\n"); - - if (of_chosen) { - prop = (u64 *)get_property(of_chosen, - "linux,initrd-start", NULL); - if (prop != NULL) { - initrd_start = (unsigned long)__va(*prop); - prop = (u64 *)get_property(of_chosen, - "linux,initrd-end", NULL); - if (prop != NULL) { - initrd_end = (unsigned long)__va(*prop); - initrd_below_start_ok = 1; - } else - initrd_start = 0; - } - } - - /* If we were passed an initrd, set the ROOT_DEV properly if the values - * look sensible. If not, clear initrd reference. - */ - if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && - initrd_end > initrd_start) - ROOT_DEV = Root_RAM0; - else - initrd_start = initrd_end = 0; - - if (initrd_start) - printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); - - DBG(" <- check_for_initrd()\n"); -#endif /* CONFIG_BLK_DEV_INITRD */ -} /* * Do some initial setup of the system. The parameters are those which @@ -447,6 +442,19 @@ void __init setup_system(void) { DBG(" -> setup_system()\n"); + /* Apply the CPUs-specific and firmware specific fixups to kernel + * text (nop out sections not relevant to this CPU or this firmware) + */ + do_feature_fixups(cur_cpu_spec->cpu_features, + &__start___ftr_fixup, &__stop___ftr_fixup); + do_feature_fixups(cur_cpu_spec->mmu_features, + &__start___mmu_ftr_fixup, &__stop___mmu_ftr_fixup); + do_feature_fixups(powerpc_firmware_features, + &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); + do_lwsync_fixups(cur_cpu_spec->cpu_features, + &__start___lwsync_fixup, &__stop___lwsync_fixup); + do_final_fixups(); + /* * Unflatten the device-tree passed by prom_init or kexec */ @@ -454,13 +462,7 @@ void __init setup_system(void) /* * Fill the ppc64_caches & systemcfg structures with informations - * retreived from the device-tree. Need to be called before - * finish_device_tree() since the later requires some of the - * informations filled up here to properly parse the interrupt - * tree. - * It also sets up the cache line sizes which allows to call - * routines like flush_icache_range (used by the hash init - * later on). + * retrieved from the device-tree. */ initialize_cache_info(); @@ -481,101 +483,136 @@ void __init setup_system(void) * setting up the hash table pointers. It also sets up some interrupt-mapping * related options that will be used by finish_device_tree() */ - ppc_md.init_early(); + if (ppc_md.init_early) + ppc_md.init_early(); - /* - * "Finish" the device-tree, that is do the actual parsing of - * some of the properties like the interrupt map + /* + * We can discover serial ports now since the above did setup the + * hash table management for us, thus ioremap works. We do that early + * so that further code can be debugged */ - finish_device_tree(); + find_legacy_serial_ports(); -#ifdef CONFIG_BOOTX_TEXT - init_boot_display(); -#endif - - /* - * Initialize xmon - */ -#ifdef CONFIG_XMON_DEFAULT - xmon_init(1); -#endif /* * Register early console */ register_early_udbg_console(); - /* Save unparsed command line copy for /proc/cmdline */ - strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE); - - parse_early_param(); + /* + * Initialize xmon + */ + xmon_setup(); - check_smt_enabled(); smp_setup_cpu_maps(); + check_smt_enabled(); + setup_tlb_core_data(); +#ifdef CONFIG_SMP /* Release secondary cpus out of their spinloops at 0x60 now that * we can map physical -> logical CPU ids */ smp_release_cpus(); +#endif - printk("Starting Linux PPC64 %s\n", system_utsname.version); + printk("Starting Linux PPC64 %s\n", init_utsname()->version); printk("-----------------------------------------------------\n"); - printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size); - printk("ppc64_debug_switch = 0x%lx\n", ppc64_debug_switch); - printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller); - printk("systemcfg = 0x%p\n", systemcfg); - printk("systemcfg->platform = 0x%x\n", systemcfg->platform); - printk("systemcfg->processorCount = 0x%lx\n", systemcfg->processorCount); - printk("systemcfg->physicalMemorySize = 0x%lx\n", systemcfg->physicalMemorySize); - printk("ppc64_caches.dcache_line_size = 0x%x\n", - ppc64_caches.dline_size); - printk("ppc64_caches.icache_line_size = 0x%x\n", - ppc64_caches.iline_size); - printk("htab_address = 0x%p\n", htab_address); + printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); + printk("physicalMemorySize = 0x%llx\n", memblock_phys_mem_size()); + if (ppc64_caches.dline_size != 0x80) + printk("ppc64_caches.dcache_line_size = 0x%x\n", + ppc64_caches.dline_size); + if (ppc64_caches.iline_size != 0x80) + printk("ppc64_caches.icache_line_size = 0x%x\n", + ppc64_caches.iline_size); +#ifdef CONFIG_PPC_STD_MMU_64 + if (htab_address) + printk("htab_address = 0x%p\n", htab_address); printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); +#endif /* CONFIG_PPC_STD_MMU_64 */ + if (PHYSICAL_START > 0) + printk("physical_start = 0x%llx\n", + (unsigned long long)PHYSICAL_START); printk("-----------------------------------------------------\n"); - mm_init_ppc64(); - DBG(" <- setup_system()\n"); } -static int ppc64_panic_event(struct notifier_block *this, - unsigned long event, void *ptr) +/* This returns the limit below which memory accesses to the linear + * mapping are guarnateed not to cause a TLB or SLB miss. This is + * used to allocate interrupt or emergency stacks for which our + * exception entry path doesn't deal with being interrupted. + */ +static u64 safe_stack_limit(void) { - ppc_md.panic((char *)ptr); /* May not return */ - return NOTIFY_DONE; +#ifdef CONFIG_PPC_BOOK3E + /* Freescale BookE bolts the entire linear mapping */ + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + return linear_map_top; + /* Other BookE, we assume the first GB is bolted */ + return 1ul << 30; +#else + /* BookS, the first segment is bolted */ + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) + return 1UL << SID_SHIFT_1T; + return 1UL << SID_SHIFT; +#endif } -#ifdef CONFIG_IRQSTACKS static void __init irqstack_early_init(void) { + u64 limit = safe_stack_limit(); unsigned int i; /* - * interrupt stacks must be under 256MB, we cannot afford to take - * SLB misses on them. + * Interrupt stacks must be in the first segment since we + * cannot afford to take SLB misses on them. */ - for_each_cpu(i) { + for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); + __va(memblock_alloc_base(THREAD_SIZE, + THREAD_SIZE, limit)); hardirq_ctx[i] = (struct thread_info *) - __va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); + __va(memblock_alloc_base(THREAD_SIZE, + THREAD_SIZE, limit)); } } + +#ifdef CONFIG_PPC_BOOK3E +static void __init exc_lvl_early_init(void) +{ + unsigned int i; + unsigned long sp; + + for_each_possible_cpu(i) { + sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); + critirq_ctx[i] = (struct thread_info *)__va(sp); + paca[i].crit_kstack = __va(sp + THREAD_SIZE); + + sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); + dbgirq_ctx[i] = (struct thread_info *)__va(sp); + paca[i].dbg_kstack = __va(sp + THREAD_SIZE); + + sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); + mcheckirq_ctx[i] = (struct thread_info *)__va(sp); + paca[i].mc_kstack = __va(sp + THREAD_SIZE); + } + + if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) + patch_exception(0x040, exc_debug_debug_book3e); +} #else -#define irqstack_early_init() +#define exc_lvl_early_init() #endif /* * Stack space used when we detect a bad kernel stack pointer, and - * early in SMP boots before relocation is enabled. + * early in SMP boots before relocation is enabled. Exclusive emergency + * stack for machine checks. */ static void __init emergency_stack_init(void) { - unsigned long limit; + u64 limit; unsigned int i; /* @@ -587,49 +624,29 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(0x10000000UL, lmb.rmo_size); - - for_each_cpu(i) - paca[i].emergency_sp = - __va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE; -} - -/* - * Called from setup_arch to initialize the bitmap of available - * syscalls in the systemcfg page - */ -void __init setup_syscall_map(void) -{ - unsigned int i, count64 = 0, count32 = 0; - extern unsigned long *sys_call_table; - extern unsigned long sys_ni_syscall; - - - for (i = 0; i < __NR_syscalls; i++) { - if (sys_call_table[i*2] != sys_ni_syscall) { - count64++; - systemcfg->syscall_map_64[i >> 5] |= - 0x80000000UL >> (i & 0x1f); - } - if (sys_call_table[i*2+1] != sys_ni_syscall) { - count32++; - systemcfg->syscall_map_32[i >> 5] |= - 0x80000000UL >> (i & 0x1f); - } + limit = min(safe_stack_limit(), ppc64_rma_size); + + for_each_possible_cpu(i) { + unsigned long sp; + sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); + sp += THREAD_SIZE; + paca[i].emergency_sp = __va(sp); + +#ifdef CONFIG_PPC_BOOK3S_64 + /* emergency stack for machine check exception handling. */ + sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); + sp += THREAD_SIZE; + paca[i].mc_emergency_sp = __va(sp); +#endif } - printk(KERN_INFO "Syscall map setup, %d 32-bit and %d 64-bit syscalls\n", - count32, count64); } /* - * Called into from start_kernel, after lock_kernel has been called. - * Initializes bootmem, which is unsed to manage page allocation until - * mem_init is called. + * Called into from start_kernel this initializes bootmem, which is used + * to manage page allocation until mem_init is called. */ void __init setup_arch(char **cmdline_p) { - extern void do_init_bootmem(void); - ppc64_boot_msg(0x12, "Setup Arch"); *cmdline_p = cmd_line; @@ -642,42 +659,44 @@ void __init setup_arch(char **cmdline_p) dcache_bsize = ppc64_caches.dline_size; icache_bsize = ppc64_caches.iline_size; - /* reboot on panic */ - panic_timeout = 180; - if (ppc_md.panic) - notifier_chain_register(&panic_notifier_list, &ppc64_panic_block); + setup_panic(); - init_mm.start_code = PAGE_OFFSET; + init_mm.start_code = (unsigned long)_stext; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; init_mm.brk = klimit; - +#ifdef CONFIG_PPC_64K_PAGES + init_mm.context.pte_frag = NULL; +#endif irqstack_early_init(); + exc_lvl_early_init(); emergency_stack_init(); +#ifdef CONFIG_PPC_STD_MMU_64 stabs_alloc(); - +#endif /* set up the bootmem stuff with available memory */ do_init_bootmem(); sparse_init(); - /* initialize the syscall map in systemcfg */ - setup_syscall_map(); - #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif - ppc_md.setup_arch(); - - /* Use the default idle loop if the platform hasn't provided one. */ - if (NULL == ppc_md.idle_loop) { - ppc_md.idle_loop = default_idle; - printk(KERN_INFO "Using default idle loop\n"); - } + if (ppc_md.setup_arch) + ppc_md.setup_arch(); paging_init(); + + /* Initialize the MMU context management stuff */ + mmu_context_init(); + + /* Interrupt code needs to be 64K-aligned */ + if ((unsigned long)_stext & 0xffff) + panic("Kernelbase not 64K-aligned (0x%lx)!\n", + (unsigned long)_stext); + ppc64_boot_msg(0x15, "Setup Done"); } @@ -706,224 +725,73 @@ void ppc64_boot_msg(unsigned int src, const char *msg) printk("[boot]%04x %s\n", src, msg); } -/* Print a termination message (print only -- does not stop the kernel) */ -void ppc64_terminate_msg(unsigned int src, const char *msg) +#ifdef CONFIG_SMP +#define PCPU_DYN_SIZE () + +static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) { - ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_TERM_MESSAGE|src, msg); - printk("[terminate]%04x %s\n", src, msg); + return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align, + __pa(MAX_DMA_ADDRESS)); } -#ifndef CONFIG_PPC_ISERIES -/* - * This function can be used by platforms to "find" legacy serial ports. - * It works for "serial" nodes under an "isa" node, and will try to - * respect the "ibm,aix-loc" property if any. It works with up to 8 - * ports. - */ - -#define MAX_LEGACY_SERIAL_PORTS 8 -static struct plat_serial8250_port serial_ports[MAX_LEGACY_SERIAL_PORTS+1]; -static unsigned int old_serial_count; - -void __init generic_find_legacy_serial_ports(u64 *physport, - unsigned int *default_speed) +static void __init pcpu_fc_free(void *ptr, size_t size) { - struct device_node *np; - u32 *sizeprop; - - struct isa_reg_property { - u32 space; - u32 address; - u32 size; - }; - struct pci_reg_property { - struct pci_address addr; - u32 size_hi; - u32 size_lo; - }; - - DBG(" -> generic_find_legacy_serial_port()\n"); - - *physport = 0; - if (default_speed) - *default_speed = 0; - - np = of_find_node_by_path("/"); - if (!np) - return; - - /* First fill our array */ - for (np = NULL; (np = of_find_node_by_type(np, "serial"));) { - struct device_node *isa, *pci; - struct isa_reg_property *reg; - unsigned long phys_size, addr_size, io_base; - u32 *rangesp; - u32 *interrupts, *clk, *spd; - char *typep; - int index, rlen, rentsize; - - /* Ok, first check if it's under an "isa" parent */ - isa = of_get_parent(np); - if (!isa || strcmp(isa->name, "isa")) { - DBG("%s: no isa parent found\n", np->full_name); - continue; - } - - /* Now look for an "ibm,aix-loc" property that gives us ordering - * if any... - */ - typep = (char *)get_property(np, "ibm,aix-loc", NULL); - - /* Get the ISA port number */ - reg = (struct isa_reg_property *)get_property(np, "reg", NULL); - if (reg == NULL) - goto next_port; - /* We assume the interrupt number isn't translated ... */ - interrupts = (u32 *)get_property(np, "interrupts", NULL); - /* get clock freq. if present */ - clk = (u32 *)get_property(np, "clock-frequency", NULL); - /* get default speed if present */ - spd = (u32 *)get_property(np, "current-speed", NULL); - /* Default to locate at end of array */ - index = old_serial_count; /* end of the array by default */ - - /* If we have a location index, then use it */ - if (typep && *typep == 'S') { - index = simple_strtol(typep+1, NULL, 0) - 1; - /* if index is out of range, use end of array instead */ - if (index >= MAX_LEGACY_SERIAL_PORTS) - index = old_serial_count; - /* if our index is still out of range, that mean that - * array is full, we could scan for a free slot but that - * make little sense to bother, just skip the port - */ - if (index >= MAX_LEGACY_SERIAL_PORTS) - goto next_port; - if (index >= old_serial_count) - old_serial_count = index + 1; - /* Check if there is a port who already claimed our slot */ - if (serial_ports[index].iobase != 0) { - /* if we still have some room, move it, else override */ - if (old_serial_count < MAX_LEGACY_SERIAL_PORTS) { - DBG("Moved legacy port %d -> %d\n", index, - old_serial_count); - serial_ports[old_serial_count++] = - serial_ports[index]; - } else { - DBG("Replacing legacy port %d\n", index); - } - } - } - if (index >= MAX_LEGACY_SERIAL_PORTS) - goto next_port; - if (index >= old_serial_count) - old_serial_count = index + 1; - - /* Now fill the entry */ - memset(&serial_ports[index], 0, sizeof(struct plat_serial8250_port)); - serial_ports[index].uartclk = clk ? *clk : BASE_BAUD * 16; - serial_ports[index].iobase = reg->address; - serial_ports[index].irq = interrupts ? interrupts[0] : 0; - serial_ports[index].flags = ASYNC_BOOT_AUTOCONF; - - DBG("Added legacy port, index: %d, port: %x, irq: %d, clk: %d\n", - index, - serial_ports[index].iobase, - serial_ports[index].irq, - serial_ports[index].uartclk); - - /* Get phys address of IO reg for port 1 */ - if (index != 0) - goto next_port; - - pci = of_get_parent(isa); - if (!pci) { - DBG("%s: no pci parent found\n", np->full_name); - goto next_port; - } - - rangesp = (u32 *)get_property(pci, "ranges", &rlen); - if (rangesp == NULL) { - of_node_put(pci); - goto next_port; - } - rlen /= 4; - - /* we need the #size-cells of the PCI bridge node itself */ - phys_size = 1; - sizeprop = (u32 *)get_property(pci, "#size-cells", NULL); - if (sizeprop != NULL) - phys_size = *sizeprop; - /* we need the parent #addr-cells */ - addr_size = prom_n_addr_cells(pci); - rentsize = 3 + addr_size + phys_size; - io_base = 0; - for (;rlen >= rentsize; rlen -= rentsize,rangesp += rentsize) { - if (((rangesp[0] >> 24) & 0x3) != 1) - continue; /* not IO space */ - io_base = rangesp[3]; - if (addr_size == 2) - io_base = (io_base << 32) | rangesp[4]; - } - if (io_base != 0) { - *physport = io_base + reg->address; - if (default_speed && spd) - *default_speed = *spd; - } - of_node_put(pci); - next_port: - of_node_put(isa); - } - - DBG(" <- generic_find_legacy_serial_port()\n"); + free_bootmem(__pa(ptr), size); } -static struct platform_device serial_device = { - .name = "serial8250", - .id = PLAT8250_DEV_PLATFORM, - .dev = { - .platform_data = serial_ports, - }, -}; - -static int __init serial_dev_init(void) +static int pcpu_cpu_distance(unsigned int from, unsigned int to) { - return platform_device_register(&serial_device); + if (cpu_to_node(from) == cpu_to_node(to)) + return LOCAL_DISTANCE; + else + return REMOTE_DISTANCE; } -arch_initcall(serial_dev_init); -#endif /* CONFIG_PPC_ISERIES */ +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); -int check_legacy_ioport(unsigned long base_port) +void __init setup_per_cpu_areas(void) { - if (ppc_md.check_legacy_ioport == NULL) - return 0; - return ppc_md.check_legacy_ioport(base_port); -} -EXPORT_SYMBOL(check_legacy_ioport); + const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; + size_t atom_size; + unsigned long delta; + unsigned int cpu; + int rc; -#ifdef CONFIG_XMON -static int __init early_xmon(char *p) -{ - /* ensure xmon is enabled */ - if (p) { - if (strncmp(p, "on", 2) == 0) - xmon_init(1); - if (strncmp(p, "off", 3) == 0) - xmon_init(0); - if (strncmp(p, "early", 5) != 0) - return 0; - } - xmon_init(1); - debugger(NULL); + /* + * Linear mapping is one of 4K, 1M and 16M. For 4K, no need + * to group units. For larger mappings, use 1M atom which + * should be large enough to contain a number of units. + */ + if (mmu_linear_psize == MMU_PAGE_4K) + atom_size = PAGE_SIZE; + else + atom_size = 1 << 20; - return 0; + rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, + pcpu_fc_alloc, pcpu_fc_free); + if (rc < 0) + panic("cannot initialize percpu area (err=%d)", rc); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) { + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; + paca[cpu].data_offset = __per_cpu_offset[cpu]; + } } -early_param("xmon", early_xmon); #endif -void cpu_die(void) +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +unsigned long memory_block_size_bytes(void) { - if (ppc_md.cpu_die) - ppc_md.cpu_die(); + if (ppc_md.memory_block_size) + return ppc_md.memory_block_size(); + + return MIN_MEMORY_BLOCK_SIZE; } +#endif + +#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO) +struct ppc_pci_io ppc_pci_io; +EXPORT_SYMBOL(ppc_pci_io); +#endif |
