diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries/setup.c')
| -rw-r--r-- | arch/powerpc/platforms/pseries/setup.c | 456 |
1 files changed, 320 insertions, 136 deletions
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index ca5f2e10972..f2f40e64658 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -23,7 +23,6 @@ #include <linux/mm.h> #include <linux/stddef.h> #include <linux/unistd.h> -#include <linux/slab.h> #include <linux/user.h> #include <linux/tty.h> #include <linux/major.h> @@ -35,11 +34,13 @@ #include <linux/pci.h> #include <linux/utsname.h> #include <linux/adb.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/delay.h> #include <linux/irq.h> #include <linux/seq_file.h> #include <linux/root_dev.h> +#include <linux/of.h> +#include <linux/kexec.h> #include <asm/mmu.h> #include <asm/processor.h> @@ -54,30 +55,27 @@ #include <asm/irq.h> #include <asm/time.h> #include <asm/nvram.h> -#include "xics.h" #include <asm/pmc.h> #include <asm/mpic.h> +#include <asm/xics.h> #include <asm/ppc-pci.h> #include <asm/i8259.h> #include <asm/udbg.h> #include <asm/smp.h> #include <asm/firmware.h> #include <asm/eeh.h> -#include <asm/pSeries_reconfig.h> +#include <asm/reg.h> +#include <asm/plpar_wrappers.h> -#include "plpar_wrappers.h" #include "pseries.h" int CMO_PrPSP = -1; int CMO_SecPSP = -1; -unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT); +unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K); EXPORT_SYMBOL(CMO_PageSize); int fwnmi_active; /* TRUE if an FWNMI handler is present */ -static void pseries_shared_idle_sleep(void); -static void pseries_dedicated_idle_sleep(void); - static struct device_node *pSeries_mpic_node; static void pSeries_show_cpuinfo(struct seq_file *m) @@ -115,10 +113,13 @@ static void __init fwnmi_init(void) static void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc) { + struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); + if (cascade_irq != NO_IRQ) generic_handle_irq(cascade_irq); - desc->chip->eoi(irq); + + chip->irq_eoi(&desc->irq_data); } static void __init pseries_setup_i8259_cascade(void) @@ -167,7 +168,7 @@ static void __init pseries_setup_i8259_cascade(void) printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack); i8259_init(found, intack); of_node_put(found); - set_irq_chained_handler(cascade, pseries_8259_cascade); + irq_set_chained_handler(cascade, pseries_8259_cascade); } static void __init pseries_mpic_init_IRQ(void) @@ -181,7 +182,7 @@ static void __init pseries_mpic_init_IRQ(void) np = of_find_node_by_path("/"); naddr = of_n_addr_cells(np); opprop = of_get_property(np, "platform-open-pic", &opplen); - if (opprop != 0) { + if (opprop != NULL) { openpic_addr = of_read_number(opprop, naddr); printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); } @@ -191,9 +192,7 @@ static void __init pseries_mpic_init_IRQ(void) /* Setup the openpic driver */ mpic = mpic_alloc(pSeries_mpic_node, openpic_addr, - MPIC_PRIMARY, - 16, 250, /* isu size, irq count */ - " MPIC "); + MPIC_NO_RESET, 16, 0, " MPIC "); BUG_ON(mpic == NULL); /* Add ISUs */ @@ -203,6 +202,9 @@ static void __init pseries_mpic_init_IRQ(void) mpic_assign_isu(mpic, n, isuaddr); } + /* Setup top-level get_irq */ + ppc_md.get_irq = mpic_get_irq; + /* All ISUs are setup, complete initialization */ mpic_init(mpic); @@ -212,7 +214,7 @@ static void __init pseries_mpic_init_IRQ(void) static void __init pseries_xics_init_IRQ(void) { - xics_init_IRQ(); + xics_init(); pseries_setup_i8259_cascade(); } @@ -236,7 +238,6 @@ static void __init pseries_discover_pic(void) if (strstr(typep, "open-pic")) { pSeries_mpic_node = of_node_get(np); ppc_md.init_IRQ = pseries_mpic_init_IRQ; - ppc_md.get_irq = mpic_get_irq; setup_kexec_cpu_down_mpic(); smp_init_pseries_mpic(); return; @@ -258,10 +259,14 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act int err = NOTIFY_OK; switch (action) { - case PSERIES_RECONFIG_ADD: + case OF_RECONFIG_ATTACH_NODE: pci = np->parent->data; - if (pci) + if (pci) { update_dn_pci_info(np, pci->phb); + + /* Create EEH device for the OF node */ + eeh_dev_init(np, pci->phb); + } break; default: err = NOTIFY_DONE; @@ -274,8 +279,192 @@ static struct notifier_block pci_dn_reconfig_nb = { .notifier_call = pci_dn_reconfig_notifier, }; +struct kmem_cache *dtl_cache; + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +/* + * Allocate space for the dispatch trace log for all possible cpus + * and register the buffers with the hypervisor. This is used for + * computing time stolen by the hypervisor. + */ +static int alloc_dispatch_logs(void) +{ + int cpu, ret; + struct paca_struct *pp; + struct dtl_entry *dtl; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return 0; + + if (!dtl_cache) + return 0; + + for_each_possible_cpu(cpu) { + pp = &paca[cpu]; + dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL); + if (!dtl) { + pr_warn("Failed to allocate dispatch trace log for cpu %d\n", + cpu); + pr_warn("Stolen time statistics will be unreliable\n"); + break; + } + + pp->dtl_ridx = 0; + pp->dispatch_log = dtl; + pp->dispatch_log_end = dtl + N_DISPATCH_LOG; + pp->dtl_curr = dtl; + } + + /* Register the DTL for the current (boot) cpu */ + dtl = get_paca()->dispatch_log; + get_paca()->dtl_ridx = 0; + get_paca()->dtl_curr = dtl; + get_paca()->lppaca_ptr->dtl_idx = 0; + + /* hypervisor reads buffer length from this field */ + dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES); + ret = register_dtl(hard_smp_processor_id(), __pa(dtl)); + if (ret) + pr_err("WARNING: DTL registration of cpu %d (hw %d) failed " + "with %d\n", smp_processor_id(), + hard_smp_processor_id(), ret); + get_paca()->lppaca_ptr->dtl_enable_mask = 2; + + return 0; +} +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ +static inline int alloc_dispatch_logs(void) +{ + return 0; +} +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +static int alloc_dispatch_log_kmem_cache(void) +{ + dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, + DISPATCH_LOG_BYTES, 0, NULL); + if (!dtl_cache) { + pr_warn("Failed to create dispatch trace log buffer cache\n"); + pr_warn("Stolen time statistics will be unreliable\n"); + return 0; + } + + return alloc_dispatch_logs(); +} +early_initcall(alloc_dispatch_log_kmem_cache); + +static void pseries_lpar_idle(void) +{ + /* + * Default handler to go into low thread priority and possibly + * low power mode by cedeing processor to hypervisor + */ + + /* Indicate to hypervisor that we are idle. */ + get_lppaca()->idle = 1; + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + */ + cede_processor(); + + get_lppaca()->idle = 0; +} + +/* + * Enable relocation on during exceptions. This has partition wide scope and + * may take a while to complete, if it takes longer than one second we will + * just give up rather than wasting any more time on this - if that turns out + * to ever be a problem in practice we can move this into a kernel thread to + * finish off the process later in boot. + */ +long pSeries_enable_reloc_on_exc(void) +{ + long rc; + unsigned int delay, total_delay = 0; + + while (1) { + rc = enable_reloc_on_exceptions(); + if (!H_IS_LONG_BUSY(rc)) + return rc; + + delay = get_longbusy_msecs(rc); + total_delay += delay; + if (total_delay > 1000) { + pr_warn("Warning: Giving up waiting to enable " + "relocation on exceptions (%u msec)!\n", + total_delay); + return rc; + } + + mdelay(delay); + } +} +EXPORT_SYMBOL(pSeries_enable_reloc_on_exc); + +long pSeries_disable_reloc_on_exc(void) +{ + long rc; + + while (1) { + rc = disable_reloc_on_exceptions(); + if (!H_IS_LONG_BUSY(rc)) + return rc; + mdelay(get_longbusy_msecs(rc)); + } +} +EXPORT_SYMBOL(pSeries_disable_reloc_on_exc); + +#ifdef CONFIG_KEXEC +static void pSeries_machine_kexec(struct kimage *image) +{ + long rc; + + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + rc = pSeries_disable_reloc_on_exc(); + if (rc != H_SUCCESS) + pr_warning("Warning: Failed to disable relocation on " + "exceptions: %ld\n", rc); + } + + default_machine_kexec(image); +} +#endif + +#ifdef __LITTLE_ENDIAN__ +long pseries_big_endian_exceptions(void) +{ + long rc; + + while (1) { + rc = enable_big_endian_exceptions(); + if (!H_IS_LONG_BUSY(rc)) + return rc; + mdelay(get_longbusy_msecs(rc)); + } +} + +static long pseries_little_endian_exceptions(void) +{ + long rc; + + while (1) { + rc = enable_little_endian_exceptions(); + if (!H_IS_LONG_BUSY(rc)) + return rc; + mdelay(get_longbusy_msecs(rc)); + } +} +#endif + static void __init pSeries_setup_arch(void) { + set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + /* Discover PIC type and setup ppc_md accordingly */ pseries_discover_pic(); @@ -288,59 +477,87 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); + /* By default, only probe PCI (can be overriden by rtas_pci) */ + pci_add_flags(PCI_PROBE_ONLY); + /* Find and initialize PCI host bridges */ init_pci_config_tokens(); find_and_init_phbs(); - pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); - eeh_init(); + of_reconfig_notifier_register(&pci_dn_reconfig_nb); pSeries_nvram_init(); - /* Choose an idle loop */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR)) { vpa_init(boot_cpuid); - if (get_lppaca()->shared_proc) { - printk(KERN_DEBUG "Using shared processor idle loop\n"); - ppc_md.power_save = pseries_shared_idle_sleep; - } else { - printk(KERN_DEBUG "Using dedicated idle loop\n"); - ppc_md.power_save = pseries_dedicated_idle_sleep; - } + ppc_md.power_save = pseries_lpar_idle; + ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; } else { - printk(KERN_DEBUG "Using default idle loop\n"); + /* No special idle routine */ + ppc_md.enable_pmcs = power4_enable_pmcs; } - if (firmware_has_feature(FW_FEATURE_LPAR)) - ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; - else - ppc_md.enable_pmcs = power4_enable_pmcs; + ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare; + + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + long rc; + if ((rc = pSeries_enable_reloc_on_exc()) != H_SUCCESS) { + pr_warn("Unable to enable relocation on exceptions: " + "%ld\n", rc); + } + } } static int __init pSeries_init_panel(void) { /* Manually leave the kernel version on the panel. */ +#ifdef __BIG_ENDIAN__ ppc_md.progress("Linux ppc64\n", 0); +#else + ppc_md.progress("Linux ppc64le\n", 0); +#endif ppc_md.progress(init_utsname()->version, 0); return 0; } -arch_initcall(pSeries_init_panel); +machine_arch_initcall(pseries, pSeries_init_panel); -static int pseries_set_dabr(unsigned long dabr) +static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx) { return plpar_hcall_norets(H_SET_DABR, dabr); } -static int pseries_set_xdabr(unsigned long dabr) +static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx) { - /* We want to catch accesses from kernel and userspace */ - return plpar_hcall_norets(H_SET_XDABR, dabr, - H_DABRX_KERNEL | H_DABRX_USER); + /* Have to set at least one bit in the DABRX according to PAPR */ + if (dabrx == 0 && dabr == 0) + dabrx = DABRX_USER; + /* PAPR says we can only set kernel and user bits */ + dabrx &= DABRX_KERNEL | DABRX_USER; + + return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx); +} + +static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx) +{ + /* PAPR says we can't set HYP */ + dawrx &= ~DAWRX_HYP; + + return plapr_set_watchpoint0(dawr, dawrx); } #define CMO_CHARACTERISTICS_TOKEN 44 #define CMO_MAXLENGTH 1026 +void pSeries_coalesce_init(void) +{ + struct hvcall_mpp_x_data mpp_x_data; + + if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data)) + powerpc_firmware_features |= FW_FEATURE_XCMO; + else + powerpc_firmware_features &= ~FW_FEATURE_XCMO; +} + /** * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions, * handle that here. (Stolen from parse_system_parameter_string) @@ -349,7 +566,7 @@ void pSeries_cmo_feature_init(void) { char *ptr, *key, *value, *end; int call_status; - int page_order = IOMMU_PAGE_SHIFT; + int page_order = IOMMU_PAGE_SHIFT_4K; pr_debug(" -> fw_cmo_feature_init()\n"); spin_lock(&rtas_data_buf_lock); @@ -410,6 +627,7 @@ void pSeries_cmo_feature_init(void) pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP, CMO_SecPSP); powerpc_firmware_features |= FW_FEATURE_CMO; + pSeries_coalesce_init(); } else pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP, CMO_SecPSP); @@ -424,13 +642,17 @@ static void __init pSeries_init_early(void) { pr_debug(" -> pSeries_init_early()\n"); +#ifdef CONFIG_HVC_CONSOLE if (firmware_has_feature(FW_FEATURE_LPAR)) - find_udbg_vterm(); - - if (firmware_has_feature(FW_FEATURE_DABR)) - ppc_md.set_dabr = pseries_set_dabr; - else if (firmware_has_feature(FW_FEATURE_XDABR)) + hvc_vio_init_early(); +#endif + if (firmware_has_feature(FW_FEATURE_XDABR)) ppc_md.set_dabr = pseries_set_xdabr; + else if (firmware_has_feature(FW_FEATURE_DABR)) + ppc_md.set_dabr = pseries_set_dabr; + + if (firmware_has_feature(FW_FEATURE_SET_MODE)) + ppc_md.set_dawr = pseries_set_dawr; pSeries_cmo_feature_init(); iommu_init_early_pSeries(); @@ -442,31 +664,45 @@ static void __init pSeries_init_early(void) * Called very early, MMU is off, device-tree isn't unflattened */ -static int __init pSeries_probe_hypertas(unsigned long node, - const char *uname, int depth, - void *data) +static int __init pseries_probe_fw_features(unsigned long node, + const char *uname, int depth, + void *data) { - const char *hypertas; - unsigned long len; + const char *prop; + int len; + static int hypertas_found; + static int vec5_found; - if (depth != 1 || - (strcmp(uname, "rtas") != 0 && strcmp(uname, "rtas@0") != 0)) + if (depth != 1) return 0; - hypertas = of_get_flat_dt_prop(node, "ibm,hypertas-functions", &len); - if (!hypertas) - return 1; + if (!strcmp(uname, "rtas") || !strcmp(uname, "rtas@0")) { + prop = of_get_flat_dt_prop(node, "ibm,hypertas-functions", + &len); + if (prop) { + powerpc_firmware_features |= FW_FEATURE_LPAR; + fw_hypertas_feature_init(prop, len); + } - powerpc_firmware_features |= FW_FEATURE_LPAR; - fw_feature_init(hypertas, len); + hypertas_found = 1; + } - return 1; + if (!strcmp(uname, "chosen")) { + prop = of_get_flat_dt_prop(node, "ibm,architecture-vec-5", + &len); + if (prop) + fw_vec5_feature_init(prop, len); + + vec5_found = 1; + } + + return hypertas_found && vec5_found; } static int __init pSeries_probe(void) { unsigned long root = of_get_flat_dt_root(); - char *dtype = of_get_flat_dt_prop(root, "device_type", NULL); + const char *dtype = of_get_flat_dt_prop(root, "device_type", NULL); if (dtype == NULL) return 0; @@ -483,7 +719,23 @@ static int __init pSeries_probe(void) pr_debug("pSeries detected, looking for LPAR capability...\n"); /* Now try to figure out if we are running on LPAR */ - of_scan_flat_dt(pSeries_probe_hypertas, NULL); + of_scan_flat_dt(pseries_probe_fw_features, NULL); + +#ifdef __LITTLE_ENDIAN__ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + long rc; + /* + * Tell the hypervisor that we want our exceptions to + * be taken in little endian mode. If this fails we don't + * want to use BUG() because it will trigger an exception. + */ + rc = pseries_little_endian_exceptions(); + if (rc) { + ppc_md.progress("H_SET_MODE LE exception fail", 0); + panic("Could not enable little endian exceptions"); + } + } +#endif if (firmware_has_feature(FW_FEATURE_LPAR)) hpte_init_lpar(); @@ -496,80 +748,6 @@ static int __init pSeries_probe(void) return 1; } - -DECLARE_PER_CPU(unsigned long, smt_snooze_delay); - -static void pseries_dedicated_idle_sleep(void) -{ - unsigned int cpu = smp_processor_id(); - unsigned long start_snooze; - unsigned long in_purr, out_purr; - - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - get_lppaca()->idle = 1; - get_lppaca()->donate_dedicated_cpu = 1; - in_purr = mfspr(SPRN_PURR); - - /* - * We come in with interrupts disabled, and need_resched() - * has been checked recently. If we should poll for a little - * while, do so. - */ - if (__get_cpu_var(smt_snooze_delay)) { - start_snooze = get_tb() + - __get_cpu_var(smt_snooze_delay) * tb_ticks_per_usec; - local_irq_enable(); - set_thread_flag(TIF_POLLING_NRFLAG); - - while (get_tb() < start_snooze) { - if (need_resched() || cpu_is_offline(cpu)) - goto out; - ppc64_runlatch_off(); - HMT_low(); - HMT_very_low(); - } - - HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - smp_mb(); - local_irq_disable(); - if (need_resched() || cpu_is_offline(cpu)) - goto out; - } - - cede_processor(); - -out: - HMT_medium(); - out_purr = mfspr(SPRN_PURR); - get_lppaca()->wait_state_cycles += out_purr - in_purr; - get_lppaca()->donate_dedicated_cpu = 0; - get_lppaca()->idle = 0; -} - -static void pseries_shared_idle_sleep(void) -{ - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - get_lppaca()->idle = 1; - - /* - * Yield the processor to the hypervisor. We return if - * an external interrupt occurs (which are driven prior - * to returning here) or if a prod occurs from another - * processor. When returning here, external interrupts - * are enabled. - */ - cede_processor(); - - get_lppaca()->idle = 0; -} - static int pSeries_pci_probe_mode(struct pci_bus *bus) { if (firmware_has_feature(FW_FEATURE_LPAR)) @@ -629,4 +807,10 @@ define_machine(pseries) { .progress = rtas_progress, .system_reset_exception = pSeries_system_reset_exception, .machine_check_exception = pSeries_machine_check_exception, +#ifdef CONFIG_KEXEC + .machine_kexec = pSeries_machine_kexec, +#endif +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE + .memory_block_size = pseries_memory_block_size, +#endif }; |
