diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries/setup.c')
| -rw-r--r-- | arch/powerpc/platforms/pseries/setup.c | 419 | 
1 files changed, 276 insertions, 143 deletions
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index d345bfd56bb..f2f40e64658 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -34,11 +34,13 @@  #include <linux/pci.h>  #include <linux/utsname.h>  #include <linux/adb.h> -#include <linux/module.h> +#include <linux/export.h>  #include <linux/delay.h>  #include <linux/irq.h>  #include <linux/seq_file.h>  #include <linux/root_dev.h> +#include <linux/of.h> +#include <linux/kexec.h>  #include <asm/mmu.h>  #include <asm/processor.h> @@ -53,30 +55,27 @@  #include <asm/irq.h>  #include <asm/time.h>  #include <asm/nvram.h> -#include "xics.h"  #include <asm/pmc.h>  #include <asm/mpic.h> +#include <asm/xics.h>  #include <asm/ppc-pci.h>  #include <asm/i8259.h>  #include <asm/udbg.h>  #include <asm/smp.h>  #include <asm/firmware.h>  #include <asm/eeh.h> -#include <asm/pSeries_reconfig.h> +#include <asm/reg.h> +#include <asm/plpar_wrappers.h> -#include "plpar_wrappers.h"  #include "pseries.h"  int CMO_PrPSP = -1;  int CMO_SecPSP = -1; -unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT); +unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);  EXPORT_SYMBOL(CMO_PageSize);  int fwnmi_active;  /* TRUE if an FWNMI handler is present */ -static void pseries_shared_idle_sleep(void); -static void pseries_dedicated_idle_sleep(void); -  static struct device_node *pSeries_mpic_node;  static void pSeries_show_cpuinfo(struct seq_file *m) @@ -114,10 +113,13 @@ static void __init fwnmi_init(void)  static void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc)  { +	struct irq_chip *chip = irq_desc_get_chip(desc);  	unsigned int cascade_irq = i8259_irq(); +  	if (cascade_irq != NO_IRQ)  		generic_handle_irq(cascade_irq); -	desc->chip->eoi(irq); + +	chip->irq_eoi(&desc->irq_data);  }  static void __init pseries_setup_i8259_cascade(void) @@ -166,7 +168,7 @@ static void __init pseries_setup_i8259_cascade(void)  		printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack);  	i8259_init(found, intack);  	of_node_put(found); -	set_irq_chained_handler(cascade, pseries_8259_cascade); +	irq_set_chained_handler(cascade, pseries_8259_cascade);  }  static void __init pseries_mpic_init_IRQ(void) @@ -180,7 +182,7 @@ static void __init pseries_mpic_init_IRQ(void)  	np = of_find_node_by_path("/");  	naddr = of_n_addr_cells(np);  	opprop = of_get_property(np, "platform-open-pic", &opplen); -	if (opprop != 0) { +	if (opprop != NULL) {  		openpic_addr = of_read_number(opprop, naddr);  		printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr);  	} @@ -190,9 +192,7 @@ static void __init pseries_mpic_init_IRQ(void)  	/* Setup the openpic driver */  	mpic = mpic_alloc(pSeries_mpic_node, openpic_addr, -			  MPIC_PRIMARY, -			  16, 250, /* isu size, irq count */ -			  " MPIC     "); +			MPIC_NO_RESET, 16, 0, " MPIC     ");  	BUG_ON(mpic == NULL);  	/* Add ISUs */ @@ -202,6 +202,9 @@ static void __init pseries_mpic_init_IRQ(void)  		mpic_assign_isu(mpic, n, isuaddr);  	} +	/* Setup top-level get_irq */ +	ppc_md.get_irq = mpic_get_irq; +  	/* All ISUs are setup, complete initialization */  	mpic_init(mpic); @@ -211,7 +214,7 @@ static void __init pseries_mpic_init_IRQ(void)  static void __init pseries_xics_init_IRQ(void)  { -	xics_init_IRQ(); +	xics_init();  	pseries_setup_i8259_cascade();  } @@ -235,7 +238,6 @@ static void __init pseries_discover_pic(void)  		if (strstr(typep, "open-pic")) {  			pSeries_mpic_node = of_node_get(np);  			ppc_md.init_IRQ       = pseries_mpic_init_IRQ; -			ppc_md.get_irq        = mpic_get_irq;  			setup_kexec_cpu_down_mpic();  			smp_init_pseries_mpic();  			return; @@ -257,10 +259,14 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act  	int err = NOTIFY_OK;  	switch (action) { -	case PSERIES_RECONFIG_ADD: +	case OF_RECONFIG_ATTACH_NODE:  		pci = np->parent->data; -		if (pci) +		if (pci) {  			update_dn_pci_info(np, pci->phb); + +			/* Create EEH device for the OF node */ +			eeh_dev_init(np, pci->phb); +		}  		break;  	default:  		err = NOTIFY_DONE; @@ -273,7 +279,9 @@ static struct notifier_block pci_dn_reconfig_nb = {  	.notifier_call = pci_dn_reconfig_notifier,  }; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +struct kmem_cache *dtl_cache; + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE  /*   * Allocate space for the dispatch trace log for all possible cpus   * and register the buffers with the hypervisor.  This is used for @@ -288,10 +296,12 @@ static int alloc_dispatch_logs(void)  	if (!firmware_has_feature(FW_FEATURE_SPLPAR))  		return 0; +	if (!dtl_cache) +		return 0; +  	for_each_possible_cpu(cpu) {  		pp = &paca[cpu]; -		dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL, -				   cpu_to_node(cpu)); +		dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);  		if (!dtl) {  			pr_warn("Failed to allocate dispatch trace log for cpu %d\n",  				cpu); @@ -312,21 +322,149 @@ static int alloc_dispatch_logs(void)  	get_paca()->lppaca_ptr->dtl_idx = 0;  	/* hypervisor reads buffer length from this field */ -	dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES; +	dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);  	ret = register_dtl(hard_smp_processor_id(), __pa(dtl));  	if (ret) -		pr_warn("DTL registration failed for boot cpu %d (%d)\n", -			smp_processor_id(), ret); +		pr_err("WARNING: DTL registration of cpu %d (hw %d) failed " +		       "with %d\n", smp_processor_id(), +		       hard_smp_processor_id(), ret);  	get_paca()->lppaca_ptr->dtl_enable_mask = 2;  	return 0;  } +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ +static inline int alloc_dispatch_logs(void) +{ +	return 0; +} +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +static int alloc_dispatch_log_kmem_cache(void) +{ +	dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, +						DISPATCH_LOG_BYTES, 0, NULL); +	if (!dtl_cache) { +		pr_warn("Failed to create dispatch trace log buffer cache\n"); +		pr_warn("Stolen time statistics will be unreliable\n"); +		return 0; +	} + +	return alloc_dispatch_logs(); +} +early_initcall(alloc_dispatch_log_kmem_cache); -early_initcall(alloc_dispatch_logs); -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +static void pseries_lpar_idle(void) +{ +	/* +	 * Default handler to go into low thread priority and possibly +	 * low power mode by cedeing processor to hypervisor +	 */ + +	/* Indicate to hypervisor that we are idle. */ +	get_lppaca()->idle = 1; + +	/* +	 * Yield the processor to the hypervisor.  We return if +	 * an external interrupt occurs (which are driven prior +	 * to returning here) or if a prod occurs from another +	 * processor. When returning here, external interrupts +	 * are enabled. +	 */ +	cede_processor(); + +	get_lppaca()->idle = 0; +} + +/* + * Enable relocation on during exceptions. This has partition wide scope and + * may take a while to complete, if it takes longer than one second we will + * just give up rather than wasting any more time on this - if that turns out + * to ever be a problem in practice we can move this into a kernel thread to + * finish off the process later in boot. + */ +long pSeries_enable_reloc_on_exc(void) +{ +	long rc; +	unsigned int delay, total_delay = 0; + +	while (1) { +		rc = enable_reloc_on_exceptions(); +		if (!H_IS_LONG_BUSY(rc)) +			return rc; + +		delay = get_longbusy_msecs(rc); +		total_delay += delay; +		if (total_delay > 1000) { +			pr_warn("Warning: Giving up waiting to enable " +				"relocation on exceptions (%u msec)!\n", +				total_delay); +			return rc; +		} + +		mdelay(delay); +	} +} +EXPORT_SYMBOL(pSeries_enable_reloc_on_exc); + +long pSeries_disable_reloc_on_exc(void) +{ +	long rc; + +	while (1) { +		rc = disable_reloc_on_exceptions(); +		if (!H_IS_LONG_BUSY(rc)) +			return rc; +		mdelay(get_longbusy_msecs(rc)); +	} +} +EXPORT_SYMBOL(pSeries_disable_reloc_on_exc); + +#ifdef CONFIG_KEXEC +static void pSeries_machine_kexec(struct kimage *image) +{ +	long rc; + +	if (firmware_has_feature(FW_FEATURE_SET_MODE)) { +		rc = pSeries_disable_reloc_on_exc(); +		if (rc != H_SUCCESS) +			pr_warning("Warning: Failed to disable relocation on " +				   "exceptions: %ld\n", rc); +	} + +	default_machine_kexec(image); +} +#endif + +#ifdef __LITTLE_ENDIAN__ +long pseries_big_endian_exceptions(void) +{ +	long rc; + +	while (1) { +		rc = enable_big_endian_exceptions(); +		if (!H_IS_LONG_BUSY(rc)) +			return rc; +		mdelay(get_longbusy_msecs(rc)); +	} +} + +static long pseries_little_endian_exceptions(void) +{ +	long rc; + +	while (1) { +		rc = enable_little_endian_exceptions(); +		if (!H_IS_LONG_BUSY(rc)) +			return rc; +		mdelay(get_longbusy_msecs(rc)); +	} +} +#endif  static void __init pSeries_setup_arch(void)  { +	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); +  	/* Discover PIC type and setup ppc_md accordingly */  	pseries_discover_pic(); @@ -339,59 +477,87 @@ static void __init pSeries_setup_arch(void)  	fwnmi_init(); +	/* By default, only probe PCI (can be overriden by rtas_pci) */ +	pci_add_flags(PCI_PROBE_ONLY); +  	/* Find and initialize PCI host bridges */  	init_pci_config_tokens();  	find_and_init_phbs(); -	pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); -	eeh_init(); +	of_reconfig_notifier_register(&pci_dn_reconfig_nb);  	pSeries_nvram_init(); -	/* Choose an idle loop */ -	if (firmware_has_feature(FW_FEATURE_SPLPAR)) { +	if (firmware_has_feature(FW_FEATURE_LPAR)) {  		vpa_init(boot_cpuid); -		if (get_lppaca()->shared_proc) { -			printk(KERN_DEBUG "Using shared processor idle loop\n"); -			ppc_md.power_save = pseries_shared_idle_sleep; -		} else { -			printk(KERN_DEBUG "Using dedicated idle loop\n"); -			ppc_md.power_save = pseries_dedicated_idle_sleep; -		} +		ppc_md.power_save = pseries_lpar_idle; +		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;  	} else { -		printk(KERN_DEBUG "Using default idle loop\n"); +		/* No special idle routine */ +		ppc_md.enable_pmcs = power4_enable_pmcs;  	} -	if (firmware_has_feature(FW_FEATURE_LPAR)) -		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; -	else -		ppc_md.enable_pmcs = power4_enable_pmcs; +	ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare; + +	if (firmware_has_feature(FW_FEATURE_SET_MODE)) { +		long rc; +		if ((rc = pSeries_enable_reloc_on_exc()) != H_SUCCESS) { +			pr_warn("Unable to enable relocation on exceptions: " +				"%ld\n", rc); +		} +	}  }  static int __init pSeries_init_panel(void)  {  	/* Manually leave the kernel version on the panel. */ +#ifdef __BIG_ENDIAN__  	ppc_md.progress("Linux ppc64\n", 0); +#else +	ppc_md.progress("Linux ppc64le\n", 0); +#endif  	ppc_md.progress(init_utsname()->version, 0);  	return 0;  } -arch_initcall(pSeries_init_panel); +machine_arch_initcall(pseries, pSeries_init_panel); -static int pseries_set_dabr(unsigned long dabr) +static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx)  {  	return plpar_hcall_norets(H_SET_DABR, dabr);  } -static int pseries_set_xdabr(unsigned long dabr) +static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx) +{ +	/* Have to set at least one bit in the DABRX according to PAPR */ +	if (dabrx == 0 && dabr == 0) +		dabrx = DABRX_USER; +	/* PAPR says we can only set kernel and user bits */ +	dabrx &= DABRX_KERNEL | DABRX_USER; + +	return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx); +} + +static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx)  { -	/* We want to catch accesses from kernel and userspace */ -	return plpar_hcall_norets(H_SET_XDABR, dabr, -			H_DABRX_KERNEL | H_DABRX_USER); +	/* PAPR says we can't set HYP */ +	dawrx &= ~DAWRX_HYP; + +	return  plapr_set_watchpoint0(dawr, dawrx);  }  #define CMO_CHARACTERISTICS_TOKEN 44  #define CMO_MAXLENGTH 1026 +void pSeries_coalesce_init(void) +{ +	struct hvcall_mpp_x_data mpp_x_data; + +	if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data)) +		powerpc_firmware_features |= FW_FEATURE_XCMO; +	else +		powerpc_firmware_features &= ~FW_FEATURE_XCMO; +} +  /**   * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,   * handle that here. (Stolen from parse_system_parameter_string) @@ -400,7 +566,7 @@ void pSeries_cmo_feature_init(void)  {  	char *ptr, *key, *value, *end;  	int call_status; -	int page_order = IOMMU_PAGE_SHIFT; +	int page_order = IOMMU_PAGE_SHIFT_4K;  	pr_debug(" -> fw_cmo_feature_init()\n");  	spin_lock(&rtas_data_buf_lock); @@ -461,6 +627,7 @@ void pSeries_cmo_feature_init(void)  		pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,  		         CMO_SecPSP);  		powerpc_firmware_features |= FW_FEATURE_CMO; +		pSeries_coalesce_init();  	} else  		pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,  		         CMO_SecPSP); @@ -475,13 +642,17 @@ static void __init pSeries_init_early(void)  {  	pr_debug(" -> pSeries_init_early()\n"); +#ifdef CONFIG_HVC_CONSOLE  	if (firmware_has_feature(FW_FEATURE_LPAR)) -		find_udbg_vterm(); - -	if (firmware_has_feature(FW_FEATURE_DABR)) -		ppc_md.set_dabr = pseries_set_dabr; -	else if (firmware_has_feature(FW_FEATURE_XDABR)) +		hvc_vio_init_early(); +#endif +	if (firmware_has_feature(FW_FEATURE_XDABR))  		ppc_md.set_dabr = pseries_set_xdabr; +	else if (firmware_has_feature(FW_FEATURE_DABR)) +		ppc_md.set_dabr = pseries_set_dabr; + +	if (firmware_has_feature(FW_FEATURE_SET_MODE)) +		ppc_md.set_dawr = pseries_set_dawr;  	pSeries_cmo_feature_init();  	iommu_init_early_pSeries(); @@ -493,31 +664,45 @@ static void __init pSeries_init_early(void)   * Called very early, MMU is off, device-tree isn't unflattened   */ -static int __init pSeries_probe_hypertas(unsigned long node, -					 const char *uname, int depth, -					 void *data) +static int __init pseries_probe_fw_features(unsigned long node, +					    const char *uname, int depth, +					    void *data)  { -	const char *hypertas; -	unsigned long len; +	const char *prop; +	int len; +	static int hypertas_found; +	static int vec5_found; -	if (depth != 1 || -	    (strcmp(uname, "rtas") != 0 && strcmp(uname, "rtas@0") != 0)) +	if (depth != 1)  		return 0; -	hypertas = of_get_flat_dt_prop(node, "ibm,hypertas-functions", &len); -	if (!hypertas) -		return 1; +	if (!strcmp(uname, "rtas") || !strcmp(uname, "rtas@0")) { +		prop = of_get_flat_dt_prop(node, "ibm,hypertas-functions", +					   &len); +		if (prop) { +			powerpc_firmware_features |= FW_FEATURE_LPAR; +			fw_hypertas_feature_init(prop, len); +		} + +		hypertas_found = 1; +	} -	powerpc_firmware_features |= FW_FEATURE_LPAR; -	fw_feature_init(hypertas, len); +	if (!strcmp(uname, "chosen")) { +		prop = of_get_flat_dt_prop(node, "ibm,architecture-vec-5", +					   &len); +		if (prop) +			fw_vec5_feature_init(prop, len); -	return 1; +		vec5_found = 1; +	} + +	return hypertas_found && vec5_found;  }  static int __init pSeries_probe(void)  {  	unsigned long root = of_get_flat_dt_root(); - 	char *dtype = of_get_flat_dt_prop(root, "device_type", NULL); +	const char *dtype = of_get_flat_dt_prop(root, "device_type", NULL);   	if (dtype == NULL)   		return 0; @@ -534,7 +719,23 @@ static int __init pSeries_probe(void)  	pr_debug("pSeries detected, looking for LPAR capability...\n");  	/* Now try to figure out if we are running on LPAR */ -	of_scan_flat_dt(pSeries_probe_hypertas, NULL); +	of_scan_flat_dt(pseries_probe_fw_features, NULL); + +#ifdef __LITTLE_ENDIAN__ +	if (firmware_has_feature(FW_FEATURE_SET_MODE)) { +		long rc; +		/* +		 * Tell the hypervisor that we want our exceptions to +		 * be taken in little endian mode. If this fails we don't +		 * want to use BUG() because it will trigger an exception. +		 */ +		rc = pseries_little_endian_exceptions(); +		if (rc) { +			ppc_md.progress("H_SET_MODE LE exception fail", 0); +			panic("Could not enable little endian exceptions"); +		} +	} +#endif  	if (firmware_has_feature(FW_FEATURE_LPAR))  		hpte_init_lpar(); @@ -547,80 +748,6 @@ static int __init pSeries_probe(void)  	return 1;  } - -DECLARE_PER_CPU(long, smt_snooze_delay); - -static void pseries_dedicated_idle_sleep(void) -{  -	unsigned int cpu = smp_processor_id(); -	unsigned long start_snooze; -	unsigned long in_purr, out_purr; -	long snooze = __get_cpu_var(smt_snooze_delay); - -	/* -	 * Indicate to the HV that we are idle. Now would be -	 * a good time to find other work to dispatch. -	 */ -	get_lppaca()->idle = 1; -	get_lppaca()->donate_dedicated_cpu = 1; -	in_purr = mfspr(SPRN_PURR); - -	/* -	 * We come in with interrupts disabled, and need_resched() -	 * has been checked recently.  If we should poll for a little -	 * while, do so. -	 */ -	if (snooze) { -		start_snooze = get_tb() + snooze * tb_ticks_per_usec; -		local_irq_enable(); -		set_thread_flag(TIF_POLLING_NRFLAG); - -		while ((snooze < 0) || (get_tb() < start_snooze)) { -			if (need_resched() || cpu_is_offline(cpu)) -				goto out; -			ppc64_runlatch_off(); -			HMT_low(); -			HMT_very_low(); -		} - -		HMT_medium(); -		clear_thread_flag(TIF_POLLING_NRFLAG); -		smp_mb(); -		local_irq_disable(); -		if (need_resched() || cpu_is_offline(cpu)) -			goto out; -	} - -	cede_processor(); - -out: -	HMT_medium(); -	out_purr = mfspr(SPRN_PURR); -	get_lppaca()->wait_state_cycles += out_purr - in_purr; -	get_lppaca()->donate_dedicated_cpu = 0; -	get_lppaca()->idle = 0; -} - -static void pseries_shared_idle_sleep(void) -{ -	/* -	 * Indicate to the HV that we are idle. Now would be -	 * a good time to find other work to dispatch. -	 */ -	get_lppaca()->idle = 1; - -	/* -	 * Yield the processor to the hypervisor.  We return if -	 * an external interrupt occurs (which are driven prior -	 * to returning here) or if a prod occurs from another -	 * processor. When returning here, external interrupts -	 * are enabled. -	 */ -	cede_processor(); - -	get_lppaca()->idle = 0; -} -  static int pSeries_pci_probe_mode(struct pci_bus *bus)  {  	if (firmware_has_feature(FW_FEATURE_LPAR)) @@ -680,4 +807,10 @@ define_machine(pseries) {  	.progress		= rtas_progress,  	.system_reset_exception = pSeries_system_reset_exception,  	.machine_check_exception = pSeries_machine_check_exception, +#ifdef CONFIG_KEXEC +	.machine_kexec          = pSeries_machine_kexec, +#endif +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +	.memory_block_size	= pseries_memory_block_size, +#endif  };  | 
