diff options
Diffstat (limited to 'arch/powerpc/kernel')
50 files changed, 1303 insertions, 2007 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 58d0572de6f..1dda7012914 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -34,13 +34,14 @@ obj-y				+= vdso32/  obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \  				   signal_64.o ptrace32.o \  				   paca.o nvram_64.o firmware.o +obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o  obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o  obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o -obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o +obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o  obj-$(CONFIG_PPC64)		+= vdso64/  obj-$(CONFIG_ALTIVEC)		+= vecemu.o  obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o -obj-$(CONFIG_PPC_OF)		+= of_device.o of_platform.o prom_parse.o +obj-$(CONFIG_PPC_OF)		+= of_platform.o prom_parse.o  obj-$(CONFIG_PPC_CLOCK)		+= clock.o  procfs-y			:= proc_powerpc.o  obj-$(CONFIG_PROC_FS)		+= $(procfs-y) @@ -67,6 +68,7 @@ obj64-$(CONFIG_HIBERNATION)	+= swsusp_asm64.o  obj-$(CONFIG_MODULES)		+= module.o module_$(CONFIG_WORD_SIZE).o  obj-$(CONFIG_44x)		+= cpu_setup_44x.o  obj-$(CONFIG_FSL_BOOKE)		+= cpu_setup_fsl_booke.o dbell.o +obj-$(CONFIG_PPC_BOOK3E_64)	+= dbell.o  extra-y				:= head_$(CONFIG_WORD_SIZE).o  extra-$(CONFIG_PPC_BOOK3E_32)	:= head_new_booke.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 496cc5b3984..1c0607ddccc 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -194,7 +194,6 @@ int main(void)  	DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr));  	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));  	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); -	DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));  	DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));  #ifdef CONFIG_KVM_BOOK3S_64_HANDLER  	DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu)); @@ -342,6 +341,7 @@ int main(void)  	DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));  	DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));  	DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime)); +	DEFINE(STAMP_SEC_FRAC, offsetof(struct vdso_data, stamp_sec_fraction));  	DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size));  	DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size));  	DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size)); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 26e58630ed7..625942ae558 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -7,7 +7,7 @@  #include <linux/string.h>  #include <linux/init.h>  #include <linux/module.h> -#include <linux/lmb.h> +#include <linux/memblock.h>  #include <asm/sections.h>  #include <asm/prom.h> diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 87aa0f3c604..65e2b4e10f9 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1364,10 +1364,10 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.machine_check		= machine_check_4xx,  		.platform		= "ppc405",  	}, -	{	/* 405EX */ -		.pvr_mask		= 0xffff0004, -		.pvr_value		= 0x12910004, -		.cpu_name		= "405EX", +	{	/* 405EX Rev. A/B with Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x12910007, +		.cpu_name		= "405EX Rev. A/B",  		.cpu_features		= CPU_FTRS_40X,  		.cpu_user_features	= PPC_FEATURE_32 |  			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, @@ -1377,10 +1377,114 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.machine_check		= machine_check_4xx,  		.platform		= "ppc405",  	}, -	{	/* 405EXr */ -		.pvr_mask		= 0xffff0004, +	{	/* 405EX Rev. C without Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x1291000d, +		.cpu_name		= "405EX Rev. C", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EX Rev. C with Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x1291000f, +		.cpu_name		= "405EX Rev. C", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EX Rev. D without Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x12910003, +		.cpu_name		= "405EX Rev. D", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EX Rev. D with Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x12910005, +		.cpu_name		= "405EX Rev. D", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EXr Rev. A/B without Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x12910001, +		.cpu_name		= "405EXr Rev. A/B", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EXr Rev. C without Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x12910009, +		.cpu_name		= "405EXr Rev. C", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EXr Rev. C with Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x1291000b, +		.cpu_name		= "405EXr Rev. C", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EXr Rev. D without Security */ +		.pvr_mask		= 0xffff000f,  		.pvr_value		= 0x12910000, -		.cpu_name		= "405EXr", +		.cpu_name		= "405EXr Rev. D", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.mmu_features		= MMU_FTR_TYPE_40x, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	}, +	{	/* 405EXr Rev. D with Security */ +		.pvr_mask		= 0xffff000f, +		.pvr_value		= 0x12910002, +		.cpu_name		= "405EXr Rev. D",  		.cpu_features		= CPU_FTRS_40X,  		.cpu_user_features	= PPC_FEATURE_32 |  			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index b46f2e09bd8..417f7b05a9c 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -24,7 +24,7 @@  #include <linux/init.h>  #include <linux/irq.h>  #include <linux/types.h> -#include <linux/lmb.h> +#include <linux/memblock.h>  #include <asm/processor.h>  #include <asm/machdep.h> @@ -447,7 +447,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)  	crash_kexec_prepare_cpus(crashing_cpu);  	cpu_set(crashing_cpu, cpus_in_crash);  	crash_kexec_stop_spus(); -#ifdef CONFIG_PPC_STD_MMU_64 +#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP)  	crash_kexec_wait_realmode(crashing_cpu);  #endif  	if (ppc_md.kexec_cpu_down) diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 5fb667a6089..8e05c16344e 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,7 +13,7 @@  #include <linux/crash_dump.h>  #include <linux/bootmem.h> -#include <linux/lmb.h> +#include <linux/memblock.h>  #include <asm/code-patching.h>  #include <asm/kdump.h>  #include <asm/prom.h> @@ -33,7 +33,7 @@ unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;  #ifndef CONFIG_RELOCATABLE  void __init reserve_kdump_trampoline(void)  { -	lmb_reserve(0, KDUMP_RESERVE_LIMIT); +	memblock_reserve(0, KDUMP_RESERVE_LIMIT);  }  static void __init create_trampoline(unsigned long addr) @@ -128,9 +128,9 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,  	if (!csize)  		return 0; -	csize = min(csize, PAGE_SIZE); +	csize = min_t(size_t, csize, PAGE_SIZE); -	if (pfn < max_pfn) { +	if ((min_low_pfn < pfn) && (pfn < max_pfn)) {  		vaddr = __va(pfn << PAGE_SHIFT);  		csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);  	} else { diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 1493734cd87..3307a52d797 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -13,32 +13,88 @@  #include <linux/kernel.h>  #include <linux/smp.h>  #include <linux/threads.h> +#include <linux/percpu.h>  #include <asm/dbell.h> +#include <asm/irq_regs.h>  #ifdef CONFIG_SMP -unsigned long dbell_smp_message[NR_CPUS]; +struct doorbell_cpu_info { +	unsigned long	messages;	/* current messages bits */ +	unsigned int	tag;		/* tag value */ +}; -void smp_dbell_message_pass(int target, int msg) +static DEFINE_PER_CPU(struct doorbell_cpu_info, doorbell_cpu_info); + +void doorbell_setup_this_cpu(void) +{ +	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); + +	info->messages = 0; +	info->tag = mfspr(SPRN_PIR) & 0x3fff; +} + +void doorbell_message_pass(int target, int msg)  { +	struct doorbell_cpu_info *info;  	int i; -	if(target < NR_CPUS) { -		set_bit(msg, &dbell_smp_message[target]); -		ppc_msgsnd(PPC_DBELL, 0, target); +	if (target < NR_CPUS) { +		info = &per_cpu(doorbell_cpu_info, target); +		set_bit(msg, &info->messages); +		ppc_msgsnd(PPC_DBELL, 0, info->tag);  	} -	else if(target == MSG_ALL_BUT_SELF) { +	else if (target == MSG_ALL_BUT_SELF) {  		for_each_online_cpu(i) {  			if (i == smp_processor_id())  				continue; -			set_bit(msg, &dbell_smp_message[i]); -			ppc_msgsnd(PPC_DBELL, 0, i); +			info = &per_cpu(doorbell_cpu_info, i); +			set_bit(msg, &info->messages); +			ppc_msgsnd(PPC_DBELL, 0, info->tag);  		}  	}  	else { /* target == MSG_ALL */ -		for_each_online_cpu(i) -			set_bit(msg, &dbell_smp_message[i]); +		for_each_online_cpu(i) { +			info = &per_cpu(doorbell_cpu_info, i); +			set_bit(msg, &info->messages); +		}  		ppc_msgsnd(PPC_DBELL, PPC_DBELL_MSG_BRDCAST, 0);  	}  } -#endif + +void doorbell_exception(struct pt_regs *regs) +{ +	struct pt_regs *old_regs = set_irq_regs(regs); +	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); +	int msg; + +	/* Warning: regs can be NULL when called from irq enable */ + +	if (!info->messages || (num_online_cpus() < 2)) +		goto out; + +	for (msg = 0; msg < 4; msg++) +		if (test_and_clear_bit(msg, &info->messages)) +			smp_message_recv(msg); + +out: +	set_irq_regs(old_regs); +} + +void doorbell_check_self(void) +{ +	struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); + +	if (!info->messages) +		return; + +	ppc_msgsnd(PPC_DBELL, 0, info->tag); +} + +#else /* CONFIG_SMP */ +void doorbell_exception(struct pt_regs *regs) +{ +	printk(KERN_WARNING "Received doorbell on non-smp system\n"); +} +#endif /* CONFIG_SMP */ + diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index e7fe218b869..4295e0b94b2 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -71,7 +71,7 @@ static int ppc_swiotlb_bus_notify(struct notifier_block *nb,  	sd->max_direct_dma_addr = 0;  	/* May need to bounce if the device can't address all of DRAM */ -	if ((dma_get_mask(dev) + 1) < lmb_end_of_DRAM()) +	if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM())  		set_dma_ops(dev, &swiotlb_dma_ops);  	return NOTIFY_DONE; @@ -82,17 +82,9 @@ static struct notifier_block ppc_swiotlb_plat_bus_notifier = {  	.priority = 0,  }; -static struct notifier_block ppc_swiotlb_of_bus_notifier = { -	.notifier_call = ppc_swiotlb_bus_notify, -	.priority = 0, -}; -  int __init swiotlb_setup_bus_notifier(void)  {  	bus_register_notifier(&platform_bus_type,  			      &ppc_swiotlb_plat_bus_notifier); -	bus_register_notifier(&of_platform_bus_type, -			      &ppc_swiotlb_of_bus_notifier); -  	return 0;  } diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 8d1de6f31d5..84d6367ec00 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -9,7 +9,7 @@  #include <linux/dma-mapping.h>  #include <linux/dma-debug.h>  #include <linux/gfp.h> -#include <linux/lmb.h> +#include <linux/memblock.h>  #include <asm/bug.h>  #include <asm/abs_addr.h> @@ -89,7 +89,7 @@ static int dma_direct_dma_supported(struct device *dev, u64 mask)  	/* Could be improved so platforms can set the limit in case  	 * they have limited DMA windows  	 */ -	return mask >= (lmb_end_of_DRAM() - 1); +	return mask >= (memblock_end_of_DRAM() - 1);  #else  	return 1;  #endif diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 24dcc0ecf24..5c43063d250 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -191,6 +191,12 @@ exc_##n##_bad_stack:							    \  	sth	r1,PACA_TRAP_SAVE(r13);	/* store trap */		    \  	b	bad_stack_book3e;	/* bad stack error */ +/* WARNING: If you change the layout of this stub, make sure you chcek +	*   the debug exception handler which handles single stepping +	*   into exceptions from userspace, and the MM code in +	*   arch/powerpc/mm/tlb_nohash.c which patches the branch here +	*   and would need to be updated if that branch is moved +	*/  #define	EXCEPTION_STUB(loc, label)					\  	. = interrupt_base_book3e + loc;				\  	nop;	/* To make debug interrupts happy */			\ @@ -204,11 +210,30 @@ exc_##n##_bad_stack:							    \  	lis	r,TSR_FIS@h;						\  	mtspr	SPRN_TSR,r +/* Used by asynchronous interrupt that may happen in the idle loop. + * + * This check if the thread was in the idle loop, and if yes, returns + * to the caller rather than the PC. This is to avoid a race if + * interrupts happen before the wait instruction. + */ +#define CHECK_NAPPING()							\ +	clrrdi	r11,r1,THREAD_SHIFT;					\ +	ld	r10,TI_LOCAL_FLAGS(r11);				\ +	andi.	r9,r10,_TLF_NAPPING;					\ +	beq+	1f;							\ +	ld	r8,_LINK(r1);						\ +	rlwinm	r7,r10,0,~_TLF_NAPPING;					\ +	std	r8,_NIP(r1);						\ +	std	r7,TI_LOCAL_FLAGS(r11);					\ +1: + +  #define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack)			\  	START_EXCEPTION(label);						\  	NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE)	\  	EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE_ALL)		\  	ack(r8);							\ +	CHECK_NAPPING();						\  	addi	r3,r1,STACK_FRAME_OVERHEAD;				\  	bl	hdlr;							\  	b	.ret_from_except_lite; @@ -246,11 +271,9 @@ interrupt_base_book3e:					/* fake trap */  	EXCEPTION_STUB(0x1a0, watchdog)			/* 0x09f0 */  	EXCEPTION_STUB(0x1c0, data_tlb_miss)  	EXCEPTION_STUB(0x1e0, instruction_tlb_miss) +	EXCEPTION_STUB(0x280, doorbell) +	EXCEPTION_STUB(0x2a0, doorbell_crit) -#if 0 -	EXCEPTION_STUB(0x280, processor_doorbell) -	EXCEPTION_STUB(0x220, processor_doorbell_crit) -#endif  	.globl interrupt_end_book3e  interrupt_end_book3e: @@ -259,6 +282,7 @@ interrupt_end_book3e:  	CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE)  //	EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE_ALL)  //	bl	special_reg_save_crit +//	CHECK_NAPPING();  //	addi	r3,r1,STACK_FRAME_OVERHEAD  //	bl	.critical_exception  //	b	ret_from_crit_except @@ -270,6 +294,7 @@ interrupt_end_book3e:  //	EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE_ALL)  //	bl	special_reg_save_mc  //	addi	r3,r1,STACK_FRAME_OVERHEAD +//	CHECK_NAPPING();  //	bl	.machine_check_exception  //	b	ret_from_mc_except  	b	. @@ -340,6 +365,7 @@ interrupt_end_book3e:  	CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE)  //	EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE_ALL)  //	bl	special_reg_save_crit +//	CHECK_NAPPING();  //	addi	r3,r1,STACK_FRAME_OVERHEAD  //	bl	.unknown_exception  //	b	ret_from_crit_except @@ -428,6 +454,20 @@ interrupt_end_book3e:  kernel_dbg_exc:  	b	.	/* NYI */ +/* Doorbell interrupt */ +	MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE) + +/* Doorbell critical Interrupt */ +	START_EXCEPTION(doorbell_crit); +	CRIT_EXCEPTION_PROLOG(0x2080, PROLOG_ADDITION_NONE) +//	EXCEPTION_COMMON(0x2080, PACA_EXCRIT, INTS_DISABLE_ALL) +//	bl	special_reg_save_crit +//	CHECK_NAPPING(); +//	addi	r3,r1,STACK_FRAME_OVERHEAD +//	bl	.doorbell_critical_exception +//	b	ret_from_crit_except +	b	. +  /*   * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -563,6 +603,8 @@ BAD_STACK_TRAMPOLINE(0xd00)  BAD_STACK_TRAMPOLINE(0xe00)  BAD_STACK_TRAMPOLINE(0xf00)  BAD_STACK_TRAMPOLINE(0xf20) +BAD_STACK_TRAMPOLINE(0x2070) +BAD_STACK_TRAMPOLINE(0x2080)  	.globl	bad_stack_book3e  bad_stack_book3e: diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3e423fbad6b..f53029a0155 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -828,6 +828,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)  /* We have a data breakpoint exception - handle it */  handle_dabr_fault: +	bl	.save_nvgprs  	ld      r4,_DAR(r1)  	ld      r5,_DSISR(r1)  	addi    r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S index beb4d78a230..a92c79be272 100644 --- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S +++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S @@ -205,8 +205,7 @@ next_tlb_setup:  	bdnz+   next_tlb_setup  /* 7. Jump to our 1:1 mapping */ -	li	r6, 0 - +	mr	r6, r25  #else  	#error You need to specify the mapping or not use this at all.  #endif @@ -217,7 +216,6 @@ next_tlb_setup:  1:	mflr	r9  	rlwimi	r6,r9,0,20,31  	addi	r6,r6,(2f - 1b) -	add	r6, r6, r25  	mtspr	SPRN_SRR0,r6  	mtspr	SPRN_SRR1,r7  	rfi				/* start execution out of TLB1[0] entry */ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c new file mode 100644 index 00000000000..5ecd0401cdb --- /dev/null +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -0,0 +1,364 @@ +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. Derived from + * "arch/x86/kernel/hw_breakpoint.c" + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2010 IBM Corporation + * Author: K.Prasad <prasad@linux.vnet.ibm.com> + * + */ + +#include <linux/hw_breakpoint.h> +#include <linux/notifier.h> +#include <linux/kprobes.h> +#include <linux/percpu.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/smp.h> + +#include <asm/hw_breakpoint.h> +#include <asm/processor.h> +#include <asm/sstep.h> +#include <asm/uaccess.h> + +/* + * Stores the breakpoints currently in use on each breakpoint address + * register for every cpu + */ +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg); + +/* + * Returns total number of data or instruction breakpoints available. + */ +int hw_breakpoint_slots(int type) +{ +	if (type == TYPE_DATA) +		return HBP_NUM; +	return 0;		/* no instruction breakpoints available */ +} + +/* + * Install a perf counter breakpoint. + * + * We seek a free debug address register and use it for this + * breakpoint. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) +{ +	struct arch_hw_breakpoint *info = counter_arch_bp(bp); +	struct perf_event **slot = &__get_cpu_var(bp_per_reg); + +	*slot = bp; + +	/* +	 * Do not install DABR values if the instruction must be single-stepped. +	 * If so, DABR will be populated in single_step_dabr_instruction(). +	 */ +	if (current->thread.last_hit_ubp != bp) +		set_dabr(info->address | info->type | DABR_TRANSLATION); + +	return 0; +} + +/* + * Uninstall the breakpoint contained in the given counter. + * + * First we search the debug address register it uses and then we disable + * it. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. + */ +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ +	struct perf_event **slot = &__get_cpu_var(bp_per_reg); + +	if (*slot != bp) { +		WARN_ONCE(1, "Can't find the breakpoint"); +		return; +	} + +	*slot = NULL; +	set_dabr(0); +} + +/* + * Perform cleanup of arch-specific counters during unregistration + * of the perf-event + */ +void arch_unregister_hw_breakpoint(struct perf_event *bp) +{ +	/* +	 * If the breakpoint is unregistered between a hw_breakpoint_handler() +	 * and the single_step_dabr_instruction(), then cleanup the breakpoint +	 * restoration variables to prevent dangling pointers. +	 */ +	if (bp->ctx->task) +		bp->ctx->task->thread.last_hit_ubp = NULL; +} + +/* + * Check for virtual address in kernel space. + */ +int arch_check_bp_in_kernelspace(struct perf_event *bp) +{ +	struct arch_hw_breakpoint *info = counter_arch_bp(bp); + +	return is_kernel_addr(info->address); +} + +int arch_bp_generic_fields(int type, int *gen_bp_type) +{ +	switch (type) { +	case DABR_DATA_READ: +		*gen_bp_type = HW_BREAKPOINT_R; +		break; +	case DABR_DATA_WRITE: +		*gen_bp_type = HW_BREAKPOINT_W; +		break; +	case (DABR_DATA_WRITE | DABR_DATA_READ): +		*gen_bp_type = (HW_BREAKPOINT_W | HW_BREAKPOINT_R); +		break; +	default: +		return -EINVAL; +	} +	return 0; +} + +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp) +{ +	int ret = -EINVAL; +	struct arch_hw_breakpoint *info = counter_arch_bp(bp); + +	if (!bp) +		return ret; + +	switch (bp->attr.bp_type) { +	case HW_BREAKPOINT_R: +		info->type = DABR_DATA_READ; +		break; +	case HW_BREAKPOINT_W: +		info->type = DABR_DATA_WRITE; +		break; +	case HW_BREAKPOINT_R | HW_BREAKPOINT_W: +		info->type = (DABR_DATA_READ | DABR_DATA_WRITE); +		break; +	default: +		return ret; +	} + +	info->address = bp->attr.bp_addr; +	info->len = bp->attr.bp_len; + +	/* +	 * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8) +	 * and breakpoint addresses are aligned to nearest double-word +	 * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the +	 * 'symbolsize' should satisfy the check below. +	 */ +	if (info->len > +	    (HW_BREAKPOINT_LEN - (info->address & HW_BREAKPOINT_ALIGN))) +		return -EINVAL; +	return 0; +} + +/* + * Restores the breakpoint on the debug registers. + * Invoke this function if it is known that the execution context is + * about to change to cause loss of MSR_SE settings. + */ +void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) +{ +	struct arch_hw_breakpoint *info; + +	if (likely(!tsk->thread.last_hit_ubp)) +		return; + +	info = counter_arch_bp(tsk->thread.last_hit_ubp); +	regs->msr &= ~MSR_SE; +	set_dabr(info->address | info->type | DABR_TRANSLATION); +	tsk->thread.last_hit_ubp = NULL; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_handler(struct die_args *args) +{ +	int rc = NOTIFY_STOP; +	struct perf_event *bp; +	struct pt_regs *regs = args->regs; +	int stepped = 1; +	struct arch_hw_breakpoint *info; +	unsigned int instr; +	unsigned long dar = regs->dar; + +	/* Disable breakpoints during exception handling */ +	set_dabr(0); + +	/* +	 * The counter may be concurrently released but that can only +	 * occur from a call_rcu() path. We can then safely fetch +	 * the breakpoint, use its callback, touch its counter +	 * while we are in an rcu_read_lock() path. +	 */ +	rcu_read_lock(); + +	bp = __get_cpu_var(bp_per_reg); +	if (!bp) +		goto out; +	info = counter_arch_bp(bp); + +	/* +	 * Return early after invoking user-callback function without restoring +	 * DABR if the breakpoint is from ptrace which always operates in +	 * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal +	 * generated in do_dabr(). +	 */ +	if (bp->overflow_handler == ptrace_triggered) { +		perf_bp_event(bp, regs); +		rc = NOTIFY_DONE; +		goto out; +	} + +	/* +	 * Verify if dar lies within the address range occupied by the symbol +	 * being watched to filter extraneous exceptions.  If it doesn't, +	 * we still need to single-step the instruction, but we don't +	 * generate an event. +	 */ +	info->extraneous_interrupt = !((bp->attr.bp_addr <= dar) && +			(dar - bp->attr.bp_addr < bp->attr.bp_len)); + +	/* Do not emulate user-space instructions, instead single-step them */ +	if (user_mode(regs)) { +		bp->ctx->task->thread.last_hit_ubp = bp; +		regs->msr |= MSR_SE; +		goto out; +	} + +	stepped = 0; +	instr = 0; +	if (!__get_user_inatomic(instr, (unsigned int *) regs->nip)) +		stepped = emulate_step(regs, instr); + +	/* +	 * emulate_step() could not execute it. We've failed in reliably +	 * handling the hw-breakpoint. Unregister it and throw a warning +	 * message to let the user know about it. +	 */ +	if (!stepped) { +		WARN(1, "Unable to handle hardware breakpoint. Breakpoint at " +			"0x%lx will be disabled.", info->address); +		perf_event_disable(bp); +		goto out; +	} +	/* +	 * As a policy, the callback is invoked in a 'trigger-after-execute' +	 * fashion +	 */ +	if (!info->extraneous_interrupt) +		perf_bp_event(bp, regs); + +	set_dabr(info->address | info->type | DABR_TRANSLATION); +out: +	rcu_read_unlock(); +	return rc; +} + +/* + * Handle single-step exceptions following a DABR hit. + */ +int __kprobes single_step_dabr_instruction(struct die_args *args) +{ +	struct pt_regs *regs = args->regs; +	struct perf_event *bp = NULL; +	struct arch_hw_breakpoint *bp_info; + +	bp = current->thread.last_hit_ubp; +	/* +	 * Check if we are single-stepping as a result of a +	 * previous HW Breakpoint exception +	 */ +	if (!bp) +		return NOTIFY_DONE; + +	bp_info = counter_arch_bp(bp); + +	/* +	 * We shall invoke the user-defined callback function in the single +	 * stepping handler to confirm to 'trigger-after-execute' semantics +	 */ +	if (!bp_info->extraneous_interrupt) +		perf_bp_event(bp, regs); + +	set_dabr(bp_info->address | bp_info->type | DABR_TRANSLATION); +	current->thread.last_hit_ubp = NULL; + +	/* +	 * If the process was being single-stepped by ptrace, let the +	 * other single-step actions occur (e.g. generate SIGTRAP). +	 */ +	if (test_thread_flag(TIF_SINGLESTEP)) +		return NOTIFY_DONE; + +	return NOTIFY_STOP; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_exceptions_notify( +		struct notifier_block *unused, unsigned long val, void *data) +{ +	int ret = NOTIFY_DONE; + +	switch (val) { +	case DIE_DABR_MATCH: +		ret = hw_breakpoint_handler(data); +		break; +	case DIE_SSTEP: +		ret = single_step_dabr_instruction(data); +		break; +	} + +	return ret; +} + +/* + * Release the user breakpoints used by ptrace + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) +{ +	struct thread_struct *t = &tsk->thread; + +	unregister_hw_breakpoint(t->ptrace_bps[0]); +	t->ptrace_bps[0] = NULL; +} + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ +	/* TODO */ +} diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 21266abfbda..9b626cfffce 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -140,19 +140,19 @@ static struct dma_map_ops ibmebus_dma_ops = {  static int ibmebus_match_path(struct device *dev, void *data)  { -	struct device_node *dn = to_of_device(dev)->dev.of_node; +	struct device_node *dn = to_platform_device(dev)->dev.of_node;  	return (dn->full_name &&  		(strcasecmp((char *)data, dn->full_name) == 0));  }  static int ibmebus_match_node(struct device *dev, void *data)  { -	return to_of_device(dev)->dev.of_node == data; +	return to_platform_device(dev)->dev.of_node == data;  }  static int ibmebus_create_device(struct device_node *dn)  { -	struct of_device *dev; +	struct platform_device *dev;  	int ret;  	dev = of_device_alloc(dn, NULL, &ibmebus_bus_device); @@ -298,7 +298,7 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus,  	if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path,  				   ibmebus_match_path))) { -		of_device_unregister(to_of_device(dev)); +		of_device_unregister(to_platform_device(dev));  		kfree(path);  		return count; diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S new file mode 100644 index 00000000000..16c002d6bdf --- /dev/null +++ b/arch/powerpc/kernel/idle_book3e.S @@ -0,0 +1,86 @@ +/* + * Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org> + * + * Generic idle routine for Book3E processors + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ppc-opcode.h> +#include <asm/processor.h> +#include <asm/thread_info.h> + +/* 64-bit version only for now */ +#ifdef CONFIG_PPC64 + +_GLOBAL(book3e_idle) +	/* Save LR for later */ +	mflr	r0 +	std	r0,16(r1) + +	/* Hard disable interrupts */ +	wrteei	0 + +	/* Now check if an interrupt came in while we were soft disabled +	 * since we may otherwise lose it (doorbells etc...). We know +	 * that since PACAHARDIRQEN will have been cleared in that case. +	 */ +	lbz	r3,PACAHARDIRQEN(r13) +	cmpwi	cr0,r3,0 +	beqlr + +	/* Now we are going to mark ourselves as soft and hard enables in +	 * order to be able to take interrupts while asleep. We inform lockdep +	 * of that. We don't actually turn interrupts on just yet tho. +	 */ +#ifdef CONFIG_TRACE_IRQFLAGS +	stdu    r1,-128(r1) +	bl	.trace_hardirqs_on +#endif +	li	r0,1 +	stb	r0,PACASOFTIRQEN(r13) +	stb	r0,PACAHARDIRQEN(r13) +	 +	/* Interrupts will make use return to LR, so get something we want +	 * in there +	 */ +	bl	1f + +	/* Hard disable interrupts again */ +	wrteei	0 + +	/* Mark them off again in the PACA as well */ +	li	r0,0 +	stb	r0,PACASOFTIRQEN(r13) +	stb	r0,PACAHARDIRQEN(r13) + +	/* Tell lockdep about it */ +#ifdef CONFIG_TRACE_IRQFLAGS +	bl	.trace_hardirqs_off +	addi    r1,r1,128 +#endif +	ld	r0,16(r1) +	mtlr	r0 +	blr + +1:	/* Let's set the _TLF_NAPPING flag so interrupts make us return +	 * to the right spot +	*/ +	clrrdi	r11,r1,THREAD_SHIFT +	ld	r10,TI_LOCAL_FLAGS(r11) +	ori	r10,r10,_TLF_NAPPING +	std	r10,TI_LOCAL_FLAGS(r11) + +	/* We can now re-enable hard interrupts and go to sleep */ +	wrteei	1 +1:	PPC_WAIT(0) +	b	1b + +#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 30817d9b20c..d3ce67cf03b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -53,6 +53,8 @@  #include <linux/bootmem.h>  #include <linux/pci.h>  #include <linux/debugfs.h> +#include <linux/of.h> +#include <linux/of_irq.h>  #include <asm/uaccess.h>  #include <asm/system.h> @@ -64,6 +66,8 @@  #include <asm/ptrace.h>  #include <asm/machdep.h>  #include <asm/udbg.h> +#include <asm/dbell.h> +  #ifdef CONFIG_PPC64  #include <asm/paca.h>  #include <asm/firmware.h> @@ -153,14 +157,28 @@ notrace void raw_local_irq_restore(unsigned long en)  	if (get_hard_enabled())  		return; +#if defined(CONFIG_BOOKE) && defined(CONFIG_SMP) +	/* Check for pending doorbell interrupts and resend to ourself */ +	doorbell_check_self(); +#endif +  	/*  	 * Need to hard-enable interrupts here.  Since currently disabled,  	 * no need to take further asm precautions against preemption; but  	 * use local_paca instead of get_paca() to avoid preemption checking.  	 */  	local_paca->hard_enabled = en; + +#ifndef CONFIG_BOOKE +	/* On server, re-trigger the decrementer if it went negative since +	 * some processors only trigger on edge transitions of the sign bit. +	 * +	 * BookE has a level sensitive decrementer (latches in TSR) so we +	 * don't need that +	 */  	if ((int)mfspr(SPRN_DEC) < 0)  		mtspr(SPRN_DEC, 1); +#endif /* CONFIG_BOOKE */  	/*  	 * Force the delivery of pending soft-disabled interrupts on PS3. @@ -295,7 +313,10 @@ void fixup_irqs(const struct cpumask *map)  	for_each_irq(irq) {  		desc = irq_to_desc(irq); -		if (desc && desc->status & IRQ_PER_CPU) +		if (!desc) +			continue; + +		if (desc->status & IRQ_PER_CPU)  			continue;  		cpumask_and(mask, desc->affinity, map); @@ -317,7 +338,6 @@ void fixup_irqs(const struct cpumask *map)  }  #endif -#ifdef CONFIG_IRQSTACKS  static inline void handle_one_irq(unsigned int irq)  {  	struct thread_info *curtp, *irqtp; @@ -358,12 +378,6 @@ static inline void handle_one_irq(unsigned int irq)  	if (irqtp->flags)  		set_bits(irqtp->flags, &curtp->flags);  } -#else -static inline void handle_one_irq(unsigned int irq) -{ -	generic_handle_irq(irq); -} -#endif  static inline void check_stack_overflow(void)  { @@ -455,7 +469,6 @@ void exc_lvl_ctx_init(void)  }  #endif -#ifdef CONFIG_IRQSTACKS  struct thread_info *softirq_ctx[NR_CPUS] __read_mostly;  struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly; @@ -492,10 +505,6 @@ static inline void do_softirq_onstack(void)  	irqtp->task = NULL;  } -#else -#define do_softirq_onstack()	__do_softirq() -#endif /* CONFIG_IRQSTACKS */ -  void do_softirq(void)  {  	unsigned long flags; @@ -813,18 +822,6 @@ unsigned int irq_create_of_mapping(struct device_node *controller,  }  EXPORT_SYMBOL_GPL(irq_create_of_mapping); -unsigned int irq_of_parse_and_map(struct device_node *dev, int index) -{ -	struct of_irq oirq; - -	if (of_irq_map_one(dev, index, &oirq)) -		return NO_IRQ; - -	return irq_create_of_mapping(oirq.controller, oirq.specifier, -				     oirq.size); -} -EXPORT_SYMBOL_GPL(irq_of_parse_and_map); -  void irq_dispose_mapping(unsigned int virq)  {  	struct irq_host *host; diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 82a7b228c81..7f61a3ac787 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -129,7 +129,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)  		return 0;  	if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) -		regs->nip += 4; +		regs->nip += BREAK_INSTR_SIZE;  	return 1;  } diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 035ada5443e..c1fd0f9658f 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -4,6 +4,7 @@  #include <linux/serial_core.h>  #include <linux/console.h>  #include <linux/pci.h> +#include <linux/of_address.h>  #include <linux/of_device.h>  #include <asm/io.h>  #include <asm/mmu.h> diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index bb3d893a835..dd6c141f166 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -12,7 +12,7 @@  #include <linux/kexec.h>  #include <linux/reboot.h>  #include <linux/threads.h> -#include <linux/lmb.h> +#include <linux/memblock.h>  #include <linux/of.h>  #include <asm/machdep.h>  #include <asm/prom.h> @@ -45,6 +45,18 @@ void machine_kexec_cleanup(struct kimage *image)  		ppc_md.machine_kexec_cleanup(image);  } +void arch_crash_save_vmcoreinfo(void) +{ + +#ifdef CONFIG_NEED_MULTIPLE_NODES +	VMCOREINFO_SYMBOL(node_data); +	VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif +#ifndef CONFIG_NEED_MULTIPLE_NODES +	VMCOREINFO_SYMBOL(contig_page_data); +#endif +} +  /*   * Do not allocate memory (or fail in any way) in machine_kexec().   * We are past the point of no return, committed to rebooting now. @@ -66,11 +78,11 @@ void __init reserve_crashkernel(void)  	unsigned long long crash_size, crash_base;  	int ret; -	/* this is necessary because of lmb_phys_mem_size() */ -	lmb_analyze(); +	/* this is necessary because of memblock_phys_mem_size() */ +	memblock_analyze();  	/* use common parsing */ -	ret = parse_crashkernel(boot_command_line, lmb_phys_mem_size(), +	ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),  			&crash_size, &crash_base);  	if (ret == 0 && crash_size > 0) {  		crashk_res.start = crash_base; @@ -133,9 +145,9 @@ void __init reserve_crashkernel(void)  			"for crashkernel (System RAM: %ldMB)\n",  			(unsigned long)(crash_size >> 20),  			(unsigned long)(crashk_res.start >> 20), -			(unsigned long)(lmb_phys_mem_size() >> 20)); +			(unsigned long)(memblock_phys_mem_size() >> 20)); -	lmb_reserve(crashk_res.start, crash_size); +	memblock_reserve(crashk_res.start, crash_size);  }  int overlaps_crashkernel(unsigned long start, unsigned long size) @@ -144,24 +156,24 @@ int overlaps_crashkernel(unsigned long start, unsigned long size)  }  /* Values we need to export to the second kernel via the device tree. */ -static unsigned long kernel_end; -static unsigned long crashk_size; +static phys_addr_t kernel_end; +static phys_addr_t crashk_size;  static struct property kernel_end_prop = {  	.name = "linux,kernel-end", -	.length = sizeof(unsigned long), +	.length = sizeof(phys_addr_t),  	.value = &kernel_end,  };  static struct property crashk_base_prop = {  	.name = "linux,crashkernel-base", -	.length = sizeof(unsigned long), +	.length = sizeof(phys_addr_t),  	.value = &crashk_res.start,  };  static struct property crashk_size_prop = {  	.name = "linux,crashkernel-size", -	.length = sizeof(unsigned long), +	.length = sizeof(phys_addr_t),  	.value = &crashk_size,  }; diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 26f9900f773..583af70c4b1 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -15,6 +15,8 @@  #include <linux/thread_info.h>  #include <linux/init_task.h>  #include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/cpu.h>  #include <asm/page.h>  #include <asm/current.h> @@ -25,6 +27,7 @@  #include <asm/sections.h>	/* _end */  #include <asm/prom.h>  #include <asm/smp.h> +#include <asm/hw_breakpoint.h>  int default_machine_kexec_prepare(struct kimage *image)  { @@ -165,6 +168,7 @@ static void kexec_smp_down(void *arg)  	while(kexec_all_irq_disabled == 0)  		cpu_relax();  	mb(); /* make sure all irqs are disabled before this */ +	hw_breakpoint_disable();  	/*  	 * Now every CPU has IRQs off, we can clear out any pending  	 * IPIs and be sure that no more will come in after this. @@ -180,34 +184,32 @@ static void kexec_prepare_cpus_wait(int wait_state)  {  	int my_cpu, i, notified=-1; +	hw_breakpoint_disable();  	my_cpu = get_cpu(); -	/* Make sure each CPU has atleast made it to the state we need */ -	for (i=0; i < NR_CPUS; i++) { +	/* Make sure each CPU has at least made it to the state we need. +	 * +	 * FIXME: There is a (slim) chance of a problem if not all of the CPUs +	 * are correctly onlined.  If somehow we start a CPU on boot with RTAS +	 * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in +	 * time, the boot CPU will timeout.  If it does eventually execute +	 * stuff, the secondary will start up (paca[].cpu_start was written) and +	 * get into a peculiar state.  If the platform supports +	 * smp_ops->take_timebase(), the secondary CPU will probably be spinning +	 * in there.  If not (i.e. pseries), the secondary will continue on and +	 * try to online itself/idle/etc. If it survives that, we need to find +	 * these possible-but-not-online-but-should-be CPUs and chaperone them +	 * into kexec_smp_wait(). +	 */ +	for_each_online_cpu(i) {  		if (i == my_cpu)  			continue;  		while (paca[i].kexec_state < wait_state) {  			barrier(); -			if (!cpu_possible(i)) { -				printk("kexec: cpu %d hw_cpu_id %d is not" -						" possible, ignoring\n", -						i, paca[i].hw_cpu_id); -				break; -			} -			if (!cpu_online(i)) { -				/* Fixme: this can be spinning in -				 * pSeries_secondary_wait with a paca -				 * waiting for it to go online. -				 */ -				printk("kexec: cpu %d hw_cpu_id %d is not" -						" online, ignoring\n", -						i, paca[i].hw_cpu_id); -				break; -			}  			if (i != notified) { -				printk( "kexec: waiting for cpu %d (physical" -						" %d) to enter %i state\n", -					i, paca[i].hw_cpu_id, wait_state); +				printk(KERN_INFO "kexec: waiting for cpu %d " +				       "(physical %d) to enter %i state\n", +				       i, paca[i].hw_cpu_id, wait_state);  				notified = i;  			}  		} @@ -215,9 +217,32 @@ static void kexec_prepare_cpus_wait(int wait_state)  	mb();  } -static void kexec_prepare_cpus(void) +/* + * We need to make sure each present CPU is online.  The next kernel will scan + * the device tree and assume primary threads are online and query secondary + * threads via RTAS to online them if required.  If we don't online primary + * threads, they will be stuck.  However, we also online secondary threads as we + * may be using 'cede offline'.  In this case RTAS doesn't see the secondary + * threads as offline -- and again, these CPUs will be stuck. + * + * So, we online all CPUs that should be running, including secondary threads. + */ +static void wake_offline_cpus(void)  { +	int cpu = 0; + +	for_each_present_cpu(cpu) { +		if (!cpu_online(cpu)) { +			printk(KERN_INFO "kexec: Waking offline cpu %d.\n", +			       cpu); +			cpu_up(cpu); +		} +	} +} +static void kexec_prepare_cpus(void) +{ +	wake_offline_cpus();  	smp_call_function(kexec_smp_down, NULL, /* wait */0);  	local_irq_disable();  	mb(); /* make sure IRQs are disabled before we say they are */ @@ -231,7 +256,10 @@ static void kexec_prepare_cpus(void)  	if (ppc_md.kexec_cpu_down)  		ppc_md.kexec_cpu_down(0, 0); -	/* Before removing MMU mapings make sure all CPUs have entered real mode */ +	/* +	 * Before removing MMU mappings make sure all CPUs have entered real +	 * mode: +	 */  	kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);  	put_cpu(); @@ -273,6 +301,12 @@ static void kexec_prepare_cpus(void)  static union thread_union kexec_stack __init_task_data =  	{ }; +/* + * For similar reasons to the stack above, the kexecing CPU needs to be on a + * static PACA; we switch to kexec_paca. + */ +struct paca_struct kexec_paca; +  /* Our assembly helper, in kexec_stub.S */  extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,  					void *image, void *control, @@ -294,12 +328,28 @@ void default_machine_kexec(struct kimage *image)  	if (crashing_cpu == -1)  		kexec_prepare_cpus(); +	pr_debug("kexec: Starting switchover sequence.\n"); +  	/* switch to a staticly allocated stack.  Based on irq stack code.  	 * XXX: the task struct will likely be invalid once we do the copy!  	 */  	kexec_stack.thread_info.task = current_thread_info()->task;  	kexec_stack.thread_info.flags = 0; +	/* We need a static PACA, too; copy this CPU's PACA over and switch to +	 * it.  Also poison per_cpu_offset to catch anyone using non-static +	 * data. +	 */ +	memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct)); +	kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL; +	paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) - +		kexec_paca.paca_index; +	setup_paca(&kexec_paca); + +	/* XXX: If anyone does 'dynamic lppacas' this will also need to be +	 * switched to a static version! +	 */ +  	/* Some things are best done in assembly.  Finding globals with  	 * a toc is easier in C, so pass in what we can.  	 */ diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 22e507c8a55..2d29752cbe1 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7)  _GLOBAL(__restore_cpu_power7)  	/* place holder */  	blr - -/* - * Get a minimal set of registers for our caller's nth caller. - * r3 = regs pointer, r5 = n. - * - * We only get R1 (stack pointer), NIP (next instruction pointer) - * and LR (link register).  These are all we can get in the - * general case without doing complicated stack unwinding, but - * fortunately they are enough to do a stack backtrace, which - * is all we need them for. - */ -_GLOBAL(perf_arch_fetch_caller_regs) -	mr	r6,r1 -	cmpwi	r5,0 -	mflr	r4 -	ble	2f -	mtctr	r5 -1:	PPC_LL	r6,0(r6) -	bdnz	1b -	PPC_LL	r4,PPC_LR_STKOFF(r6) -2:	PPC_LL	r7,0(r6) -	PPC_LL	r7,PPC_LR_STKOFF(r7) -	PPC_STL	r6,GPR1-STACK_FRAME_OVERHEAD(r3) -	PPC_STL	r4,_NIP-STACK_FRAME_OVERHEAD(r3) -	PPC_STL	r7,_LINK-STACK_FRAME_OVERHEAD(r3) -	blr diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index dc66d52dcff..6bbd7a604d2 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -33,7 +33,6 @@  	.text -#ifdef CONFIG_IRQSTACKS  _GLOBAL(call_do_softirq)  	mflr	r0  	stw	r0,4(r1) @@ -56,7 +55,6 @@ _GLOBAL(call_handle_irq)  	lwz	r0,4(r1)  	mtlr	r0  	blr -#endif /* CONFIG_IRQSTACKS */  /*   * This returns the high 64 bits of the product of two 64-bit numbers. diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index a2b18dffa03..e5144906a56 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -28,7 +28,6 @@  	.text -#ifdef CONFIG_IRQSTACKS  _GLOBAL(call_do_softirq)  	mflr	r0  	std	r0,16(r1) @@ -52,7 +51,6 @@ _GLOBAL(call_handle_irq)  	ld	r0,16(r1)  	mtlr	r0  	blr -#endif /* CONFIG_IRQSTACKS */  	.section	".toc","aw"  PPC64_CACHES: diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c deleted file mode 100644 index df78e0236a0..00000000000 --- a/arch/powerpc/kernel/of_device.c +++ /dev/null @@ -1,133 +0,0 @@ -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/of.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/mod_devicetable.h> -#include <linux/slab.h> -#include <linux/of_device.h> - -#include <asm/errno.h> -#include <asm/dcr.h> - -static void of_device_make_bus_id(struct of_device *dev) -{ -	static atomic_t bus_no_reg_magic; -	struct device_node *node = dev->dev.of_node; -	const u32 *reg; -	u64 addr; -	int magic; - -	/* -	 * If it's a DCR based device, use 'd' for native DCRs -	 * and 'D' for MMIO DCRs. -	 */ -#ifdef CONFIG_PPC_DCR -	reg = of_get_property(node, "dcr-reg", NULL); -	if (reg) { -#ifdef CONFIG_PPC_DCR_NATIVE -		dev_set_name(&dev->dev, "d%x.%s", *reg, node->name); -#else /* CONFIG_PPC_DCR_NATIVE */ -		addr = of_translate_dcr_address(node, *reg, NULL); -		if (addr != OF_BAD_ADDR) { -			dev_set_name(&dev->dev, "D%llx.%s", -				     (unsigned long long)addr, node->name); -			return; -		} -#endif /* !CONFIG_PPC_DCR_NATIVE */ -	} -#endif /* CONFIG_PPC_DCR */ - -	/* -	 * For MMIO, get the physical address -	 */ -	reg = of_get_property(node, "reg", NULL); -	if (reg) { -		addr = of_translate_address(node, reg); -		if (addr != OF_BAD_ADDR) { -			dev_set_name(&dev->dev, "%llx.%s", -				     (unsigned long long)addr, node->name); -			return; -		} -	} - -	/* -	 * No BusID, use the node name and add a globally incremented -	 * counter (and pray...) -	 */ -	magic = atomic_add_return(1, &bus_no_reg_magic); -	dev_set_name(&dev->dev, "%s.%d", node->name, magic - 1); -} - -struct of_device *of_device_alloc(struct device_node *np, -				  const char *bus_id, -				  struct device *parent) -{ -	struct of_device *dev; - -	dev = kzalloc(sizeof(*dev), GFP_KERNEL); -	if (!dev) -		return NULL; - -	dev->dev.of_node = of_node_get(np); -	dev->dev.dma_mask = &dev->archdata.dma_mask; -	dev->dev.parent = parent; -	dev->dev.release = of_release_dev; - -	if (bus_id) -		dev_set_name(&dev->dev, "%s", bus_id); -	else -		of_device_make_bus_id(dev); - -	return dev; -} -EXPORT_SYMBOL(of_device_alloc); - -int of_device_uevent(struct device *dev, struct kobj_uevent_env *env) -{ -	struct of_device *ofdev; -	const char *compat; -	int seen = 0, cplen, sl; - -	if (!dev) -		return -ENODEV; - -	ofdev = to_of_device(dev); - -	if (add_uevent_var(env, "OF_NAME=%s", ofdev->dev.of_node->name)) -		return -ENOMEM; - -	if (add_uevent_var(env, "OF_TYPE=%s", ofdev->dev.of_node->type)) -		return -ENOMEM; - -        /* Since the compatible field can contain pretty much anything -         * it's not really legal to split it out with commas. We split it -         * up using a number of environment variables instead. */ - -	compat = of_get_property(ofdev->dev.of_node, "compatible", &cplen); -	while (compat && *compat && cplen > 0) { -		if (add_uevent_var(env, "OF_COMPATIBLE_%d=%s", seen, compat)) -			return -ENOMEM; - -		sl = strlen (compat) + 1; -		compat += sl; -		cplen -= sl; -		seen++; -	} - -	if (add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen)) -		return -ENOMEM; - -	/* modalias is trickier, we add it in 2 steps */ -	if (add_uevent_var(env, "MODALIAS=")) -		return -ENOMEM; -	sl = of_device_get_modalias(ofdev, &env->buf[env->buflen-1], -				    sizeof(env->buf) - env->buflen); -	if (sl >= (sizeof(env->buf) - env->buflen)) -		return -ENOMEM; -	env->buflen += sl; - -	return 0; -} -EXPORT_SYMBOL(of_device_uevent); -EXPORT_SYMBOL(of_device_get_modalias); diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 487a98851ba..b2c363ef38a 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -28,207 +28,6 @@  #include <asm/ppc-pci.h>  #include <asm/atomic.h> -/* - * The list of OF IDs below is used for matching bus types in the - * system whose devices are to be exposed as of_platform_devices. - * - * This is the default list valid for most platforms. This file provides - * functions who can take an explicit list if necessary though - * - * The search is always performed recursively looking for children of - * the provided device_node and recursively if such a children matches - * a bus type in the list - */ - -static const struct of_device_id of_default_bus_ids[] = { -	{ .type = "soc", }, -	{ .compatible = "soc", }, -	{ .type = "spider", }, -	{ .type = "axon", }, -	{ .type = "plb5", }, -	{ .type = "plb4", }, -	{ .type = "opb", }, -	{ .type = "ebc", }, -	{}, -}; - -struct bus_type of_platform_bus_type = { -       .uevent	= of_device_uevent, -}; -EXPORT_SYMBOL(of_platform_bus_type); - -static int __init of_bus_driver_init(void) -{ -	return of_bus_type_init(&of_platform_bus_type, "of_platform"); -} - -postcore_initcall(of_bus_driver_init); - -struct of_device* of_platform_device_create(struct device_node *np, -					    const char *bus_id, -					    struct device *parent) -{ -	struct of_device *dev; - -	dev = of_device_alloc(np, bus_id, parent); -	if (!dev) -		return NULL; - -	dev->archdata.dma_mask = 0xffffffffUL; -	dev->dev.coherent_dma_mask = DMA_BIT_MASK(32); - -	dev->dev.bus = &of_platform_bus_type; - -	/* We do not fill the DMA ops for platform devices by default. -	 * This is currently the responsibility of the platform code -	 * to do such, possibly using a device notifier -	 */ - -	if (of_device_register(dev) != 0) { -		of_device_free(dev); -		return NULL; -	} - -	return dev; -} -EXPORT_SYMBOL(of_platform_device_create); - - - -/** - * of_platform_bus_create - Create an OF device for a bus node and all its - * children. Optionally recursively instanciate matching busses. - * @bus: device node of the bus to instanciate - * @matches: match table, NULL to use the default, OF_NO_DEEP_PROBE to - * disallow recursive creation of child busses - */ -static int of_platform_bus_create(const struct device_node *bus, -				  const struct of_device_id *matches, -				  struct device *parent) -{ -	struct device_node *child; -	struct of_device *dev; -	int rc = 0; - -	for_each_child_of_node(bus, child) { -		pr_debug("   create child: %s\n", child->full_name); -		dev = of_platform_device_create(child, NULL, parent); -		if (dev == NULL) -			rc = -ENOMEM; -		else if (!of_match_node(matches, child)) -			continue; -		if (rc == 0) { -			pr_debug("   and sub busses\n"); -			rc = of_platform_bus_create(child, matches, &dev->dev); -		} if (rc) { -			of_node_put(child); -			break; -		} -	} -	return rc; -} - -/** - * of_platform_bus_probe - Probe the device-tree for platform busses - * @root: parent of the first level to probe or NULL for the root of the tree - * @matches: match table, NULL to use the default - * @parent: parent to hook devices from, NULL for toplevel - * - * Note that children of the provided root are not instanciated as devices - * unless the specified root itself matches the bus list and is not NULL. - */ - -int of_platform_bus_probe(struct device_node *root, -			  const struct of_device_id *matches, -			  struct device *parent) -{ -	struct device_node *child; -	struct of_device *dev; -	int rc = 0; - -	if (matches == NULL) -		matches = of_default_bus_ids; -	if (matches == OF_NO_DEEP_PROBE) -		return -EINVAL; -	if (root == NULL) -		root = of_find_node_by_path("/"); -	else -		of_node_get(root); - -	pr_debug("of_platform_bus_probe()\n"); -	pr_debug(" starting at: %s\n", root->full_name); - -	/* Do a self check of bus type, if there's a match, create -	 * children -	 */ -	if (of_match_node(matches, root)) { -		pr_debug(" root match, create all sub devices\n"); -		dev = of_platform_device_create(root, NULL, parent); -		if (dev == NULL) { -			rc = -ENOMEM; -			goto bail; -		} -		pr_debug(" create all sub busses\n"); -		rc = of_platform_bus_create(root, matches, &dev->dev); -		goto bail; -	} -	for_each_child_of_node(root, child) { -		if (!of_match_node(matches, child)) -			continue; - -		pr_debug("  match: %s\n", child->full_name); -		dev = of_platform_device_create(child, NULL, parent); -		if (dev == NULL) -			rc = -ENOMEM; -		else -			rc = of_platform_bus_create(child, matches, &dev->dev); -		if (rc) { -			of_node_put(child); -			break; -		} -	} - bail: -	of_node_put(root); -	return rc; -} -EXPORT_SYMBOL(of_platform_bus_probe); - -static int of_dev_node_match(struct device *dev, void *data) -{ -	return to_of_device(dev)->dev.of_node == data; -} - -struct of_device *of_find_device_by_node(struct device_node *np) -{ -	struct device *dev; - -	dev = bus_find_device(&of_platform_bus_type, -			      NULL, np, of_dev_node_match); -	if (dev) -		return to_of_device(dev); -	return NULL; -} -EXPORT_SYMBOL(of_find_device_by_node); - -static int of_dev_phandle_match(struct device *dev, void *data) -{ -	phandle *ph = data; -	return to_of_device(dev)->dev.of_node->phandle == *ph; -} - -struct of_device *of_find_device_by_phandle(phandle ph) -{ -	struct device *dev; - -	dev = bus_find_device(&of_platform_bus_type, -			      NULL, &ph, of_dev_phandle_match); -	if (dev) -		return to_of_device(dev); -	return NULL; -} -EXPORT_SYMBOL(of_find_device_by_phandle); - -  #ifdef CONFIG_PPC_OF_PLATFORM_PCI  /* The probing of PCI controllers from of_platform is currently @@ -237,7 +36,7 @@ EXPORT_SYMBOL(of_find_device_by_phandle);   * lacking some bits needed here.   */ -static int __devinit of_pci_phb_probe(struct of_device *dev, +static int __devinit of_pci_phb_probe(struct platform_device *dev,  				      const struct of_device_id *match)  {  	struct pci_controller *phb; diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index f88acf0218d..d0a26f1770f 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -9,7 +9,7 @@  #include <linux/threads.h>  #include <linux/module.h> -#include <linux/lmb.h> +#include <linux/memblock.h>  #include <asm/firmware.h>  #include <asm/lppaca.h> @@ -105,6 +105,16 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)  #endif /* CONFIG_PPC_STD_MMU_64 */  } +/* Put the paca pointer into r13 and SPRG_PACA */ +void setup_paca(struct paca_struct *new_paca) +{ +	local_paca = new_paca; +	mtspr(SPRN_SPRG_PACA, local_paca); +#ifdef CONFIG_PPC_BOOK3E +	mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); +#endif +} +  static int __initdata paca_size;  void __init allocate_pacas(void) @@ -117,7 +127,7 @@ void __init allocate_pacas(void)  	 * the first segment. On iSeries they must be within the area mapped  	 * by the HV, which is HvPagesToMap * HVPAGESIZE bytes.  	 */ -	limit = min(0x10000000ULL, lmb.rmo_size); +	limit = min(0x10000000ULL, memblock.rmo_size);  	if (firmware_has_feature(FW_FEATURE_ISERIES))  		limit = min(limit, HvPagesToMap * HVPAGESIZE); @@ -128,7 +138,7 @@ void __init allocate_pacas(void)  	paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus); -	paca = __va(lmb_alloc_base(paca_size, PAGE_SIZE, limit)); +	paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));  	memset(paca, 0, paca_size);  	printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", @@ -148,7 +158,7 @@ void __init free_unused_pacas(void)  	if (new_size >= paca_size)  		return; -	lmb_free(__pa(paca) + new_size, paca_size - new_size); +	memblock_free(__pa(paca) + new_size, paca_size - new_size);  	printk(KERN_DEBUG "Freed %u bytes for unused pacas\n",  		paca_size - new_size); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 6646005dffb..9021c4ad4bb 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -21,6 +21,7 @@  #include <linux/string.h>  #include <linux/init.h>  #include <linux/bootmem.h> +#include <linux/of_address.h>  #include <linux/mm.h>  #include <linux/list.h>  #include <linux/syscalls.h> @@ -1309,6 +1310,7 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus)  		printk(KERN_WARNING "PCI: Cannot allocate resource region "  		       "%d of PCI bridge %d, will remap\n", i, bus->number);  clear_resource: +		res->start = res->end = 0;  		res->flags = 0;  	} diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 43b83c35cf5..d301a30445e 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event)  	 * Therefore we treat them like NMIs.  	 */  	do { -		prev = atomic64_read(&event->hw.prev_count); +		prev = local64_read(&event->hw.prev_count);  		barrier();  		val = read_pmc(event->hw.idx); -	} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); +	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);  	/* The counters are only 32 bits wide */  	delta = (val - prev) & 0xfffffffful; -	atomic64_add(delta, &event->count); -	atomic64_sub(delta, &event->hw.period_left); +	local64_add(delta, &event->count); +	local64_sub(delta, &event->hw.period_left);  }  /* @@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,  		if (!event->hw.idx)  			continue;  		val = (event->hw.idx == 5) ? pmc5 : pmc6; -		prev = atomic64_read(&event->hw.prev_count); +		prev = local64_read(&event->hw.prev_count);  		event->hw.idx = 0;  		delta = (val - prev) & 0xfffffffful; -		atomic64_add(delta, &event->count); +		local64_add(delta, &event->count);  	}  } @@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,  		event = cpuhw->limited_counter[i];  		event->hw.idx = cpuhw->limited_hwidx[i];  		val = (event->hw.idx == 5) ? pmc5 : pmc6; -		atomic64_set(&event->hw.prev_count, val); +		local64_set(&event->hw.prev_count, val);  		perf_event_update_userpage(event);  	}  } @@ -666,11 +666,11 @@ void hw_perf_enable(void)  		}  		val = 0;  		if (event->hw.sample_period) { -			left = atomic64_read(&event->hw.period_left); +			left = local64_read(&event->hw.period_left);  			if (left < 0x80000000L)  				val = 0x80000000L - left;  		} -		atomic64_set(&event->hw.prev_count, val); +		local64_set(&event->hw.prev_count, val);  		event->hw.idx = idx;  		write_pmc(idx, val);  		perf_event_update_userpage(event); @@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event)  	 * skip the schedulability test here, it will be peformed  	 * at commit time(->commit_txn) as a whole  	 */ -	if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED) +	if (cpuhw->group_flag & PERF_EVENT_TXN)  		goto nocheck;  	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) @@ -791,8 +791,11 @@ static void power_pmu_disable(struct perf_event *event)  	cpuhw = &__get_cpu_var(cpu_hw_events);  	for (i = 0; i < cpuhw->n_events; ++i) {  		if (event == cpuhw->event[i]) { -			while (++i < cpuhw->n_events) +			while (++i < cpuhw->n_events) {  				cpuhw->event[i-1] = cpuhw->event[i]; +				cpuhw->events[i-1] = cpuhw->events[i]; +				cpuhw->flags[i-1] = cpuhw->flags[i]; +			}  			--cpuhw->n_events;  			ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr);  			if (event->hw.idx) { @@ -842,8 +845,8 @@ static void power_pmu_unthrottle(struct perf_event *event)  	if (left < 0x80000000L)  		val = 0x80000000L - left;  	write_pmc(event->hw.idx, val); -	atomic64_set(&event->hw.prev_count, val); -	atomic64_set(&event->hw.period_left, left); +	local64_set(&event->hw.prev_count, val); +	local64_set(&event->hw.period_left, left);  	perf_event_update_userpage(event);  	perf_enable();  	local_irq_restore(flags); @@ -858,7 +861,7 @@ void power_pmu_start_txn(const struct pmu *pmu)  {  	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); -	cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; +	cpuhw->group_flag |= PERF_EVENT_TXN;  	cpuhw->n_txn_start = cpuhw->n_events;  } @@ -871,7 +874,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)  {  	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); -	cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; +	cpuhw->group_flag &= ~PERF_EVENT_TXN;  }  /* @@ -897,6 +900,7 @@ int power_pmu_commit_txn(const struct pmu *pmu)  	for (i = cpuhw->n_txn_start; i < n; ++i)  		cpuhw->event[i]->hw.config = cpuhw->events[i]; +	cpuhw->group_flag &= ~PERF_EVENT_TXN;  	return 0;  } @@ -1108,7 +1112,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)  	event->hw.config = events[n];  	event->hw.event_base = cflags[n];  	event->hw.last_period = event->hw.sample_period; -	atomic64_set(&event->hw.period_left, event->hw.last_period); +	local64_set(&event->hw.period_left, event->hw.last_period);  	/*  	 * See if we need to reserve the PMU. @@ -1146,16 +1150,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,  	int record = 0;  	/* we don't have to worry about interrupts here */ -	prev = atomic64_read(&event->hw.prev_count); +	prev = local64_read(&event->hw.prev_count);  	delta = (val - prev) & 0xfffffffful; -	atomic64_add(delta, &event->count); +	local64_add(delta, &event->count);  	/*  	 * See if the total period for this event has expired,  	 * and update for the next period.  	 */  	val = 0; -	left = atomic64_read(&event->hw.period_left) - delta; +	left = local64_read(&event->hw.period_left) - delta;  	if (period) {  		if (left <= 0) {  			left += period; @@ -1193,8 +1197,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,  	}  	write_pmc(event->hw.idx, val); -	atomic64_set(&event->hw.prev_count, val); -	atomic64_set(&event->hw.period_left, left); +	local64_set(&event->hw.prev_count, val); +	local64_set(&event->hw.period_left, left);  	perf_event_update_userpage(event);  } diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 369872f6cf7..1ba45471ae4 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -162,15 +162,15 @@ static void fsl_emb_pmu_read(struct perf_event *event)  	 * Therefore we treat them like NMIs.  	 */  	do { -		prev = atomic64_read(&event->hw.prev_count); +		prev = local64_read(&event->hw.prev_count);  		barrier();  		val = read_pmc(event->hw.idx); -	} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); +	} while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);  	/* The counters are only 32 bits wide */  	delta = (val - prev) & 0xfffffffful; -	atomic64_add(delta, &event->count); -	atomic64_sub(delta, &event->hw.period_left); +	local64_add(delta, &event->count); +	local64_sub(delta, &event->hw.period_left);  }  /* @@ -296,11 +296,11 @@ static int fsl_emb_pmu_enable(struct perf_event *event)  	val = 0;  	if (event->hw.sample_period) { -		s64 left = atomic64_read(&event->hw.period_left); +		s64 left = local64_read(&event->hw.period_left);  		if (left < 0x80000000L)  			val = 0x80000000L - left;  	} -	atomic64_set(&event->hw.prev_count, val); +	local64_set(&event->hw.prev_count, val);  	write_pmc(i, val);  	perf_event_update_userpage(event); @@ -371,8 +371,8 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)  	if (left < 0x80000000L)  		val = 0x80000000L - left;  	write_pmc(event->hw.idx, val); -	atomic64_set(&event->hw.prev_count, val); -	atomic64_set(&event->hw.period_left, left); +	local64_set(&event->hw.prev_count, val); +	local64_set(&event->hw.period_left, left);  	perf_event_update_userpage(event);  	perf_enable();  	local_irq_restore(flags); @@ -500,7 +500,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)  		return ERR_PTR(-ENOTSUPP);  	event->hw.last_period = event->hw.sample_period; -	atomic64_set(&event->hw.period_left, event->hw.last_period); +	local64_set(&event->hw.period_left, event->hw.last_period);  	/*  	 * See if we need to reserve the PMU. @@ -541,16 +541,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,  	int record = 0;  	/* we don't have to worry about interrupts here */ -	prev = atomic64_read(&event->hw.prev_count); +	prev = local64_read(&event->hw.prev_count);  	delta = (val - prev) & 0xfffffffful; -	atomic64_add(delta, &event->count); +	local64_add(delta, &event->count);  	/*  	 * See if the total period for this event has expired,  	 * and update for the next period.  	 */  	val = 0; -	left = atomic64_read(&event->hw.period_left) - delta; +	left = local64_read(&event->hw.period_left) - delta;  	if (period) {  		if (left <= 0) {  			left += period; @@ -566,9 +566,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,  	 * Finally record data if requested.  	 */  	if (record) { -		struct perf_sample_data data = { -			.period	= event->hw.last_period, -		}; +		struct perf_sample_data data; + +		perf_sample_data_init(&data, 0); +		data.period = event->hw.last_period;  		if (perf_event_overflow(event, nmi, &data, regs)) {  			/* @@ -584,8 +585,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,  	}  	write_pmc(event->hw.idx, val); -	atomic64_set(&event->hw.prev_count, val); -	atomic64_set(&event->hw.period_left, left); +	local64_set(&event->hw.prev_count, val); +	local64_set(&event->hw.period_left, left);  	perf_event_update_userpage(event);  } diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 3b4dcc82a4c..ab3e392ac63 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -101,10 +101,6 @@ EXPORT_SYMBOL(pci_dram_offset);  EXPORT_SYMBOL(start_thread);  EXPORT_SYMBOL(kernel_thread); -#ifdef CONFIG_PPC_FPU -EXPORT_SYMBOL_GPL(cvt_df); -EXPORT_SYMBOL_GPL(cvt_fd); -#endif  EXPORT_SYMBOL(giveup_fpu);  #ifdef CONFIG_ALTIVEC  EXPORT_SYMBOL(giveup_altivec); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9d255b4f0a0..feacfb78968 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -37,6 +37,7 @@  #include <linux/kernel_stat.h>  #include <linux/personality.h>  #include <linux/random.h> +#include <linux/hw_breakpoint.h>  #include <asm/pgtable.h>  #include <asm/uaccess.h> @@ -462,14 +463,42 @@ struct task_struct *__switch_to(struct task_struct *prev,  #ifdef CONFIG_PPC_ADV_DEBUG_REGS  	switch_booke_debug_regs(&new->thread);  #else +/* + * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would + * schedule DABR + */ +#ifndef CONFIG_HAVE_HW_BREAKPOINT  	if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))  		set_dabr(new->thread.dabr); +#endif /* CONFIG_HAVE_HW_BREAKPOINT */  #endif  	new_thread = &new->thread;  	old_thread = ¤t->thread; +#if defined(CONFIG_PPC_BOOK3E_64) +	/* XXX Current Book3E code doesn't deal with kernel side DBCR0, +	 * we always hold the user values, so we set it now. +	 * +	 * However, we ensure the kernel MSR:DE is appropriately cleared too +	 * to avoid spurrious single step exceptions in the kernel. +	 * +	 * This will have to change to merge with the ppc32 code at some point, +	 * but I don't like much what ppc32 is doing today so there's some +	 * thinking needed there +	 */ +	if ((new_thread->dbcr0 | old_thread->dbcr0) & DBCR0_IDM) { +		u32 dbcr0; + +		mtmsr(mfmsr() & ~MSR_DE); +		isync(); +		dbcr0 = mfspr(SPRN_DBCR0); +		dbcr0 = (dbcr0 & DBCR0_EDM) | new_thread->dbcr0; +		mtspr(SPRN_DBCR0, dbcr0); +	} +#endif /* CONFIG_PPC64_BOOK3E */ +  #ifdef CONFIG_PPC64  	/*  	 * Collect processor utilization data per process @@ -642,7 +671,11 @@ void flush_thread(void)  {  	discard_lazy_cpu_state(); +#ifdef CONFIG_HAVE_HW_BREAKPOINTS +	flush_ptrace_hw_breakpoint(current); +#else /* CONFIG_HAVE_HW_BREAKPOINTS */  	set_debug_reg_defaults(¤t->thread); +#endif /* CONFIG_HAVE_HW_BREAKPOINTS */  }  void @@ -660,6 +693,9 @@ void prepare_to_copy(struct task_struct *tsk)  	flush_altivec_to_thread(current);  	flush_vsx_to_thread(current);  	flush_spe_to_thread(current); +#ifdef CONFIG_HAVE_HW_BREAKPOINT +	flush_ptrace_hw_breakpoint(tsk); +#endif /* CONFIG_HAVE_HW_BREAKPOINT */  }  /* @@ -991,7 +1027,7 @@ int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,  	int error;  	char *filename; -	filename = getname((char __user *) a0); +	filename = getname((const char __user *) a0);  	error = PTR_ERR(filename);  	if (IS_ERR(filename))  		goto out; @@ -1005,7 +1041,6 @@ out:  	return error;  } -#ifdef CONFIG_IRQSTACKS  static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,  				  unsigned long nbytes)  { @@ -1030,10 +1065,6 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,  	return 0;  } -#else -#define valid_irq_stack(sp, p, nb)	0 -#endif /* CONFIG_IRQSTACKS */ -  int validate_sp(unsigned long sp, struct task_struct *p,  		       unsigned long nbytes)  { @@ -1268,3 +1299,14 @@ unsigned long randomize_et_dyn(unsigned long base)  	return ret;  } + +#ifdef CONFIG_SMP +int arch_sd_sibling_asym_packing(void) +{ +	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { +		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); +		return SD_ASYM_PACKING; +	} +	return 0; +} +#endif diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 05131d634e7..fed9bf6187d 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -31,7 +31,7 @@  #include <linux/kexec.h>  #include <linux/debugfs.h>  #include <linux/irq.h> -#include <linux/lmb.h> +#include <linux/memblock.h>  #include <asm/prom.h>  #include <asm/rtas.h> @@ -98,7 +98,7 @@ static void __init move_device_tree(void)  	if ((memory_limit && (start + size) > memory_limit) ||  			overlaps_crashkernel(start, size)) { -		p = __va(lmb_alloc_base(size, PAGE_SIZE, lmb.rmo_size)); +		p = __va(memblock_alloc_base(size, PAGE_SIZE, memblock.rmo_size));  		memcpy(p, initial_boot_params, size);  		initial_boot_params = (struct boot_param_header *)p;  		DBG("Moved device tree to 0x%p\n", p); @@ -411,13 +411,13 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)  {  	__be32 *dm, *ls, *usm;  	unsigned long l, n, flags; -	u64 base, size, lmb_size; +	u64 base, size, memblock_size;  	unsigned int is_kexec_kdump = 0, rngs;  	ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);  	if (ls == NULL || l < dt_root_size_cells * sizeof(__be32))  		return 0; -	lmb_size = dt_mem_next_cell(dt_root_size_cells, &ls); +	memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls);  	dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);  	if (dm == NULL || l < sizeof(__be32)) @@ -442,11 +442,11 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)  		   or if the block is not assigned to this partition (0x8) */  		if ((flags & 0x80) || !(flags & 0x8))  			continue; -		size = lmb_size; +		size = memblock_size;  		rngs = 1;  		if (is_kexec_kdump) {  			/* -			 * For each lmb in ibm,dynamic-memory, a corresponding +			 * For each memblock in ibm,dynamic-memory, a corresponding  			 * entry in linux,drconf-usable-memory property contains  			 * a counter 'p' followed by 'p' (base, size) duple.  			 * Now read the counter from @@ -469,10 +469,10 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)  				if ((base + size) > 0x80000000ul)  					size = 0x80000000ul - base;  			} -			lmb_add(base, size); +			memblock_add(base, size);  		} while (--rngs);  	} -	lmb_dump_all(); +	memblock_dump_all();  	return 0;  }  #else @@ -501,14 +501,14 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)  	}  #endif -	lmb_add(base, size); +	memblock_add(base, size);  	memstart_addr = min((u64)memstart_addr, base);  }  u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align)  { -	return lmb_alloc(size, align); +	return memblock_alloc(size, align);  }  #ifdef CONFIG_BLK_DEV_INITRD @@ -534,12 +534,12 @@ static void __init early_reserve_mem(void)  	/* before we do anything, lets reserve the dt blob */  	self_base = __pa((unsigned long)initial_boot_params);  	self_size = initial_boot_params->totalsize; -	lmb_reserve(self_base, self_size); +	memblock_reserve(self_base, self_size);  #ifdef CONFIG_BLK_DEV_INITRD  	/* then reserve the initrd, if any */  	if (initrd_start && (initrd_end > initrd_start)) -		lmb_reserve(__pa(initrd_start), initrd_end - initrd_start); +		memblock_reserve(__pa(initrd_start), initrd_end - initrd_start);  #endif /* CONFIG_BLK_DEV_INITRD */  #ifdef CONFIG_PPC32 @@ -560,7 +560,7 @@ static void __init early_reserve_mem(void)  			if (base_32 == self_base && size_32 == self_size)  				continue;  			DBG("reserving: %x -> %x\n", base_32, size_32); -			lmb_reserve(base_32, size_32); +			memblock_reserve(base_32, size_32);  		}  		return;  	} @@ -571,7 +571,7 @@ static void __init early_reserve_mem(void)  		if (size == 0)  			break;  		DBG("reserving: %llx -> %llx\n", base, size); -		lmb_reserve(base, size); +		memblock_reserve(base, size);  	}  } @@ -594,7 +594,7 @@ static inline unsigned long phyp_dump_calculate_reserve_size(void)  		return phyp_dump_info->reserve_bootvar;  	/* divide by 20 to get 5% of value */ -	tmp = lmb_end_of_DRAM(); +	tmp = memblock_end_of_DRAM();  	do_div(tmp, 20);  	/* round it down in multiples of 256 */ @@ -633,11 +633,11 @@ static void __init phyp_dump_reserve_mem(void)  	if (phyp_dump_info->phyp_dump_is_active) {  		/* Reserve *everything* above RMR.Area freed by userland tools*/  		base = variable_reserve_size; -		size = lmb_end_of_DRAM() - base; +		size = memblock_end_of_DRAM() - base;  		/* XXX crashed_ram_end is wrong, since it may be beyond  		 * the memory_limit, it will need to be adjusted. */ -		lmb_reserve(base, size); +		memblock_reserve(base, size);  		phyp_dump_info->init_reserve_start = base;  		phyp_dump_info->init_reserve_size = size; @@ -645,8 +645,8 @@ static void __init phyp_dump_reserve_mem(void)  		size = phyp_dump_info->cpu_state_size +  			phyp_dump_info->hpte_region_size +  			variable_reserve_size; -		base = lmb_end_of_DRAM() - size; -		lmb_reserve(base, size); +		base = memblock_end_of_DRAM() - size; +		memblock_reserve(base, size);  		phyp_dump_info->init_reserve_start = base;  		phyp_dump_info->init_reserve_size = size;  	} @@ -681,8 +681,8 @@ void __init early_init_devtree(void *params)  	 */  	of_scan_flat_dt(early_init_dt_scan_chosen, NULL); -	/* Scan memory nodes and rebuild LMBs */ -	lmb_init(); +	/* Scan memory nodes and rebuild MEMBLOCKs */ +	memblock_init();  	of_scan_flat_dt(early_init_dt_scan_root, NULL);  	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); @@ -690,11 +690,11 @@ void __init early_init_devtree(void *params)  	strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);  	parse_early_param(); -	/* Reserve LMB regions used by kernel, initrd, dt, etc... */ -	lmb_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); +	/* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ +	memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START);  	/* If relocatable, reserve first 32k for interrupt vectors etc. */  	if (PHYSICAL_START > MEMORY_START) -		lmb_reserve(MEMORY_START, 0x8000); +		memblock_reserve(MEMORY_START, 0x8000);  	reserve_kdump_trampoline();  	reserve_crashkernel();  	early_reserve_mem(); @@ -706,17 +706,17 @@ void __init early_init_devtree(void *params)  		/* Ensure that total memory size is page-aligned, because  		 * otherwise mark_bootmem() gets upset. */ -		lmb_analyze(); -		memsize = lmb_phys_mem_size(); +		memblock_analyze(); +		memsize = memblock_phys_mem_size();  		if ((memsize & PAGE_MASK) != memsize)  			limit = memsize & PAGE_MASK;  	} -	lmb_enforce_memory_limit(limit); +	memblock_enforce_memory_limit(limit); -	lmb_analyze(); -	lmb_dump_all(); +	memblock_analyze(); +	memblock_dump_all(); -	DBG("Phys. mem: %llx\n", lmb_phys_mem_size()); +	DBG("Phys. mem: %llx\n", memblock_phys_mem_size());  	/* We may need to relocate the flat tree, do it now.  	 * FIXME .. and the initrd too? */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 97d4bd9442d..941ff4dbc56 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -311,6 +311,24 @@ static void __init prom_print_hex(unsigned long val)  	call_prom("write", 3, 1, _prom->stdout, buf, nibbles);  } +/* max number of decimal digits in an unsigned long */ +#define UL_DIGITS 21 +static void __init prom_print_dec(unsigned long val) +{ +	int i, size; +	char buf[UL_DIGITS+1]; +	struct prom_t *_prom = &RELOC(prom); + +	for (i = UL_DIGITS-1; i >= 0;  i--) { +		buf[i] = (val % 10) + '0'; +		val = val/10; +		if (val == 0) +			break; +	} +	/* shift stuff down */ +	size = UL_DIGITS - i; +	call_prom("write", 3, 1, _prom->stdout, buf+i, size); +}  static void __init prom_printf(const char *format, ...)  { @@ -350,6 +368,14 @@ static void __init prom_printf(const char *format, ...)  			v = va_arg(args, unsigned long);  			prom_print_hex(v);  			break; +		case 'l': +			++q; +			if (*q == 'u') { /* '%lu' */ +				++q; +				v = va_arg(args, unsigned long); +				prom_print_dec(v); +			} +			break;  		}  	}  } @@ -835,11 +861,11 @@ static int __init prom_count_smt_threads(void)  		if (plen == PROM_ERROR)  			break;  		plen >>= 2; -		prom_debug("Found 0x%x smt threads per core\n", (unsigned long)plen); +		prom_debug("Found %lu smt threads per core\n", (unsigned long)plen);  		/* Sanity check */  		if (plen < 1 || plen > 64) { -			prom_printf("Threads per core 0x%x out of bounds, assuming 1\n", +			prom_printf("Threads per core %lu out of bounds, assuming 1\n",  				    (unsigned long)plen);  			return 1;  		} @@ -869,12 +895,12 @@ static void __init prom_send_capabilities(void)  		cores = (u32 *)PTRRELOC(&ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET]);  		if (*cores != NR_CPUS) {  			prom_printf("WARNING ! " -				    "ibm_architecture_vec structure inconsistent: 0x%x !\n", +				    "ibm_architecture_vec structure inconsistent: %lu!\n",  				    *cores);  		} else { -			*cores = NR_CPUS / prom_count_smt_threads(); -			prom_printf("Max number of cores passed to firmware: 0x%x\n", -				    (unsigned long)*cores); +			*cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads()); +			prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n", +				    *cores, NR_CPUS);  		}  		/* try calling the ibm,client-architecture-support method */ @@ -1482,7 +1508,7 @@ static void __init prom_hold_cpus(void)  		reg = -1;  		prom_getprop(node, "reg", ®, sizeof(reg)); -		prom_debug("cpu hw idx   = 0x%x\n", reg); +		prom_debug("cpu hw idx   = %lu\n", reg);  		/* Init the acknowledge var which will be reset by  		 * the secondary cpu when it awakens from its OF @@ -1492,7 +1518,7 @@ static void __init prom_hold_cpus(void)  		if (reg != _prom->cpu) {  			/* Primary Thread of non-boot cpu */ -			prom_printf("starting cpu hw idx %x... ", reg); +			prom_printf("starting cpu hw idx %lu... ", reg);  			call_prom("start-cpu", 3, 0, node,  				  secondary_hold, reg); @@ -1507,7 +1533,7 @@ static void __init prom_hold_cpus(void)  		}  #ifdef CONFIG_SMP  		else -			prom_printf("boot cpu hw idx %x\n", reg); +			prom_printf("boot cpu hw idx %lu\n", reg);  #endif /* CONFIG_SMP */  	} @@ -2420,7 +2446,7 @@ static void __init prom_find_boot_cpu(void)  	prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval));  	_prom->cpu = getprop_rval; -	prom_debug("Booting CPU hw index = 0x%x\n", _prom->cpu); +	prom_debug("Booting CPU hw index = %lu\n", _prom->cpu);  }  static void __init prom_check_initrd(unsigned long r3, unsigned long r4) diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 1ac136b128f..9f82f493789 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -52,12 +52,18 @@ do  	if [ "${UNDEF:0:9}" = "_restgpr_" ]; then  		OK=1  	fi +	if [ "${UNDEF:0:10}" = "_restgpr0_" ]; then +		OK=1 +	fi  	if [ "${UNDEF:0:11}" = "_rest32gpr_" ]; then  		OK=1  	fi  	if [ "${UNDEF:0:9}" = "_savegpr_" ]; then  		OK=1  	fi +	if [ "${UNDEF:0:10}" = "_savegpr0_" ]; then +		OK=1 +	fi  	if [ "${UNDEF:0:11}" = "_save32gpr_" ]; then  		OK=1  	fi diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c index 8362620c9e6..88334af038e 100644 --- a/arch/powerpc/kernel/prom_parse.c +++ b/arch/powerpc/kernel/prom_parse.c @@ -6,232 +6,11 @@  #include <linux/module.h>  #include <linux/ioport.h>  #include <linux/etherdevice.h> +#include <linux/of_address.h>  #include <asm/prom.h>  #include <asm/pci-bridge.h> -#ifdef DEBUG -#define DBG(fmt...) do { printk(fmt); } while(0) -#else -#define DBG(fmt...) do { } while(0) -#endif - -#ifdef CONFIG_PPC64 -#define PRu64	"%lx" -#else -#define PRu64	"%llx" -#endif - -/* Max address size we deal with */ -#define OF_MAX_ADDR_CELLS	4 -#define OF_CHECK_COUNTS(na, ns)	((na) > 0 && (na) <= OF_MAX_ADDR_CELLS && \ -			(ns) > 0) - -static struct of_bus *of_match_bus(struct device_node *np); -static int __of_address_to_resource(struct device_node *dev, -		const u32 *addrp, u64 size, unsigned int flags, -		struct resource *r); - - -/* Debug utility */ -#ifdef DEBUG -static void of_dump_addr(const char *s, const u32 *addr, int na) -{ -	printk("%s", s); -	while(na--) -		printk(" %08x", *(addr++)); -	printk("\n"); -} -#else -static void of_dump_addr(const char *s, const u32 *addr, int na) { } -#endif - - -/* Callbacks for bus specific translators */ -struct of_bus { -	const char	*name; -	const char	*addresses; -	int		(*match)(struct device_node *parent); -	void		(*count_cells)(struct device_node *child, -				       int *addrc, int *sizec); -	u64		(*map)(u32 *addr, const u32 *range, -				int na, int ns, int pna); -	int		(*translate)(u32 *addr, u64 offset, int na); -	unsigned int	(*get_flags)(const u32 *addr); -}; - - -/* - * Default translator (generic bus) - */ - -static void of_bus_default_count_cells(struct device_node *dev, -				       int *addrc, int *sizec) -{ -	if (addrc) -		*addrc = of_n_addr_cells(dev); -	if (sizec) -		*sizec = of_n_size_cells(dev); -} - -static u64 of_bus_default_map(u32 *addr, const u32 *range, -		int na, int ns, int pna) -{ -	u64 cp, s, da; - -	cp = of_read_number(range, na); -	s  = of_read_number(range + na + pna, ns); -	da = of_read_number(addr, na); - -	DBG("OF: default map, cp="PRu64", s="PRu64", da="PRu64"\n", -	    cp, s, da); - -	if (da < cp || da >= (cp + s)) -		return OF_BAD_ADDR; -	return da - cp; -} - -static int of_bus_default_translate(u32 *addr, u64 offset, int na) -{ -	u64 a = of_read_number(addr, na); -	memset(addr, 0, na * 4); -	a += offset; -	if (na > 1) -		addr[na - 2] = a >> 32; -	addr[na - 1] = a & 0xffffffffu; - -	return 0; -} - -static unsigned int of_bus_default_get_flags(const u32 *addr) -{ -	return IORESOURCE_MEM; -} - -  #ifdef CONFIG_PCI -/* - * PCI bus specific translator - */ - -static int of_bus_pci_match(struct device_node *np) -{ -	/* "vci" is for the /chaos bridge on 1st-gen PCI powermacs */ -	return !strcmp(np->type, "pci") || !strcmp(np->type, "vci"); -} - -static void of_bus_pci_count_cells(struct device_node *np, -				   int *addrc, int *sizec) -{ -	if (addrc) -		*addrc = 3; -	if (sizec) -		*sizec = 2; -} - -static unsigned int of_bus_pci_get_flags(const u32 *addr) -{ -	unsigned int flags = 0; -	u32 w = addr[0]; - -	switch((w >> 24) & 0x03) { -	case 0x01: -		flags |= IORESOURCE_IO; -		break; -	case 0x02: /* 32 bits */ -	case 0x03: /* 64 bits */ -		flags |= IORESOURCE_MEM; -		break; -	} -	if (w & 0x40000000) -		flags |= IORESOURCE_PREFETCH; -	return flags; -} - -static u64 of_bus_pci_map(u32 *addr, const u32 *range, int na, int ns, int pna) -{ -	u64 cp, s, da; -	unsigned int af, rf; - -	af = of_bus_pci_get_flags(addr); -	rf = of_bus_pci_get_flags(range); - -	/* Check address type match */ -	if ((af ^ rf) & (IORESOURCE_MEM | IORESOURCE_IO)) -		return OF_BAD_ADDR; - -	/* Read address values, skipping high cell */ -	cp = of_read_number(range + 1, na - 1); -	s  = of_read_number(range + na + pna, ns); -	da = of_read_number(addr + 1, na - 1); - -	DBG("OF: PCI map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da); - -	if (da < cp || da >= (cp + s)) -		return OF_BAD_ADDR; -	return da - cp; -} - -static int of_bus_pci_translate(u32 *addr, u64 offset, int na) -{ -	return of_bus_default_translate(addr + 1, offset, na - 1); -} - -const u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size, -			unsigned int *flags) -{ -	const u32 *prop; -	unsigned int psize; -	struct device_node *parent; -	struct of_bus *bus; -	int onesize, i, na, ns; - -	/* Get parent & match bus type */ -	parent = of_get_parent(dev); -	if (parent == NULL) -		return NULL; -	bus = of_match_bus(parent); -	if (strcmp(bus->name, "pci")) { -		of_node_put(parent); -		return NULL; -	} -	bus->count_cells(dev, &na, &ns); -	of_node_put(parent); -	if (!OF_CHECK_COUNTS(na, ns)) -		return NULL; - -	/* Get "reg" or "assigned-addresses" property */ -	prop = of_get_property(dev, bus->addresses, &psize); -	if (prop == NULL) -		return NULL; -	psize /= 4; - -	onesize = na + ns; -	for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++) -		if ((prop[0] & 0xff) == ((bar_no * 4) + PCI_BASE_ADDRESS_0)) { -			if (size) -				*size = of_read_number(prop + na, ns); -			if (flags) -				*flags = bus->get_flags(prop); -			return prop; -		} -	return NULL; -} -EXPORT_SYMBOL(of_get_pci_address); - -int of_pci_address_to_resource(struct device_node *dev, int bar, -			       struct resource *r) -{ -	const u32	*addrp; -	u64		size; -	unsigned int	flags; - -	addrp = of_get_pci_address(dev, bar, &size, &flags); -	if (addrp == NULL) -		return -EINVAL; -	return __of_address_to_resource(dev, addrp, size, flags, r); -} -EXPORT_SYMBOL_GPL(of_pci_address_to_resource); -  int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)  {  	struct device_node *dn, *ppnode; @@ -313,345 +92,6 @@ int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)  EXPORT_SYMBOL_GPL(of_irq_map_pci);  #endif /* CONFIG_PCI */ -/* - * ISA bus specific translator - */ - -static int of_bus_isa_match(struct device_node *np) -{ -	return !strcmp(np->name, "isa"); -} - -static void of_bus_isa_count_cells(struct device_node *child, -				   int *addrc, int *sizec) -{ -	if (addrc) -		*addrc = 2; -	if (sizec) -		*sizec = 1; -} - -static u64 of_bus_isa_map(u32 *addr, const u32 *range, int na, int ns, int pna) -{ -	u64 cp, s, da; - -	/* Check address type match */ -	if ((addr[0] ^ range[0]) & 0x00000001) -		return OF_BAD_ADDR; - -	/* Read address values, skipping high cell */ -	cp = of_read_number(range + 1, na - 1); -	s  = of_read_number(range + na + pna, ns); -	da = of_read_number(addr + 1, na - 1); - -	DBG("OF: ISA map, cp="PRu64", s="PRu64", da="PRu64"\n", cp, s, da); - -	if (da < cp || da >= (cp + s)) -		return OF_BAD_ADDR; -	return da - cp; -} - -static int of_bus_isa_translate(u32 *addr, u64 offset, int na) -{ -	return of_bus_default_translate(addr + 1, offset, na - 1); -} - -static unsigned int of_bus_isa_get_flags(const u32 *addr) -{ -	unsigned int flags = 0; -	u32 w = addr[0]; - -	if (w & 1) -		flags |= IORESOURCE_IO; -	else -		flags |= IORESOURCE_MEM; -	return flags; -} - - -/* - * Array of bus specific translators - */ - -static struct of_bus of_busses[] = { -#ifdef CONFIG_PCI -	/* PCI */ -	{ -		.name = "pci", -		.addresses = "assigned-addresses", -		.match = of_bus_pci_match, -		.count_cells = of_bus_pci_count_cells, -		.map = of_bus_pci_map, -		.translate = of_bus_pci_translate, -		.get_flags = of_bus_pci_get_flags, -	}, -#endif /* CONFIG_PCI */ -	/* ISA */ -	{ -		.name = "isa", -		.addresses = "reg", -		.match = of_bus_isa_match, -		.count_cells = of_bus_isa_count_cells, -		.map = of_bus_isa_map, -		.translate = of_bus_isa_translate, -		.get_flags = of_bus_isa_get_flags, -	}, -	/* Default */ -	{ -		.name = "default", -		.addresses = "reg", -		.match = NULL, -		.count_cells = of_bus_default_count_cells, -		.map = of_bus_default_map, -		.translate = of_bus_default_translate, -		.get_flags = of_bus_default_get_flags, -	}, -}; - -static struct of_bus *of_match_bus(struct device_node *np) -{ -	int i; - -	for (i = 0; i < ARRAY_SIZE(of_busses); i ++) -		if (!of_busses[i].match || of_busses[i].match(np)) -			return &of_busses[i]; -	BUG(); -	return NULL; -} - -static int of_translate_one(struct device_node *parent, struct of_bus *bus, -			    struct of_bus *pbus, u32 *addr, -			    int na, int ns, int pna, const char *rprop) -{ -	const u32 *ranges; -	unsigned int rlen; -	int rone; -	u64 offset = OF_BAD_ADDR; - -	/* Normally, an absence of a "ranges" property means we are -	 * crossing a non-translatable boundary, and thus the addresses -	 * below the current not cannot be converted to CPU physical ones. -	 * Unfortunately, while this is very clear in the spec, it's not -	 * what Apple understood, and they do have things like /uni-n or -	 * /ht nodes with no "ranges" property and a lot of perfectly -	 * useable mapped devices below them. Thus we treat the absence of -	 * "ranges" as equivalent to an empty "ranges" property which means -	 * a 1:1 translation at that level. It's up to the caller not to try -	 * to translate addresses that aren't supposed to be translated in -	 * the first place. --BenH. -	 */ -	ranges = of_get_property(parent, rprop, &rlen); -	if (ranges == NULL || rlen == 0) { -		offset = of_read_number(addr, na); -		memset(addr, 0, pna * 4); -		DBG("OF: no ranges, 1:1 translation\n"); -		goto finish; -	} - -	DBG("OF: walking ranges...\n"); - -	/* Now walk through the ranges */ -	rlen /= 4; -	rone = na + pna + ns; -	for (; rlen >= rone; rlen -= rone, ranges += rone) { -		offset = bus->map(addr, ranges, na, ns, pna); -		if (offset != OF_BAD_ADDR) -			break; -	} -	if (offset == OF_BAD_ADDR) { -		DBG("OF: not found !\n"); -		return 1; -	} -	memcpy(addr, ranges + na, 4 * pna); - - finish: -	of_dump_addr("OF: parent translation for:", addr, pna); -	DBG("OF: with offset: "PRu64"\n", offset); - -	/* Translate it into parent bus space */ -	return pbus->translate(addr, offset, pna); -} - - -/* - * Translate an address from the device-tree into a CPU physical address, - * this walks up the tree and applies the various bus mappings on the - * way. - * - * Note: We consider that crossing any level with #size-cells == 0 to mean - * that translation is impossible (that is we are not dealing with a value - * that can be mapped to a cpu physical address). This is not really specified - * that way, but this is traditionally the way IBM at least do things - */ -u64 __of_translate_address(struct device_node *dev, const u32 *in_addr, -			   const char *rprop) -{ -	struct device_node *parent = NULL; -	struct of_bus *bus, *pbus; -	u32 addr[OF_MAX_ADDR_CELLS]; -	int na, ns, pna, pns; -	u64 result = OF_BAD_ADDR; - -	DBG("OF: ** translation for device %s **\n", dev->full_name); - -	/* Increase refcount at current level */ -	of_node_get(dev); - -	/* Get parent & match bus type */ -	parent = of_get_parent(dev); -	if (parent == NULL) -		goto bail; -	bus = of_match_bus(parent); - -	/* Cound address cells & copy address locally */ -	bus->count_cells(dev, &na, &ns); -	if (!OF_CHECK_COUNTS(na, ns)) { -		printk(KERN_ERR "prom_parse: Bad cell count for %s\n", -		       dev->full_name); -		goto bail; -	} -	memcpy(addr, in_addr, na * 4); - -	DBG("OF: bus is %s (na=%d, ns=%d) on %s\n", -	    bus->name, na, ns, parent->full_name); -	of_dump_addr("OF: translating address:", addr, na); - -	/* Translate */ -	for (;;) { -		/* Switch to parent bus */ -		of_node_put(dev); -		dev = parent; -		parent = of_get_parent(dev); - -		/* If root, we have finished */ -		if (parent == NULL) { -			DBG("OF: reached root node\n"); -			result = of_read_number(addr, na); -			break; -		} - -		/* Get new parent bus and counts */ -		pbus = of_match_bus(parent); -		pbus->count_cells(dev, &pna, &pns); -		if (!OF_CHECK_COUNTS(pna, pns)) { -			printk(KERN_ERR "prom_parse: Bad cell count for %s\n", -			       dev->full_name); -			break; -		} - -		DBG("OF: parent bus is %s (na=%d, ns=%d) on %s\n", -		    pbus->name, pna, pns, parent->full_name); - -		/* Apply bus translation */ -		if (of_translate_one(dev, bus, pbus, addr, na, ns, pna, rprop)) -			break; - -		/* Complete the move up one level */ -		na = pna; -		ns = pns; -		bus = pbus; - -		of_dump_addr("OF: one level translation:", addr, na); -	} - bail: -	of_node_put(parent); -	of_node_put(dev); - -	return result; -} - -u64 of_translate_address(struct device_node *dev, const u32 *in_addr) -{ -	return __of_translate_address(dev, in_addr, "ranges"); -} -EXPORT_SYMBOL(of_translate_address); - -u64 of_translate_dma_address(struct device_node *dev, const u32 *in_addr) -{ -	return __of_translate_address(dev, in_addr, "dma-ranges"); -} -EXPORT_SYMBOL(of_translate_dma_address); - -const u32 *of_get_address(struct device_node *dev, int index, u64 *size, -		    unsigned int *flags) -{ -	const u32 *prop; -	unsigned int psize; -	struct device_node *parent; -	struct of_bus *bus; -	int onesize, i, na, ns; - -	/* Get parent & match bus type */ -	parent = of_get_parent(dev); -	if (parent == NULL) -		return NULL; -	bus = of_match_bus(parent); -	bus->count_cells(dev, &na, &ns); -	of_node_put(parent); -	if (!OF_CHECK_COUNTS(na, ns)) -		return NULL; - -	/* Get "reg" or "assigned-addresses" property */ -	prop = of_get_property(dev, bus->addresses, &psize); -	if (prop == NULL) -		return NULL; -	psize /= 4; - -	onesize = na + ns; -	for (i = 0; psize >= onesize; psize -= onesize, prop += onesize, i++) -		if (i == index) { -			if (size) -				*size = of_read_number(prop + na, ns); -			if (flags) -				*flags = bus->get_flags(prop); -			return prop; -		} -	return NULL; -} -EXPORT_SYMBOL(of_get_address); - -static int __of_address_to_resource(struct device_node *dev, const u32 *addrp, -				    u64 size, unsigned int flags, -				    struct resource *r) -{ -	u64 taddr; - -	if ((flags & (IORESOURCE_IO | IORESOURCE_MEM)) == 0) -		return -EINVAL; -	taddr = of_translate_address(dev, addrp); -	if (taddr == OF_BAD_ADDR) -		return -EINVAL; -	memset(r, 0, sizeof(struct resource)); -	if (flags & IORESOURCE_IO) { -		unsigned long port; -		port = pci_address_to_pio(taddr); -		if (port == (unsigned long)-1) -			return -EINVAL; -		r->start = port; -		r->end = port + size - 1; -	} else { -		r->start = taddr; -		r->end = taddr + size - 1; -	} -	r->flags = flags; -	r->name = dev->name; -	return 0; -} - -int of_address_to_resource(struct device_node *dev, int index, -			   struct resource *r) -{ -	const u32	*addrp; -	u64		size; -	unsigned int	flags; - -	addrp = of_get_address(dev, index, &size, &flags); -	if (addrp == NULL) -		return -EINVAL; -	return __of_address_to_resource(dev, addrp, size, flags, r); -} -EXPORT_SYMBOL_GPL(of_address_to_resource); -  void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,  		unsigned long *busno, unsigned long *phys, unsigned long *size)  { @@ -678,342 +118,6 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,  	*size = of_read_number(dma_window, cells);  } -/* - * Interrupt remapper - */ - -static unsigned int of_irq_workarounds; -static struct device_node *of_irq_dflt_pic; - -static struct device_node *of_irq_find_parent(struct device_node *child) -{ -	struct device_node *p; -	const phandle *parp; - -	if (!of_node_get(child)) -		return NULL; - -	do { -		parp = of_get_property(child, "interrupt-parent", NULL); -		if (parp == NULL) -			p = of_get_parent(child); -		else { -			if (of_irq_workarounds & OF_IMAP_NO_PHANDLE) -				p = of_node_get(of_irq_dflt_pic); -			else -				p = of_find_node_by_phandle(*parp); -		} -		of_node_put(child); -		child = p; -	} while (p && of_get_property(p, "#interrupt-cells", NULL) == NULL); - -	return p; -} - -/* This doesn't need to be called if you don't have any special workaround - * flags to pass - */ -void of_irq_map_init(unsigned int flags) -{ -	of_irq_workarounds = flags; - -	/* OldWorld, don't bother looking at other things */ -	if (flags & OF_IMAP_OLDWORLD_MAC) -		return; - -	/* If we don't have phandles, let's try to locate a default interrupt -	 * controller (happens when booting with BootX). We do a first match -	 * here, hopefully, that only ever happens on machines with one -	 * controller. -	 */ -	if (flags & OF_IMAP_NO_PHANDLE) { -		struct device_node *np; - -		for_each_node_with_property(np, "interrupt-controller") { -			/* Skip /chosen/interrupt-controller */ -			if (strcmp(np->name, "chosen") == 0) -				continue; -			/* It seems like at least one person on this planet wants -			 * to use BootX on a machine with an AppleKiwi controller -			 * which happens to pretend to be an interrupt -			 * controller too. -			 */ -			if (strcmp(np->name, "AppleKiwi") == 0) -				continue; -			/* I think we found one ! */ -			of_irq_dflt_pic = np; -			break; -		} -	} - -} - -int of_irq_map_raw(struct device_node *parent, const u32 *intspec, u32 ointsize, -		const u32 *addr, struct of_irq *out_irq) -{ -	struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL; -	const u32 *tmp, *imap, *imask; -	u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0; -	int imaplen, match, i; - -	DBG("of_irq_map_raw: par=%s,intspec=[0x%08x 0x%08x...],ointsize=%d\n", -	    parent->full_name, intspec[0], intspec[1], ointsize); - -	ipar = of_node_get(parent); - -	/* First get the #interrupt-cells property of the current cursor -	 * that tells us how to interpret the passed-in intspec. If there -	 * is none, we are nice and just walk up the tree -	 */ -	do { -		tmp = of_get_property(ipar, "#interrupt-cells", NULL); -		if (tmp != NULL) { -			intsize = *tmp; -			break; -		} -		tnode = ipar; -		ipar = of_irq_find_parent(ipar); -		of_node_put(tnode); -	} while (ipar); -	if (ipar == NULL) { -		DBG(" -> no parent found !\n"); -		goto fail; -	} - -	DBG("of_irq_map_raw: ipar=%s, size=%d\n", ipar->full_name, intsize); - -	if (ointsize != intsize) -		return -EINVAL; - -	/* Look for this #address-cells. We have to implement the old linux -	 * trick of looking for the parent here as some device-trees rely on it -	 */ -	old = of_node_get(ipar); -	do { -		tmp = of_get_property(old, "#address-cells", NULL); -		tnode = of_get_parent(old); -		of_node_put(old); -		old = tnode; -	} while(old && tmp == NULL); -	of_node_put(old); -	old = NULL; -	addrsize = (tmp == NULL) ? 2 : *tmp; - -	DBG(" -> addrsize=%d\n", addrsize); - -	/* Now start the actual "proper" walk of the interrupt tree */ -	while (ipar != NULL) { -		/* Now check if cursor is an interrupt-controller and if it is -		 * then we are done -		 */ -		if (of_get_property(ipar, "interrupt-controller", NULL) != -				NULL) { -			DBG(" -> got it !\n"); -			memcpy(out_irq->specifier, intspec, -			       intsize * sizeof(u32)); -			out_irq->size = intsize; -			out_irq->controller = ipar; -			of_node_put(old); -			return 0; -		} - -		/* Now look for an interrupt-map */ -		imap = of_get_property(ipar, "interrupt-map", &imaplen); -		/* No interrupt map, check for an interrupt parent */ -		if (imap == NULL) { -			DBG(" -> no map, getting parent\n"); -			newpar = of_irq_find_parent(ipar); -			goto skiplevel; -		} -		imaplen /= sizeof(u32); - -		/* Look for a mask */ -		imask = of_get_property(ipar, "interrupt-map-mask", NULL); - -		/* If we were passed no "reg" property and we attempt to parse -		 * an interrupt-map, then #address-cells must be 0. -		 * Fail if it's not. -		 */ -		if (addr == NULL && addrsize != 0) { -			DBG(" -> no reg passed in when needed !\n"); -			goto fail; -		} - -		/* Parse interrupt-map */ -		match = 0; -		while (imaplen > (addrsize + intsize + 1) && !match) { -			/* Compare specifiers */ -			match = 1; -			for (i = 0; i < addrsize && match; ++i) { -				u32 mask = imask ? imask[i] : 0xffffffffu; -				match = ((addr[i] ^ imap[i]) & mask) == 0; -			} -			for (; i < (addrsize + intsize) && match; ++i) { -				u32 mask = imask ? imask[i] : 0xffffffffu; -				match = -				   ((intspec[i-addrsize] ^ imap[i]) & mask) == 0; -			} -			imap += addrsize + intsize; -			imaplen -= addrsize + intsize; - -			DBG(" -> match=%d (imaplen=%d)\n", match, imaplen); - -			/* Get the interrupt parent */ -			if (of_irq_workarounds & OF_IMAP_NO_PHANDLE) -				newpar = of_node_get(of_irq_dflt_pic); -			else -				newpar = of_find_node_by_phandle((phandle)*imap); -			imap++; -			--imaplen; - -			/* Check if not found */ -			if (newpar == NULL) { -				DBG(" -> imap parent not found !\n"); -				goto fail; -			} - -			/* Get #interrupt-cells and #address-cells of new -			 * parent -			 */ -			tmp = of_get_property(newpar, "#interrupt-cells", NULL); -			if (tmp == NULL) { -				DBG(" -> parent lacks #interrupt-cells !\n"); -				goto fail; -			} -			newintsize = *tmp; -			tmp = of_get_property(newpar, "#address-cells", NULL); -			newaddrsize = (tmp == NULL) ? 0 : *tmp; - -			DBG(" -> newintsize=%d, newaddrsize=%d\n", -			    newintsize, newaddrsize); - -			/* Check for malformed properties */ -			if (imaplen < (newaddrsize + newintsize)) -				goto fail; - -			imap += newaddrsize + newintsize; -			imaplen -= newaddrsize + newintsize; - -			DBG(" -> imaplen=%d\n", imaplen); -		} -		if (!match) -			goto fail; - -		of_node_put(old); -		old = of_node_get(newpar); -		addrsize = newaddrsize; -		intsize = newintsize; -		intspec = imap - intsize; -		addr = intspec - addrsize; - -	skiplevel: -		/* Iterate again with new parent */ -		DBG(" -> new parent: %s\n", newpar ? newpar->full_name : "<>"); -		of_node_put(ipar); -		ipar = newpar; -		newpar = NULL; -	} - fail: -	of_node_put(ipar); -	of_node_put(old); -	of_node_put(newpar); - -	return -EINVAL; -} -EXPORT_SYMBOL_GPL(of_irq_map_raw); - -#if defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32) -static int of_irq_map_oldworld(struct device_node *device, int index, -			       struct of_irq *out_irq) -{ -	const u32 *ints = NULL; -	int intlen; - -	/* -	 * Old machines just have a list of interrupt numbers -	 * and no interrupt-controller nodes. We also have dodgy -	 * cases where the APPL,interrupts property is completely -	 * missing behind pci-pci bridges and we have to get it -	 * from the parent (the bridge itself, as apple just wired -	 * everything together on these) -	 */ -	while (device) { -		ints = of_get_property(device, "AAPL,interrupts", &intlen); -		if (ints != NULL) -			break; -		device = device->parent; -		if (device && strcmp(device->type, "pci") != 0) -			break; -	} -	if (ints == NULL) -		return -EINVAL; -	intlen /= sizeof(u32); - -	if (index >= intlen) -		return -EINVAL; - -	out_irq->controller = NULL; -	out_irq->specifier[0] = ints[index]; -	out_irq->size = 1; - -	return 0; -} -#else /* defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32) */ -static int of_irq_map_oldworld(struct device_node *device, int index, -			       struct of_irq *out_irq) -{ -	return -EINVAL; -} -#endif /* !(defined(CONFIG_PPC_PMAC) && defined(CONFIG_PPC32)) */ - -int of_irq_map_one(struct device_node *device, int index, struct of_irq *out_irq) -{ -	struct device_node *p; -	const u32 *intspec, *tmp, *addr; -	u32 intsize, intlen; -	int res = -EINVAL; - -	DBG("of_irq_map_one: dev=%s, index=%d\n", device->full_name, index); - -	/* OldWorld mac stuff is "special", handle out of line */ -	if (of_irq_workarounds & OF_IMAP_OLDWORLD_MAC) -		return of_irq_map_oldworld(device, index, out_irq); - -	/* Get the interrupts property */ -	intspec = of_get_property(device, "interrupts", &intlen); -	if (intspec == NULL) -		return -EINVAL; -	intlen /= sizeof(u32); - -	/* Get the reg property (if any) */ -	addr = of_get_property(device, "reg", NULL); - -	/* Look for the interrupt parent. */ -	p = of_irq_find_parent(device); -	if (p == NULL) -		return -EINVAL; - -	/* Get size of interrupt specifier */ -	tmp = of_get_property(p, "#interrupt-cells", NULL); -	if (tmp == NULL) -		goto out; -	intsize = *tmp; - -	DBG(" intsize=%d intlen=%d\n", intsize, intlen); - -	/* Check index */ -	if ((index + 1) * intsize > intlen) -		goto out; - -	/* Get new specifier and map it */ -	res = of_irq_map_raw(p, intspec + index * intsize, intsize, -			     addr, out_irq); -out: -	of_node_put(p); -	return res; -} -EXPORT_SYMBOL_GPL(of_irq_map_one); -  /**   * Search the device tree for the best MAC address to use.  'mac-address' is   * checked first, because that is supposed to contain to "most recent" MAC @@ -1051,29 +155,3 @@ const void *of_get_mac_address(struct device_node *np)  	return NULL;  }  EXPORT_SYMBOL(of_get_mac_address); - -int of_irq_to_resource(struct device_node *dev, int index, struct resource *r) -{ -	int irq = irq_of_parse_and_map(dev, index); - -	/* Only dereference the resource if both the -	 * resource and the irq are valid. */ -	if (r && irq != NO_IRQ) { -		r->start = r->end = irq; -		r->flags = IORESOURCE_IRQ; -	} - -	return irq; -} -EXPORT_SYMBOL_GPL(of_irq_to_resource); - -void __iomem *of_iomap(struct device_node *np, int index) -{ -	struct resource res; - -	if (of_address_to_resource(np, index, &res)) -		return NULL; - -	return ioremap(res.start, 1 + res.end - res.start); -} -EXPORT_SYMBOL(of_iomap); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 7a0c0199ea2..11f3cd9c832 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -32,6 +32,8 @@  #ifdef CONFIG_PPC32  #include <linux/module.h>  #endif +#include <linux/hw_breakpoint.h> +#include <linux/perf_event.h>  #include <asm/uaccess.h>  #include <asm/page.h> @@ -866,9 +868,34 @@ void user_disable_single_step(struct task_struct *task)  	clear_tsk_thread_flag(task, TIF_SINGLESTEP);  } +#ifdef CONFIG_HAVE_HW_BREAKPOINT +void ptrace_triggered(struct perf_event *bp, int nmi, +		      struct perf_sample_data *data, struct pt_regs *regs) +{ +	struct perf_event_attr attr; + +	/* +	 * Disable the breakpoint request here since ptrace has defined a +	 * one-shot behaviour for breakpoint exceptions in PPC64. +	 * The SIGTRAP signal is generated automatically for us in do_dabr(). +	 * We don't have to do anything about that here +	 */ +	attr = bp->attr; +	attr.disabled = true; +	modify_user_hw_breakpoint(bp, &attr); +} +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +  int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,  			       unsigned long data)  { +#ifdef CONFIG_HAVE_HW_BREAKPOINT +	int ret; +	struct thread_struct *thread = &(task->thread); +	struct perf_event *bp; +	struct perf_event_attr attr; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +  	/* For ppc64 we support one DABR and no IABR's at the moment (ppc64).  	 *  For embedded processors we support one DAC and no IAC's at the  	 *  moment. @@ -896,6 +923,43 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,  	/* Ensure breakpoint translation bit is set */  	if (data && !(data & DABR_TRANSLATION))  		return -EIO; +#ifdef CONFIG_HAVE_HW_BREAKPOINT +	bp = thread->ptrace_bps[0]; +	if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) { +		if (bp) { +			unregister_hw_breakpoint(bp); +			thread->ptrace_bps[0] = NULL; +		} +		return 0; +	} +	if (bp) { +		attr = bp->attr; +		attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN; +		arch_bp_generic_fields(data & +					(DABR_DATA_WRITE | DABR_DATA_READ), +							&attr.bp_type); +		ret =  modify_user_hw_breakpoint(bp, &attr); +		if (ret) +			return ret; +		thread->ptrace_bps[0] = bp; +		thread->dabr = data; +		return 0; +	} + +	/* Create a new breakpoint request if one doesn't exist already */ +	hw_breakpoint_init(&attr); +	attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN; +	arch_bp_generic_fields(data & (DABR_DATA_WRITE | DABR_DATA_READ), +								&attr.bp_type); + +	thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, +							ptrace_triggered, task); +	if (IS_ERR(bp)) { +		thread->ptrace_bps[0] = NULL; +		return PTR_ERR(bp); +	} + +#endif /* CONFIG_HAVE_HW_BREAKPOINT */  	/* Move contents to the DABR register */  	task->thread.dabr = data; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 0e1ec6f746f..41048de3c6c 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -22,7 +22,7 @@  #include <linux/smp.h>  #include <linux/completion.h>  #include <linux/cpumask.h> -#include <linux/lmb.h> +#include <linux/memblock.h>  #include <linux/slab.h>  #include <asm/prom.h> @@ -47,14 +47,6 @@ struct rtas_t rtas = {  };  EXPORT_SYMBOL(rtas); -struct rtas_suspend_me_data { -	atomic_t working; /* number of cpus accessing this struct */ -	atomic_t done; -	int token; /* ibm,suspend-me */ -	int error; -	struct completion *complete; /* wait on this until working == 0 */ -}; -  DEFINE_SPINLOCK(rtas_data_buf_lock);  EXPORT_SYMBOL(rtas_data_buf_lock); @@ -714,14 +706,53 @@ void rtas_os_term(char *str)  static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;  #ifdef CONFIG_PPC_PSERIES -static void rtas_percpu_suspend_me(void *info) +static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done) +{ +	u16 slb_size = mmu_slb_size; +	int rc = H_MULTI_THREADS_ACTIVE; +	int cpu; + +	slb_set_size(SLB_MIN_SIZE); +	printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id()); + +	while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) && +	       !atomic_read(&data->error)) +		rc = rtas_call(data->token, 0, 1, NULL); + +	if (rc || atomic_read(&data->error)) { +		printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc); +		slb_set_size(slb_size); +	} + +	if (atomic_read(&data->error)) +		rc = atomic_read(&data->error); + +	atomic_set(&data->error, rc); + +	if (wake_when_done) { +		atomic_set(&data->done, 1); + +		for_each_online_cpu(cpu) +			plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); +	} + +	if (atomic_dec_return(&data->working) == 0) +		complete(data->complete); + +	return rc; +} + +int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data) +{ +	atomic_inc(&data->working); +	return __rtas_suspend_last_cpu(data, 0); +} + +static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int wake_when_done)  {  	long rc = H_SUCCESS;  	unsigned long msr_save; -	u16 slb_size = mmu_slb_size;  	int cpu; -	struct rtas_suspend_me_data *data = -		(struct rtas_suspend_me_data *)info;  	atomic_inc(&data->working); @@ -729,7 +760,7 @@ static void rtas_percpu_suspend_me(void *info)  	msr_save = mfmsr();  	mtmsr(msr_save & ~(MSR_EE)); -	while (rc == H_SUCCESS && !atomic_read(&data->done)) +	while (rc == H_SUCCESS && !atomic_read(&data->done) && !atomic_read(&data->error))  		rc = plpar_hcall_norets(H_JOIN);  	mtmsr(msr_save); @@ -741,33 +772,37 @@ static void rtas_percpu_suspend_me(void *info)  		/* All other cpus are in H_JOIN, this cpu does  		 * the suspend.  		 */ -		slb_set_size(SLB_MIN_SIZE); -		printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", -		       smp_processor_id()); -		data->error = rtas_call(data->token, 0, 1, NULL); - -		if (data->error) { -			printk(KERN_DEBUG "ibm,suspend-me returned %d\n", -			       data->error); -			slb_set_size(slb_size); -		} +		return __rtas_suspend_last_cpu(data, wake_when_done);  	} else {  		printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n",  		       smp_processor_id(), rc); -		data->error = rc; +		atomic_set(&data->error, rc);  	} -	atomic_set(&data->done, 1); +	if (wake_when_done) { +		atomic_set(&data->done, 1); -	/* This cpu did the suspend or got an error; in either case, -	 * we need to prod all other other cpus out of join state. -	 * Extra prods are harmless. -	 */ -	for_each_online_cpu(cpu) -		plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); +		/* This cpu did the suspend or got an error; in either case, +		 * we need to prod all other other cpus out of join state. +		 * Extra prods are harmless. +		 */ +		for_each_online_cpu(cpu) +			plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); +	}  out:  	if (atomic_dec_return(&data->working) == 0)  		complete(data->complete); +	return rc; +} + +int rtas_suspend_cpu(struct rtas_suspend_me_data *data) +{ +	return __rtas_suspend_cpu(data, 0); +} + +static void rtas_percpu_suspend_me(void *info) +{ +	__rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);  }  static int rtas_ibm_suspend_me(struct rtas_args *args) @@ -802,22 +837,22 @@ static int rtas_ibm_suspend_me(struct rtas_args *args)  	atomic_set(&data.working, 0);  	atomic_set(&data.done, 0); +	atomic_set(&data.error, 0);  	data.token = rtas_token("ibm,suspend-me"); -	data.error = 0;  	data.complete = &done;  	/* Call function on all CPUs.  One of us will make the  	 * rtas call  	 */  	if (on_each_cpu(rtas_percpu_suspend_me, &data, 0)) -		data.error = -EINVAL; +		atomic_set(&data.error, -EINVAL);  	wait_for_completion(&done); -	if (data.error != 0) +	if (atomic_read(&data.error) != 0)  		printk(KERN_ERR "Error doing global join\n"); -	return data.error; +	return atomic_read(&data.error);  }  #else /* CONFIG_PPC_PSERIES */  static int rtas_ibm_suspend_me(struct rtas_args *args) @@ -934,11 +969,11 @@ void __init rtas_initialize(void)  	 */  #ifdef CONFIG_PPC64  	if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { -		rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX); +		rtas_region = min(memblock.rmo_size, RTAS_INSTANTIATE_MAX);  		ibm_suspend_me_token = rtas_token("ibm,suspend-me");  	}  #endif -	rtas_rmo_buf = lmb_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region); +	rtas_rmo_buf = memblock_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region);  #ifdef CONFIG_RTAS_ERROR_LOGGING  	rtas_last_error_token = rtas_token("rtas-last-error"); diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index bfc2abafac4..67a84d8f118 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -94,12 +94,8 @@ struct flash_block_list {  	struct flash_block_list *next;  	struct flash_block blocks[FLASH_BLOCKS_PER_NODE];  }; -struct flash_block_list_header { /* just the header of flash_block_list */ -	unsigned long num_blocks; -	struct flash_block_list *next; -}; -static struct flash_block_list_header rtas_firmware_flash_list = {0, NULL}; +static struct flash_block_list *rtas_firmware_flash_list;  /* Use slab cache to guarantee 4k alignment */  static struct kmem_cache *flash_block_cache = NULL; @@ -108,13 +104,14 @@ static struct kmem_cache *flash_block_cache = NULL;  /* Local copy of the flash block list.   * We only allow one open of the flash proc file and create this - * list as we go.  This list will be put in the - * rtas_firmware_flash_list var once it is fully read. + * list as we go.  The rtas_firmware_flash_list varable will be + * set once the data is fully read.   *   * For convenience as we build the list we use virtual addrs,   * we do not fill in the version number, and the length field   * is treated as the number of entries currently in the block - * (i.e. not a byte count).  This is all fixed on release. + * (i.e. not a byte count).  This is all fixed when calling  + * the flash routine.   */  /* Status int must be first member of struct */ @@ -201,16 +198,16 @@ static int rtas_flash_release(struct inode *inode, struct file *file)  	if (uf->flist) {      		/* File was opened in write mode for a new flash attempt */  		/* Clear saved list */ -		if (rtas_firmware_flash_list.next) { -			free_flash_list(rtas_firmware_flash_list.next); -			rtas_firmware_flash_list.next = NULL; +		if (rtas_firmware_flash_list) { +			free_flash_list(rtas_firmware_flash_list); +			rtas_firmware_flash_list = NULL;  		}  		if (uf->status != FLASH_AUTH)    			uf->status = flash_list_valid(uf->flist);  		if (uf->status == FLASH_IMG_READY)  -			rtas_firmware_flash_list.next = uf->flist; +			rtas_firmware_flash_list = uf->flist;  		else  			free_flash_list(uf->flist); @@ -593,7 +590,7 @@ static void rtas_flash_firmware(int reboot_type)  	unsigned long rtas_block_list;  	int i, status, update_token; -	if (rtas_firmware_flash_list.next == NULL) +	if (rtas_firmware_flash_list == NULL)  		return;		/* nothing to do */  	if (reboot_type != SYS_RESTART) { @@ -610,20 +607,25 @@ static void rtas_flash_firmware(int reboot_type)  		return;  	} -	/* NOTE: the "first" block list is a global var with no data -	 * blocks in the kernel data segment.  We do this because -	 * we want to ensure this block_list addr is under 4GB. +	/* +	 * NOTE: the "first" block must be under 4GB, so we create +	 * an entry with no data blocks in the reserved buffer in +	 * the kernel data segment.  	 */ -	rtas_firmware_flash_list.num_blocks = 0; -	flist = (struct flash_block_list *)&rtas_firmware_flash_list; +	spin_lock(&rtas_data_buf_lock); +	flist = (struct flash_block_list *)&rtas_data_buf[0]; +	flist->num_blocks = 0; +	flist->next = rtas_firmware_flash_list;  	rtas_block_list = virt_to_abs(flist);  	if (rtas_block_list >= 4UL*1024*1024*1024) {  		printk(KERN_ALERT "FLASH: kernel bug...flash list header addr above 4GB\n"); +		spin_unlock(&rtas_data_buf_lock);  		return;  	}  	printk(KERN_ALERT "FLASH: preparing saved firmware image for flash\n");  	/* Update the block_list in place. */ +	rtas_firmware_flash_list = NULL; /* too hard to backout on error */  	image_size = 0;  	for (f = flist; f; f = next) {  		/* Translate data addrs to absolute */ @@ -664,6 +666,7 @@ static void rtas_flash_firmware(int reboot_type)  		printk(KERN_ALERT "FLASH: unknown flash return code %d\n", status);  		break;  	} +	spin_unlock(&rtas_data_buf_lock);  }  static void remove_flash_pde(struct proc_dir_entry *dp) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 5e4d852f640..9d4882a4664 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -33,7 +33,7 @@  #include <linux/serial_8250.h>  #include <linux/debugfs.h>  #include <linux/percpu.h> -#include <linux/lmb.h> +#include <linux/memblock.h>  #include <linux/of_platform.h>  #include <asm/io.h>  #include <asm/paca.h> @@ -94,6 +94,12 @@ struct screen_info screen_info = {  	.orig_video_points = 16  }; +/* Variables required to store legacy IO irq routing */ +int of_i8042_kbd_irq; +EXPORT_SYMBOL_GPL(of_i8042_kbd_irq); +int of_i8042_aux_irq; +EXPORT_SYMBOL_GPL(of_i8042_aux_irq); +  #ifdef __DO_IRQ_CANON  /* XXX should go elsewhere eventually */  int ppc_do_canonicalize_irqs; @@ -575,6 +581,15 @@ int check_legacy_ioport(unsigned long base_port)  			np = of_find_compatible_node(NULL, NULL, "pnpPNP,f03");  		if (np) {  			parent = of_get_parent(np); + +			of_i8042_kbd_irq = irq_of_parse_and_map(parent, 0); +			if (!of_i8042_kbd_irq) +				of_i8042_kbd_irq = 1; + +			of_i8042_aux_irq = irq_of_parse_and_map(parent, 1); +			if (!of_i8042_aux_irq) +				of_i8042_aux_irq = 12; +  			of_node_put(np);  			np = parent;  			break; @@ -701,16 +716,9 @@ static struct notifier_block ppc_dflt_plat_bus_notifier = {  	.priority = INT_MAX,  }; -static struct notifier_block ppc_dflt_of_bus_notifier = { -	.notifier_call = ppc_dflt_bus_notify, -	.priority = INT_MAX, -}; -  static int __init setup_bus_notifier(void)  {  	bus_register_notifier(&platform_bus_type, &ppc_dflt_plat_bus_notifier); -	bus_register_notifier(&of_platform_bus_type, &ppc_dflt_of_bus_notifier); -  	return 0;  } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 8f58986c2ad..a10ffc85ada 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -16,7 +16,7 @@  #include <linux/root_dev.h>  #include <linux/cpu.h>  #include <linux/console.h> -#include <linux/lmb.h> +#include <linux/memblock.h>  #include <asm/io.h>  #include <asm/prom.h> @@ -241,23 +241,19 @@ int __init ppc_init(void)  arch_initcall(ppc_init); -#ifdef CONFIG_IRQSTACKS  static void __init irqstack_early_init(void)  {  	unsigned int i;  	/* interrupt stacks must be in lowmem, we get that for free on ppc32 -	 * as the lmb is limited to lowmem by LMB_REAL_LIMIT */ +	 * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */  	for_each_possible_cpu(i) {  		softirq_ctx[i] = (struct thread_info *) -			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); +			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));  		hardirq_ctx[i] = (struct thread_info *) -			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); +			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));  	}  } -#else -#define irqstack_early_init() -#endif  #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)  static void __init exc_lvl_early_init(void) @@ -265,15 +261,15 @@ static void __init exc_lvl_early_init(void)  	unsigned int i;  	/* interrupt stacks must be in lowmem, we get that for free on ppc32 -	 * as the lmb is limited to lowmem by LMB_REAL_LIMIT */ +	 * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */  	for_each_possible_cpu(i) {  		critirq_ctx[i] = (struct thread_info *) -			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); +			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));  #ifdef CONFIG_BOOKE  		dbgirq_ctx[i] = (struct thread_info *) -			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); +			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));  		mcheckirq_ctx[i] = (struct thread_info *) -			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); +			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));  #endif  	}  } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index f3fb5a79de5..1bee4b68fa4 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -34,7 +34,7 @@  #include <linux/bootmem.h>  #include <linux/pci.h>  #include <linux/lockdep.h> -#include <linux/lmb.h> +#include <linux/memblock.h>  #include <asm/io.h>  #include <asm/kdump.h>  #include <asm/prom.h> @@ -142,23 +142,13 @@ early_param("smt-enabled", early_smt_enabled);  #define check_smt_enabled()  #endif /* CONFIG_SMP */ -/* Put the paca pointer into r13 and SPRG_PACA */ -static void __init setup_paca(struct paca_struct *new_paca) -{ -	local_paca = new_paca; -	mtspr(SPRN_SPRG_PACA, local_paca); -#ifdef CONFIG_PPC_BOOK3E -	mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); -#endif -} -  /*   * Early initialization entry point. This is called by head.S   * with MMU translation disabled. We rely on the "feature" of   * the CPU that ignores the top 2 bits of the address in real   * mode so we can access kernel globals normally provided we   * only toy with things in the RMO region. From here, we do - * some early parsing of the device-tree to setup out LMB + * some early parsing of the device-tree to setup out MEMBLOCK   * data structures, and allocate & initialize the hash table   * and segment tables so we can start running with translation   * enabled. @@ -404,7 +394,7 @@ void __init setup_system(void)  	printk("-----------------------------------------------------\n");  	printk("ppc64_pft_size                = 0x%llx\n", ppc64_pft_size); -	printk("physicalMemorySize            = 0x%llx\n", lmb_phys_mem_size()); +	printk("physicalMemorySize            = 0x%llx\n", memblock_phys_mem_size());  	if (ppc64_caches.dline_size != 0x80)  		printk("ppc64_caches.dcache_line_size = 0x%x\n",  		       ppc64_caches.dline_size); @@ -432,7 +422,6 @@ static u64 slb0_limit(void)  	return 1UL << SID_SHIFT;  } -#ifdef CONFIG_IRQSTACKS  static void __init irqstack_early_init(void)  {  	u64 limit = slb0_limit(); @@ -444,16 +433,13 @@ static void __init irqstack_early_init(void)  	 */  	for_each_possible_cpu(i) {  		softirq_ctx[i] = (struct thread_info *) -			__va(lmb_alloc_base(THREAD_SIZE, +			__va(memblock_alloc_base(THREAD_SIZE,  					    THREAD_SIZE, limit));  		hardirq_ctx[i] = (struct thread_info *) -			__va(lmb_alloc_base(THREAD_SIZE, +			__va(memblock_alloc_base(THREAD_SIZE,  					    THREAD_SIZE, limit));  	}  } -#else -#define irqstack_early_init() -#endif  #ifdef CONFIG_PPC_BOOK3E  static void __init exc_lvl_early_init(void) @@ -462,11 +448,11 @@ static void __init exc_lvl_early_init(void)  	for_each_possible_cpu(i) {  		critirq_ctx[i] = (struct thread_info *) -			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); +			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));  		dbgirq_ctx[i] = (struct thread_info *) -			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); +			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));  		mcheckirq_ctx[i] = (struct thread_info *) -			__va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); +			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));  	}  }  #else @@ -491,11 +477,11 @@ static void __init emergency_stack_init(void)  	 * bringup, we need to get at them in real mode. This means they  	 * must also be within the RMO region.  	 */ -	limit = min(slb0_limit(), lmb.rmo_size); +	limit = min(slb0_limit(), memblock.rmo_size);  	for_each_possible_cpu(i) {  		unsigned long sp; -		sp  = lmb_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); +		sp  = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);  		sp += THREAD_SIZE;  		paca[i].emergency_sp = __va(sp);  	} @@ -604,6 +590,9 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)  		return REMOTE_DISTANCE;  } +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); +  void __init setup_per_cpu_areas(void)  {  	const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; @@ -628,8 +617,10 @@ void __init setup_per_cpu_areas(void)  		panic("cannot initialize percpu area (err=%d)", rc);  	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; -	for_each_possible_cpu(cpu) -		paca[cpu].data_offset = delta + pcpu_unit_offsets[cpu]; +	for_each_possible_cpu(cpu) { +                __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +		paca[cpu].data_offset = __per_cpu_offset[cpu]; +	}  }  #endif diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index a0afb555a7c..7109f5b1baa 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -11,6 +11,7 @@  #include <linux/tracehook.h>  #include <linux/signal.h> +#include <asm/hw_breakpoint.h>  #include <asm/uaccess.h>  #include <asm/unistd.h> @@ -149,6 +150,8 @@ static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs)  	if (current->thread.dabr)  		set_dabr(current->thread.dabr);  #endif +	/* Re-enable the breakpoints for the signal stack */ +	thread_change_pc(current, regs);  	if (is32) {          	if (ka.sa.sa_flags & SA_SIGINFO) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 5c196d1086d..a61b3ddd7bb 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -288,8 +288,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)  			max_cpus = NR_CPUS;  	else  		max_cpus = 1; -  -	smp_space_timers(max_cpus);  	for_each_possible_cpu(cpu)  		if (cpu != boot_cpuid) @@ -501,14 +499,6 @@ int __devinit start_secondary(void *unused)  	current->active_mm = &init_mm;  	smp_store_cpu_info(cpu); - -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) -	/* Clear any pending timer interrupts */ -	mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); - -	/* Enable decrementer interrupt */ -	mtspr(SPRN_TCR, TCR_DIE); -#endif  	set_dec(tb_ticks_per_jiffy);  	preempt_disable();  	cpu_callin_map[cpu] = 1; diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c index 6fc6328dc62..0167d53da30 100644 --- a/arch/powerpc/kernel/suspend.c +++ b/arch/powerpc/kernel/suspend.c @@ -3,7 +3,7 @@   *   * Distribute under GPLv2   * - * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> + * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>   * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>   */ diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 19471a1cef1..20fd701a686 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -546,7 +546,7 @@ compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_siz  	return sys_pread64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo);  } -compat_ssize_t compat_sys_pwrite64(unsigned int fd, char __user *ubuf, compat_size_t count, +compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,  			      u32 reg6, u32 poshi, u32 poslo)  {  	return sys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 0441bbdadbd..ce53dfa7130 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -149,16 +149,6 @@ unsigned long tb_ticks_per_usec = 100; /* sane default */  EXPORT_SYMBOL(tb_ticks_per_usec);  unsigned long tb_ticks_per_sec;  EXPORT_SYMBOL(tb_ticks_per_sec);	/* for cputime_t conversions */ -u64 tb_to_xs; -unsigned tb_to_us; - -#define TICKLEN_SCALE	NTP_SCALE_SHIFT -static u64 last_tick_len;	/* units are ns / 2^TICKLEN_SCALE */ -static u64 ticklen_to_xs;	/* 0.64 fraction */ - -/* If last_tick_len corresponds to about 1/HZ seconds, then -   last_tick_len << TICKLEN_SHIFT will be about 2^63. */ -#define TICKLEN_SHIFT	(63 - 30 - TICKLEN_SCALE + SHIFT_HZ)  DEFINE_SPINLOCK(rtc_lock);  EXPORT_SYMBOL_GPL(rtc_lock); @@ -174,7 +164,6 @@ unsigned long ppc_proc_freq;  EXPORT_SYMBOL(ppc_proc_freq);  unsigned long ppc_tb_freq; -static u64 tb_last_jiffy __cacheline_aligned_in_smp;  static DEFINE_PER_CPU(u64, last_jiffy);  #ifdef CONFIG_VIRT_CPU_ACCOUNTING @@ -423,30 +412,6 @@ void udelay(unsigned long usecs)  }  EXPORT_SYMBOL(udelay); -static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec, -			       u64 new_tb_to_xs) -{ -	/* -	 * tb_update_count is used to allow the userspace gettimeofday code -	 * to assure itself that it sees a consistent view of the tb_to_xs and -	 * stamp_xsec variables.  It reads the tb_update_count, then reads -	 * tb_to_xs and stamp_xsec and then reads tb_update_count again.  If -	 * the two values of tb_update_count match and are even then the -	 * tb_to_xs and stamp_xsec values are consistent.  If not, then it -	 * loops back and reads them again until this criteria is met. -	 * We expect the caller to have done the first increment of -	 * vdso_data->tb_update_count already. -	 */ -	vdso_data->tb_orig_stamp = new_tb_stamp; -	vdso_data->stamp_xsec = new_stamp_xsec; -	vdso_data->tb_to_xs = new_tb_to_xs; -	vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec; -	vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec; -	vdso_data->stamp_xtime = xtime; -	smp_wmb(); -	++(vdso_data->tb_update_count); -} -  #ifdef CONFIG_SMP  unsigned long profile_pc(struct pt_regs *regs)  { @@ -470,7 +435,6 @@ EXPORT_SYMBOL(profile_pc);  static int __init iSeries_tb_recal(void)  { -	struct div_result divres;  	unsigned long titan, tb;  	/* Make sure we only run on iSeries */ @@ -501,10 +465,7 @@ static int __init iSeries_tb_recal(void)  				tb_ticks_per_jiffy = new_tb_ticks_per_jiffy;  				tb_ticks_per_sec   = new_tb_ticks_per_sec;  				calc_cputime_factors(); -				div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres ); -				tb_to_xs = divres.result_low;  				vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; -				vdso_data->tb_to_xs = tb_to_xs;  				setup_cputime_one_jiffy();  			}  			else { @@ -667,27 +628,9 @@ void timer_interrupt(struct pt_regs * regs)  	trace_timer_interrupt_exit(regs);  } -void wakeup_decrementer(void) -{ -	unsigned long ticks; - -	/* -	 * The timebase gets saved on sleep and restored on wakeup, -	 * so all we need to do is to reset the decrementer. -	 */ -	ticks = tb_ticks_since(__get_cpu_var(last_jiffy)); -	if (ticks < tb_ticks_per_jiffy) -		ticks = tb_ticks_per_jiffy - ticks; -	else -		ticks = 1; -	set_dec(ticks); -} -  #ifdef CONFIG_SUSPEND -void generic_suspend_disable_irqs(void) +static void generic_suspend_disable_irqs(void)  { -	preempt_disable(); -  	/* Disable the decrementer, so that it doesn't interfere  	 * with suspending.  	 */ @@ -697,12 +640,9 @@ void generic_suspend_disable_irqs(void)  	set_dec(0x7fffffff);  } -void generic_suspend_enable_irqs(void) +static void generic_suspend_enable_irqs(void)  { -	wakeup_decrementer(); -  	local_irq_enable(); -	preempt_enable();  }  /* Overrides the weak version in kernel/power/main.c */ @@ -722,23 +662,6 @@ void arch_suspend_enable_irqs(void)  }  #endif -#ifdef CONFIG_SMP -void __init smp_space_timers(unsigned int max_cpus) -{ -	int i; -	u64 previous_tb = per_cpu(last_jiffy, boot_cpuid); - -	/* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */ -	previous_tb -= tb_ticks_per_jiffy; - -	for_each_possible_cpu(i) { -		if (i == boot_cpuid) -			continue; -		per_cpu(last_jiffy, i) = previous_tb; -	} -} -#endif -  /*   * Scheduler clock - returns current time in nanosec units.   * @@ -873,10 +796,11 @@ static cycle_t timebase_read(struct clocksource *cs)  	return (cycle_t)get_tb();  } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, -		     u32 mult) +void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, +			struct clocksource *clock, u32 mult)  { -	u64 t2x, stamp_xsec; +	u64 new_tb_to_xs, new_stamp_xsec; +	u32 frac_sec;  	if (clock != &clocksource_timebase)  		return; @@ -887,11 +811,35 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,  	/* XXX this assumes clock->shift == 22 */  	/* 4611686018 ~= 2^(20+64-22) / 1e9 */ -	t2x = (u64) mult * 4611686018ULL; -	stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC; -	do_div(stamp_xsec, 1000000000); -	stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC; -	update_gtod(clock->cycle_last, stamp_xsec, t2x); +	new_tb_to_xs = (u64) mult * 4611686018ULL; +	new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC; +	do_div(new_stamp_xsec, 1000000000); +	new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC; + +	BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC); +	/* this is tv_nsec / 1e9 as a 0.32 fraction */ +	frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32; + +	/* +	 * tb_update_count is used to allow the userspace gettimeofday code +	 * to assure itself that it sees a consistent view of the tb_to_xs and +	 * stamp_xsec variables.  It reads the tb_update_count, then reads +	 * tb_to_xs and stamp_xsec and then reads tb_update_count again.  If +	 * the two values of tb_update_count match and are even then the +	 * tb_to_xs and stamp_xsec values are consistent.  If not, then it +	 * loops back and reads them again until this criteria is met. +	 * We expect the caller to have done the first increment of +	 * vdso_data->tb_update_count already. +	 */ +	vdso_data->tb_orig_stamp = clock->cycle_last; +	vdso_data->stamp_xsec = new_stamp_xsec; +	vdso_data->tb_to_xs = new_tb_to_xs; +	vdso_data->wtom_clock_sec = wtm->tv_sec; +	vdso_data->wtom_clock_nsec = wtm->tv_nsec; +	vdso_data->stamp_xtime = *wall_time; +	vdso_data->stamp_sec_fraction = frac_sec; +	smp_wmb(); +	++(vdso_data->tb_update_count);  }  void update_vsyscall_tz(void) @@ -1007,15 +955,13 @@ void secondary_cpu_time_init(void)  /* This function is only called on the boot processor */  void __init time_init(void)  { -	unsigned long flags;  	struct div_result res; -	u64 scale, x; +	u64 scale;  	unsigned shift;  	if (__USE_RTC()) {  		/* 601 processor: dec counts down by 128 every 128ns */  		ppc_tb_freq = 1000000000; -		tb_last_jiffy = get_rtcl();  	} else {  		/* Normal PowerPC with timebase register */  		ppc_md.calibrate_decr(); @@ -1023,50 +969,15 @@ void __init time_init(void)  		       ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);  		printk(KERN_DEBUG "time_init: processor frequency   = %lu.%.6lu MHz\n",  		       ppc_proc_freq / 1000000, ppc_proc_freq % 1000000); -		tb_last_jiffy = get_tb();  	}  	tb_ticks_per_jiffy = ppc_tb_freq / HZ;  	tb_ticks_per_sec = ppc_tb_freq;  	tb_ticks_per_usec = ppc_tb_freq / 1000000; -	tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);  	calc_cputime_factors();  	setup_cputime_one_jiffy();  	/* -	 * Calculate the length of each tick in ns.  It will not be -	 * exactly 1e9/HZ unless ppc_tb_freq is divisible by HZ. -	 * We compute 1e9 * tb_ticks_per_jiffy / ppc_tb_freq, -	 * rounded up. -	 */ -	x = (u64) NSEC_PER_SEC * tb_ticks_per_jiffy + ppc_tb_freq - 1; -	do_div(x, ppc_tb_freq); -	tick_nsec = x; -	last_tick_len = x << TICKLEN_SCALE; - -	/* -	 * Compute ticklen_to_xs, which is a factor which gets multiplied -	 * by (last_tick_len << TICKLEN_SHIFT) to get a tb_to_xs value. -	 * It is computed as: -	 * ticklen_to_xs = 2^N / (tb_ticks_per_jiffy * 1e9) -	 * where N = 64 + 20 - TICKLEN_SCALE - TICKLEN_SHIFT -	 * which turns out to be N = 51 - SHIFT_HZ. -	 * This gives the result as a 0.64 fixed-point fraction. -	 * That value is reduced by an offset amounting to 1 xsec per -	 * 2^31 timebase ticks to avoid problems with time going backwards -	 * by 1 xsec when we do timer_recalc_offset due to losing the -	 * fractional xsec.  That offset is equal to ppc_tb_freq/2^51 -	 * since there are 2^20 xsec in a second. -	 */ -	div128_by_32((1ULL << 51) - ppc_tb_freq, 0, -		     tb_ticks_per_jiffy << SHIFT_HZ, &res); -	div128_by_32(res.result_high, res.result_low, NSEC_PER_SEC, &res); -	ticklen_to_xs = res.result_low; - -	/* Compute tb_to_xs from tick_nsec */ -	tb_to_xs = mulhdu(last_tick_len << TICKLEN_SHIFT, ticklen_to_xs); - -	/*  	 * Compute scale factor for sched_clock.  	 * The calibrate_decr() function has set tb_ticks_per_sec,  	 * which is the timebase frequency. @@ -1087,21 +998,14 @@ void __init time_init(void)  	/* Save the current timebase to pretty up CONFIG_PRINTK_TIME */  	boot_tb = get_tb_or_rtc(); -	write_seqlock_irqsave(&xtime_lock, flags); -  	/* If platform provided a timezone (pmac), we correct the time */          if (timezone_offset) {  		sys_tz.tz_minuteswest = -timezone_offset / 60;  		sys_tz.tz_dsttime = 0;          } -	vdso_data->tb_orig_stamp = tb_last_jiffy;  	vdso_data->tb_update_count = 0;  	vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; -	vdso_data->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC; -	vdso_data->tb_to_xs = tb_to_xs; - -	write_sequnlock_irqrestore(&xtime_lock, flags);  	/* Start the decrementer on CPUs that have manual control  	 * such as BookE @@ -1195,39 +1099,6 @@ void to_tm(int tim, struct rtc_time * tm)  	GregorianDay(tm);  } -/* Auxiliary function to compute scaling factors */ -/* Actually the choice of a timebase running at 1/4 the of the bus - * frequency giving resolution of a few tens of nanoseconds is quite nice. - * It makes this computation very precise (27-28 bits typically) which - * is optimistic considering the stability of most processor clock - * oscillators and the precision with which the timebase frequency - * is measured but does not harm. - */ -unsigned mulhwu_scale_factor(unsigned inscale, unsigned outscale) -{ -        unsigned mlt=0, tmp, err; -        /* No concern for performance, it's done once: use a stupid -         * but safe and compact method to find the multiplier. -         */ -   -        for (tmp = 1U<<31; tmp != 0; tmp >>= 1) { -                if (mulhwu(inscale, mlt|tmp) < outscale) -			mlt |= tmp; -        } -   -        /* We might still be off by 1 for the best approximation. -         * A side effect of this is that if outscale is too large -         * the returned value will be zero. -         * Many corner cases have been checked and seem to work, -         * some might have been forgotten in the test however. -         */ -   -        err = inscale * (mlt+1); -        if (err <= inscale/2) -		mlt++; -        return mlt; -} -  /*   * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit   * result. diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 25fc33984c2..a45a63c3a0c 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -55,9 +55,6 @@  #endif  #include <asm/kexec.h>  #include <asm/ppc-opcode.h> -#ifdef CONFIG_FSL_BOOKE -#include <asm/dbell.h> -#endif  #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)  int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -688,7 +685,7 @@ void RunModeException(struct pt_regs *regs)  void __kprobes single_step_exception(struct pt_regs *regs)  { -	regs->msr &= ~(MSR_SE | MSR_BE);  /* Turn off 'trace' bits */ +	clear_single_step(regs);  	if (notify_die(DIE_SSTEP, "single_step", regs, 5,  					5, SIGTRAP) == NOTIFY_STOP) @@ -707,10 +704,8 @@ void __kprobes single_step_exception(struct pt_regs *regs)   */  static void emulate_single_step(struct pt_regs *regs)  { -	if (single_stepping(regs)) { -		clear_single_step(regs); -		_exception(SIGTRAP, regs, TRAP_TRACE, 0); -	} +	if (single_stepping(regs)) +		single_step_exception(regs);  }  static inline int __parse_fpscr(unsigned long fpscr) @@ -1344,24 +1339,6 @@ void vsx_assist_exception(struct pt_regs *regs)  #endif /* CONFIG_VSX */  #ifdef CONFIG_FSL_BOOKE - -void doorbell_exception(struct pt_regs *regs) -{ -#ifdef CONFIG_SMP -	int cpu = smp_processor_id(); -	int msg; - -	if (num_online_cpus() < 2) -		return; - -	for (msg = 0; msg < 4; msg++) -		if (test_and_clear_bit(msg, &dbell_smp_message[cpu])) -			smp_message_recv(msg); -#else -	printk(KERN_WARNING "Received doorbell on non-smp system\n"); -#endif -} -  void CacheLockingException(struct pt_regs *regs, unsigned long address,  			   unsigned long error_code)  { diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index d84d19224a9..13002fe206e 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -22,7 +22,7 @@  #include <linux/elf.h>  #include <linux/security.h>  #include <linux/bootmem.h> -#include <linux/lmb.h> +#include <linux/memblock.h>  #include <asm/pgtable.h>  #include <asm/system.h> @@ -734,7 +734,7 @@ static int __init vdso_init(void)  	vdso_data->platform = machine_is(iseries) ? 0x200 : 0x100;  	if (firmware_has_feature(FW_FEATURE_LPAR))  		vdso_data->platform |= 1; -	vdso_data->physicalMemorySize = lmb_phys_mem_size(); +	vdso_data->physicalMemorySize = memblock_phys_mem_size();  	vdso_data->dcache_size = ppc64_caches.dsize;  	vdso_data->dcache_line_size = ppc64_caches.dline_size;  	vdso_data->icache_size = ppc64_caches.isize; diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index ee038d4bf25..4ee09ee2e83 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -19,8 +19,10 @@  /* Offset for the low 32-bit part of a field of long type */  #ifdef CONFIG_PPC64  #define LOPART	4 +#define TSPEC_TV_SEC	TSPC64_TV_SEC+LOPART  #else  #define LOPART	0 +#define TSPEC_TV_SEC	TSPC32_TV_SEC  #endif  	.text @@ -41,23 +43,11 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)  	mr	r9, r3			/* datapage ptr in r9 */  	cmplwi	r10,0			/* check if tv is NULL */  	beq	3f -	bl	__do_get_xsec@local	/* get xsec from tb & kernel */ -	bne-	2f			/* out of line -> do syscall */ - -	/* seconds are xsec >> 20 */ -	rlwinm	r5,r4,12,20,31 -	rlwimi	r5,r3,12,0,19 -	stw	r5,TVAL32_TV_SEC(r10) - -	/* get remaining xsec and convert to usec. we scale -	 * up remaining xsec by 12 bits and get the top 32 bits -	 * of the multiplication -	 */ -	rlwinm	r5,r4,12,0,19 -	lis	r6,1000000@h -	ori	r6,r6,1000000@l -	mulhwu	r5,r5,r6 -	stw	r5,TVAL32_TV_USEC(r10) +	lis	r7,1000000@ha		/* load up USEC_PER_SEC */ +	addi	r7,r7,1000000@l		/* so we get microseconds in r4 */ +	bl	__do_get_tspec@local	/* get sec/usec from tb & kernel */ +	stw	r3,TVAL32_TV_SEC(r10) +	stw	r4,TVAL32_TV_USEC(r10)  3:	cmplwi	r11,0			/* check if tz is NULL */  	beq	1f @@ -70,14 +60,6 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)  	crclr	cr0*4+so  	li	r3,0  	blr - -2: -	mtlr	r12 -	mr	r3,r10 -	mr	r4,r11 -	li	r0,__NR_gettimeofday -	sc -	blr    .cfi_endproc  V_FUNCTION_END(__kernel_gettimeofday) @@ -100,7 +82,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)  	mr	r11,r4			/* r11 saves tp */  	bl	__get_datapage@local	/* get data page */  	mr	r9,r3			/* datapage ptr in r9 */ - +	lis	r7,NSEC_PER_SEC@h	/* want nanoseconds */ +	ori	r7,r7,NSEC_PER_SEC@l  50:	bl	__do_get_tspec@local	/* get sec/nsec from tb & kernel */  	bne	cr1,80f			/* not monotonic -> all done */ @@ -198,83 +181,12 @@ V_FUNCTION_END(__kernel_clock_getres)  /* - * This is the core of gettimeofday() & friends, it returns the xsec - * value in r3 & r4 and expects the datapage ptr (non clobbered) - * in r9. clobbers r0,r4,r5,r6,r7,r8. - * When returning, r8 contains the counter value that can be reused - * by the monotonic clock implementation - */ -__do_get_xsec: -  .cfi_startproc -	/* Check for update count & load values. We use the low -	 * order 32 bits of the update count -	 */ -1:	lwz	r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9) -	andi.	r0,r8,1			/* pending update ? loop */ -	bne-	1b -	xor	r0,r8,r8		/* create dependency */ -	add	r9,r9,r0 - -	/* Load orig stamp (offset to TB) */ -	lwz	r5,CFG_TB_ORIG_STAMP(r9) -	lwz	r6,(CFG_TB_ORIG_STAMP+4)(r9) - -	/* Get a stable TB value */ -2:	mftbu	r3 -	mftbl	r4 -	mftbu	r0 -	cmpl	cr0,r3,r0 -	bne-	2b - -	/* Substract tb orig stamp. If the high part is non-zero, we jump to -	 * the slow path which call the syscall. -	 * If it's ok, then we have our 32 bits tb_ticks value in r7 -	 */ -	subfc	r7,r6,r4 -	subfe.	r0,r5,r3 -	bne-	3f - -	/* Load scale factor & do multiplication */ -	lwz	r5,CFG_TB_TO_XS(r9)	/* load values */ -	lwz	r6,(CFG_TB_TO_XS+4)(r9) -	mulhwu	r4,r7,r5 -	mulhwu	r6,r7,r6 -	mullw	r0,r7,r5 -	addc	r6,r6,r0 - -	/* At this point, we have the scaled xsec value in r4 + XER:CA -	 * we load & add the stamp since epoch -	 */ -	lwz	r5,CFG_STAMP_XSEC(r9) -	lwz	r6,(CFG_STAMP_XSEC+4)(r9) -	adde	r4,r4,r6 -	addze	r3,r5 - -	/* We now have our result in r3,r4. We create a fake dependency -	 * on that result and re-check the counter -	 */ -	or	r6,r4,r3 -	xor	r0,r6,r6 -	add	r9,r9,r0 -	lwz	r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) -        cmpl    cr0,r8,r0		/* check if updated */ -	bne-	1b - -	/* Warning ! The caller expects CR:EQ to be set to indicate a -	 * successful calculation (so it won't fallback to the syscall -	 * method). We have overriden that CR bit in the counter check, -	 * but fortunately, the loop exit condition _is_ CR:EQ set, so -	 * we can exit safely here. If you change this code, be careful -	 * of that side effect. -	 */ -3:	blr -  .cfi_endproc - -/* - * This is the core of clock_gettime(), it returns the current - * time in seconds and nanoseconds in r3 and r4. + * This is the core of clock_gettime() and gettimeofday(), + * it returns the current time in r3 (seconds) and r4. + * On entry, r7 gives the resolution of r4, either USEC_PER_SEC + * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds.   * It expects the datapage ptr in r9 and doesn't clobber it. - * It clobbers r0, r5, r6, r10 and returns NSEC_PER_SEC in r7. + * It clobbers r0, r5 and r6.   * On return, r8 contains the counter value that can be reused.   * This clobbers cr0 but not any other cr field.   */ @@ -297,70 +209,58 @@ __do_get_tspec:  2:	mftbu	r3  	mftbl	r4  	mftbu	r0 -	cmpl	cr0,r3,r0 +	cmplw	cr0,r3,r0  	bne-	2b  	/* Subtract tb orig stamp and shift left 12 bits.  	 */ -	subfc	r7,r6,r4 +	subfc	r4,r6,r4  	subfe	r0,r5,r3  	slwi	r0,r0,12 -	rlwimi.	r0,r7,12,20,31 -	slwi	r7,r7,12 +	rlwimi.	r0,r4,12,20,31 +	slwi	r4,r4,12 -	/* Load scale factor & do multiplication */ +	/* +	 * Load scale factor & do multiplication. +	 * We only use the high 32 bits of the tb_to_xs value. +	 * Even with a 1GHz timebase clock, the high 32 bits of +	 * tb_to_xs will be at least 4 million, so the error from +	 * ignoring the low 32 bits will be no more than 0.25ppm. +	 * The error will just make the clock run very very slightly +	 * slow until the next time the kernel updates the VDSO data, +	 * at which point the clock will catch up to the kernel's value, +	 * so there is no long-term error accumulation. +	 */  	lwz	r5,CFG_TB_TO_XS(r9)	/* load values */ -	lwz	r6,(CFG_TB_TO_XS+4)(r9) -	mulhwu	r3,r7,r6 -	mullw	r10,r7,r5 -	mulhwu	r4,r7,r5 -	addc	r10,r3,r10 +	mulhwu	r4,r4,r5  	li	r3,0  	beq+	4f			/* skip high part computation if 0 */  	mulhwu	r3,r0,r5 -	mullw	r7,r0,r5 -	mulhwu	r5,r0,r6 -	mullw	r6,r0,r6 -	adde	r4,r4,r7 -	addze	r3,r3 +	mullw	r5,r0,r5  	addc	r4,r4,r5  	addze	r3,r3 -	addc	r10,r10,r6 - -4:	addze	r4,r4			/* add in carry */ -	lis	r7,NSEC_PER_SEC@h -	ori	r7,r7,NSEC_PER_SEC@l -	mulhwu	r4,r4,r7		/* convert to nanoseconds */ - -	/* At this point, we have seconds & nanoseconds since the xtime -	 * stamp in r3+CA and r4.  Load & add the xtime stamp. +4: +	/* At this point, we have seconds since the xtime stamp +	 * as a 32.32 fixed-point number in r3 and r4. +	 * Load & add the xtime stamp.  	 */ -#ifdef CONFIG_PPC64 -	lwz	r5,STAMP_XTIME+TSPC64_TV_SEC+LOPART(r9) -	lwz	r6,STAMP_XTIME+TSPC64_TV_NSEC+LOPART(r9) -#else -	lwz	r5,STAMP_XTIME+TSPC32_TV_SEC(r9) -	lwz	r6,STAMP_XTIME+TSPC32_TV_NSEC(r9) -#endif -	add	r4,r4,r6 +	lwz	r5,STAMP_XTIME+TSPEC_TV_SEC(r9) +	lwz	r6,STAMP_SEC_FRAC(r9) +	addc	r4,r4,r6  	adde	r3,r3,r5 -	/* We now have our result in r3,r4. We create a fake dependency -	 * on that result and re-check the counter +	/* We create a fake dependency on the result in r3/r4 +	 * and re-check the counter  	 */  	or	r6,r4,r3  	xor	r0,r6,r6  	add	r9,r9,r0  	lwz	r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9) -        cmpl    cr0,r8,r0		/* check if updated */ +        cmplw	cr0,r8,r0		/* check if updated */  	bne-	1b -	/* check for nanosecond overflow and adjust if necessary */ -	cmpw	r4,r7 -	bltlr				/* all done if no overflow */ -	subf	r4,r7,r4		/* adjust if overflow */ -	addi	r3,r3,1 +	mulhwu	r4,r4,r7		/* convert to micro or nanoseconds */  	blr    .cfi_endproc diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 262cd5857a5..e97a9a0dc4a 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -33,18 +33,11 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)  	bl	V_LOCAL_FUNC(__get_datapage)	/* get data page */  	cmpldi	r11,0			/* check if tv is NULL */  	beq	2f -	bl	V_LOCAL_FUNC(__do_get_xsec)	/* get xsec from tb & kernel */ -	lis     r7,15			/* r7 = 1000000 = USEC_PER_SEC */ -	ori     r7,r7,16960 -	rldicl  r5,r4,44,20		/* r5 = sec = xsec / XSEC_PER_SEC */ -	rldicr  r6,r5,20,43		/* r6 = sec * XSEC_PER_SEC */ -	std	r5,TVAL64_TV_SEC(r11)	/* store sec in tv */ -	subf	r0,r6,r4		/* r0 = xsec = (xsec - r6) */ -	mulld   r0,r0,r7		/* usec = (xsec * USEC_PER_SEC) / -					 * XSEC_PER_SEC -					 */ -	rldicl  r0,r0,44,20 -	std	r0,TVAL64_TV_USEC(r11)	/* store usec in tv */ +	lis	r7,1000000@ha		/* load up USEC_PER_SEC */ +	addi	r7,r7,1000000@l +	bl	V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */ +	std	r4,TVAL64_TV_SEC(r11)	/* store sec in tv */ +	std	r5,TVAL64_TV_USEC(r11)	/* store usec in tv */  2:	cmpldi	r10,0			/* check if tz is NULL */  	beq	1f  	lwz	r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ @@ -77,6 +70,8 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)    .cfi_register lr,r12  	mr	r11,r4			/* r11 saves tp */  	bl	V_LOCAL_FUNC(__get_datapage)	/* get data page */ +	lis	r7,NSEC_PER_SEC@h	/* want nanoseconds */ +	ori	r7,r7,NSEC_PER_SEC@l  50:	bl	V_LOCAL_FUNC(__do_get_tspec)	/* get time from tb & kernel */  	bne	cr1,80f			/* if not monotonic, all done */ @@ -171,49 +166,12 @@ V_FUNCTION_END(__kernel_clock_getres)  /* - * This is the core of gettimeofday(), it returns the xsec - * value in r4 and expects the datapage ptr (non clobbered) - * in r3. clobbers r0,r4,r5,r6,r7,r8 - * When returning, r8 contains the counter value that can be reused - */ -V_FUNCTION_BEGIN(__do_get_xsec) -  .cfi_startproc -	/* check for update count & load values */ -1:	ld	r8,CFG_TB_UPDATE_COUNT(r3) -	andi.	r0,r8,1			/* pending update ? loop */ -	bne-	1b -	xor	r0,r8,r8		/* create dependency */ -	add	r3,r3,r0 - -	/* Get TB & offset it. We use the MFTB macro which will generate -	 * workaround code for Cell. -	 */ -	MFTB(r7) -	ld	r9,CFG_TB_ORIG_STAMP(r3) -	subf	r7,r9,r7 - -	/* Scale result */ -	ld	r5,CFG_TB_TO_XS(r3) -	mulhdu	r7,r7,r5 - -	/* Add stamp since epoch */ -	ld	r6,CFG_STAMP_XSEC(r3) -	add	r4,r6,r7 - -	xor	r0,r4,r4 -	add	r3,r3,r0 -	ld	r0,CFG_TB_UPDATE_COUNT(r3) -        cmpld   cr0,r0,r8		/* check if updated */ -	bne-	1b -	blr -  .cfi_endproc -V_FUNCTION_END(__do_get_xsec) - -/* - * This is the core of clock_gettime(), it returns the current - * time in seconds and nanoseconds in r4 and r5. + * This is the core of clock_gettime() and gettimeofday(), + * it returns the current time in r4 (seconds) and r5. + * On entry, r7 gives the resolution of r5, either USEC_PER_SEC + * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds.   * It expects the datapage ptr in r3 and doesn't clobber it. - * It clobbers r0 and r6 and returns NSEC_PER_SEC in r7. + * It clobbers r0, r6 and r9.   * On return, r8 contains the counter value that can be reused.   * This clobbers cr0 but not any other cr field.   */ @@ -229,18 +187,18 @@ V_FUNCTION_BEGIN(__do_get_tspec)  	/* Get TB & offset it. We use the MFTB macro which will generate  	 * workaround code for Cell.  	 */ -	MFTB(r7) +	MFTB(r6)  	ld	r9,CFG_TB_ORIG_STAMP(r3) -	subf	r7,r9,r7 +	subf	r6,r9,r6  	/* Scale result */  	ld	r5,CFG_TB_TO_XS(r3) -	sldi	r7,r7,12		/* compute time since stamp_xtime */ -	mulhdu	r6,r7,r5		/* in units of 2^-32 seconds */ +	sldi	r6,r6,12		/* compute time since stamp_xtime */ +	mulhdu	r6,r6,r5		/* in units of 2^-32 seconds */  	/* Add stamp since epoch */  	ld	r4,STAMP_XTIME+TSPC64_TV_SEC(r3) -	ld	r5,STAMP_XTIME+TSPC64_TV_NSEC(r3) +	lwz	r5,STAMP_SEC_FRAC(r3)  	or	r0,r4,r5  	or	r0,r0,r6  	xor	r0,r0,r0 @@ -250,17 +208,11 @@ V_FUNCTION_BEGIN(__do_get_tspec)  	bne-	1b			/* reload if so */  	/* convert to seconds & nanoseconds and add to stamp */ -	lis	r7,NSEC_PER_SEC@h -	ori	r7,r7,NSEC_PER_SEC@l -	mulhwu	r0,r6,r7		/* compute nanoseconds and */ +	add	r6,r6,r5		/* add on fractional seconds of xtime */ +	mulhwu	r5,r6,r7		/* compute micro or nanoseconds and */  	srdi	r6,r6,32		/* seconds since stamp_xtime */ -	clrldi	r0,r0,32 -	add	r5,r5,r0		/* add nanoseconds together */ -	cmpd	r5,r7			/* overflow? */ +	clrldi	r5,r5,32  	add	r4,r4,r6 -	bltlr				/* all done if no overflow */ -	subf	r5,r7,r5		/* if overflow, adjust */ -	addi	r4,r4,1  	blr    .cfi_endproc  V_FUNCTION_END(__do_get_tspec)  | 
