diff options
Diffstat (limited to 'arch/powerpc/kernel')
26 files changed, 772 insertions, 334 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index ce4f7f17911..ee728e433aa 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -85,6 +85,8 @@ extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-y += vmlinux.lds +obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o + obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 7c5324f1ec9..04caee7d9bc 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -208,6 +208,7 @@ int main(void) DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); + DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); #endif /* CONFIG_PPC64 */ /* RTAS */ diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S index 7f818feaa7a..ebc62f42a23 100644 --- a/arch/powerpc/kernel/cpu_setup_a2.S +++ b/arch/powerpc/kernel/cpu_setup_a2.S @@ -41,11 +41,16 @@ _GLOBAL(__setup_cpu_a2) * core local but doing it always won't hurt */ -#ifdef CONFIG_PPC_WSP_COPRO +#ifdef CONFIG_PPC_ICSWX /* Make sure ACOP starts out as zero */ li r3,0 mtspr SPRN_ACOP,r3 + /* Skip the following if we are in Guest mode */ + mfmsr r3 + andis. r0,r3,MSR_GS@h + bne _icswx_skip_guest + /* Enable icswx instruction */ mfspr r3,SPRN_A2_CCR2 ori r3,r3,A2_CCR2_ENABLE_ICSWX @@ -54,7 +59,8 @@ _GLOBAL(__setup_cpu_a2) /* Unmask all CTs in HACOP */ li r3,-1 mtspr SPRN_HACOP,r3 -#endif /* CONFIG_PPC_WSP_COPRO */ +_icswx_skip_guest: +#endif /* CONFIG_PPC_ICSWX */ /* Enable doorbell */ mfspr r3,SPRN_A2_CCR2 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index edae5bb06f1..81db9e2a8a2 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1505,6 +1505,19 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_4xx, .platform = "ppc405", }, + { /* APM8018X */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x7ff11432, + .cpu_name = "APM8018X", + .cpu_features = CPU_FTRS_40X, + .cpu_user_features = PPC_FEATURE_32 | + PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, + .mmu_features = MMU_FTR_TYPE_40x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc405", + }, { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, @@ -1830,6 +1843,20 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_47x, .platform = "ppc470", }, + { /* 476fpe */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x7ff50000, + .cpu_name = "476fpe", + .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, { /* 476 iss */ .pvr_mask = 0xffff0000, .pvr_value = 0x00050000, diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index d879809d5c4..28be3452e67 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -10,85 +10,85 @@ * */ -#undef DEBUG - #include <linux/kernel.h> #include <linux/smp.h> #include <linux/reboot.h> #include <linux/kexec.h> -#include <linux/bootmem.h> #include <linux/export.h> #include <linux/crash_dump.h> #include <linux/delay.h> -#include <linux/elf.h> -#include <linux/elfcore.h> #include <linux/init.h> #include <linux/irq.h> #include <linux/types.h> -#include <linux/memblock.h> #include <asm/processor.h> #include <asm/machdep.h> #include <asm/kexec.h> #include <asm/kdump.h> #include <asm/prom.h> -#include <asm/firmware.h> #include <asm/smp.h> #include <asm/system.h> #include <asm/setjmp.h> -#ifdef DEBUG -#include <asm/udbg.h> -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif +/* + * The primary CPU waits a while for all secondary CPUs to enter. This is to + * avoid sending an IPI if the secondary CPUs are entering + * crash_kexec_secondary on their own (eg via a system reset). + * + * The secondary timeout has to be longer than the primary. Both timeouts are + * in milliseconds. + */ +#define PRIMARY_TIMEOUT 500 +#define SECONDARY_TIMEOUT 1000 -/* This keeps a track of which one is crashing cpu. */ +#define IPI_TIMEOUT 10000 +#define REAL_MODE_TIMEOUT 10000 + +/* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; -static cpumask_t cpus_in_crash = CPU_MASK_NONE; -cpumask_t cpus_in_sr = CPU_MASK_NONE; +static atomic_t cpus_in_crash; +static int time_to_dump; #define CRASH_HANDLER_MAX 3 /* NULL terminated list of shutdown handles */ static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; static DEFINE_SPINLOCK(crash_handlers_lock); +static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; +static int crash_shutdown_cpu = -1; + +static int handle_fault(struct pt_regs *regs) +{ + if (crash_shutdown_cpu == smp_processor_id()) + longjmp(crash_shutdown_buf, 1); + return 0; +} + #ifdef CONFIG_SMP -static atomic_t enter_on_soft_reset = ATOMIC_INIT(0); void crash_ipi_callback(struct pt_regs *regs) { + static cpumask_t cpus_state_saved = CPU_MASK_NONE; + int cpu = smp_processor_id(); if (!cpu_online(cpu)) return; hard_irq_disable(); - if (!cpumask_test_cpu(cpu, &cpus_in_crash)) + if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { crash_save_cpu(regs, cpu); - cpumask_set_cpu(cpu, &cpus_in_crash); - - /* - * Entered via soft-reset - could be the kdump - * process is invoked using soft-reset or user activated - * it if some CPU did not respond to an IPI. - * For soft-reset, the secondary CPU can enter this func - * twice. 1 - using IPI, and 2. soft-reset. - * Tell the kexec CPU that entered via soft-reset and ready - * to go down. - */ - if (cpumask_test_cpu(cpu, &cpus_in_sr)) { - cpumask_clear_cpu(cpu, &cpus_in_sr); - atomic_inc(&enter_on_soft_reset); + cpumask_set_cpu(cpu, &cpus_state_saved); } + atomic_inc(&cpus_in_crash); + smp_mb__after_atomic_inc(); + /* * Starting the kdump boot. * This barrier is needed to make sure that all CPUs are stopped. - * If not, soft-reset will be invoked to bring other CPUs. */ - while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) + while (!time_to_dump) cpu_relax(); if (ppc_md.kexec_cpu_down) @@ -103,106 +103,99 @@ void crash_ipi_callback(struct pt_regs *regs) /* NOTREACHED */ } -/* - * Wait until all CPUs are entered via soft-reset. - */ -static void crash_soft_reset_check(int cpu) -{ - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - - cpumask_clear_cpu(cpu, &cpus_in_sr); - while (atomic_read(&enter_on_soft_reset) != ncpus) - cpu_relax(); -} - - static void crash_kexec_prepare_cpus(int cpu) { unsigned int msecs; - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + int tries = 0; + int (*old_handler)(struct pt_regs *regs); + + printk(KERN_EMERG "Sending IPI to other CPUs\n"); crash_send_ipi(crash_ipi_callback); smp_wmb(); +again: /* * FIXME: Until we will have the way to stop other CPUs reliably, * the crash CPU will send an IPI and wait for other CPUs to * respond. - * Delay of at least 10 seconds. */ - printk(KERN_EMERG "Sending IPI to other cpus...\n"); - msecs = 10000; - while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { - cpu_relax(); + msecs = IPI_TIMEOUT; + while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) mdelay(1); - } /* Would it be better to replace the trap vector here? */ + if (atomic_read(&cpus_in_crash) >= ncpus) { + printk(KERN_EMERG "IPI complete\n"); + return; + } + + printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", + ncpus - atomic_read(&cpus_in_crash)); + /* - * FIXME: In case if we do not get all CPUs, one possibility: ask the - * user to do soft reset such that we get all. - * Soft-reset will be used until better mechanism is implemented. + * If we have a panic timeout set then we can't wait indefinitely + * for someone to activate system reset. We also give up on the + * second time through if system reset fail to work. */ - if (cpumask_weight(&cpus_in_crash) < ncpus) { - printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", - ncpus - cpumask_weight(&cpus_in_crash)); - printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); - cpumask_clear(&cpus_in_sr); - atomic_set(&enter_on_soft_reset, 0); - while (cpumask_weight(&cpus_in_crash) < ncpus) - cpu_relax(); - } + if ((panic_timeout > 0) || (tries > 0)) + return; + /* - * Make sure all CPUs are entered via soft-reset if the kdump is - * invoked using soft-reset. + * A system reset will cause all CPUs to take an 0x100 exception. + * The primary CPU returns here via setjmp, and the secondary + * CPUs reexecute the crash_kexec_secondary path. */ - if (cpumask_test_cpu(cpu, &cpus_in_sr)) - crash_soft_reset_check(cpu); - /* Leave the IPI callback set */ + old_handler = __debugger; + __debugger = handle_fault; + crash_shutdown_cpu = smp_processor_id(); + + if (setjmp(crash_shutdown_buf) == 0) { + printk(KERN_EMERG "Activate system reset (dumprestart) " + "to stop other cpu(s)\n"); + + /* + * A system reset will force all CPUs to execute the + * crash code again. We need to reset cpus_in_crash so we + * wait for everyone to do this. + */ + atomic_set(&cpus_in_crash, 0); + smp_mb(); + + while (atomic_read(&cpus_in_crash) < ncpus) + cpu_relax(); + } + + crash_shutdown_cpu = -1; + __debugger = old_handler; + + tries++; + goto again; } /* - * This function will be called by secondary cpus or by kexec cpu - * if soft-reset is activated to stop some CPUs. + * This function will be called by secondary cpus. */ void crash_kexec_secondary(struct pt_regs *regs) { - int cpu = smp_processor_id(); unsigned long flags; - int msecs = 5; + int msecs = SECONDARY_TIMEOUT; local_irq_save(flags); - /* Wait 5ms if the kexec CPU is not entered yet. */ + + /* Wait for the primary crash CPU to signal its progress */ while (crashing_cpu < 0) { if (--msecs < 0) { - /* - * Either kdump image is not loaded or - * kdump process is not started - Probably xmon - * exited using 'x'(exit and recover) or - * kexec_should_crash() failed for all running tasks. - */ - cpumask_clear_cpu(cpu, &cpus_in_sr); + /* No response, kdump image may not have been loaded */ local_irq_restore(flags); return; } + mdelay(1); - cpu_relax(); - } - if (cpu == crashing_cpu) { - /* - * Panic CPU will enter this func only via soft-reset. - * Wait until all secondary CPUs entered and - * then start kexec boot. - */ - crash_soft_reset_check(cpu); - cpumask_set_cpu(crashing_cpu, &cpus_in_crash); - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(1, 0); - machine_kexec(kexec_crash_image); - /* NOTREACHED */ } + crash_ipi_callback(regs); } @@ -211,7 +204,7 @@ void crash_kexec_secondary(struct pt_regs *regs) static void crash_kexec_prepare_cpus(int cpu) { /* - * move the secondarys to us so that we can copy + * move the secondaries to us so that we can copy * the new kernel 0-0x100 safely * * do this if kexec in setup.c ? @@ -225,7 +218,6 @@ static void crash_kexec_prepare_cpus(int cpu) void crash_kexec_secondary(struct pt_regs *regs) { - cpumask_clear(&cpus_in_sr); } #endif /* CONFIG_SMP */ @@ -236,7 +228,7 @@ static void crash_kexec_wait_realmode(int cpu) unsigned int msecs; int i; - msecs = 10000; + msecs = REAL_MODE_TIMEOUT; for (i=0; i < nr_cpu_ids && msecs > 0; i++) { if (i == cpu) continue; @@ -308,22 +300,11 @@ int crash_shutdown_unregister(crash_shutdown_t handler) } EXPORT_SYMBOL(crash_shutdown_unregister); -static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; -static int crash_shutdown_cpu = -1; - -static int handle_fault(struct pt_regs *regs) -{ - if (crash_shutdown_cpu == smp_processor_id()) - longjmp(crash_shutdown_buf, 1); - return 0; -} - void default_machine_crash_shutdown(struct pt_regs *regs) { unsigned int i; int (*old_handler)(struct pt_regs *regs); - /* * This function is only called after the system * has panicked or is otherwise in a critical state. @@ -341,15 +322,26 @@ void default_machine_crash_shutdown(struct pt_regs *regs) * such that another IPI will not be sent. */ crashing_cpu = smp_processor_id(); - crash_save_cpu(regs, crashing_cpu); + + /* + * If we came in via system reset, wait a while for the secondary + * CPUs to enter. + */ + if (TRAP(regs) == 0x100) + mdelay(PRIMARY_TIMEOUT); + crash_kexec_prepare_cpus(crashing_cpu); - cpumask_set_cpu(crashing_cpu, &cpus_in_crash); + + crash_save_cpu(regs, crashing_cpu); + + time_to_dump = 1; + crash_kexec_wait_realmode(crashing_cpu); machine_kexec_mask_interrupts(); /* - * Call registered shutdown routines savely. Swap out + * Call registered shutdown routines safely. Swap out * __debugger_fault_handler, and replace on exit. */ old_handler = __debugger_fault_handler; diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 424afb6b8fb..b3ba5163eae 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -28,7 +28,7 @@ #define DBG(fmt...) #endif -#ifndef CONFIG_RELOCATABLE +#ifndef CONFIG_NONSTATIC_KERNEL void __init reserve_kdump_trampoline(void) { memblock_reserve(0, KDUMP_RESERVE_LIMIT); @@ -67,7 +67,7 @@ void __init setup_kdump_trampoline(void) DBG(" <- setup_kdump_trampoline()\n"); } -#endif /* CONFIG_RELOCATABLE */ +#endif /* CONFIG_NONSTATIC_KERNEL */ static int __init parse_savemaxmem(char *p) { diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index cf9c69b9189..d4be7bb3dbd 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -65,7 +65,7 @@ BEGIN_FTR_SECTION lbz r0,PACAPROCSTART(r13) cmpwi r0,0x80 bne 1f - li r0,0 + li r0,1 stb r0,PACAPROCSTART(r13) b kvm_start_guest 1: diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index b725dab0f88..7dd2981bcc5 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -64,6 +64,35 @@ _ENTRY(_start); mr r31,r3 /* save device tree ptr */ li r24,0 /* CPU number */ +#ifdef CONFIG_RELOCATABLE +/* + * Relocate ourselves to the current runtime address. + * This is called only by the Boot CPU. + * "relocate" is called with our current runtime virutal + * address. + * r21 will be loaded with the physical runtime address of _stext + */ + bl 0f /* Get our runtime address */ +0: mflr r21 /* Make it accessible */ + addis r21,r21,(_stext - 0b)@ha + addi r21,r21,(_stext - 0b)@l /* Get our current runtime base */ + + /* + * We have the runtime (virutal) address of our base. + * We calculate our shift of offset from a 256M page. + * We could map the 256M page we belong to at PAGE_OFFSET and + * get going from there. + */ + lis r4,KERNELBASE@h + ori r4,r4,KERNELBASE@l + rlwinm r6,r21,0,4,31 /* r6 = PHYS_START % 256M */ + rlwinm r5,r4,0,4,31 /* r5 = KERNELBASE % 256M */ + subf r3,r5,r6 /* r3 = r6 - r5 */ + add r3,r4,r3 /* Required Virutal Address */ + + bl relocate +#endif + bl init_cpu_state /* @@ -88,6 +117,65 @@ _ENTRY(_start); #ifdef CONFIG_RELOCATABLE /* + * Relocatable kernel support based on processing of dynamic + * relocation entries. + * + * r25 will contain RPN/ERPN for the start address of memory + * r21 will contain the current offset of _stext + */ + lis r3,kernstart_addr@ha + la r3,kernstart_addr@l(r3) + + /* + * Compute the kernstart_addr. + * kernstart_addr => (r6,r8) + * kernstart_addr & ~0xfffffff => (r6,r7) + */ + rlwinm r6,r25,0,28,31 /* ERPN. Bits 32-35 of Address */ + rlwinm r7,r25,0,0,3 /* RPN - assuming 256 MB page size */ + rlwinm r8,r21,0,4,31 /* r8 = (_stext & 0xfffffff) */ + or r8,r7,r8 /* Compute the lower 32bit of kernstart_addr */ + + /* Store kernstart_addr */ + stw r6,0(r3) /* higher 32bit */ + stw r8,4(r3) /* lower 32bit */ + + /* + * Compute the virt_phys_offset : + * virt_phys_offset = stext.run - kernstart_addr + * + * stext.run = (KERNELBASE & ~0xfffffff) + (kernstart_addr & 0xfffffff) + * When we relocate, we have : + * + * (kernstart_addr & 0xfffffff) = (stext.run & 0xfffffff) + * + * hence: + * virt_phys_offset = (KERNELBASE & ~0xfffffff) - (kernstart_addr & ~0xfffffff) + * + */ + + /* KERNELBASE&~0xfffffff => (r4,r5) */ + li r4, 0 /* higer 32bit */ + lis r5,KERNELBASE@h + rlwinm r5,r5,0,0,3 /* Align to 256M, lower 32bit */ + + /* + * 64bit subtraction. + */ + subfc r5,r7,r5 + subfe r4,r6,r4 + + /* Store virt_phys_offset */ + lis r3,virt_phys_offset@ha + la r3,virt_phys_offset@l(r3) + + stw r4,0(r3) + stw r5,4(r3) + +#elif defined(CONFIG_DYNAMIC_MEMSTART) + /* + * Mapping based, page aligned dynamic kernel loading. + * * r25 will contain RPN/ERPN for the start address of memory * * Add the difference between KERNELBASE and PAGE_OFFSET to the @@ -732,6 +820,8 @@ _GLOBAL(init_cpu_state) /* We use the PVR to differenciate 44x cores from 476 */ mfspr r3,SPRN_PVR srwi r3,r3,16 + cmplwi cr0,r3,PVR_476FPE@h + beq head_start_47x cmplwi cr0,r3,PVR_476@h beq head_start_47x cmplwi cr0,r3,PVR_476_ISS@h @@ -800,12 +890,29 @@ skpinv: addi r4,r4,1 /* Increment */ /* * Configure and load pinned entry into TLB slot 63. */ +#ifdef CONFIG_NONSTATIC_KERNEL + /* + * In case of a NONSTATIC_KERNEL we reuse the TLB XLAT + * entries of the initial mapping set by the boot loader. + * The XLAT entry is stored in r25 + */ + + /* Read the XLAT entry for our current mapping */ + tlbre r25,r23,PPC44x_TLB_XLAT + + lis r3,KERNELBASE@h + ori r3,r3,KERNELBASE@l + + /* Use our current RPN entry */ + mr r4,r25 +#else lis r3,PAGE_OFFSET@h ori r3,r3,PAGE_OFFSET@l /* Kernel is at the base of RAM */ li r4, 0 /* Load the kernel physical address */ +#endif /* Load the kernel PID = 0 */ li r0,0 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 9f5d210ddf3..d5d78c4ceef 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -197,7 +197,7 @@ _ENTRY(__early_start) bl early_init -#ifdef CONFIG_RELOCATABLE +#ifdef CONFIG_DYNAMIC_MEMSTART lis r3,kernstart_addr@ha la r3,kernstart_addr@l(r3) #ifdef CONFIG_PHYS_64BIT diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 9c3cd490b1b..7c66ce13da8 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -39,9 +39,13 @@ #define cpu_should_die() 0 #endif +unsigned long cpuidle_disable = IDLE_NO_OVERRIDE; +EXPORT_SYMBOL(cpuidle_disable); + static int __init powersave_off(char *arg) { ppc_md.power_save = NULL; + cpuidle_disable = IDLE_POWERSAVE_OFF; return 0; } __setup("powersave=off", powersave_off); @@ -113,6 +117,29 @@ void cpu_idle(void) } } + +/* + * cpu_idle_wait - Used to ensure that all the CPUs come out of the old + * idle loop and start using the new idle loop. + * Required while changing idle handler on SMP systems. + * Caller must have changed idle handler to the new value before the call. + * This window may be larger on shared systems. + */ +void cpu_idle_wait(void) +{ + int cpu; + smp_mb(); + + /* kick all the CPUs so that they exit out of old idle routine */ + get_online_cpus(); + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + smp_send_reschedule(cpu); + } + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + int powersave_nap; #ifdef CONFIG_SYSCTL diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 3a70845a51c..fcdff198da4 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -54,6 +54,7 @@ _GLOBAL(power7_idle) li r0,0 stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ stb r0,PACAHARDIRQEN(r13) + stb r0,PACA_NAPSTATELOST(r13) /* Continue saving state */ SAVE_GPR(2, r1) @@ -86,6 +87,9 @@ _GLOBAL(power7_wakeup_loss) rfid _GLOBAL(power7_wakeup_noloss) + lbz r0,PACA_NAPSTATELOST(r13) + cmpwi r0,0 + bne .power7_wakeup_loss ld r1,PACAR1(r13) ld r4,_MSR(r1) ld r5,_NIP(r1) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5c3c46948d9..701d4aceb4f 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -115,6 +115,15 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } +static inline notrace void decrementer_check_overflow(void) +{ + u64 now = get_tb_or_rtc(); + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + + if (now >= *next_tb) + set_dec(1); +} + notrace void arch_local_irq_restore(unsigned long en) { /* @@ -164,24 +173,21 @@ notrace void arch_local_irq_restore(unsigned long en) */ local_paca->hard_enabled = en; -#ifndef CONFIG_BOOKE - /* On server, re-trigger the decrementer if it went negative since - * some processors only trigger on edge transitions of the sign bit. - * - * BookE has a level sensitive decrementer (latches in TSR) so we - * don't need that + /* + * Trigger the decrementer if we have a pending event. Some processors + * only trigger on edge transitions of the sign bit. We might also + * have disabled interrupts long enough that the decrementer wrapped + * to positive. */ - if ((int)mfspr(SPRN_DEC) < 0) - mtspr(SPRN_DEC, 1); -#endif /* CONFIG_BOOKE */ + decrementer_check_overflow(); /* * Force the delivery of pending soft-disabled interrupts on PS3. * Any HV call will have this side effect. */ if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { - u64 tmp; - lv1_get_version_info(&tmp); + u64 tmp, tmp2; + lv1_get_version_info(&tmp, &tmp2); } __hard_irq_enable(); diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index a2158a395d9..c957b1202bd 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -125,7 +125,7 @@ void __init reserve_crashkernel(void) crash_size = resource_size(&crashk_res); -#ifndef CONFIG_RELOCATABLE +#ifndef CONFIG_NONSTATIC_KERNEL if (crashk_res.start != KDUMP_KERNELBASE) printk("Crash kernel location must be 0x%x\n", KDUMP_KERNELBASE); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 458ed3bee66..fa4a573d671 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -214,7 +214,7 @@ char __devinit *pcibios_setup(char *str) * If the interrupt is used, then gets the interrupt line from the * openfirmware and sets it in the pci_dev and pci_config line. */ -int pci_read_irq_line(struct pci_dev *pci_dev) +static int pci_read_irq_line(struct pci_dev *pci_dev) { struct of_irq oirq; unsigned int virq; @@ -283,7 +283,6 @@ int pci_read_irq_line(struct pci_dev *pci_dev) return 0; } -EXPORT_SYMBOL(pci_read_irq_line); /* * Platform support for /proc/bus/pci/X/Y mmap()s, @@ -921,18 +920,22 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev) struct resource *res = dev->resource + i; if (!res->flags) continue; - /* On platforms that have PCI_PROBE_ONLY set, we don't - * consider 0 as an unassigned BAR value. It's technically - * a valid value, but linux doesn't like it... so when we can - * re-assign things, we do so, but if we can't, we keep it - * around and hope for the best... + + /* If we're going to re-assign everything, we mark all resources + * as unset (and 0-base them). In addition, we mark BARs starting + * at 0 as unset as well, except if PCI_PROBE_ONLY is also set + * since in that case, we don't want to re-assign anything */ - if (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY)) { - pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] is unassigned\n", - pci_name(dev), i, - (unsigned long long)res->start, - (unsigned long long)res->end, - (unsigned int)res->flags); + if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) || + (res->start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) { + /* Only print message if not re-assigning */ + if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) + pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] " + "is unassigned\n", + pci_name(dev), i, + (unsigned long long)res->start, + (unsigned long long)res->end, + (unsigned int)res->flags); res->end -= res->start; res->start = 0; res->flags |= IORESOURCE_UNSET; @@ -1042,6 +1045,16 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus) if (i >= 3 && bus->self->transparent) continue; + /* If we are going to re-assign everything, mark the resource + * as unset and move it down to 0 + */ + if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) { + res->flags |= IORESOURCE_UNSET; + res->end -= res->start; + res->start = 0; + continue; + } + pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n", pci_name(dev), i, (unsigned long long)res->start,\ @@ -1262,18 +1275,15 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus) pci_bus_for_each_resource(bus, res, i) { if (!res || !res->flags || res->start > res->end || res->parent) continue; + + /* If the resource was left unset at this point, we clear it */ + if (res->flags & IORESOURCE_UNSET) + goto clear_resource; + if (bus->parent == NULL) pr = (res->flags & IORESOURCE_IO) ? &ioport_resource : &iomem_resource; else { - /* Don't bother with non-root busses when - * re-assigning all resources. We clear the - * resource flags as if they were colliding - * and as such ensure proper re-allocation - * later. - */ - if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) - goto clear_resource; pr = pci_find_parent_resource(bus->self, res); if (pr == res) { /* this happens when the generic PCI @@ -1304,9 +1314,9 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus) if (reparent_resources(pr, res) == 0) continue; } - printk(KERN_WARNING "PCI: Cannot allocate resource region " - "%d of PCI bridge %d, will remap\n", i, bus->number); -clear_resource: + pr_warning("PCI: Cannot allocate resource region " + "%d of PCI bridge %d, will remap\n", i, bus->number); + clear_resource: res->start = res->end = 0; res->flags = 0; } @@ -1451,16 +1461,11 @@ void __init pcibios_resource_survey(void) { struct pci_bus *b; - /* Allocate and assign resources. If we re-assign everything, then - * we skip the allocate phase - */ + /* Allocate and assign resources */ list_for_each_entry(b, &pci_root_buses, node) pcibios_allocate_bus_resources(b); - - if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) { - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); - } + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); /* Before we start assigning unassigned resource, we try to reserve * the low IO area and the VGA memory area if they intersect the @@ -1732,6 +1737,12 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) if (mode == PCI_PROBE_NORMAL) hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); + /* Platform gets a chance to do some global fixups before + * we proceed to resource allocation + */ + if (ppc_md.pcibios_fixup_phb) + ppc_md.pcibios_fixup_phb(hose); + /* Configure PCI Express settings */ if (bus && !pci_has_flag(PCI_PROBE_ONLY)) { struct pci_bus *child; @@ -1747,10 +1758,13 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) static void fixup_hide_host_resource_fsl(struct pci_dev *dev) { int i, class = dev->class >> 8; + /* When configured as agent, programing interface = 1 */ + int prog_if = dev->class & 0xf; if ((class == PCI_CLASS_PROCESSOR_POWERPC || class == PCI_CLASS_BRIDGE_OTHER) && (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) && + (prog_if == 0) && (dev->bus->parent == NULL)) { for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { dev->resource[i].start = 0; diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 4e69deb89b3..dd9e4a04bf7 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -50,6 +50,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data) dn->data = pdn; pdn->node = dn; pdn->phb = phb; +#ifdef CONFIG_PPC_POWERNV + pdn->pe_number = IODA_INVALID_PE; +#endif regs = of_get_property(dn, "reg", NULL); if (regs) { /* First register entry is addr (00BBSS00) */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6457574c0b2..ebe5766781a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -584,16 +584,32 @@ static struct regbit { unsigned long bit; const char *name; } msr_bits[] = { +#if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE) + {MSR_SF, "SF"}, + {MSR_HV, "HV"}, +#endif + {MSR_VEC, "VEC"}, + {MSR_VSX, "VSX"}, +#ifdef CONFIG_BOOKE + {MSR_CE, "CE"}, +#endif {MSR_EE, "EE"}, {MSR_PR, "PR"}, {MSR_FP, "FP"}, - {MSR_VEC, "VEC"}, - {MSR_VSX, "VSX"}, {MSR_ME, "ME"}, - {MSR_CE, "CE"}, +#ifdef CONFIG_BOOKE {MSR_DE, "DE"}, +#else + {MSR_SE, "SE"}, + {MSR_BE, "BE"}, +#endif {MSR_IR, "IR"}, {MSR_DR, "DR"}, + {MSR_PMM, "PMM"}, +#ifndef CONFIG_BOOKE + {MSR_RI, "RI"}, + {MSR_LE, "LE"}, +#endif {0, NULL} }; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index cc584865b3d..eca626ea3f2 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -742,7 +742,7 @@ static unsigned char ibm_architecture_vec[] = { W(0xffffffff), /* virt_base */ W(0xffffffff), /* virt_size */ W(0xffffffff), /* load_base */ - W(64), /* 64MB min RMA */ + W(256), /* 256MB min RMA */ W(0xffffffff), /* full client load */ 0, /* min RMA percentage of total RAM */ 48, /* max log_2(hash table size) */ @@ -1224,14 +1224,6 @@ static void __init prom_init_mem(void) RELOC(alloc_bottom) = PAGE_ALIGN((unsigned long)&RELOC(_end) + 0x4000); - /* Check if we have an initrd after the kernel, if we do move our bottom - * point to after it - */ - if (RELOC(prom_initrd_start)) { - if (RELOC(prom_initrd_end) > RELOC(alloc_bottom)) - RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end)); - } - /* * If prom_memory_limit is set we reduce the upper limits *except* for * alloc_top_high. This must be the real top of RAM so we can put @@ -1269,6 +1261,15 @@ static void __init prom_init_mem(void) RELOC(alloc_top) = RELOC(rmo_top); RELOC(alloc_top_high) = RELOC(ram_top); + /* + * Check if we have an initrd after the kernel but still inside + * the RMO. If we do move our bottom point to after it. + */ + if (RELOC(prom_initrd_start) && + RELOC(prom_initrd_start) < RELOC(rmo_top) && + RELOC(prom_initrd_end) > RELOC(alloc_bottom)) + RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end)); + prom_printf("memory layout at init:\n"); prom_printf(" memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit)); prom_printf(" alloc_bottom : %x\n", RELOC(alloc_bottom)); @@ -2079,7 +2080,7 @@ static void __init prom_check_displays(void) /* Setup a usable color table when the appropriate * method is available. Should update this to set-colors */ clut = RELOC(default_colors); - for (i = 0; i < 32; i++, clut += 3) + for (i = 0; i < 16; i++, clut += 3) if (prom_set_color(ih, i, clut[0], clut[1], clut[2]) != 0) break; @@ -2844,7 +2845,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, RELOC(of_platform) = prom_find_machine_type(); prom_printf("Detected machine type: %x\n", RELOC(of_platform)); -#ifndef CONFIG_RELOCATABLE +#ifndef CONFIG_NONSTATIC_KERNEL /* Bail if this is a kdump kernel. */ if (PHYSICAL_START > 0) prom_panic("Error: You can't boot a kdump kernel from OF!\n"); @@ -2969,9 +2970,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* * in case stdin is USB and still active on IBM machines... * Unfortunately quiesce crashes on some powermacs if we have - * closed stdin already (in particular the powerbook 101). + * closed stdin already (in particular the powerbook 101). It + * appears that the OPAL version of OFW doesn't like it either. */ - if (RELOC(of_platform) != PLATFORM_POWERMAC) + if (RELOC(of_platform) != PLATFORM_POWERMAC && + RELOC(of_platform) != PLATFORM_OPAL) prom_close_stdin(); /* @@ -2987,8 +2990,12 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * is common to us and kexec */ hdr = RELOC(dt_header_start); - prom_printf("returning from prom_init\n"); - prom_debug("->dt_header_start=0x%x\n", hdr); + + /* Don't print anything after quiesce under OPAL, it crashes OFW */ + if (RELOC(of_platform) != PLATFORM_OPAL) { + prom_printf("returning from prom_init\n"); + prom_debug("->dt_header_start=0x%x\n", hdr); + } #ifdef CONFIG_PPC32 reloc_got2(-offset); diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S new file mode 100644 index 00000000000..ef46ba6e094 --- /dev/null +++ b/arch/powerpc/kernel/reloc_32.S @@ -0,0 +1,208 @@ +/* + * Code to process dynamic relocations for PPC32. + * + * Copyrights (C) IBM Corporation, 2011. + * Author: Suzuki Poulose <suzuki@in.ibm.com> + * + * - Based on ppc64 code - reloc_64.S + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/ppc_asm.h> + +/* Dynamic section table entry tags */ +DT_RELA = 7 /* Tag for Elf32_Rela section */ +DT_RELASZ = 8 /* Size of the Rela relocs */ +DT_RELAENT = 9 /* Size of one Rela reloc entry */ + +STN_UNDEF = 0 /* Undefined symbol index */ +STB_LOCAL = 0 /* Local binding for the symbol */ + +R_PPC_ADDR16_LO = 4 /* Lower half of (S+A) */ +R_PPC_ADDR16_HI = 5 /* Upper half of (S+A) */ +R_PPC_ADDR16_HA = 6 /* High Adjusted (S+A) */ +R_PPC_RELATIVE = 22 + +/* + * r3 = desired final address + */ + +_GLOBAL(relocate) + + mflr r0 /* Save our LR */ + bl 0f /* Find our current runtime address */ +0: mflr r12 /* Make it accessible */ + mtlr r0 + + lwz r11, (p_dyn - 0b)(r12) + add r11, r11, r12 /* runtime address of .dynamic section */ + lwz r9, (p_rela - 0b)(r12) + add r9, r9, r12 /* runtime address of .rela.dyn section */ + lwz r10, (p_st - 0b)(r12) + add r10, r10, r12 /* runtime address of _stext section */ + lwz r13, (p_sym - 0b)(r12) + add r13, r13, r12 /* runtime address of .dynsym section */ + + /* + * Scan the dynamic section for RELA, RELASZ entries + */ + li r6, 0 + li r7, 0 + li r8, 0 +1: lwz r5, 0(r11) /* ELF_Dyn.d_tag */ + cmpwi r5, 0 /* End of ELF_Dyn[] */ + beq eodyn + cmpwi r5, DT_RELA + bne relasz + lwz r7, 4(r11) /* r7 = rela.link */ + b skip +relasz: + cmpwi r5, DT_RELASZ + bne relaent + lwz r8, 4(r11) /* r8 = Total Rela relocs size */ + b skip +relaent: + cmpwi r5, DT_RELAENT + bne skip + lwz r6, 4(r11) /* r6 = Size of one Rela reloc */ +skip: + addi r11, r11, 8 + b 1b +eodyn: /* End of Dyn Table scan */ + + /* Check if we have found all the entries */ + cmpwi r7, 0 + beq done + cmpwi r8, 0 + beq done + cmpwi r6, 0 + beq done + + + /* + * Work out the current offset from the link time address of .rela + * section. + * cur_offset[r7] = rela.run[r9] - rela.link [r7] + * _stext.link[r12] = _stext.run[r10] - cur_offset[r7] + * final_offset[r3] = _stext.final[r3] - _stext.link[r12] + */ + subf r7, r7, r9 /* cur_offset */ + subf r12, r7, r10 + subf r3, r12, r3 /* final_offset */ + + subf r8, r6, r8 /* relaz -= relaent */ + /* + * Scan through the .rela table and process each entry + * r9 - points to the current .rela table entry + * r13 - points to the symbol table + */ + + /* + * Check if we have a relocation based on symbol + * r5 will hold the value of the symbol. + */ +applyrela: + lwz r4, 4(r9) /* r4 = rela.r_info */ + srwi r5, r4, 8 /* ELF32_R_SYM(r_info) */ + cmpwi r5, STN_UNDEF /* sym == STN_UNDEF ? */ + beq get_type /* value = 0 */ + /* Find the value of the symbol at index(r5) */ + slwi r5, r5, 4 /* r5 = r5 * sizeof(Elf32_Sym) */ + add r12, r13, r5 /* r12 = &__dyn_sym[Index] */ + + /* + * GNU ld has a bug, where dynamic relocs based on + * STB_LOCAL symbols, the value should be assumed + * to be zero. - Alan Modra + */ + /* XXX: Do we need to check if we are using GNU ld ? */ + lbz r5, 12(r12) /* r5 = dyn_sym[Index].st_info */ + extrwi r5, r5, 4, 24 /* r5 = ELF32_ST_BIND(r5) */ + cmpwi r5, STB_LOCAL /* st_value = 0, ld bug */ + beq get_type /* We have r5 = 0 */ + lwz r5, 4(r12) /* r5 = __dyn_sym[Index].st_value */ + +get_type: + /* Load the relocation type to r4 */ + extrwi r4, r4, 8, 24 /* r4 = ELF32_R_TYPE(r_info) = ((char*)r4)[3] */ + + /* R_PPC_RELATIVE */ + cmpwi r4, R_PPC_RELATIVE + bne hi16 + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 /* final addend */ + stwx r0, r4, r7 /* memory[r4+r7]) = (u32)r0 */ + b nxtrela /* continue */ + + /* R_PPC_ADDR16_HI */ +hi16: + cmpwi r4, R_PPC_ADDR16_HI + bne ha16 + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 + add r0, r0, r5 /* r0 = (S+A+Offset) */ + extrwi r0, r0, 16, 0 /* r0 = (r0 >> 16) */ + b store_half + + /* R_PPC_ADDR16_HA */ +ha16: + cmpwi r4, R_PPC_ADDR16_HA + bne lo16 + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 + add r0, r0, r5 /* r0 = (S+A+Offset) */ + extrwi r5, r0, 1, 16 /* Extract bit 16 */ + extrwi r0, r0, 16, 0 /* r0 = (r0 >> 16) */ + add r0, r0, r5 /* Add it to r0 */ + b store_half + + /* R_PPC_ADDR16_LO */ +lo16: + cmpwi r4, R_PPC_ADDR16_LO + bne nxtrela + lwz r4, 0(r9) /* r_offset */ + lwz r0, 8(r9) /* r_addend */ + add r0, r0, r3 + add r0, r0, r5 /* r0 = (S+A+Offset) */ + extrwi r0, r0, 16, 16 /* r0 &= 0xffff */ + /* Fall through to */ + + /* Store half word */ +store_half: + sthx r0, r4, r7 /* memory[r4+r7] = (u16)r0 */ + +nxtrela: + /* + * We have to flush the modified instructions to the + * main storage from the d-cache. And also, invalidate the + * cached instructions in i-cache which has been modified. + * + * We delay the sync / isync operation till the end, since + * we won't be executing the modified instructions until + * we return from here. + */ + dcbst r4,r7 + sync /* Ensure the data is flushed before icbi */ + icbi r4,r7 + cmpwi r8, 0 /* relasz = 0 ? */ + ble done + add r9, r9, r6 /* move to next entry in the .rela table */ + subf r8, r6, r8 /* relasz -= relaent */ + b applyrela + +done: + sync /* Wait for the flush to finish */ + isync /* Discard prefetched instructions */ + blr + +p_dyn: .long __dynamic_start - 0b +p_rela: .long __rela_dyn_start - 0b +p_sym: .long __dynamic_symtab - 0b +p_st: .long _stext - 0b diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index e037c7494fd..4174b4b2324 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -568,6 +568,12 @@ static void rtas_flash_firmware(int reboot_type) } /* + * Just before starting the firmware flash, cancel the event scan work + * to avoid any soft lockup issues. + */ + rtas_cancel_event_scan(); + + /* * NOTE: the "first" block must be under 4GB, so we create * an entry with no data blocks in the reserved buffer in * the kernel data segment. diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 481ef064c8f..1045ff49cc6 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -472,6 +472,13 @@ static void start_event_scan(void) &event_scan_work, event_scan_delay); } +/* Cancel the rtas event scan work */ +void rtas_cancel_event_scan(void) +{ + cancel_delayed_work_sync(&event_scan_work); +} +EXPORT_SYMBOL_GPL(rtas_cancel_event_scan); + static int __init rtas_init(void) { struct proc_dir_entry *entry; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index fb9bb46e7e8..4cb8f1e9d04 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -35,6 +35,8 @@ #include <linux/pci.h> #include <linux/lockdep.h> #include <linux/memblock.h> +#include <linux/hugetlb.h> + #include <asm/io.h> #include <asm/kdump.h> #include <asm/prom.h> @@ -64,6 +66,7 @@ #include <asm/mmu_context.h> #include <asm/code-patching.h> #include <asm/kvm_ppc.h> +#include <asm/hugetlb.h> #include "setup.h" @@ -217,6 +220,13 @@ void __init early_setup(unsigned long dt_ptr) /* Initialize the hash table or TLB handling */ early_init_mmu(); + /* + * Reserve any gigantic pages requested on the command line. + * memblock needs to have been initialized by the time this is + * called since this will reserve memory. + */ + reserve_hugetlb_gpages(); + DBG(" <- early_setup()\n"); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 6df70907d60..f0abe92f63f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -187,7 +187,8 @@ int smp_request_message_ipi(int virq, int msg) return 1; } #endif - err = request_irq(virq, smp_ipi_action[msg], IRQF_PERCPU, + err = request_irq(virq, smp_ipi_action[msg], + IRQF_PERCPU | IRQF_NO_THREAD, smp_ipi_name[msg], 0); WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n", virq, smp_ipi_name[msg], err); diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index ce035c1905f..6fdf5ffe8c4 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -18,6 +18,7 @@ #include <asm/machdep.h> #include <asm/smp.h> #include <asm/pmc.h> +#include <asm/system.h> #include "cacheinfo.h" @@ -51,6 +52,7 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev, return -EINVAL; per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze; + update_smt_snooze_delay(snooze); return count; } @@ -177,11 +179,13 @@ SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); SYSFS_PMCSETUP(purr, SPRN_PURR); SYSFS_PMCSETUP(spurr, SPRN_SPURR); SYSFS_PMCSETUP(dscr, SPRN_DSCR); +SYSFS_PMCSETUP(pir, SPRN_PIR); static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL); static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr); static SYSDEV_ATTR(purr, 0600, show_purr, store_purr); +static SYSDEV_ATTR(pir, 0400, show_pir, NULL); unsigned long dscr_default = 0; EXPORT_SYMBOL(dscr_default); @@ -392,6 +396,9 @@ static void __cpuinit register_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_DSCR)) sysdev_create_file(s, &attr_dscr); + + if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) + sysdev_create_file(s, &attr_pir); #endif /* CONFIG_PPC64 */ cacheinfo_cpu_online(cpu); @@ -462,6 +469,9 @@ static void unregister_cpu_online(unsigned int cpu) if (cpu_has_feature(CPU_FTR_DSCR)) sysdev_remove_file(s, &attr_dscr); + + if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2)) + sysdev_remove_file(s, &attr_pir); #endif /* CONFIG_PPC64 */ cacheinfo_cpu_offline(cpu); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 522bb1dfc35..567dd7c3ac2 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -86,8 +86,6 @@ static struct clocksource clocksource_rtc = { .rating = 400, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .mask = CLOCKSOURCE_MASK(64), - .shift = 22, - .mult = 0, /* To be filled in */ .read = rtc_read, }; @@ -97,8 +95,6 @@ static struct clocksource clocksource_timebase = { .rating = 400, .flags = CLOCK_SOURCE_IS_CONTINUOUS, .mask = CLOCKSOURCE_MASK(64), - .shift = 22, - .mult = 0, /* To be filled in */ .read = timebase_read, }; @@ -110,22 +106,16 @@ static void decrementer_set_mode(enum clock_event_mode mode, struct clock_event_device *dev); static struct clock_event_device decrementer_clockevent = { - .name = "decrementer", - .rating = 200, - .shift = 0, /* To be filled in */ - .mult = 0, /* To be filled in */ - .irq = 0, - .set_next_event = decrementer_set_next_event, - .set_mode = decrementer_set_mode, - .features = CLOCK_EVT_FEAT_ONESHOT, + .name = "decrementer", + .rating = 200, + .irq = 0, + .set_next_event = decrementer_set_next_event, + .set_mode = decrementer_set_mode, + .features = CLOCK_EVT_FEAT_ONESHOT, }; -struct decrementer_clock { - struct clock_event_device event; - u64 next_tb; -}; - -static DEFINE_PER_CPU(struct decrementer_clock, decrementers); +DEFINE_PER_CPU(u64, decrementers_next_tb); +static DEFINE_PER_CPU(struct clock_event_device, decrementers); #ifdef CONFIG_PPC_ISERIES static unsigned long __initdata iSeries_recal_titan; @@ -168,13 +158,13 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq); #ifdef CONFIG_VIRT_CPU_ACCOUNTING /* * Factors for converting from cputime_t (timebase ticks) to - * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds). + * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). * These are all stored as 0.64 fixed-point binary fractions. */ u64 __cputime_jiffies_factor; EXPORT_SYMBOL(__cputime_jiffies_factor); -u64 __cputime_msec_factor; -EXPORT_SYMBOL(__cputime_msec_factor); +u64 __cputime_usec_factor; +EXPORT_SYMBOL(__cputime_usec_factor); u64 __cputime_sec_factor; EXPORT_SYMBOL(__cputime_sec_factor); u64 __cputime_clockt_factor; @@ -192,8 +182,8 @@ static void calc_cputime_factors(void) div128_by_32(HZ, 0, tb_ticks_per_sec, &res); __cputime_jiffies_factor = res.result_low; - div128_by_32(1000, 0, tb_ticks_per_sec, &res); - __cputime_msec_factor = res.result_low; + div128_by_32(1000000, 0, tb_ticks_per_sec, &res); + __cputime_usec_factor = res.result_low; div128_by_32(1, 0, tb_ticks_per_sec, &res); __cputime_sec_factor = res.result_low; div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res); @@ -441,7 +431,7 @@ EXPORT_SYMBOL(profile_pc); /* * This function recalibrates the timebase based on the 49-bit time-of-day * value in the Titan chip. The Titan is much more accurate than the value - * returned by the service processor for the timebase frequency. + * returned by the service processor for the timebase frequency. */ static int __init iSeries_tb_recal(void) @@ -576,9 +566,8 @@ void arch_irq_work_raise(void) void timer_interrupt(struct pt_regs * regs) { struct pt_regs *old_regs; - struct decrementer_clock *decrementer = &__get_cpu_var(decrementers); - struct clock_event_device *evt = &decrementer->event; - u64 now; + u64 *next_tb = &__get_cpu_var(decrementers_next_tb); + struct clock_event_device *evt = &__get_cpu_var(decrementers); /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -613,16 +602,9 @@ void timer_interrupt(struct pt_regs * regs) get_lppaca()->int_dword.fields.decr_int = 0; #endif - now = get_tb_or_rtc(); - if (now >= decrementer->next_tb) { - decrementer->next_tb = ~(u64)0; - if (evt->event_handler) - evt->event_handler(evt); - } else { - now = decrementer->next_tb - now; - if (now <= DECREMENTER_MAX) - set_dec((int)now); - } + *next_tb = ~(u64)0; + if (evt->event_handler) + evt->event_handler(evt); #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending()) @@ -650,9 +632,9 @@ static void generic_suspend_disable_irqs(void) * with suspending. */ - set_dec(0x7fffffff); + set_dec(DECREMENTER_MAX); local_irq_disable(); - set_dec(0x7fffffff); + set_dec(DECREMENTER_MAX); } static void generic_suspend_enable_irqs(void) @@ -824,9 +806,8 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, ++vdso_data->tb_update_count; smp_mb(); - /* XXX this assumes clock->shift == 22 */ - /* 4611686018 ~= 2^(20+64-22) / 1e9 */ - new_tb_to_xs = (u64) mult * 4611686018ULL; + /* 19342813113834067 ~= 2^(20+64) / 1e9 */ + new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift); new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC; do_div(new_stamp_xsec, 1000000000); new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC; @@ -877,9 +858,7 @@ static void __init clocksource_init(void) else clock = &clocksource_timebase; - clock->mult = clocksource_hz2mult(tb_ticks_per_sec, clock->shift); - - if (clocksource_register(clock)) { + if (clocksource_register_hz(clock, tb_ticks_per_sec)) { printk(KERN_ERR "clocksource: %s is already registered\n", clock->name); return; @@ -892,7 +871,7 @@ static void __init clocksource_init(void) static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { - __get_cpu_var(decrementers).next_tb = get_tb_or_rtc() + evt; + __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; set_dec(evt); return 0; } @@ -904,34 +883,9 @@ static void decrementer_set_mode(enum clock_event_mode mode, decrementer_set_next_event(DECREMENTER_MAX, dev); } -static inline uint64_t div_sc64(unsigned long ticks, unsigned long nsec, - int shift) -{ - uint64_t tmp = ((uint64_t)ticks) << shift; - - do_div(tmp, nsec); - return tmp; -} - -static void __init setup_clockevent_multiplier(unsigned long hz) -{ - u64 mult, shift = 32; - - while (1) { - mult = div_sc64(hz, NSEC_PER_SEC, shift); - if (mult && (mult >> 32UL) == 0UL) - break; - - shift--; - } - - decrementer_clockevent.shift = shift; - decrementer_clockevent.mult = mult; -} - static void register_decrementer_clockevent(int cpu) { - struct clock_event_device *dec = &per_cpu(decrementers, cpu).event; + struct clock_event_device *dec = &per_cpu(decrementers, cpu); *dec = decrementer_clockevent; dec->cpumask = cpumask_of(cpu); @@ -946,7 +900,8 @@ static void __init init_decrementer_clockevent(void) { int cpu = smp_processor_id(); - setup_clockevent_multiplier(ppc_tb_freq); + clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4); + decrementer_clockevent.max_delta_ns = clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent); decrementer_clockevent.min_delta_ns = @@ -1014,10 +969,10 @@ void __init time_init(void) boot_tb = get_tb_or_rtc(); /* If platform provided a timezone (pmac), we correct the time */ - if (timezone_offset) { + if (timezone_offset) { sys_tz.tz_minuteswest = -timezone_offset / 60; sys_tz.tz_dsttime = 0; - } + } vdso_data->tb_update_count = 0; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5459d148a0f..c091527efd8 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -98,18 +98,14 @@ static void pmac_backlight_unblank(void) static inline void pmac_backlight_unblank(void) { } #endif -int die(const char *str, struct pt_regs *regs, long err) +static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; +static int die_counter; + +static unsigned __kprobes long oops_begin(struct pt_regs *regs) { - static struct { - raw_spinlock_t lock; - u32 lock_owner; - int lock_owner_depth; - } die = { - .lock = __RAW_SPIN_LOCK_UNLOCKED(die.lock), - .lock_owner = -1, - .lock_owner_depth = 0 - }; - static int die_counter; + int cpu; unsigned long flags; if (debugger(regs)) @@ -117,66 +113,109 @@ int die(const char *str, struct pt_regs *regs, long err) oops_enter(); - if (die.lock_owner != raw_smp_processor_id()) { - console_verbose(); - raw_spin_lock_irqsave(&die.lock, flags); - die.lock_owner = smp_processor_id(); - die.lock_owner_depth = 0; - bust_spinlocks(1); - if (machine_is(powermac)) - pmac_backlight_unblank(); - } else { - local_save_flags(flags); + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!arch_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + arch_spin_lock(&die_lock); } + die_nest_count++; + die_owner = cpu; + console_verbose(); + bust_spinlocks(1); + if (machine_is(powermac)) + pmac_backlight_unblank(); + return flags; +} - if (++die.lock_owner_depth < 3) { - printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP NR_CPUS=%d ", NR_CPUS); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); -#endif -#ifdef CONFIG_NUMA - printk("NUMA "); -#endif - printk("%s\n", ppc_md.name ? ppc_md.name : ""); +static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, + int signr) +{ + bust_spinlocks(0); + die_owner = -1; + add_taint(TAINT_DIE); + die_nest_count--; + oops_exit(); + printk("\n"); + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + arch_spin_unlock(&die_lock); + raw_local_irq_restore(flags); - if (notify_die(DIE_OOPS, str, regs, err, 255, - SIGSEGV) == NOTIFY_STOP) - return 1; + /* + * A system reset (0x100) is a request to dump, so we always send + * it through the crashdump code. + */ + if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) { + crash_kexec(regs); - print_modules(); - show_regs(regs); - } else { - printk("Recursive die() failure, output suppressed\n"); + /* + * We aren't the primary crash CPU. We need to send it + * to a holding pattern to avoid it ending up in the panic + * code. + */ + crash_kexec_secondary(regs); } - bust_spinlocks(0); - die.lock_owner = -1; - add_taint(TAINT_DIE); - raw_spin_unlock_irqrestore(&die.lock, flags); + if (!signr) + return; - if (kexec_should_crash(current) || - kexec_sr_activated(smp_processor_id())) - crash_kexec(regs); - crash_kexec_secondary(regs); + /* + * While our oops output is serialised by a spinlock, output + * from panic() called below can race and corrupt it. If we + * know we are going to panic, delay for 1 second so we have a + * chance to get clean backtraces from all CPUs that are oopsing. + */ + if (in_interrupt() || panic_on_oops || !current->pid || + is_global_init(current)) { + mdelay(MSEC_PER_SEC); + } if (in_interrupt()) panic("Fatal exception in interrupt"); - if (panic_on_oops) panic("Fatal exception"); + do_exit(signr); +} - oops_exit(); - do_exit(err); +static int __kprobes __die(const char *str, struct pt_regs *regs, long err) +{ + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP NR_CPUS=%d ", NR_CPUS); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC "); +#endif +#ifdef CONFIG_NUMA + printk("NUMA "); +#endif + printk("%s\n", ppc_md.name ? ppc_md.name : ""); + + if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP) + return 1; + + print_modules(); + show_regs(regs); return 0; } +void die(const char *str, struct pt_regs *regs, long err) +{ + unsigned long flags = oops_begin(regs); + + if (__die(str, regs, err)) + err = 0; + oops_end(flags, regs, err); +} + void user_single_step_siginfo(struct task_struct *tsk, struct pt_regs *regs, siginfo_t *info) { @@ -195,10 +234,11 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) "at %016lx nip %016lx lr %016lx code %x\n"; if (!user_mode(regs)) { - if (die("Exception in kernel mode", regs, signr)) - return; - } else if (show_unhandled_signals && - unhandled_signal(current, signr)) { + die("Exception in kernel mode", regs, signr); + return; + } + + if (show_unhandled_signals && unhandled_signal(current, signr)) { printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, signr, addr, regs->nip, regs->link, code); @@ -220,25 +260,8 @@ void system_reset_exception(struct pt_regs *regs) return; } -#ifdef CONFIG_KEXEC - cpumask_set_cpu(smp_processor_id(), &cpus_in_sr); -#endif - die("System Reset", regs, SIGABRT); - /* - * Some CPUs when released from the debugger will execute this path. - * These CPUs entered the debugger via a soft-reset. If the CPU was - * hung before entering the debugger it will return to the hung - * state when exiting this function. This causes a problem in - * kdump since the hung CPU(s) will not respond to the IPI sent - * from kdump. To prevent the problem we call crash_kexec_secondary() - * here. If a kdump had not been initiated or we exit the debugger - * with the "exit and recover" command (x) crash_kexec_secondary() - * will return after 5ms and the CPU returns to its previous state. - */ - crash_kexec_secondary(regs); - /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) panic("Unrecoverable System Reset"); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 920276c0f6a..710a54005df 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -170,7 +170,13 @@ SECTIONS } #ifdef CONFIG_RELOCATABLE . = ALIGN(8); - .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { *(.dynsym) } + .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) + { +#ifdef CONFIG_RELOCATABLE_PPC32 + __dynamic_symtab = .; +#endif + *(.dynsym) + } .dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) } .dynamic : AT(ADDR(.dynamic) - LOAD_OFFSET) { |