diff options
Diffstat (limited to 'arch/powerpc/kernel')
86 files changed, 2806 insertions, 1560 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 36c30f31ec9..9aab3631257 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -29,18 +29,23 @@ endif obj-y := cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ init_task.o process.o systbl.o idle.o \ - signal.o sysfs.o cacheinfo.o -obj-y += vdso32/ + signal.o sysfs.o cacheinfo.o time.o \ + prom.o traps.o setup-common.o \ + udbg.o misc.o io.o dma.o \ + misc_$(CONFIG_WORD_SIZE).o vdso32/ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o nvram_64.o firmware.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o +obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power7.o obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o +obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o +obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o obj-$(CONFIG_PPC_CLOCK) += clock.o procfs-y := proc_powerpc.o @@ -73,16 +78,12 @@ obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o dbell.o obj-$(CONFIG_PPC_BOOK3E_64) += dbell.o extra-y := head_$(CONFIG_WORD_SIZE).o -extra-$(CONFIG_PPC_BOOK3E_32) := head_new_booke.o extra-$(CONFIG_40x) := head_40x.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-y += vmlinux.lds -obj-y += time.o prom.o traps.o setup-common.o \ - udbg.o misc.o io.o dma.o \ - misc_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o @@ -104,6 +105,8 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o +obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o + obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index bd0df2e6aa8..6887661ac07 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -74,6 +74,7 @@ int main(void) DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); + DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); #endif /* CONFIG_PPC64 */ @@ -209,7 +210,6 @@ int main(void) DEFINE(RTASENTRY, offsetof(struct rtas_t, entry)); /* Interrupt register frame */ - DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 625942ae558..60b3e377b1e 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -99,7 +99,7 @@ void __init btext_prepare_BAT(void) /* This function can be used to enable the early boot text when doing * OF booting or within bootx init. It must be followed by a btext_unmap() - * call before the logical address becomes unuseable + * call before the logical address becomes unusable */ void __init btext_setup_display(int width, int height, int depth, int pitch, unsigned long address) diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index 55cba4a8a95..f8cd9fba4d3 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -18,7 +18,7 @@ #include <asm/mmu.h> _GLOBAL(__setup_cpu_603) - mflr r4 + mflr r5 BEGIN_MMU_FTR_SECTION li r10,0 mtspr SPRN_SPRG_603_LRU,r10 /* init SW LRU tracking */ @@ -27,60 +27,60 @@ BEGIN_FTR_SECTION bl __init_fpu_registers END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE) bl setup_common_caches - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_604) - mflr r4 + mflr r5 bl setup_common_caches bl setup_604_hid0 - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_750) - mflr r4 + mflr r5 bl __init_fpu_registers bl setup_common_caches bl setup_750_7400_hid0 - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_750cx) - mflr r4 + mflr r5 bl __init_fpu_registers bl setup_common_caches bl setup_750_7400_hid0 bl setup_750cx - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_750fx) - mflr r4 + mflr r5 bl __init_fpu_registers bl setup_common_caches bl setup_750_7400_hid0 bl setup_750fx - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_7400) - mflr r4 + mflr r5 bl __init_fpu_registers bl setup_7400_workarounds bl setup_common_caches bl setup_750_7400_hid0 - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_7410) - mflr r4 + mflr r5 bl __init_fpu_registers bl setup_7410_workarounds bl setup_common_caches bl setup_750_7400_hid0 li r3,0 mtspr SPRN_L2CR2,r3 - mtlr r4 + mtlr r5 blr _GLOBAL(__setup_cpu_745x) - mflr r4 + mflr r5 bl setup_common_caches bl setup_745x_specifics - mtlr r4 + mtlr r5 blr /* Enable caches for 603's, 604, 750 & 7400 */ @@ -194,10 +194,10 @@ setup_750cx: cror 4*cr0+eq,4*cr0+eq,4*cr1+eq cror 4*cr0+eq,4*cr0+eq,4*cr2+eq bnelr - lwz r6,CPU_SPEC_FEATURES(r5) + lwz r6,CPU_SPEC_FEATURES(r4) li r7,CPU_FTR_CAN_NAP andc r6,r6,r7 - stw r6,CPU_SPEC_FEATURES(r5) + stw r6,CPU_SPEC_FEATURES(r4) blr /* 750fx specific @@ -225,12 +225,12 @@ BEGIN_FTR_SECTION andis. r11,r11,L3CR_L3E@h beq 1f END_FTR_SECTION_IFSET(CPU_FTR_L3CR) - lwz r6,CPU_SPEC_FEATURES(r5) + lwz r6,CPU_SPEC_FEATURES(r4) andi. r0,r6,CPU_FTR_L3_DISABLE_NAP beq 1f li r7,CPU_FTR_CAN_NAP andc r6,r6,r7 - stw r6,CPU_SPEC_FEATURES(r5) + stw r6,CPU_SPEC_FEATURES(r4) 1: mfspr r11,SPRN_HID0 diff --git a/arch/powerpc/kernel/cpu_setup_a2.S b/arch/powerpc/kernel/cpu_setup_a2.S new file mode 100644 index 00000000000..7f818feaa7a --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_a2.S @@ -0,0 +1,114 @@ +/* + * A2 specific assembly support code + * + * Copyright 2009 Ben Herrenschmidt, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/asm-offsets.h> +#include <asm/ppc_asm.h> +#include <asm/ppc-opcode.h> +#include <asm/processor.h> +#include <asm/reg_a2.h> +#include <asm/reg.h> +#include <asm/thread_info.h> + +/* + * Disable thdid and class fields in ERATs to bump PID to full 14 bits capacity. + * This also prevents external LPID accesses but that isn't a problem when not a + * guest. Under PV, this setting will be ignored and MMUCR will return the right + * number of PID bits we can use. + */ +#define MMUCR1_EXTEND_PID \ + (MMUCR1_ICTID | MMUCR1_ITTID | MMUCR1_DCTID | \ + MMUCR1_DTTID | MMUCR1_DCCD) + +/* + * Use extended PIDs if enabled. + * Don't clear the ERATs on context sync events and enable I & D LRU. + * Enable ERAT back invalidate when tlbwe overwrites an entry. + */ +#define INITIAL_MMUCR1 \ + (MMUCR1_EXTEND_PID | MMUCR1_CSINV_NEVER | MMUCR1_IRRE | \ + MMUCR1_DRRE | MMUCR1_TLBWE_BINV) + +_GLOBAL(__setup_cpu_a2) + /* Some of these are actually thread local and some are + * core local but doing it always won't hurt + */ + +#ifdef CONFIG_PPC_WSP_COPRO + /* Make sure ACOP starts out as zero */ + li r3,0 + mtspr SPRN_ACOP,r3 + + /* Enable icswx instruction */ + mfspr r3,SPRN_A2_CCR2 + ori r3,r3,A2_CCR2_ENABLE_ICSWX + mtspr SPRN_A2_CCR2,r3 + + /* Unmask all CTs in HACOP */ + li r3,-1 + mtspr SPRN_HACOP,r3 +#endif /* CONFIG_PPC_WSP_COPRO */ + + /* Enable doorbell */ + mfspr r3,SPRN_A2_CCR2 + oris r3,r3,A2_CCR2_ENABLE_PC@h + mtspr SPRN_A2_CCR2,r3 + isync + + /* Setup CCR0 to disable power saving for now as it's busted + * in the current implementations. Setup CCR1 to wake on + * interrupts normally (we write the default value but who + * knows what FW may have clobbered...) + */ + li r3,0 + mtspr SPRN_A2_CCR0, r3 + LOAD_REG_IMMEDIATE(r3,0x0f0f0f0f) + mtspr SPRN_A2_CCR1, r3 + + /* Initialise MMUCR1 */ + lis r3,INITIAL_MMUCR1@h + ori r3,r3,INITIAL_MMUCR1@l + mtspr SPRN_MMUCR1,r3 + + /* Set MMUCR2 to enable 4K, 64K, 1M, 16M and 1G pages */ + LOAD_REG_IMMEDIATE(r3, 0x000a7531) + mtspr SPRN_MMUCR2,r3 + + /* Set MMUCR3 to write all thids bit to the TLB */ + LOAD_REG_IMMEDIATE(r3, 0x0000000f) + mtspr SPRN_MMUCR3,r3 + + /* Don't do ERAT stuff if running guest mode */ + mfmsr r3 + andis. r0,r3,MSR_GS@h + bne 1f + + /* Now set the I-ERAT watermark to 15 */ + lis r4,(MMUCR0_TLBSEL_I|MMUCR0_ECL)@h + mtspr SPRN_MMUCR0, r4 + li r4,A2_IERAT_SIZE-1 + PPC_ERATWE(r4,r4,3) + + /* Now set the D-ERAT watermark to 31 */ + lis r4,(MMUCR0_TLBSEL_D|MMUCR0_ECL)@h + mtspr SPRN_MMUCR0, r4 + li r4,A2_DERAT_SIZE-1 + PPC_ERATWE(r4,r4,3) + + /* And invalidate the beast just in case. That won't get rid of + * a bolted entry though it will be in LRU and so will go away eventually + * but let's not bother for now + */ + PPC_ERATILX(0,0,0) +1: + blr + +_GLOBAL(__restore_cpu_a2) + b __setup_cpu_a2 diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 894e64fa481..8053db02b85 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -64,6 +64,12 @@ _GLOBAL(__setup_cpu_e500v2) bl __e500_icache_setup bl __e500_dcache_setup bl __setup_e500_ivors +#ifdef CONFIG_FSL_RIO + /* Ensure that RFXE is set */ + mfspr r3,SPRN_HID1 + oris r3,r3,HID1_RFXE@h + mtspr SPRN_HID1,r3 +#endif mtlr r4 blr _GLOBAL(__setup_cpu_e500mc) @@ -82,6 +88,9 @@ _GLOBAL(__setup_cpu_e5500) bl __e500_dcache_setup #ifdef CONFIG_PPC_BOOK3E_64 bl .__setup_base_ivors + bl .setup_perfmon_ivor + bl .setup_doorbell_ivors + bl .setup_ehv_ivors #else bl __setup_e500mc_ivors #endif diff --git a/arch/powerpc/kernel/cpu_setup_power7.S b/arch/powerpc/kernel/cpu_setup_power7.S new file mode 100644 index 00000000000..4f9a93fcfe0 --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_power7.S @@ -0,0 +1,91 @@ +/* + * This file contains low level CPU setup functions. + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> + +/* Entry: r3 = crap, r4 = ptr to cputable entry + * + * Note that we can be called twice for pseudo-PVRs + */ +_GLOBAL(__setup_cpu_power7) + mflr r11 + bl __init_hvmode_206 + mtlr r11 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + bl __init_LPCR + bl __init_TLB + mtlr r11 + blr + +_GLOBAL(__restore_cpu_power7) + mflr r11 + mfmsr r3 + rldicl. r0,r3,4,63 + beqlr + li r0,0 + mtspr SPRN_LPID,r0 + bl __init_LPCR + bl __init_TLB + mtlr r11 + blr + +__init_hvmode_206: + /* Disable CPU_FTR_HVMODE_206 and exit if MSR:HV is not set */ + mfmsr r3 + rldicl. r0,r3,4,63 + bnelr + ld r5,CPU_SPEC_FEATURES(r4) + LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE_206) + xor r5,r5,r6 + std r5,CPU_SPEC_FEATURES(r4) + blr + +__init_LPCR: + /* Setup a sane LPCR: + * + * LPES = 0b01 (HSRR0/1 used for 0x500) + * PECE = 0b111 + * DPFD = 4 + * + * Other bits untouched for now + */ + mfspr r3,SPRN_LPCR + ori r3,r3,(LPCR_LPES0|LPCR_LPES1) + xori r3,r3, LPCR_LPES0 + ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) + li r5,7 + sldi r5,r5,LPCR_DPFD_SH + andc r3,r3,r5 + li r5,4 + sldi r5,r5,LPCR_DPFD_SH + or r3,r3,r5 + mtspr SPRN_LPCR,r3 + isync + blr + +__init_TLB: + /* Clear the TLB */ + li r6,128 + mtctr r6 + li r7,0xc00 /* IS field = 0b11 */ + ptesync +2: tlbiel r7 + addi r7,r7,0x1000 + bdnz 2b + ptesync +1: blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 96a908f1cd8..34d2722b945 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -62,10 +62,12 @@ extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_a2(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_pa6t(void); extern void __restore_cpu_ppc970(void); extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_power7(void); +extern void __restore_cpu_a2(void); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -116,7 +118,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power3", .oprofile_type = PPC_OPROFILE_RS64, - .machine_check = machine_check_generic, .platform = "power3", }, { /* Power3+ */ @@ -132,7 +133,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power3", .oprofile_type = PPC_OPROFILE_RS64, - .machine_check = machine_check_generic, .platform = "power3", }, { /* Northstar */ @@ -148,7 +148,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/rs64", .oprofile_type = PPC_OPROFILE_RS64, - .machine_check = machine_check_generic, .platform = "rs64", }, { /* Pulsar */ @@ -164,7 +163,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/rs64", .oprofile_type = PPC_OPROFILE_RS64, - .machine_check = machine_check_generic, .platform = "rs64", }, { /* I-star */ @@ -180,7 +178,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/rs64", .oprofile_type = PPC_OPROFILE_RS64, - .machine_check = machine_check_generic, .platform = "rs64", }, { /* S-star */ @@ -196,7 +193,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/rs64", .oprofile_type = PPC_OPROFILE_RS64, - .machine_check = machine_check_generic, .platform = "rs64", }, { /* Power4 */ @@ -205,14 +201,13 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4 (gp)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER4, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power4", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "power4", }, { /* Power4+ */ @@ -221,14 +216,13 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4+ (gq)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER4, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power4", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "power4", }, { /* PPC970 */ @@ -238,7 +232,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -247,7 +241,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "ppc970", }, { /* PPC970FX */ @@ -257,7 +250,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -266,7 +259,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "ppc970", }, { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */ @@ -285,7 +277,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970MP", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "ppc970", }, { /* PPC970MP */ @@ -295,7 +286,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -304,7 +295,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_ppc970, .oprofile_cpu_type = "ppc64/970MP", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "ppc970", }, { /* PPC970GX */ @@ -314,7 +304,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_PPC970, .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PPC970, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -322,7 +312,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_ppc970, .oprofile_cpu_type = "ppc64/970", .oprofile_type = PPC_OPROFILE_POWER4, - .machine_check = machine_check_generic, .platform = "ppc970", }, { /* Power5 GR */ @@ -331,7 +320,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5 (gr)", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -343,7 +332,6 @@ static struct cpu_spec __initdata cpu_specs[] = { */ .oprofile_mmcra_sihv = MMCRA_SIHV, .oprofile_mmcra_sipr = MMCRA_SIPR, - .machine_check = machine_check_generic, .platform = "power5", }, { /* Power5++ */ @@ -352,7 +340,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5+ (gs)", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -360,7 +348,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .oprofile_mmcra_sihv = MMCRA_SIHV, .oprofile_mmcra_sipr = MMCRA_SIPR, - .machine_check = machine_check_generic, .platform = "power5+", }, { /* Power5 GS */ @@ -369,7 +356,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5+ (gs)", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -378,7 +365,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_type = PPC_OPROFILE_POWER4, .oprofile_mmcra_sihv = MMCRA_SIHV, .oprofile_mmcra_sipr = MMCRA_SIPR, - .machine_check = machine_check_generic, .platform = "power5+", }, { /* POWER6 in P5+ mode; 2.04-compliant processor */ @@ -387,10 +373,9 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER5+", .cpu_features = CPU_FTRS_POWER5, .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER5, .icache_bsize = 128, .dcache_bsize = 128, - .machine_check = machine_check_generic, .oprofile_cpu_type = "ppc64/ibm-compat-v1", .oprofile_type = PPC_OPROFILE_POWER4, .platform = "power5+", @@ -402,7 +387,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_features = CPU_FTRS_POWER6, .cpu_user_features = COMMON_USER_POWER6 | PPC_FEATURE_POWER6_EXT, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER6, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, @@ -413,7 +398,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR, .oprofile_mmcra_clear = POWER6_MMCRA_THRM | POWER6_MMCRA_OTHER, - .machine_check = machine_check_generic, .platform = "power6x", }, { /* 2.05-compliant processor, i.e. Power6 "architected" mode */ @@ -422,10 +406,9 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER6 (architected)", .cpu_features = CPU_FTRS_POWER6, .cpu_user_features = COMMON_USER_POWER6, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_POWER6, .icache_bsize = 128, .dcache_bsize = 128, - .machine_check = machine_check_generic, .oprofile_cpu_type = "ppc64/ibm-compat-v1", .oprofile_type = PPC_OPROFILE_POWER4, .platform = "power6", @@ -436,13 +419,13 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7 (architected)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .mmu_features = MMU_FTR_HPTE_TABLE | - MMU_FTR_TLBIE_206, + .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, - .machine_check = machine_check_generic, .oprofile_type = PPC_OPROFILE_POWER4, .oprofile_cpu_type = "ppc64/ibm-compat-v1", + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, .platform = "power7", }, { /* Power7 */ @@ -451,21 +434,33 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7 (raw)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .mmu_features = MMU_FTR_HPTE_TABLE | - MMU_FTR_TLBIE_206, + .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power7", + .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power7, .cpu_restore = __restore_cpu_power7, + .platform = "power7", + }, + { /* Power7+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004A0000, + .cpu_name = "POWER7+ (raw)", + .cpu_features = CPU_FTRS_POWER7, + .cpu_user_features = COMMON_USER_POWER7, + .mmu_features = MMU_FTRS_POWER7, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/power7", .oprofile_type = PPC_OPROFILE_POWER4, - .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV, - .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR, - .oprofile_mmcra_clear = POWER6_MMCRA_THRM | - POWER6_MMCRA_OTHER, - .platform = "power7", + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, + .platform = "power7+", }, { /* Cell Broadband Engine */ .pvr_mask = 0xffff0000, @@ -475,14 +470,13 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_user_features = COMMON_USER_PPC64 | PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_SMT, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_CELL, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 4, .pmc_type = PPC_PMC_IBM, .oprofile_cpu_type = "ppc64/cell-be", .oprofile_type = PPC_OPROFILE_CELL, - .machine_check = machine_check_generic, .platform = "ppc-cell-be", }, { /* PA Semi PA6T */ @@ -491,7 +485,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "PA6T", .cpu_features = CPU_FTRS_PA6T, .cpu_user_features = COMMON_USER_PA6T, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_PA6T, .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 6, @@ -500,7 +494,6 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_restore = __restore_cpu_pa6t, .oprofile_cpu_type = "ppc64/pa6t", .oprofile_type = PPC_OPROFILE_PA6T, - .machine_check = machine_check_generic, .platform = "pa6t", }, { /* default match */ @@ -509,12 +502,11 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4 (compatible)", .cpu_features = CPU_FTRS_COMPATIBLE, .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTR_HPTE_TABLE, + .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .machine_check = machine_check_generic, .platform = "power4", } #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -1824,11 +1816,11 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, - { /* 476 core */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x11a50000, + { /* 476 DD2 core */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x11a52080, .cpu_name = "476", - .cpu_features = CPU_FTRS_47X, + .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, .mmu_features = MMU_FTR_TYPE_47x | @@ -1852,6 +1844,20 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_47x, .platform = "ppc470", }, + { /* 476 others */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x11a50000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | + MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, @@ -1972,7 +1978,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .pvr_mask = 0xffff0000, .pvr_value = 0x80240000, .cpu_name = "e5500", - .cpu_features = CPU_FTRS_E500MC, + .cpu_features = CPU_FTRS_E5500, .cpu_user_features = COMMON_USER_BOOKE, .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX, @@ -2004,7 +2010,22 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC32 */ #endif /* CONFIG_E500 */ -#ifdef CONFIG_PPC_BOOK3E_64 +#ifdef CONFIG_PPC_A2 + { /* Standard A2 (>= DD2) + FPU core */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00480000, + .cpu_name = "A2 (>= DD2)", + .cpu_features = CPU_FTRS_A2, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTRS_A2, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 0, + .cpu_setup = __setup_cpu_a2, + .cpu_restore = __restore_cpu_a2, + .machine_check = machine_check_generic, + .platform = "ppca2", + }, { /* This is a default entry to get going, to be replaced by * a real one at some stage */ @@ -2025,7 +2046,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "power6", }, -#endif +#endif /* CONFIG_PPC_A2 */ }; static struct cpu_spec the_cpu_spec; @@ -2089,8 +2110,8 @@ static void __init setup_cpu_spec(unsigned long offset, struct cpu_spec *s) * pointer on ppc64 and booke as we are running at 0 in real mode * on ppc64 and reloc_offset is always 0 on booke. */ - if (s->cpu_setup) { - s->cpu_setup(offset, s); + if (t->cpu_setup) { + t->cpu_setup(offset, t); } #endif /* CONFIG_PPC64 || CONFIG_BOOKE */ } diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 832c8c4db25..4e6ee944495 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -48,7 +48,7 @@ int crashing_cpu = -1; static cpumask_t cpus_in_crash = CPU_MASK_NONE; cpumask_t cpus_in_sr = CPU_MASK_NONE; -#define CRASH_HANDLER_MAX 2 +#define CRASH_HANDLER_MAX 3 /* NULL terminated list of shutdown handles */ static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; static DEFINE_SPINLOCK(crash_handlers_lock); @@ -64,9 +64,9 @@ void crash_ipi_callback(struct pt_regs *regs) return; hard_irq_disable(); - if (!cpu_isset(cpu, cpus_in_crash)) + if (!cpumask_test_cpu(cpu, &cpus_in_crash)) crash_save_cpu(regs, cpu); - cpu_set(cpu, cpus_in_crash); + cpumask_set_cpu(cpu, &cpus_in_crash); /* * Entered via soft-reset - could be the kdump @@ -77,8 +77,8 @@ void crash_ipi_callback(struct pt_regs *regs) * Tell the kexec CPU that entered via soft-reset and ready * to go down. */ - if (cpu_isset(cpu, cpus_in_sr)) { - cpu_clear(cpu, cpus_in_sr); + if (cpumask_test_cpu(cpu, &cpus_in_sr)) { + cpumask_clear_cpu(cpu, &cpus_in_sr); atomic_inc(&enter_on_soft_reset); } @@ -87,7 +87,7 @@ void crash_ipi_callback(struct pt_regs *regs) * This barrier is needed to make sure that all CPUs are stopped. * If not, soft-reset will be invoked to bring other CPUs. */ - while (!cpu_isset(crashing_cpu, cpus_in_crash)) + while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) cpu_relax(); if (ppc_md.kexec_cpu_down) @@ -109,7 +109,7 @@ static void crash_soft_reset_check(int cpu) { unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - cpu_clear(cpu, cpus_in_sr); + cpumask_clear_cpu(cpu, &cpus_in_sr); while (atomic_read(&enter_on_soft_reset) != ncpus) cpu_relax(); } @@ -125,14 +125,14 @@ static void crash_kexec_prepare_cpus(int cpu) smp_wmb(); /* - * FIXME: Until we will have the way to stop other CPUSs reliabally, + * FIXME: Until we will have the way to stop other CPUs reliably, * the crash CPU will send an IPI and wait for other CPUs to * respond. * Delay of at least 10 seconds. */ printk(KERN_EMERG "Sending IPI to other cpus...\n"); msecs = 10000; - while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) { + while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { cpu_relax(); mdelay(1); } @@ -144,52 +144,24 @@ static void crash_kexec_prepare_cpus(int cpu) * user to do soft reset such that we get all. * Soft-reset will be used until better mechanism is implemented. */ - if (cpus_weight(cpus_in_crash) < ncpus) { + if (cpumask_weight(&cpus_in_crash) < ncpus) { printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", - ncpus - cpus_weight(cpus_in_crash)); + ncpus - cpumask_weight(&cpus_in_crash)); printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); - cpus_in_sr = CPU_MASK_NONE; + cpumask_clear(&cpus_in_sr); atomic_set(&enter_on_soft_reset, 0); - while (cpus_weight(cpus_in_crash) < ncpus) + while (cpumask_weight(&cpus_in_crash) < ncpus) cpu_relax(); } /* * Make sure all CPUs are entered via soft-reset if the kdump is * invoked using soft-reset. */ - if (cpu_isset(cpu, cpus_in_sr)) + if (cpumask_test_cpu(cpu, &cpus_in_sr)) crash_soft_reset_check(cpu); /* Leave the IPI callback set */ } -/* wait for all the CPUs to hit real mode but timeout if they don't come in */ -#ifdef CONFIG_PPC_STD_MMU_64 -static void crash_kexec_wait_realmode(int cpu) -{ - unsigned int msecs; - int i; - - msecs = 10000; - for (i=0; i < NR_CPUS && msecs > 0; i++) { - if (i == cpu) - continue; - - while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { - barrier(); - if (!cpu_possible(i)) { - break; - } - if (!cpu_online(i)) { - break; - } - msecs--; - mdelay(1); - } - } - mb(); -} -#endif - /* * This function will be called by secondary cpus or by kexec cpu * if soft-reset is activated to stop some CPUs. @@ -210,7 +182,7 @@ void crash_kexec_secondary(struct pt_regs *regs) * exited using 'x'(exit and recover) or * kexec_should_crash() failed for all running tasks. */ - cpu_clear(cpu, cpus_in_sr); + cpumask_clear_cpu(cpu, &cpus_in_sr); local_irq_restore(flags); return; } @@ -224,7 +196,7 @@ void crash_kexec_secondary(struct pt_regs *regs) * then start kexec boot. */ crash_soft_reset_check(cpu); - cpu_set(crashing_cpu, cpus_in_crash); + cpumask_set_cpu(crashing_cpu, &cpus_in_crash); if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); machine_kexec(kexec_crash_image); @@ -233,7 +205,8 @@ void crash_kexec_secondary(struct pt_regs *regs) crash_ipi_callback(regs); } -#else +#else /* ! CONFIG_SMP */ + static void crash_kexec_prepare_cpus(int cpu) { /* @@ -251,75 +224,39 @@ static void crash_kexec_prepare_cpus(int cpu) void crash_kexec_secondary(struct pt_regs *regs) { - cpus_in_sr = CPU_MASK_NONE; + cpumask_clear(&cpus_in_sr); } -#endif -#ifdef CONFIG_SPU_BASE +#endif /* CONFIG_SMP */ -#include <asm/spu.h> -#include <asm/spu_priv1.h> - -struct crash_spu_info { - struct spu *spu; - u32 saved_spu_runcntl_RW; - u32 saved_spu_status_R; - u32 saved_spu_npc_RW; - u64 saved_mfc_sr1_RW; - u64 saved_mfc_dar; - u64 saved_mfc_dsisr; -}; - -#define CRASH_NUM_SPUS 16 /* Enough for current hardware */ -static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS]; - -static void crash_kexec_stop_spus(void) +/* wait for all the CPUs to hit real mode but timeout if they don't come in */ +#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64) +static void crash_kexec_wait_realmode(int cpu) { - struct spu *spu; + unsigned int msecs; int i; - u64 tmp; - - for (i = 0; i < CRASH_NUM_SPUS; i++) { - if (!crash_spu_info[i].spu) - continue; - - spu = crash_spu_info[i].spu; - - crash_spu_info[i].saved_spu_runcntl_RW = - in_be32(&spu->problem->spu_runcntl_RW); - crash_spu_info[i].saved_spu_status_R = - in_be32(&spu->problem->spu_status_R); - crash_spu_info[i].saved_spu_npc_RW = - in_be32(&spu->problem->spu_npc_RW); - - crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu); - crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu); - tmp = spu_mfc_sr1_get(spu); - crash_spu_info[i].saved_mfc_sr1_RW = tmp; - - tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; - spu_mfc_sr1_set(spu, tmp); - __delay(200); - } -} - -void crash_register_spus(struct list_head *list) -{ - struct spu *spu; - - list_for_each_entry(spu, list, full_list) { - if (WARN_ON(spu->number >= CRASH_NUM_SPUS)) + msecs = 10000; + for (i=0; i < nr_cpu_ids && msecs > 0; i++) { + if (i == cpu) continue; - crash_spu_info[spu->number].spu = spu; + while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + if (!cpu_possible(i)) { + break; + } + if (!cpu_online(i)) { + break; + } + msecs--; + mdelay(1); + } } + mb(); } - #else -static inline void crash_kexec_stop_spus(void) -{ -} -#endif /* CONFIG_SPU_BASE */ +static inline void crash_kexec_wait_realmode(int cpu) {} +#endif /* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */ /* * Register a function to be called on shutdown. Only use this if you @@ -409,10 +346,8 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crashing_cpu = smp_processor_id(); crash_save_cpu(regs, crashing_cpu); crash_kexec_prepare_cpus(crashing_cpu); - cpu_set(crashing_cpu, cpus_in_crash); -#if defined(CONFIG_PPC_STD_MMU_64) && defined(CONFIG_SMP) + cpumask_set_cpu(crashing_cpu, &cpus_in_crash); crash_kexec_wait_realmode(crashing_cpu); -#endif machine_kexec_mask_interrupts(); @@ -439,8 +374,6 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crash_shutdown_cpu = -1; __debugger_fault_handler = old_handler; - crash_kexec_stop_spus(); - if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); } diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 8e05c16344e..424afb6b8fb 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -19,6 +19,7 @@ #include <asm/prom.h> #include <asm/firmware.h> #include <asm/uaccess.h> +#include <asm/rtas.h> #ifdef DEBUG #include <asm/udbg.h> @@ -27,9 +28,6 @@ #define DBG(fmt...) #endif -/* Stores the physical address of elf header of crash image. */ -unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; - #ifndef CONFIG_RELOCATABLE void __init reserve_kdump_trampoline(void) { @@ -71,20 +69,6 @@ void __init setup_kdump_trampoline(void) } #endif /* CONFIG_RELOCATABLE */ -/* - * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by - * is_kdump_kernel() to determine if we are booting after a panic. Hence - * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE. - */ -static int __init parse_elfcorehdr(char *p) -{ - if (p) - elfcorehdr_addr = memparse(p, &p); - - return 1; -} -__setup("elfcorehdr=", parse_elfcorehdr); - static int __init parse_savemaxmem(char *p) { if (p) @@ -141,3 +125,35 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, return csize; } + +#ifdef CONFIG_PPC_RTAS +/* + * The crashkernel region will almost always overlap the RTAS region, so + * we have to be careful when shrinking the crashkernel region. + */ +void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) +{ + unsigned long addr; + const u32 *basep, *sizep; + unsigned int rtas_start = 0, rtas_end = 0; + + basep = of_get_property(rtas.dev, "linux,rtas-base", NULL); + sizep = of_get_property(rtas.dev, "rtas-size", NULL); + + if (basep && sizep) { + rtas_start = *basep; + rtas_end = *basep + *sizep; + } + + for (addr = begin; addr < end; addr += PAGE_SIZE) { + /* Does this page overlap with the RTAS region? */ + if (addr <= rtas_end && ((addr + PAGE_SIZE) > rtas_start)) + continue; + + ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT)); + init_page_count(pfn_to_page(addr >> PAGE_SHIFT)); + free_page((unsigned long)__va(addr)); + totalram_pages++; + } +} +#endif diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 3307a52d797..2cc451aaaca 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -13,84 +13,35 @@ #include <linux/kernel.h> #include <linux/smp.h> #include <linux/threads.h> -#include <linux/percpu.h> +#include <linux/hardirq.h> #include <asm/dbell.h> #include <asm/irq_regs.h> #ifdef CONFIG_SMP -struct doorbell_cpu_info { - unsigned long messages; /* current messages bits */ - unsigned int tag; /* tag value */ -}; - -static DEFINE_PER_CPU(struct doorbell_cpu_info, doorbell_cpu_info); - void doorbell_setup_this_cpu(void) { - struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); + unsigned long tag = mfspr(SPRN_PIR) & 0x3fff; - info->messages = 0; - info->tag = mfspr(SPRN_PIR) & 0x3fff; + smp_muxed_ipi_set_data(smp_processor_id(), tag); } -void doorbell_message_pass(int target, int msg) +void doorbell_cause_ipi(int cpu, unsigned long data) { - struct doorbell_cpu_info *info; - int i; - - if (target < NR_CPUS) { - info = &per_cpu(doorbell_cpu_info, target); - set_bit(msg, &info->messages); - ppc_msgsnd(PPC_DBELL, 0, info->tag); - } - else if (target == MSG_ALL_BUT_SELF) { - for_each_online_cpu(i) { - if (i == smp_processor_id()) - continue; - info = &per_cpu(doorbell_cpu_info, i); - set_bit(msg, &info->messages); - ppc_msgsnd(PPC_DBELL, 0, info->tag); - } - } - else { /* target == MSG_ALL */ - for_each_online_cpu(i) { - info = &per_cpu(doorbell_cpu_info, i); - set_bit(msg, &info->messages); - } - ppc_msgsnd(PPC_DBELL, PPC_DBELL_MSG_BRDCAST, 0); - } + ppc_msgsnd(PPC_DBELL, 0, data); } void doorbell_exception(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); - int msg; - /* Warning: regs can be NULL when called from irq enable */ + irq_enter(); - if (!info->messages || (num_online_cpus() < 2)) - goto out; + smp_ipi_demux(); - for (msg = 0; msg < 4; msg++) - if (test_and_clear_bit(msg, &info->messages)) - smp_message_recv(msg); - -out: + irq_exit(); set_irq_regs(old_regs); } - -void doorbell_check_self(void) -{ - struct doorbell_cpu_info *info = &__get_cpu_var(doorbell_cpu_info); - - if (!info->messages) - return; - - ppc_msgsnd(PPC_DBELL, 0, info->tag); -} - #else /* CONFIG_SMP */ void doorbell_exception(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 6e54a0fd31a..e7554154a6d 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -19,7 +19,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size, - dma_handle, device_to_mask(dev), flag, + dma_handle, dev->coherent_dma_mask, flag, dev_to_node(dev)); } diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index cf02cad62d9..d238c082c3c 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -179,3 +179,21 @@ static int __init dma_init(void) return 0; } fs_initcall(dma_init); + +int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t handle, size_t size) +{ + unsigned long pfn; + +#ifdef CONFIG_NOT_COHERENT_CACHE + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr); +#else + pfn = page_to_pfn(virt_to_page(cpu_addr)); +#endif + return remap_pfn_range(vma, vma->vm_start, + pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} +EXPORT_SYMBOL_GPL(dma_mmap_coherent); diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c index 7c07de0d894..b150b510510 100644 --- a/arch/powerpc/kernel/e500-pmu.c +++ b/arch/powerpc/kernel/e500-pmu.c @@ -126,4 +126,4 @@ static int init_e500_pmu(void) return register_fsl_emb_pmu(&e500_pmu); } -arch_initcall(init_e500_pmu); +early_initcall(init_e500_pmu); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ed4aeb96398..56212bc0ab0 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -31,6 +31,7 @@ #include <asm/asm-offsets.h> #include <asm/unistd.h> #include <asm/ftrace.h> +#include <asm/ptrace.h> #undef SHOW_SYSCALLS #undef SHOW_SYSCALLS_TASK @@ -879,7 +880,18 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) */ andi. r10,r9,MSR_EE beq 1f + /* + * Since the ftrace irqsoff latency trace checks CALLER_ADDR1, + * which is the stack frame here, we need to force a stack frame + * in case we came from user space. + */ + stwu r1,-32(r1) + mflr r0 + stw r0,4(r1) + stwu r1,-32(r1) bl trace_hardirqs_on + lwz r1,0(r1) + lwz r1,0(r1) lwz r9,_MSR(r1) 1: #endif /* CONFIG_TRACE_IRQFLAGS */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d82878c4daa..d834425186a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -421,6 +421,12 @@ BEGIN_FTR_SECTION std r24,THREAD_VRSAVE(r3) END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + mfspr r25,SPRN_DSCR + std r25,THREAD_DSCR(r3) +END_FTR_SECTION_IFSET(CPU_FTR_DSCR) +#endif and. r0,r0,r22 beq+ 1f andc r22,r22,r0 @@ -462,10 +468,10 @@ BEGIN_FTR_SECTION FTR_SECTION_ELSE_NESTED(95) clrrdi r6,r8,40 /* get its 1T ESID */ clrrdi r9,r1,40 /* get current sp 1T ESID */ - ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_1T_SEGMENT, 95) + ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95) FTR_SECTION_ELSE b 2f -ALT_FTR_SECTION_END_IFSET(CPU_FTR_SLB) +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB) clrldi. r0,r6,2 /* is new ESID c00000000? */ cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */ cror eq,4*cr1+eq,eq @@ -479,7 +485,7 @@ BEGIN_FTR_SECTION li r9,MMU_SEGSIZE_1T /* insert B field */ oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0 -END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) /* Update the last bolted SLB. No write barriers are needed * here, provided we only update the current CPU's SLB shadow @@ -491,7 +497,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT) std r7,SLBSHADOW_STACKVSID(r9) /* Save VSID */ std r0,SLBSHADOW_STACKESID(r9) /* Save ESID */ - /* No need to check for CPU_FTR_NO_SLBIE_B here, since when + /* No need to check for MMU_FTR_NO_SLBIE_B here, since when * we have 1TB segments, the only CPUs known to have the errata * only support less than 1TB of system memory and we'll never * actually hit this code path. @@ -522,6 +528,15 @@ BEGIN_FTR_SECTION mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + ld r0,THREAD_DSCR(r4) + cmpd r0,r25 + beq 1f + mtspr SPRN_DSCR,r0 +1: +END_FTR_SECTION_IFSET(CPU_FTR_DSCR) +#endif /* r3-r13 are destroyed -- Cort */ REST_8GPRS(14, r1) @@ -838,7 +853,7 @@ _GLOBAL(enter_rtas) _STATIC(rtas_return_loc) /* relocation is off at this point */ - mfspr r4,SPRN_SPRG_PACA /* Get PACA */ + GET_PACA(r4) clrldi r4,r4,2 /* convert to realmode address */ bcl 20,31,$+4 @@ -869,7 +884,7 @@ _STATIC(rtas_restore_regs) REST_8GPRS(14, r1) /* Restore the non-volatiles */ REST_10GPRS(22, r1) /* ditto */ - mfspr r13,SPRN_SPRG_PACA + GET_PACA(r13) ld r4,_CCR(r1) mtcr r4 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 5c43063d250..d24d4400cc7 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -17,6 +17,7 @@ #include <asm/cputable.h> #include <asm/setup.h> #include <asm/thread_info.h> +#include <asm/reg_a2.h> #include <asm/exception-64e.h> #include <asm/bug.h> #include <asm/irqflags.h> @@ -252,9 +253,6 @@ exception_marker: .balign 0x1000 .globl interrupt_base_book3e interrupt_base_book3e: /* fake trap */ - /* Note: If real debug exceptions are supported by the HW, the vector - * below will have to be patched up to point to an appropriate handler - */ EXCEPTION_STUB(0x000, machine_check) /* 0x0200 */ EXCEPTION_STUB(0x020, critical_input) /* 0x0580 */ EXCEPTION_STUB(0x040, debug_crit) /* 0x0d00 */ @@ -271,8 +269,13 @@ interrupt_base_book3e: /* fake trap */ EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */ EXCEPTION_STUB(0x1c0, data_tlb_miss) EXCEPTION_STUB(0x1e0, instruction_tlb_miss) + EXCEPTION_STUB(0x260, perfmon) EXCEPTION_STUB(0x280, doorbell) EXCEPTION_STUB(0x2a0, doorbell_crit) + EXCEPTION_STUB(0x2c0, guest_doorbell) + EXCEPTION_STUB(0x2e0, guest_doorbell_crit) + EXCEPTION_STUB(0x300, hypercall) + EXCEPTION_STUB(0x320, ehpriv) .globl interrupt_end_book3e interrupt_end_book3e: @@ -379,7 +382,7 @@ interrupt_end_book3e: mfspr r13,SPRN_SPRG_PACA /* get our PACA */ b system_call_common -/* Auxillary Processor Unavailable Interrupt */ +/* Auxiliary Processor Unavailable Interrupt */ START_EXCEPTION(ap_unavailable); NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_KEEP) @@ -454,6 +457,70 @@ interrupt_end_book3e: kernel_dbg_exc: b . /* NYI */ +/* Debug exception as a debug interrupt*/ + START_EXCEPTION(debug_debug); + DBG_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS) + + /* + * If there is a single step or branch-taken exception in an + * exception entry sequence, it was probably meant to apply to + * the code where the exception occurred (since exception entry + * doesn't turn off DE automatically). We simulate the effect + * of turning off DE on entry to an exception handler by turning + * off DE in the DSRR1 value and clearing the debug status. + */ + + mfspr r14,SPRN_DBSR /* check single-step/branch taken */ + andis. r15,r14,DBSR_IC@h + beq+ 1f + + LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e) + LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e) + cmpld cr0,r10,r14 + cmpld cr1,r10,r15 + blt+ cr0,1f + bge+ cr1,1f + + /* here it looks like we got an inappropriate debug exception. */ + lis r14,DBSR_IC@h /* clear the IC event */ + rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value */ + mtspr SPRN_DBSR,r14 + mtspr SPRN_DSRR1,r11 + lwz r10,PACA_EXDBG+EX_CR(r13) /* restore registers */ + ld r1,PACA_EXDBG+EX_R1(r13) + ld r14,PACA_EXDBG+EX_R14(r13) + ld r15,PACA_EXDBG+EX_R15(r13) + mtcr r10 + ld r10,PACA_EXDBG+EX_R10(r13) /* restore registers */ + ld r11,PACA_EXDBG+EX_R11(r13) + mfspr r13,SPRN_SPRG_DBG_SCRATCH + rfdi + + /* Normal debug exception */ + /* XXX We only handle coming from userspace for now since we can't + * quite save properly an interrupted kernel state yet + */ +1: andi. r14,r11,MSR_PR; /* check for userspace again */ + beq kernel_dbg_exc; /* if from kernel mode */ + + /* Now we mash up things to make it look like we are coming on a + * normal exception + */ + mfspr r15,SPRN_SPRG_DBG_SCRATCH + mtspr SPRN_SPRG_GEN_SCRATCH,r15 + mfspr r14,SPRN_DBSR + EXCEPTION_COMMON(0xd00, PACA_EXDBG, INTS_DISABLE_ALL) + std r14,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + mr r4,r14 + ld r14,PACA_EXDBG+EX_R14(r13) + ld r15,PACA_EXDBG+EX_R15(r13) + bl .save_nvgprs + bl .DebugException + b .ret_from_except + + MASKABLE_EXCEPTION(0x260, perfmon, .performance_monitor_exception, ACK_NONE) + /* Doorbell interrupt */ MASKABLE_EXCEPTION(0x2070, doorbell, .doorbell_exception, ACK_NONE) @@ -468,6 +535,11 @@ kernel_dbg_exc: // b ret_from_crit_except b . + MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x2e0, guest_doorbell_crit, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x310, hypercall, .unknown_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x320, ehpriv, .unknown_exception, ACK_NONE) + /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -587,7 +659,12 @@ fast_exception_return: BAD_STACK_TRAMPOLINE(0x000) BAD_STACK_TRAMPOLINE(0x100) BAD_STACK_TRAMPOLINE(0x200) +BAD_STACK_TRAMPOLINE(0x260) +BAD_STACK_TRAMPOLINE(0x2c0) +BAD_STACK_TRAMPOLINE(0x2e0) BAD_STACK_TRAMPOLINE(0x300) +BAD_STACK_TRAMPOLINE(0x310) +BAD_STACK_TRAMPOLINE(0x320) BAD_STACK_TRAMPOLINE(0x400) BAD_STACK_TRAMPOLINE(0x500) BAD_STACK_TRAMPOLINE(0x600) @@ -864,8 +941,23 @@ have_hes: * that will have to be made dependent on whether we are running under * a hypervisor I suppose. */ - ori r3,r3,MAS0_HES | MAS0_WQ_ALLWAYS - mtspr SPRN_MAS0,r3 + + /* BEWARE, MAGIC + * This code is called as an ordinary function on the boot CPU. But to + * avoid duplication, this code is also used in SCOM bringup of + * secondary CPUs. We read the code between the initial_tlb_code_start + * and initial_tlb_code_end labels one instruction at a time and RAM it + * into the new core via SCOM. That doesn't process branches, so there + * must be none between those two labels. It also means if this code + * ever takes any parameters, the SCOM code must also be updated to + * provide them. + */ + .globl a2_tlbinit_code_start +a2_tlbinit_code_start: + + ori r11,r3,MAS0_WQ_ALLWAYS + oris r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */ + mtspr SPRN_MAS0,r11 lis r3,(MAS1_VALID | MAS1_IPROT)@h ori r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT mtspr SPRN_MAS1,r3 @@ -879,18 +971,86 @@ have_hes: /* Write the TLB entry */ tlbwe + .globl a2_tlbinit_after_linear_map +a2_tlbinit_after_linear_map: + /* Now we branch the new virtual address mapped by this entry */ LOAD_REG_IMMEDIATE(r3,1f) mtctr r3 bctr 1: /* We are now running at PAGE_OFFSET, clean the TLB of everything - * else (XXX we should scan for bolted crap from the firmware too) + * else (including IPROTed things left by firmware) + * r4 = TLBnCFG + * r3 = current address (more or less) */ + + li r5,0 + mtspr SPRN_MAS6,r5 + tlbsx 0,r3 + + rlwinm r9,r4,0,TLBnCFG_N_ENTRY + rlwinm r10,r4,8,0xff + addi r10,r10,-1 /* Get inner loop mask */ + + li r3,1 + + mfspr r5,SPRN_MAS1 + rlwinm r5,r5,0,(~(MAS1_VALID|MAS1_IPROT)) + + mfspr r6,SPRN_MAS2 + rldicr r6,r6,0,51 /* Extract EPN */ + + mfspr r7,SPRN_MAS0 + rlwinm r7,r7,0,0xffff0fff /* Clear HES and WQ */ + + rlwinm r8,r7,16,0xfff /* Extract ESEL */ + +2: add r4,r3,r8 + and r4,r4,r10 + + rlwimi r7,r4,16,MAS0_ESEL_MASK + + mtspr SPRN_MAS0,r7 + mtspr SPRN_MAS1,r5 + mtspr SPRN_MAS2,r6 + tlbwe + + addi r3,r3,1 + and. r4,r3,r10 + + bne 3f + addis r6,r6,(1<<30)@h +3: + cmpw r3,r9 + blt 2b + + .globl a2_tlbinit_after_iprot_flush +a2_tlbinit_after_iprot_flush: + +#ifdef CONFIG_PPC_EARLY_DEBUG_WSP + /* Now establish early debug mappings if applicable */ + /* Restore the MAS0 we used for linear mapping load */ + mtspr SPRN_MAS0,r11 + + lis r3,(MAS1_VALID | MAS1_IPROT)@h + ori r3,r3,(BOOK3E_PAGESZ_4K << MAS1_TSIZE_SHIFT) + mtspr SPRN_MAS1,r3 + LOAD_REG_IMMEDIATE(r3, WSP_UART_VIRT | MAS2_I | MAS2_G) + mtspr SPRN_MAS2,r3 + LOAD_REG_IMMEDIATE(r3, WSP_UART_PHYS | MAS3_SR | MAS3_SW) + mtspr SPRN_MAS7_MAS3,r3 + /* re-use the MAS8 value from the linear mapping */ + tlbwe +#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ + PPC_TLBILX(0,0,0) sync isync + .globl a2_tlbinit_code_end +a2_tlbinit_code_end: + /* We translate LR and return */ mflr r3 tovirt(r3,r3) @@ -1040,3 +1200,33 @@ _GLOBAL(__setup_base_ivors) sync blr + +_GLOBAL(setup_perfmon_ivor) + SET_IVOR(35, 0x260) /* Performance Monitor */ + blr + +_GLOBAL(setup_doorbell_ivors) + SET_IVOR(36, 0x280) /* Processor Doorbell */ + SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */ + + /* Check MMUCFG[LPIDSIZE] to determine if we have category E.HV */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beqlr + + SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */ + SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */ + blr + +_GLOBAL(setup_ehv_ivors) + /* + * We may be running as a guest and lack E.HV even on a chip + * that normally has it. + */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beqlr + + SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */ + SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */ + blr diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9f8b01d6466..a85f4874cba 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -5,7 +5,7 @@ * handling and other fixed offset specific things. * * This file is meant to be #included from head_64.S due to - * position dependant assembly. + * position dependent assembly. * * Most of this originates from head_64.S and thus has the same * copyright history. @@ -13,6 +13,7 @@ */ #include <asm/exception-64s.h> +#include <asm/ptrace.h> /* * We layout physical memory as follows: @@ -36,23 +37,51 @@ .globl __start_interrupts __start_interrupts: - STD_EXCEPTION_PSERIES(0x100, system_reset) + .globl system_reset_pSeries; +system_reset_pSeries: + HMT_MEDIUM; + DO_KVM 0x100; + SET_SCRATCH0(r13) +#ifdef CONFIG_PPC_P7_NAP +BEGIN_FTR_SECTION + /* Running native on arch 2.06 or later, check if we are + * waking up from nap. We only handle no state loss and + * supervisor state loss. We do -not- handle hypervisor + * state loss at this time. + */ + mfspr r13,SPRN_SRR1 + rlwinm r13,r13,47-31,30,31 + cmpwi cr0,r13,1 + bne 1f + b .power7_wakeup_noloss +1: cmpwi cr0,r13,2 + bne 1f + b .power7_wakeup_loss + /* Total loss of HV state is fatal, we could try to use the + * PIR to locate a PACA, then use an emergency stack etc... + * but for now, let's just stay stuck here + */ +1: cmpwi cr0,r13,3 + beq . +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE_206) +#endif /* CONFIG_PPC_P7_NAP */ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD) . = 0x200 _machine_check_pSeries: HMT_MEDIUM DO_KVM 0x200 - mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) . = 0x300 .globl data_access_pSeries data_access_pSeries: HMT_MEDIUM DO_KVM 0x300 - mtspr SPRN_SPRG_SCRATCH0,r13 + SET_SCRATCH0(r13) BEGIN_FTR_SECTION - mfspr r13,SPRN_SPRG_PACA + GET_PACA(r13) std r9,PACA_EXSLB+EX_R9(r13) std r10,PACA_EXSLB+EX_R10(r13) mfspr r10,SPRN_DAR @@ -66,22 +95,22 @@ BEGIN_FTR_SECTION std r11,PACA_EXGEN+EX_R11(r13) ld r11,PACA_EXSLB+EX_R9(r13) std r12,PACA_EXGEN+EX_R12(r13) - mfspr r12,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r12) std r10,PACA_EXGEN+EX_R10(r13) std r11,PACA_EXGEN+EX_R9(r13) std r12,PACA_EXGEN+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(data_access_common) + EXCEPTION_PROLOG_PSERIES_1(data_access_common, EXC_STD) FTR_SECTION_ELSE - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common) -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD) +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_SLB) . = 0x380 .globl data_access_slb_pSeries data_access_slb_pSeries: HMT_MEDIUM DO_KVM 0x380 - mtspr SPRN_SPRG_SCRATCH0,r13 - mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ + SET_SCRATCH0(r13) + GET_PACA(r13) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ @@ -94,7 +123,7 @@ data_access_slb_pSeries: std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r10) std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ #ifndef CONFIG_RELOCATABLE @@ -112,15 +141,15 @@ data_access_slb_pSeries: bctr #endif - STD_EXCEPTION_PSERIES(0x400, instruction_access) + STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access) . = 0x480 .globl instruction_access_slb_pSeries instruction_access_slb_pSeries: HMT_MEDIUM DO_KVM 0x480 - mtspr SPRN_SPRG_SCRATCH0,r13 - mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ + SET_SCRATCH0(r13) + GET_PACA(r13) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ @@ -133,7 +162,7 @@ instruction_access_slb_pSeries: std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r10) std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ #ifndef CONFIG_RELOCATABLE @@ -146,13 +175,29 @@ instruction_access_slb_pSeries: bctr #endif - MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt) - STD_EXCEPTION_PSERIES(0x600, alignment) - STD_EXCEPTION_PSERIES(0x700, program_check) - STD_EXCEPTION_PSERIES(0x800, fp_unavailable) - MASKABLE_EXCEPTION_PSERIES(0x900, decrementer) - STD_EXCEPTION_PSERIES(0xa00, trap_0a) - STD_EXCEPTION_PSERIES(0xb00, trap_0b) + /* We open code these as we can't have a ". = x" (even with + * x = "." within a feature section + */ + . = 0x500; + .globl hardware_interrupt_pSeries; + .globl hardware_interrupt_hv; +hardware_interrupt_pSeries: +hardware_interrupt_hv: + BEGIN_FTR_SECTION + _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD) + FTR_SECTION_ELSE + _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV) + ALT_FTR_SECTION_END_IFCLR(CPU_FTR_HVMODE_206) + + STD_EXCEPTION_PSERIES(0x600, 0x600, alignment) + STD_EXCEPTION_PSERIES(0x700, 0x700, program_check) + STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable) + + MASKABLE_EXCEPTION_PSERIES(0x900, 0x900, decrementer) + MASKABLE_EXCEPTION_HV(0x980, 0x980, decrementer) + + STD_EXCEPTION_PSERIES(0xa00, 0xa00, trap_0a) + STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b) . = 0xc00 .globl system_call_pSeries @@ -164,13 +209,13 @@ BEGIN_FTR_SECTION beq- 1f END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) mr r9,r13 - mfspr r13,SPRN_SPRG_PACA + GET_PACA(r13) mfspr r11,SPRN_SRR0 - ld r12,PACAKBASE(r13) - ld r10,PACAKMSR(r13) - LOAD_HANDLER(r12, system_call_entry) - mtspr SPRN_SRR0,r12 mfspr r12,SPRN_SRR1 + ld r10,PACAKBASE(r13) + LOAD_HANDLER(r10, system_call_entry) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 rfid b . /* prevent speculative execution */ @@ -182,8 +227,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) rfid /* return to userspace */ b . - STD_EXCEPTION_PSERIES(0xd00, single_step) - STD_EXCEPTION_PSERIES(0xe00, trap_0e) + STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step) + + /* At 0xe??? we have a bunch of hypervisor exceptions, we branch + * out of line to handle them + */ + . = 0xe00 + b h_data_storage_hv + . = 0xe20 + b h_instr_storage_hv + . = 0xe40 + b emulation_assist_hv + . = 0xe50 + b hmi_exception_hv + . = 0xe60 + b hmi_exception_hv /* We need to deal with the Altivec unavailable exception * here which is at 0xf20, thus in the middle of the @@ -192,39 +250,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) */ performance_monitor_pSeries_1: . = 0xf00 - DO_KVM 0xf00 b performance_monitor_pSeries altivec_unavailable_pSeries_1: . = 0xf20 - DO_KVM 0xf20 b altivec_unavailable_pSeries vsx_unavailable_pSeries_1: . = 0xf40 - DO_KVM 0xf40 b vsx_unavailable_pSeries #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error) + STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) #endif /* CONFIG_CBE_RAS */ - STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) + STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint) #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1600, cbe_maintenance) + STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance) #endif /* CONFIG_CBE_RAS */ - STD_EXCEPTION_PSERIES(0x1700, altivec_assist) + STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist) #ifdef CONFIG_CBE_RAS - HSTD_EXCEPTION_PSERIES(0x1800, cbe_thermal) + STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal) #endif /* CONFIG_CBE_RAS */ . = 0x3000 -/*** pSeries interrupt support ***/ +/*** Out of line interrupts support ***/ + + /* moved from 0xe00 */ + STD_EXCEPTION_HV(., 0xe00, h_data_storage) + STD_EXCEPTION_HV(., 0xe20, h_instr_storage) + STD_EXCEPTION_HV(., 0xe40, emulation_assist) + STD_EXCEPTION_HV(., 0xe60, hmi_exception) /* need to flush cache ? */ /* moved from 0xf00 */ - STD_EXCEPTION_PSERIES(., performance_monitor) - STD_EXCEPTION_PSERIES(., altivec_unavailable) - STD_EXCEPTION_PSERIES(., vsx_unavailable) + STD_EXCEPTION_PSERIES(., 0xf00, performance_monitor) + STD_EXCEPTION_PSERIES(., 0xf20, altivec_unavailable) + STD_EXCEPTION_PSERIES(., 0xf40, vsx_unavailable) /* * An interrupt came in while soft-disabled; clear EE in SRR1, @@ -239,17 +300,30 @@ masked_interrupt: rotldi r10,r10,16 mtspr SPRN_SRR1,r10 ld r10,PACA_EXGEN+EX_R10(r13) - mfspr r13,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r13) rfid b . +masked_Hinterrupt: + stb r10,PACAHARDIRQEN(r13) + mtcrf 0x80,r9 + ld r9,PACA_EXGEN+EX_R9(r13) + mfspr r10,SPRN_HSRR1 + rldicl r10,r10,48,1 /* clear MSR_EE */ + rotldi r10,r10,16 + mtspr SPRN_HSRR1,r10 + ld r10,PACA_EXGEN+EX_R10(r13) + GET_SCRATCH0(r13) + hrfid + b . + .align 7 do_stab_bolted_pSeries: std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - mfspr r10,SPRN_SPRG_SCRATCH0 + GET_SCRATCH0(r10) std r10,PACA_EXSLB+EX_R13(r13) - EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted) + EXCEPTION_PROLOG_PSERIES_1(.do_stab_bolted, EXC_STD) #ifdef CONFIG_PPC_PSERIES /* @@ -259,15 +333,15 @@ do_stab_bolted_pSeries: .align 7 system_reset_fwnmi: HMT_MEDIUM - mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD) .globl machine_check_fwnmi .align 7 machine_check_fwnmi: HMT_MEDIUM - mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common, EXC_STD) #endif /* CONFIG_PPC_PSERIES */ @@ -281,7 +355,7 @@ slb_miss_user_pseries: std r10,PACA_EXGEN+EX_R10(r13) std r11,PACA_EXGEN+EX_R11(r13) std r12,PACA_EXGEN+EX_R12(r13) - mfspr r10,SPRG_SCRATCH0 + GET_SCRATCH0(r10) ld r11,PACA_EXSLB+EX_R9(r13) ld r12,PACA_EXSLB+EX_R3(r13) std r10,PACA_EXGEN+EX_R13(r13) @@ -341,6 +415,8 @@ machine_check_common: STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) + STD_EXCEPTION_COMMON(0xe40, emulation_assist, .program_check_exception) + STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception) STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) #ifdef CONFIG_ALTIVEC @@ -385,9 +461,24 @@ bad_stack: std r12,_XER(r1) SAVE_GPR(0,r1) SAVE_GPR(2,r1) - SAVE_4GPRS(3,r1) - SAVE_2GPRS(7,r1) - SAVE_10GPRS(12,r1) + ld r10,EX_R3(r3) + std r10,GPR3(r1) + SAVE_GPR(4,r1) + SAVE_4GPRS(5,r1) + ld r9,EX_R9(r3) + ld r10,EX_R10(r3) + SAVE_2GPRS(9,r1) + ld r9,EX_R11(r3) + ld r10,EX_R12(r3) + ld r11,EX_R13(r3) + std r9,GPR11(r1) + std r10,GPR12(r1) + std r11,GPR13(r1) +BEGIN_FTR_SECTION + ld r10,EX_CFAR(r3) + std r10,ORIG_GPR3(r1) +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + SAVE_8GPRS(14,r1) SAVE_10GPRS(22,r1) lhz r12,PACA_TRAP_SAVE(r13) std r12,_TRAP(r1) @@ -396,6 +487,9 @@ bad_stack: li r12,0 std r12,0(r11) ld r2,PACATOC(r13) + ld r11,exception_marker@toc(r2) + std r12,RESULT(r1) + std r11,STACK_FRAME_OVERHEAD-16(r1) 1: addi r3,r1,STACK_FRAME_OVERHEAD bl .kernel_bad_stack b 1b @@ -418,6 +512,19 @@ data_access_common: li r5,0x300 b .do_hash_page /* Try to handle as hpte fault */ + .align 7 + .globl h_data_storage_common +h_data_storage_common: + mfspr r10,SPRN_HDAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_HDSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .unknown_exception + b .ret_from_except + .align 7 .globl instruction_access_common instruction_access_common: @@ -427,6 +534,8 @@ instruction_access_common: li r5,0x400 b .do_hash_page /* Try to handle as hpte fault */ + STD_EXCEPTION_COMMON(0xe20, h_instr_storage, .unknown_exception) + /* * Here is the common SLB miss user that is used when going to virtual * mode for SLB misses, that is currently not used @@ -749,7 +858,7 @@ _STATIC(do_hash_page) BEGIN_FTR_SECTION andis. r0,r4,0x0020 /* Is it a segment table fault? */ bne- do_ste_alloc /* If so handle it */ -END_FTR_SECTION_IFCLR(CPU_FTR_SLB) +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB) clrrdi r11,r1,THREAD_SHIFT lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ @@ -976,20 +1085,6 @@ _GLOBAL(do_stab_bolted) rfid b . /* prevent speculative execution */ -/* - * Space for CPU0's segment table. - * - * On iSeries, the hypervisor must fill in at least one entry before - * we get control (with relocate on). The address is given to the hv - * as a page number (see xLparMap below), so this must be at a - * fixed address (the linker can't compute (u64)&initial_stab >> - * PAGE_SHIFT). - */ - . = STAB0_OFFSET /* 0x6000 */ - .globl initial_stab -initial_stab: - .space 4096 - #ifdef CONFIG_PPC_PSERIES /* * Data area reserved for FWNMI option. @@ -1026,3 +1121,17 @@ xLparMap: #ifdef CONFIG_PPC_PSERIES . = 0x8000 #endif /* CONFIG_PPC_PSERIES */ + +/* + * Space for CPU0's segment table. + * + * On iSeries, the hypervisor must fill in at least one entry before + * we get control (with relocate on). The address is given to the hv + * as a page number (see xLparMap above), so this must be at a + * fixed address (the linker can't compute (u64)&initial_stab >> + * PAGE_SHIFT). + */ + . = STAB0_OFFSET /* 0x8000 */ + .globl initial_stab +initial_stab: + .space 4096 diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index e86c040ae58..de369558bf0 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -23,6 +23,7 @@ #include <asm/thread_info.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> +#include <asm/ptrace.h> #ifdef CONFIG_VSX #define REST_32FPVSRS(n,c,base) \ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 98c4b29a56f..ba250d505e0 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -805,19 +805,6 @@ _ENTRY(copy_and_flush) blr #ifdef CONFIG_SMP -#ifdef CONFIG_GEMINI - .globl __secondary_start_gemini -__secondary_start_gemini: - mfspr r4,SPRN_HID0 - ori r4,r4,HID0_ICFI - li r3,0 - ori r3,r3,HID0_ICE - andc r4,r4,r3 - mtspr SPRN_HID0,r4 - sync - b __secondary_start -#endif /* CONFIG_GEMINI */ - .globl __secondary_start_mpc86xx __secondary_start_mpc86xx: mfspr r3, SPRN_PIR diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 8278e8bad5a..a91626d87fc 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -40,6 +40,7 @@ #include <asm/thread_info.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> +#include <asm/ptrace.h> /* As with the other PowerPC ports, it is expected that when code * execution begins here, the following registers contain valid, yet @@ -765,7 +766,7 @@ DataAccess: * miss get to this point to load the TLB. * r10 - TLB_TAG value * r11 - Linux PTE - * r12, r9 - avilable to use + * r12, r9 - available to use * PID - loaded with proper value when we get here * Upon exit, we reload everything and RFI. * Actually, it will fit now, but oh well.....a common place diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 562305b40a8..5e12b741ba5 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -37,6 +37,7 @@ #include <asm/thread_info.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> +#include <asm/ptrace.h> #include <asm/synch.h> #include "head_booke.h" @@ -177,7 +178,7 @@ interrupt_base: NORMAL_EXCEPTION_PROLOG EXC_XFER_EE_LITE(0x0c00, DoSyscall) - /* Auxillary Processor Unavailable Interrupt */ + /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index f0dd577e4a5..ba504099844 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -38,8 +38,9 @@ #include <asm/page_64.h> #include <asm/irqflags.h> #include <asm/kvm_book3s_asm.h> +#include <asm/ptrace.h> -/* The physical memory is layed out such that the secondary processor +/* The physical memory is laid out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow * using the layout described in exceptions-64s.S */ @@ -96,7 +97,7 @@ __secondary_hold_acknowledge: .llong hvReleaseData-KERNELBASE #endif /* CONFIG_PPC_ISERIES */ -#ifdef CONFIG_CRASH_DUMP +#ifdef CONFIG_RELOCATABLE /* This flag is set to 1 by a loader if the kernel should run * at the loaded address instead of the linked address. This * is used by kexec-tools to keep the the kdump kernel in the @@ -146,6 +147,8 @@ __secondary_hold: mtctr r4 mr r3,r24 li r4,0 + /* Make sure that patched code is visible */ + isync bctr #else BUG_OPCODE @@ -215,19 +218,25 @@ generic_secondary_common_init: */ LOAD_REG_ADDR(r13, paca) /* Load paca pointer */ ld r13,0(r13) /* Get base vaddr of paca array */ +#ifndef CONFIG_SMP + addi r13,r13,PACA_SIZE /* know r13 if used accidentally */ + b .kexec_wait /* wait for next kernel if !SMP */ +#else + LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */ + lwz r7,0(r7) /* also the max paca allocated */ li r5,0 /* logical cpu id */ 1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ cmpw r6,r24 /* Compare to our id */ beq 2f addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */ addi r5,r5,1 - cmpwi r5,NR_CPUS + cmpw r5,r7 /* Check if more pacas exist */ blt 1b mr r3,r24 /* not found, copy phys to r3 */ b .kexec_wait /* next kernel might do better */ -2: mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG */ +2: SET_PACA(r13) #ifdef CONFIG_PPC_BOOK3E addi r12,r13,PACA_EXTLB /* and TLB exc frame in another */ mtspr SPRN_SPRG_TLB_EXFRAME,r12 @@ -235,34 +244,39 @@ generic_secondary_common_init: /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 -3: HMT_LOW - lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ - /* start. */ - -#ifndef CONFIG_SMP - b 3b /* Never go on non-SMP */ -#else - cmpwi 0,r23,0 - beq 3b /* Loop until told to go */ - - sync /* order paca.run and cur_cpu_spec */ /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) ld r23,0(r23) ld r23,CPU_SPEC_RESTORE(r23) cmpdi 0,r23,0 - beq 4f + beq 3f ld r23,0(r23) mtctr r23 bctrl -4: /* Create a temp kernel stack for use before relocation is on. */ +3: LOAD_REG_ADDR(r3, boot_cpu_count) /* Decrement boot_cpu_count */ + lwarx r4,0,r3 + subi r4,r4,1 + stwcx. r4,0,r3 + bne 3b + isync + +4: HMT_LOW + lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ + /* start. */ + cmpwi 0,r23,0 + beq 4b /* Loop until told to go */ + + sync /* order paca.run and cur_cpu_spec */ + isync /* In case code patching happened */ + + /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) subi r1,r1,STACK_FRAME_OVERHEAD b __secondary_start -#endif +#endif /* SMP */ /* * Turn the MMU off. @@ -384,12 +398,10 @@ _STATIC(__after_prom_start) /* process relocations for the final address of the kernel */ lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */ sldi r25,r25,32 -#ifdef CONFIG_CRASH_DUMP lwz r7,__run_at_load-_stext(r26) - cmplwi cr0,r7,1 /* kdump kernel ? - stay where we are */ + cmplwi cr0,r7,1 /* flagged to stay where we are ? */ bne 1f add r25,r25,r26 -#endif 1: mr r3,r25 bl .relocate #endif @@ -535,7 +547,14 @@ _GLOBAL(pmac_secondary_start) ld r4,0(r4) /* Get base vaddr of paca array */ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r4 /* for this processor. */ - mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG*/ + SET_PACA(r13) /* Save vaddr of paca in an SPRG*/ + + /* Mark interrupts soft and hard disabled (they might be enabled + * in the PACA when doing hotplug) + */ + li r0,0 + stb r0,PACASOFTIRQEN(r13) + stb r0,PACAHARDIRQEN(r13) /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) @@ -639,7 +658,7 @@ _GLOBAL(enable_64b_mode) oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ mtmsr r11 #else /* CONFIG_PPC_BOOK3E */ - li r12,(MSR_SF | MSR_ISF)@highest + li r12,(MSR_64BIT | MSR_ISF)@highest sldi r12,r12,48 or r11,r11,r12 mtmsrd r11 diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 1f1a04b5c2a..1cbf64e6b41 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -29,6 +29,7 @@ #include <asm/thread_info.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> +#include <asm/ptrace.h> /* Macro to make the code more readable. */ #ifdef CONFIG_8xx_CPU6 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 529b817f473..5ecf54cfa7d 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -41,6 +41,7 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/cache.h> +#include <asm/ptrace.h> #include "head_booke.h" /* As with the other PowerPC ports, it is expected that when code @@ -325,7 +326,7 @@ interrupt_base: NORMAL_EXCEPTION_PROLOG EXC_XFER_EE_LITE(0x0c00, DoSyscall) - /* Auxillary Processor Unavailable Interrupt */ + /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index f62efdfd176..28581f1ad2c 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -201,13 +201,14 @@ int ibmebus_register_driver(struct of_platform_driver *drv) /* If the driver uses devices that ibmebus doesn't know, add them */ ibmebus_create_devices(drv->driver.of_match_table); - return of_register_driver(drv, &ibmebus_bus_type); + drv->driver.bus = &ibmebus_bus_type; + return driver_register(&drv->driver); } EXPORT_SYMBOL(ibmebus_register_driver); void ibmebus_unregister_driver(struct of_platform_driver *drv) { - of_unregister_driver(drv); + driver_unregister(&drv->driver); } EXPORT_SYMBOL(ibmebus_unregister_driver); @@ -308,15 +309,410 @@ static ssize_t ibmebus_store_remove(struct bus_type *bus, } } + static struct bus_attribute ibmebus_bus_attrs[] = { __ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe), __ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove), __ATTR_NULL }; +static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv) +{ + const struct of_device_id *matches = drv->of_match_table; + + if (!matches) + return 0; + + return of_match_device(matches, dev) != NULL; +} + +static int ibmebus_bus_device_probe(struct device *dev) +{ + int error = -ENODEV; + struct of_platform_driver *drv; + struct platform_device *of_dev; + const struct of_device_id *match; + + drv = to_of_platform_driver(dev->driver); + of_dev = to_platform_device(dev); + + if (!drv->probe) + return error; + + of_dev_get(of_dev); + + match = of_match_device(drv->driver.of_match_table, dev); + if (match) + error = drv->probe(of_dev, match); + if (error) + of_dev_put(of_dev); + + return error; +} + +static int ibmebus_bus_device_remove(struct device *dev) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + + if (dev->driver && drv->remove) + drv->remove(of_dev); + return 0; +} + +static void ibmebus_bus_device_shutdown(struct device *dev) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + + if (dev->driver && drv->shutdown) + drv->shutdown(of_dev); +} + +/* + * ibmebus_bus_device_attrs + */ +static ssize_t devspec_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ofdev; + + ofdev = to_platform_device(dev); + return sprintf(buf, "%s\n", ofdev->dev.of_node->full_name); +} + +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct platform_device *ofdev; + + ofdev = to_platform_device(dev); + return sprintf(buf, "%s\n", ofdev->dev.of_node->name); +} + +static ssize_t modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + ssize_t len = of_device_get_modalias(dev, buf, PAGE_SIZE - 2); + buf[len] = '\n'; + buf[len+1] = 0; + return len+1; +} + +struct device_attribute ibmebus_bus_device_attrs[] = { + __ATTR_RO(devspec), + __ATTR_RO(name), + __ATTR_RO(modalias), + __ATTR_NULL +}; + +#ifdef CONFIG_PM_SLEEP +static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + int ret = 0; + + if (dev->driver && drv->suspend) + ret = drv->suspend(of_dev, mesg); + return ret; +} + +static int ibmebus_bus_legacy_resume(struct device *dev) +{ + struct platform_device *of_dev = to_platform_device(dev); + struct of_platform_driver *drv = to_of_platform_driver(dev->driver); + int ret = 0; + + if (dev->driver && drv->resume) + ret = drv->resume(of_dev); + return ret; +} + +static int ibmebus_bus_pm_prepare(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (drv && drv->pm && drv->pm->prepare) + ret = drv->pm->prepare(dev); + + return ret; +} + +static void ibmebus_bus_pm_complete(struct device *dev) +{ + struct device_driver *drv = dev->driver; + + if (drv && drv->pm && drv->pm->complete) + drv->pm->complete(dev); +} + +#ifdef CONFIG_SUSPEND + +static int ibmebus_bus_pm_suspend(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->suspend) + ret = drv->pm->suspend(dev); + } else { + ret = ibmebus_bus_legacy_suspend(dev, PMSG_SUSPEND); + } + + return ret; +} + +static int ibmebus_bus_pm_suspend_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->suspend_noirq) + ret = drv->pm->suspend_noirq(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_resume(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->resume) + ret = drv->pm->resume(dev); + } else { + ret = ibmebus_bus_legacy_resume(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_resume_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->resume_noirq) + ret = drv->pm->resume_noirq(dev); + } + + return ret; +} + +#else /* !CONFIG_SUSPEND */ + +#define ibmebus_bus_pm_suspend NULL +#define ibmebus_bus_pm_resume NULL +#define ibmebus_bus_pm_suspend_noirq NULL +#define ibmebus_bus_pm_resume_noirq NULL + +#endif /* !CONFIG_SUSPEND */ + +#ifdef CONFIG_HIBERNATE_CALLBACKS + +static int ibmebus_bus_pm_freeze(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->freeze) + ret = drv->pm->freeze(dev); + } else { + ret = ibmebus_bus_legacy_suspend(dev, PMSG_FREEZE); + } + + return ret; +} + +static int ibmebus_bus_pm_freeze_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->freeze_noirq) + ret = drv->pm->freeze_noirq(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_thaw(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->thaw) + ret = drv->pm->thaw(dev); + } else { + ret = ibmebus_bus_legacy_resume(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_thaw_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->thaw_noirq) + ret = drv->pm->thaw_noirq(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_poweroff(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->poweroff) + ret = drv->pm->poweroff(dev); + } else { + ret = ibmebus_bus_legacy_suspend(dev, PMSG_HIBERNATE); + } + + return ret; +} + +static int ibmebus_bus_pm_poweroff_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->poweroff_noirq) + ret = drv->pm->poweroff_noirq(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_restore(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->restore) + ret = drv->pm->restore(dev); + } else { + ret = ibmebus_bus_legacy_resume(dev); + } + + return ret; +} + +static int ibmebus_bus_pm_restore_noirq(struct device *dev) +{ + struct device_driver *drv = dev->driver; + int ret = 0; + + if (!drv) + return 0; + + if (drv->pm) { + if (drv->pm->restore_noirq) + ret = drv->pm->restore_noirq(dev); + } + + return ret; +} + +#else /* !CONFIG_HIBERNATE_CALLBACKS */ + +#define ibmebus_bus_pm_freeze NULL +#define ibmebus_bus_pm_thaw NULL +#define ibmebus_bus_pm_poweroff NULL +#define ibmebus_bus_pm_restore NULL +#define ibmebus_bus_pm_freeze_noirq NULL +#define ibmebus_bus_pm_thaw_noirq NULL +#define ibmebus_bus_pm_poweroff_noirq NULL +#define ibmebus_bus_pm_restore_noirq NULL + +#endif /* !CONFIG_HIBERNATE_CALLBACKS */ + +static struct dev_pm_ops ibmebus_bus_dev_pm_ops = { + .prepare = ibmebus_bus_pm_prepare, + .complete = ibmebus_bus_pm_complete, + .suspend = ibmebus_bus_pm_suspend, + .resume = ibmebus_bus_pm_resume, + .freeze = ibmebus_bus_pm_freeze, + .thaw = ibmebus_bus_pm_thaw, + .poweroff = ibmebus_bus_pm_poweroff, + .restore = ibmebus_bus_pm_restore, + .suspend_noirq = ibmebus_bus_pm_suspend_noirq, + .resume_noirq = ibmebus_bus_pm_resume_noirq, + .freeze_noirq = ibmebus_bus_pm_freeze_noirq, + .thaw_noirq = ibmebus_bus_pm_thaw_noirq, + .poweroff_noirq = ibmebus_bus_pm_poweroff_noirq, + .restore_noirq = ibmebus_bus_pm_restore_noirq, +}; + +#define IBMEBUS_BUS_PM_OPS_PTR (&ibmebus_bus_dev_pm_ops) + +#else /* !CONFIG_PM_SLEEP */ + +#define IBMEBUS_BUS_PM_OPS_PTR NULL + +#endif /* !CONFIG_PM_SLEEP */ + struct bus_type ibmebus_bus_type = { + .name = "ibmebus", .uevent = of_device_uevent, - .bus_attrs = ibmebus_bus_attrs + .bus_attrs = ibmebus_bus_attrs, + .match = ibmebus_bus_bus_match, + .probe = ibmebus_bus_device_probe, + .remove = ibmebus_bus_device_remove, + .shutdown = ibmebus_bus_device_shutdown, + .dev_attrs = ibmebus_bus_device_attrs, + .pm = IBMEBUS_BUS_PM_OPS_PTR, }; EXPORT_SYMBOL(ibmebus_bus_type); @@ -326,7 +722,7 @@ static int __init ibmebus_bus_init(void) printk(KERN_INFO "IBM eBus Device Driver\n"); - err = of_bus_type_init(&ibmebus_bus_type, "ibmebus"); + err = bus_register(&ibmebus_bus_type); if (err) { printk(KERN_ERR "%s: failed to register IBM eBus.\n", __func__); diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index 5328709eeed..ba319547860 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -53,24 +53,3 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) isync b 1b -_GLOBAL(power4_cpu_offline_powersave) - /* Go to NAP now */ - mfmsr r7 - rldicl r0,r7,48,1 - rotldi r0,r0,16 - mtmsrd r0,1 /* hard-disable interrupts */ - li r0,1 - li r6,0 - stb r0,PACAHARDIRQEN(r13) /* we'll hard-enable shortly */ - stb r6,PACASOFTIRQEN(r13) /* soft-disable irqs */ -BEGIN_FTR_SECTION - DSSALL - sync -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - ori r7,r7,MSR_EE - oris r7,r7,MSR_POW@h - sync - isync - mtmsrd r7 - isync - blr diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S new file mode 100644 index 00000000000..f8f0bc7f1d4 --- /dev/null +++ b/arch/powerpc/kernel/idle_power7.S @@ -0,0 +1,97 @@ +/* + * This file contains the power_save function for 970-family CPUs. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ppc-opcode.h> + +#undef DEBUG + + .text + +_GLOBAL(power7_idle) + /* Now check if user or arch enabled NAP mode */ + LOAD_REG_ADDRBASE(r3,powersave_nap) + lwz r4,ADDROFF(powersave_nap)(r3) + cmpwi 0,r4,0 + beqlr + + /* NAP is a state loss, we create a regs frame on the + * stack, fill it up with the state we care about and + * stick a pointer to it in PACAR1. We really only + * need to save PC, some CR bits and the NV GPRs, + * but for now an interrupt frame will do. + */ + mflr r0 + std r0,16(r1) + stdu r1,-INT_FRAME_SIZE(r1) + std r0,_LINK(r1) + std r0,_NIP(r1) + +#ifndef CONFIG_SMP + /* Make sure FPU, VSX etc... are flushed as we may lose + * state when going to nap mode + */ + bl .discard_lazy_cpu_state +#endif /* CONFIG_SMP */ + + /* Hard disable interrupts */ + mfmsr r9 + rldicl r9,r9,48,1 + rotldi r9,r9,16 + mtmsrd r9,1 /* hard-disable interrupts */ + li r0,0 + stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ + stb r0,PACAHARDIRQEN(r13) + + /* Continue saving state */ + SAVE_GPR(2, r1) + SAVE_NVGPRS(r1) + mfcr r3 + std r3,_CCR(r1) + std r9,_MSR(r1) + std r1,PACAR1(r13) + + /* Magic NAP mode enter sequence */ + std r0,0(r1) + ptesync + ld r0,0(r1) +1: cmp cr0,r0,r0 + bne 1b + PPC_NAP + b . + +_GLOBAL(power7_wakeup_loss) + GET_PACA(r13) + ld r1,PACAR1(r13) + REST_NVGPRS(r1) + REST_GPR(2, r1) + ld r3,_CCR(r1) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtcr r3 + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid + +_GLOBAL(power7_wakeup_noloss) + GET_PACA(r13) + ld r1,PACAR1(r13) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c new file mode 100644 index 00000000000..ffafaea3d26 --- /dev/null +++ b/arch/powerpc/kernel/io-workarounds.c @@ -0,0 +1,188 @@ +/* + * Support PCI IO workaround + * + * Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org> + * IBM, Corp. + * (C) Copyright 2007-2008 TOSHIBA CORPORATION + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#undef DEBUG + +#include <linux/kernel.h> + +#include <asm/io.h> +#include <asm/machdep.h> +#include <asm/pgtable.h> +#include <asm/ppc-pci.h> +#include <asm/io-workarounds.h> + +#define IOWA_MAX_BUS 8 + +static struct iowa_bus iowa_busses[IOWA_MAX_BUS]; +static unsigned int iowa_bus_count; + +static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) +{ + int i, j; + struct resource *res; + unsigned long vstart, vend; + + for (i = 0; i < iowa_bus_count; i++) { + struct iowa_bus *bus = &iowa_busses[i]; + struct pci_controller *phb = bus->phb; + + if (vaddr) { + vstart = (unsigned long)phb->io_base_virt; + vend = vstart + phb->pci_io_size - 1; + if ((vaddr >= vstart) && (vaddr <= vend)) + return bus; + } + + if (paddr) + for (j = 0; j < 3; j++) { + res = &phb->mem_resources[j]; + if (paddr >= res->start && paddr <= res->end) + return bus; + } + } + + return NULL; +} + +struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) +{ + struct iowa_bus *bus; + int token; + + token = PCI_GET_ADDR_TOKEN(addr); + + if (token && token <= iowa_bus_count) + bus = &iowa_busses[token - 1]; + else { + unsigned long vaddr, paddr; + pte_t *ptep; + + vaddr = (unsigned long)PCI_FIX_ADDR(addr); + if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) + return NULL; + + ptep = find_linux_pte(init_mm.pgd, vaddr); + if (ptep == NULL) + paddr = 0; + else + paddr = pte_pfn(*ptep) << PAGE_SHIFT; + bus = iowa_pci_find(vaddr, paddr); + + if (bus == NULL) + return NULL; + } + + return bus; +} + +struct iowa_bus *iowa_pio_find_bus(unsigned long port) +{ + unsigned long vaddr = (unsigned long)pci_io_base + port; + return iowa_pci_find(vaddr, 0); +} + + +#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ +static ret iowa_##name at \ +{ \ + struct iowa_bus *bus; \ + bus = iowa_##space##_find_bus(aa); \ + if (bus && bus->ops && bus->ops->name) \ + return bus->ops->name al; \ + return __do_##name al; \ +} + +#define DEF_PCI_AC_NORET(name, at, al, space, aa) \ +static void iowa_##name at \ +{ \ + struct iowa_bus *bus; \ + bus = iowa_##space##_find_bus(aa); \ + if (bus && bus->ops && bus->ops->name) { \ + bus->ops->name al; \ + return; \ + } \ + __do_##name al; \ +} + +#include <asm/io-defs.h> + +#undef DEF_PCI_AC_RET +#undef DEF_PCI_AC_NORET + +static const struct ppc_pci_io __devinitconst iowa_pci_io = { + +#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) .name = iowa_##name, +#define DEF_PCI_AC_NORET(name, at, al, space, aa) .name = iowa_##name, + +#include <asm/io-defs.h> + +#undef DEF_PCI_AC_RET +#undef DEF_PCI_AC_NORET + +}; + +static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, + unsigned long flags, void *caller) +{ + struct iowa_bus *bus; + void __iomem *res = __ioremap_caller(addr, size, flags, caller); + int busno; + + bus = iowa_pci_find(0, (unsigned long)addr); + if (bus != NULL) { + busno = bus - iowa_busses; + PCI_SET_ADDR_TOKEN(res, busno + 1); + } + return res; +} + +/* Enable IO workaround */ +static void __devinit io_workaround_init(void) +{ + static int io_workaround_inited; + + if (io_workaround_inited) + return; + ppc_pci_io = iowa_pci_io; + ppc_md.ioremap = iowa_ioremap; + io_workaround_inited = 1; +} + +/* Register new bus to support workaround */ +void __devinit iowa_register_bus(struct pci_controller *phb, + struct ppc_pci_io *ops, + int (*initfunc)(struct iowa_bus *, void *), void *data) +{ + struct iowa_bus *bus; + struct device_node *np = phb->dn; + + io_workaround_init(); + + if (iowa_bus_count >= IOWA_MAX_BUS) { + pr_err("IOWA:Too many pci bridges, " + "workarounds disabled for %s\n", np->full_name); + return; + } + + bus = &iowa_busses[iowa_bus_count]; + bus->phb = phb; + bus->ops = ops; + bus->private = data; + + if (initfunc) + if ((*initfunc)(bus, data)) + return; + + iowa_bus_count++; + + pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name); +} + diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index d5839179ec7..961bb03413f 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -311,8 +311,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl, /* Handle failure */ if (unlikely(entry == DMA_ERROR_CODE)) { if (printk_ratelimit()) - printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx" - " npages %lx\n", tbl, vaddr, npages); + dev_info(dev, "iommu_alloc failed, tbl %p " + "vaddr %lx npages %lu\n", tbl, vaddr, + npages); goto failure; } @@ -579,9 +580,9 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, attrs); if (dma_handle == DMA_ERROR_CODE) { if (printk_ratelimit()) { - printk(KERN_INFO "iommu_alloc failed, " - "tbl %p vaddr %p npages %d\n", - tbl, vaddr, npages); + dev_info(dev, "iommu_alloc failed, tbl %p " + "vaddr %p npages %d\n", tbl, vaddr, + npages); } } else dma_handle |= (uaddr & ~IOMMU_PAGE_MASK); @@ -627,7 +628,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, * the tce tables. */ if (order >= IOMAP_MAX_ORDER) { - printk("iommu_alloc_consistent size too large: 0x%lx\n", size); + dev_info(dev, "iommu_alloc_consistent size too large: 0x%lx\n", + size); return NULL; } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ce557f6f00f..a24d37d4cf5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -66,7 +66,6 @@ #include <asm/ptrace.h> #include <asm/machdep.h> #include <asm/udbg.h> -#include <asm/dbell.h> #include <asm/smp.h> #ifdef CONFIG_PPC64 @@ -160,7 +159,8 @@ notrace void arch_local_irq_restore(unsigned long en) #if defined(CONFIG_BOOKE) && defined(CONFIG_SMP) /* Check for pending doorbell interrupts and resend to ourself */ - doorbell_check_self(); + if (cpu_has_feature(CPU_FTR_DBELL)) + smp_muxed_ipi_resend(); #endif /* @@ -195,7 +195,7 @@ notrace void arch_local_irq_restore(unsigned long en) EXPORT_SYMBOL(arch_local_irq_restore); #endif /* CONFIG_PPC64 */ -static int show_other_interrupts(struct seq_file *p, int prec) +int arch_show_interrupts(struct seq_file *p, int prec) { int j; @@ -231,63 +231,6 @@ static int show_other_interrupts(struct seq_file *p, int prec) return 0; } -int show_interrupts(struct seq_file *p, void *v) -{ - unsigned long flags, any_count = 0; - int i = *(loff_t *) v, j, prec; - struct irqaction *action; - struct irq_desc *desc; - - if (i > nr_irqs) - return 0; - - for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec) - j *= 10; - - if (i == nr_irqs) - return show_other_interrupts(p, prec); - - /* print header */ - if (i == 0) { - seq_printf(p, "%*s", prec + 8, ""); - for_each_online_cpu(j) - seq_printf(p, "CPU%-8d", j); - seq_putc(p, '\n'); - } - - desc = irq_to_desc(i); - if (!desc) - return 0; - - raw_spin_lock_irqsave(&desc->lock, flags); - for_each_online_cpu(j) - any_count |= kstat_irqs_cpu(i, j); - action = desc->action; - if (!action && !any_count) - goto out; - - seq_printf(p, "%*d: ", prec, i); - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); - - if (desc->chip) - seq_printf(p, " %-16s", desc->chip->name); - else - seq_printf(p, " %-16s", "None"); - seq_printf(p, " %-8s", (desc->status & IRQ_LEVEL) ? "Level" : "Edge"); - - if (action) { - seq_printf(p, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(p, ", %s", action->name); - } - - seq_putc(p, '\n'); -out: - raw_spin_unlock_irqrestore(&desc->lock, flags); - return 0; -} - /* * /proc/stat helpers */ @@ -303,30 +246,37 @@ u64 arch_irq_stat_cpu(unsigned int cpu) } #ifdef CONFIG_HOTPLUG_CPU -void fixup_irqs(const struct cpumask *map) +void migrate_irqs(void) { struct irq_desc *desc; unsigned int irq; static int warned; cpumask_var_t mask; + const struct cpumask *map = cpu_online_mask; alloc_cpumask_var(&mask, GFP_KERNEL); for_each_irq(irq) { + struct irq_data *data; + struct irq_chip *chip; + desc = irq_to_desc(irq); if (!desc) continue; - if (desc->status & IRQ_PER_CPU) + data = irq_desc_get_irq_data(desc); + if (irqd_is_per_cpu(data)) continue; - cpumask_and(mask, desc->affinity, map); + chip = irq_data_get_irq_chip(data); + + cpumask_and(mask, data->affinity, map); if (cpumask_any(mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); cpumask_copy(mask, map); } - if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, mask); + if (chip->irq_set_affinity) + chip->irq_set_affinity(data, mask, true); else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } @@ -447,24 +397,28 @@ struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly; void exc_lvl_ctx_init(void) { struct thread_info *tp; - int i, hw_cpu; + int i, cpu_nr; for_each_possible_cpu(i) { - hw_cpu = get_hard_smp_processor_id(i); - memset((void *)critirq_ctx[hw_cpu], 0, THREAD_SIZE); - tp = critirq_ctx[hw_cpu]; - tp->cpu = i; +#ifdef CONFIG_PPC64 + cpu_nr = i; +#else + cpu_nr = get_hard_smp_processor_id(i); +#endif + memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = critirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; tp->preempt_count = 0; #ifdef CONFIG_BOOKE - memset((void *)dbgirq_ctx[hw_cpu], 0, THREAD_SIZE); - tp = dbgirq_ctx[hw_cpu]; - tp->cpu = i; + memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = dbgirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; tp->preempt_count = 0; - memset((void *)mcheckirq_ctx[hw_cpu], 0, THREAD_SIZE); - tp = mcheckirq_ctx[hw_cpu]; - tp->cpu = i; + memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE); + tp = mcheckirq_ctx[cpu_nr]; + tp->cpu = cpu_nr; tp->preempt_count = HARDIRQ_OFFSET; #endif } @@ -527,20 +481,41 @@ void do_softirq(void) * IRQ controller and virtual interrupts */ +/* The main irq map itself is an array of NR_IRQ entries containing the + * associate host and irq number. An entry with a host of NULL is free. + * An entry can be allocated if it's free, the allocator always then sets + * hwirq first to the host's invalid irq number and then fills ops. + */ +struct irq_map_entry { + irq_hw_number_t hwirq; + struct irq_host *host; +}; + static LIST_HEAD(irq_hosts); static DEFINE_RAW_SPINLOCK(irq_big_lock); -static unsigned int revmap_trees_allocated; static DEFINE_MUTEX(revmap_trees_mutex); -struct irq_map_entry irq_map[NR_IRQS]; +static struct irq_map_entry irq_map[NR_IRQS]; static unsigned int irq_virq_count = NR_IRQS; static struct irq_host *irq_default_host; +irq_hw_number_t irqd_to_hwirq(struct irq_data *d) +{ + return irq_map[d->irq].hwirq; +} +EXPORT_SYMBOL_GPL(irqd_to_hwirq); + irq_hw_number_t virq_to_hw(unsigned int virq) { return irq_map[virq].hwirq; } EXPORT_SYMBOL_GPL(virq_to_hw); +bool virq_is_host(unsigned int virq, struct irq_host *host) +{ + return irq_map[virq].host == host; +} +EXPORT_SYMBOL_GPL(virq_is_host); + static int default_irq_host_match(struct irq_host *h, struct device_node *np) { return h->of_node != NULL && h->of_node == np; @@ -561,7 +536,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, /* Allocate structure and revmap table if using linear mapping */ if (revmap_type == IRQ_HOST_MAP_LINEAR) size += revmap_arg * sizeof(unsigned int); - host = zalloc_maybe_bootmem(size, GFP_KERNEL); + host = kzalloc(size, GFP_KERNEL); if (host == NULL) return NULL; @@ -611,14 +586,14 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, irq_map[i].host = host; smp_wmb(); - /* Clear norequest flags */ - irq_to_desc(i)->status &= ~IRQ_NOREQUEST; - /* Legacy flags are left to default at this point, * one can then use irq_create_mapping() to * explicitly change them */ ops->map(host, i, i); + + /* Clear norequest flags */ + irq_clear_status_flags(i, IRQ_NOREQUEST); } break; case IRQ_HOST_MAP_LINEAR: @@ -629,6 +604,9 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, smp_wmb(); host->revmap_data.linear.revmap = rmap; break; + case IRQ_HOST_MAP_TREE: + INIT_RADIX_TREE(&host->revmap_data.tree, GFP_KERNEL); + break; default: break; } @@ -678,17 +656,14 @@ void irq_set_virq_count(unsigned int count) static int irq_setup_virq(struct irq_host *host, unsigned int virq, irq_hw_number_t hwirq) { - struct irq_desc *desc; + int res; - desc = irq_to_desc_alloc_node(virq, 0); - if (!desc) { + res = irq_alloc_desc_at(virq, 0); + if (res != virq) { pr_debug("irq: -> allocating desc failed\n"); goto error; } - /* Clear IRQ_NOREQUEST flag */ - desc->status &= ~IRQ_NOREQUEST; - /* map it */ smp_wmb(); irq_map[virq].hwirq = hwirq; @@ -696,11 +671,15 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq, if (host->ops->map(host, virq, hwirq)) { pr_debug("irq: -> mapping failed, freeing\n"); - goto error; + goto errdesc; } + irq_clear_status_flags(virq, IRQ_NOREQUEST); + return 0; +errdesc: + irq_free_descs(virq, 1); error: irq_free_virt(virq, 1); return -1; @@ -753,8 +732,6 @@ unsigned int irq_create_mapping(struct irq_host *host, */ virq = irq_find_mapping(host, hwirq); if (virq != NO_IRQ) { - if (host->ops->remap) - host->ops->remap(host, virq, hwirq); pr_debug("irq: -> existing mapping on virq %d\n", virq); return virq; } @@ -820,8 +797,8 @@ unsigned int irq_create_of_mapping(struct device_node *controller, /* Set type if specified and different than the current one */ if (type != IRQ_TYPE_NONE && - type != (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK)) - set_irq_type(virq, type); + type != (irqd_get_trigger_type(irq_get_irq_data(virq)))) + irq_set_irq_type(virq, type); return virq; } EXPORT_SYMBOL_GPL(irq_create_of_mapping); @@ -835,16 +812,17 @@ void irq_dispose_mapping(unsigned int virq) return; host = irq_map[virq].host; - WARN_ON (host == NULL); - if (host == NULL) + if (WARN_ON(host == NULL)) return; /* Never unmap legacy interrupts */ if (host->revmap_type == IRQ_HOST_MAP_LEGACY) return; + irq_set_status_flags(virq, IRQ_NOREQUEST); + /* remove chip and handler */ - set_irq_chip_and_handler(virq, NULL, NULL); + irq_set_chip_and_handler(virq, NULL, NULL); /* Make sure it's completed */ synchronize_irq(virq); @@ -862,13 +840,6 @@ void irq_dispose_mapping(unsigned int virq) host->revmap_data.linear.revmap[hwirq] = NO_IRQ; break; case IRQ_HOST_MAP_TREE: - /* - * Check if radix tree allocated yet, if not then nothing to - * remove. - */ - smp_rmb(); - if (revmap_trees_allocated < 1) - break; mutex_lock(&revmap_trees_mutex); radix_tree_delete(&host->revmap_data.tree, hwirq); mutex_unlock(&revmap_trees_mutex); @@ -879,9 +850,7 @@ void irq_dispose_mapping(unsigned int virq) smp_mb(); irq_map[virq].hwirq = host->inval_irq; - /* Set some flags */ - irq_to_desc(virq)->status |= IRQ_NOREQUEST; - + irq_free_descs(virq, 1); /* Free it */ irq_free_virt(virq, 1); } @@ -926,16 +895,9 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host, struct irq_map_entry *ptr; unsigned int virq; - WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE); - - /* - * Check if the radix tree exists and has bee initialized. - * If not, we fallback to slow mode - */ - if (revmap_trees_allocated < 2) + if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_TREE)) return irq_find_mapping(host, hwirq); - /* Now try to resolve */ /* * No rcu_read_lock(ing) needed, the ptr returned can't go under us * as it's referencing an entry in the static irq_map table. @@ -958,16 +920,7 @@ unsigned int irq_radix_revmap_lookup(struct irq_host *host, void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq, irq_hw_number_t hwirq) { - - WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE); - - /* - * Check if the radix tree exists yet. - * If not, then the irq will be inserted into the tree when it gets - * initialized. - */ - smp_rmb(); - if (revmap_trees_allocated < 1) + if (WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE)) return; if (virq != NO_IRQ) { @@ -983,7 +936,8 @@ unsigned int irq_linear_revmap(struct irq_host *host, { unsigned int *revmap; - WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR); + if (WARN_ON_ONCE(host->revmap_type != IRQ_HOST_MAP_LINEAR)) + return irq_find_mapping(host, hwirq); /* Check revmap bounds */ if (unlikely(hwirq >= host->revmap_data.linear.size)) @@ -1074,71 +1028,9 @@ void irq_free_virt(unsigned int virq, unsigned int count) int arch_early_irq_init(void) { - struct irq_desc *desc; - int i; - - for (i = 0; i < NR_IRQS; i++) { - desc = irq_to_desc(i); - if (desc) - desc->status |= IRQ_NOREQUEST; - } - - return 0; -} - -int arch_init_chip_data(struct irq_desc *desc, int node) -{ - desc->status |= IRQ_NOREQUEST; return 0; } -/* We need to create the radix trees late */ -static int irq_late_init(void) -{ - struct irq_host *h; - unsigned int i; - - /* - * No mutual exclusion with respect to accessors of the tree is needed - * here as the synchronization is done via the state variable - * revmap_trees_allocated. - */ - list_for_each_entry(h, &irq_hosts, link) { - if (h->revmap_type == IRQ_HOST_MAP_TREE) - INIT_RADIX_TREE(&h->revmap_data.tree, GFP_KERNEL); - } - - /* - * Make sure the radix trees inits are visible before setting - * the flag - */ - smp_wmb(); - revmap_trees_allocated = 1; - - /* - * Insert the reverse mapping for those interrupts already present - * in irq_map[]. - */ - mutex_lock(&revmap_trees_mutex); - for (i = 0; i < irq_virq_count; i++) { - if (irq_map[i].host && - (irq_map[i].host->revmap_type == IRQ_HOST_MAP_TREE)) - radix_tree_insert(&irq_map[i].host->revmap_data.tree, - irq_map[i].hwirq, &irq_map[i]); - } - mutex_unlock(&revmap_trees_mutex); - - /* - * Make sure the radix trees insertions are visible before setting - * the flag - */ - smp_wmb(); - revmap_trees_allocated = 2; - - return 0; -} -arch_initcall(irq_late_init); - #ifdef CONFIG_VIRQ_DEBUG static int virq_debug_show(struct seq_file *m, void *private) { @@ -1146,10 +1038,11 @@ static int virq_debug_show(struct seq_file *m, void *private) struct irq_desc *desc; const char *p; static const char none[] = "none"; + void *data; int i; - seq_printf(m, "%-5s %-7s %-15s %s\n", "virq", "hwirq", - "chip name", "host name"); + seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq", + "chip name", "chip data", "host name"); for (i = 1; i < nr_irqs; i++) { desc = irq_to_desc(i); @@ -1159,15 +1052,21 @@ static int virq_debug_show(struct seq_file *m, void *private) raw_spin_lock_irqsave(&desc->lock, flags); if (desc->action && desc->action->handler) { + struct irq_chip *chip; + seq_printf(m, "%5d ", i); - seq_printf(m, "0x%05lx ", virq_to_hw(i)); + seq_printf(m, "0x%05lx ", irq_map[i].hwirq); - if (desc->chip && desc->chip->name) - p = desc->chip->name; + chip = irq_desc_get_chip(desc); + if (chip && chip->name) + p = chip->name; else p = none; seq_printf(m, "%-15s ", p); + data = irq_desc_get_chip_data(desc); + seq_printf(m, "0x%16p ", data); + if (irq_map[i].host && irq_map[i].host->of_node) p = irq_map[i].host->of_node->full_name; else diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 7f61a3ac787..bd9d35f59cf 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -109,7 +109,7 @@ static int kgdb_call_nmi_hook(struct pt_regs *regs) #ifdef CONFIG_SMP void kgdb_roundup_cpus(unsigned long flags) { - smp_send_debugger_break(MSG_ALL_BUT_SELF); + smp_send_debugger_break(); } #endif @@ -194,40 +194,6 @@ static int kgdb_dabr_match(struct pt_regs *regs) ptr = (unsigned long *)ptr32; \ } while (0) - -void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) -{ - unsigned long *ptr = gdb_regs; - int reg; - - memset(gdb_regs, 0, NUMREGBYTES); - - for (reg = 0; reg < 32; reg++) - PACK64(ptr, regs->gpr[reg]); - -#ifdef CONFIG_FSL_BOOKE -#ifdef CONFIG_SPE - for (reg = 0; reg < 32; reg++) - PACK64(ptr, current->thread.evr[reg]); -#else - ptr += 32; -#endif -#else - /* fp registers not used by kernel, leave zero */ - ptr += 32 * 8 / sizeof(long); -#endif - - PACK64(ptr, regs->nip); - PACK64(ptr, regs->msr); - PACK32(ptr, regs->ccr); - PACK64(ptr, regs->link); - PACK64(ptr, regs->ctr); - PACK32(ptr, regs->xer); - - BUG_ON((unsigned long)ptr > - (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); -} - void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) { struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp + @@ -271,44 +237,140 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); } -#define UNPACK64(dest, ptr) do { dest = *(ptr++); } while (0) +#define GDB_SIZEOF_REG sizeof(unsigned long) +#define GDB_SIZEOF_REG_U32 sizeof(u32) -#define UNPACK32(dest, ptr) do { \ - u32 *ptr32; \ - ptr32 = (u32 *)ptr; \ - dest = *(ptr32++); \ - ptr = (unsigned long *)ptr32; \ - } while (0) +#ifdef CONFIG_FSL_BOOKE +#define GDB_SIZEOF_FLOAT_REG sizeof(unsigned long) +#else +#define GDB_SIZEOF_FLOAT_REG sizeof(u64) +#endif -void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { - unsigned long *ptr = gdb_regs; - int reg; - - for (reg = 0; reg < 32; reg++) - UNPACK64(regs->gpr[reg], ptr); + { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[0]) }, + { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[1]) }, + { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[2]) }, + { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[3]) }, + { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[4]) }, + { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[5]) }, + { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[6]) }, + { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[7]) }, + { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[8]) }, + { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[9]) }, + { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[10]) }, + { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[11]) }, + { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[12]) }, + { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[13]) }, + { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[14]) }, + { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[15]) }, + { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[16]) }, + { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[17]) }, + { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[18]) }, + { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[19]) }, + { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[20]) }, + { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[21]) }, + { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[22]) }, + { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[23]) }, + { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[24]) }, + { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[25]) }, + { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[26]) }, + { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[27]) }, + { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[28]) }, + { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[29]) }, + { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[30]) }, + { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[31]) }, + + { "f0", GDB_SIZEOF_FLOAT_REG, 0 }, + { "f1", GDB_SIZEOF_FLOAT_REG, 1 }, + { "f2", GDB_SIZEOF_FLOAT_REG, 2 }, + { "f3", GDB_SIZEOF_FLOAT_REG, 3 }, + { "f4", GDB_SIZEOF_FLOAT_REG, 4 }, + { "f5", GDB_SIZEOF_FLOAT_REG, 5 }, + { "f6", GDB_SIZEOF_FLOAT_REG, 6 }, + { "f7", GDB_SIZEOF_FLOAT_REG, 7 }, + { "f8", GDB_SIZEOF_FLOAT_REG, 8 }, + { "f9", GDB_SIZEOF_FLOAT_REG, 9 }, + { "f10", GDB_SIZEOF_FLOAT_REG, 10 }, + { "f11", GDB_SIZEOF_FLOAT_REG, 11 }, + { "f12", GDB_SIZEOF_FLOAT_REG, 12 }, + { "f13", GDB_SIZEOF_FLOAT_REG, 13 }, + { "f14", GDB_SIZEOF_FLOAT_REG, 14 }, + { "f15", GDB_SIZEOF_FLOAT_REG, 15 }, + { "f16", GDB_SIZEOF_FLOAT_REG, 16 }, + { "f17", GDB_SIZEOF_FLOAT_REG, 17 }, + { "f18", GDB_SIZEOF_FLOAT_REG, 18 }, + { "f19", GDB_SIZEOF_FLOAT_REG, 19 }, + { "f20", GDB_SIZEOF_FLOAT_REG, 20 }, + { "f21", GDB_SIZEOF_FLOAT_REG, 21 }, + { "f22", GDB_SIZEOF_FLOAT_REG, 22 }, + { "f23", GDB_SIZEOF_FLOAT_REG, 23 }, + { "f24", GDB_SIZEOF_FLOAT_REG, 24 }, + { "f25", GDB_SIZEOF_FLOAT_REG, 25 }, + { "f26", GDB_SIZEOF_FLOAT_REG, 26 }, + { "f27", GDB_SIZEOF_FLOAT_REG, 27 }, + { "f28", GDB_SIZEOF_FLOAT_REG, 28 }, + { "f29", GDB_SIZEOF_FLOAT_REG, 29 }, + { "f30", GDB_SIZEOF_FLOAT_REG, 30 }, + { "f31", GDB_SIZEOF_FLOAT_REG, 31 }, + + { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, nip) }, + { "msr", GDB_SIZEOF_REG, offsetof(struct pt_regs, msr) }, + { "cr", GDB_SIZEOF_REG_U32, offsetof(struct pt_regs, ccr) }, + { "lr", GDB_SIZEOF_REG, offsetof(struct pt_regs, link) }, + { "ctr", GDB_SIZEOF_REG_U32, offsetof(struct pt_regs, ctr) }, + { "xer", GDB_SIZEOF_REG, offsetof(struct pt_regs, xer) }, +}; -#ifdef CONFIG_FSL_BOOKE -#ifdef CONFIG_SPE - for (reg = 0; reg < 32; reg++) - UNPACK64(current->thread.evr[reg], ptr); +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (regno < 32 || regno >= 64) + /* First 0 -> 31 gpr registers*/ + /* pc, msr, ls... registers 64 -> 69 */ + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + + if (regno >= 32 && regno < 64) { + /* FP registers 32 -> 63 */ +#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) + if (current) + memcpy(mem, ¤t->thread.evr[regno-32], + dbg_reg_def[regno].size); #else - ptr += 32; + /* fp registers not used by kernel, leave zero */ + memset(mem, 0, dbg_reg_def[regno].size); #endif + } + + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return -EINVAL; + + if (regno < 32 || regno >= 64) + /* First 0 -> 31 gpr registers*/ + /* pc, msr, ls... registers 64 -> 69 */ + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + + if (regno >= 32 && regno < 64) { + /* FP registers 32 -> 63 */ +#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) + memcpy(¤t->thread.evr[regno-32], mem, + dbg_reg_def[regno].size); #else - /* fp registers not used by kernel, leave zero */ - ptr += 32 * 8 / sizeof(int); + /* fp registers not used by kernel, leave zero */ + return 0; #endif + } - UNPACK64(regs->nip, ptr); - UNPACK64(regs->msr, ptr); - UNPACK32(regs->ccr, ptr); - UNPACK64(regs->link, ptr); - UNPACK64(regs->ctr, ptr); - UNPACK32(regs->xer, ptr); - - BUG_ON((unsigned long)ptr > - (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); + return 0; } void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 428d0e538ae..b06bdae0406 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -127,7 +127,7 @@ static void kvm_patch_ins_nop(u32 *inst) static void kvm_patch_ins_b(u32 *inst, int addr) { -#ifdef CONFIG_RELOCATABLE +#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S) /* On relocatable kernels interrupts handlers and our code can be in different regions, so we don't patch them */ diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 2a2f3c3f6d8..97ec8557f97 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -151,7 +151,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /**** Might be a good idea to set L2DO here - to prevent instructions from getting into the cache. But since we invalidate the next time we enable the cache it doesn't really matter. - Don't do this unless you accomodate all processor variations. + Don't do this unless you accommodate all processor variations. The bit moved on the 7450..... ****/ diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index c834757bebc..2b97b80d6d7 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -330,9 +330,11 @@ void __init find_legacy_serial_ports(void) if (!parent) continue; if (of_match_node(legacy_serial_parents, parent) != NULL) { - index = add_legacy_soc_port(np, np); - if (index >= 0 && np == stdout) - legacy_serial_console = index; + if (of_device_is_available(np)) { + index = add_legacy_soc_port(np, np); + if (index >= 0 && np == stdout) + legacy_serial_console = index; + } } of_node_put(parent); } diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 16468362ad5..84daabe2fcb 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -132,34 +132,6 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v) /* * Methods used to fetch LPAR data when running on a pSeries platform. */ -/** - * h_get_mpp - * H_GET_MPP hcall returns info in 7 parms - */ -int h_get_mpp(struct hvcall_mpp_data *mpp_data) -{ - int rc; - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; - - rc = plpar_hcall9(H_GET_MPP, retbuf); - - mpp_data->entitled_mem = retbuf[0]; - mpp_data->mapped_mem = retbuf[1]; - - mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; - mpp_data->pool_num = retbuf[2] & 0xffff; - - mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; - mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; - mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; - - mpp_data->pool_size = retbuf[4]; - mpp_data->loan_request = retbuf[5]; - mpp_data->backing_mem = retbuf[6]; - - return rc; -} -EXPORT_SYMBOL(h_get_mpp); struct hvcall_ppp_data { u64 entitlement; @@ -262,7 +234,7 @@ static void parse_ppp_data(struct seq_file *m) seq_printf(m, "system_active_processors=%d\n", ppp_data.active_system_procs); - /* pool related entries are apropriate for shared configs */ + /* pool related entries are appropriate for shared configs */ if (lppaca_of(0).shared_proc) { unsigned long pool_idle_time, pool_procs; @@ -345,6 +317,30 @@ static void parse_mpp_data(struct seq_file *m) seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem); } +/** + * parse_mpp_x_data + * Parse out data returned from h_get_mpp_x + */ +static void parse_mpp_x_data(struct seq_file *m) +{ + struct hvcall_mpp_x_data mpp_x_data; + + if (!firmware_has_feature(FW_FEATURE_XCMO)) + return; + if (h_get_mpp_x(&mpp_x_data)) + return; + + seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes); + + if (mpp_x_data.pool_coalesced_bytes) + seq_printf(m, "pool_coalesced_bytes=%ld\n", + mpp_x_data.pool_coalesced_bytes); + if (mpp_x_data.pool_purr_cycles) + seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles); + if (mpp_x_data.pool_spurr_cycles) + seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles); +} + #define SPLPAR_CHARACTERISTICS_TOKEN 20 #define SPLPAR_MAXLENGTH 1026*(sizeof(char)) @@ -520,6 +516,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) parse_system_parameter_string(m); parse_ppp_data(m); parse_mpp_data(m); + parse_mpp_x_data(m); pseries_cmo_data(m); splpar_dispatch_data(m); diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index df7e20c191c..7ee50f0547c 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -15,6 +15,7 @@ #include <linux/memblock.h> #include <linux/of.h> #include <linux/irq.h> +#include <linux/ftrace.h> #include <asm/machdep.h> #include <asm/prom.h> @@ -25,29 +26,29 @@ void machine_kexec_mask_interrupts(void) { for_each_irq(i) { struct irq_desc *desc = irq_to_desc(i); + struct irq_chip *chip; - if (!desc || !desc->chip) + if (!desc) continue; - if (desc->chip->eoi && - desc->status & IRQ_INPROGRESS) - desc->chip->eoi(i); + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) + chip->irq_eoi(&desc->irq_data); - if (desc->chip->mask) - desc->chip->mask(i); + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); - if (desc->chip->disable && - !(desc->status & IRQ_DISABLED)) - desc->chip->disable(i); + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); } } void machine_crash_shutdown(struct pt_regs *regs) { - if (ppc_md.machine_crash_shutdown) - ppc_md.machine_crash_shutdown(regs); - else - default_machine_crash_shutdown(regs); + default_machine_crash_shutdown(regs); } /* @@ -65,8 +66,6 @@ int machine_kexec_prepare(struct kimage *image) void machine_kexec_cleanup(struct kimage *image) { - if (ppc_md.machine_kexec_cleanup) - ppc_md.machine_kexec_cleanup(image); } void arch_crash_save_vmcoreinfo(void) @@ -87,11 +86,17 @@ void arch_crash_save_vmcoreinfo(void) */ void machine_kexec(struct kimage *image) { + int save_ftrace_enabled; + + save_ftrace_enabled = __ftrace_enabled_save(); + if (ppc_md.machine_kexec) ppc_md.machine_kexec(image); else default_machine_kexec(image); + __ftrace_enabled_restore(save_ftrace_enabled); + /* Fall back to normal restart if we're still alive. */ machine_restart(NULL); for(;;); diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 2d29752cbe1..b69463ec201 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -122,8 +122,3 @@ _GLOBAL(longjmp) mtlr r0 mr r3,r4 blr - -_GLOBAL(__setup_cpu_power7) -_GLOBAL(__restore_cpu_power7) - /* place holder */ - blr diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index a7a570dcdd5..998a1002860 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -30,6 +30,7 @@ #include <asm/processor.h> #include <asm/kexec.h> #include <asm/bug.h> +#include <asm/ptrace.h> .text @@ -693,6 +694,17 @@ _GLOBAL(kernel_thread) addi r1,r1,16 blr +#ifdef CONFIG_SMP +_GLOBAL(start_secondary_resume) + /* Reset stack */ + rlwinm r1,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r3,0 + stw r3,0(r1) /* Zero the stack frame pointer */ + bl start_secondary + b . +#endif /* CONFIG_SMP */ + /* * This routine is just here to keep GCC happy - sigh... */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index e5144906a56..e89df59cdc5 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -25,6 +25,7 @@ #include <asm/cputable.h> #include <asm/thread_info.h> #include <asm/kexec.h> +#include <asm/ptrace.h> .text @@ -461,7 +462,8 @@ _GLOBAL(disable_kernel_fp) * wait for the flag to change, indicating this kernel is going away but * the slave code for the next one is at addresses 0 to 100. * - * This is used by all slaves. + * This is used by all slaves, even those that did not find a matching + * paca in the secondary startup code. * * Physical (hardware) cpu id should be in r3. */ @@ -470,10 +472,6 @@ _GLOBAL(kexec_wait) 1: mflr r5 addi r5,r5,kexec_flag-1b - li r4,KEXEC_STATE_REAL_MODE - stb r4,PACAKEXECSTATE(r13) - SYNC - 99: HMT_LOW #ifdef CONFIG_KEXEC /* use no memory without kexec */ lwz r4,0(r5) @@ -498,11 +496,17 @@ kexec_flag: * * get phys id from paca * switch to real mode + * mark the paca as no longer used * join other cpus in kexec_wait(phys_id) */ _GLOBAL(kexec_smp_wait) lhz r3,PACAHWCPUID(r13) bl real_mode + + li r4,KEXEC_STATE_REAL_MODE + stb r4,PACAKEXECSTATE(r13) + SYNC + b .kexec_wait /* diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c index 09d72028f31..2cc5e0301d0 100644 --- a/arch/powerpc/kernel/mpc7450-pmu.c +++ b/arch/powerpc/kernel/mpc7450-pmu.c @@ -414,4 +414,4 @@ static int init_mpc7450_pmu(void) return register_power_pmu(&mpc7450_pmu); } -arch_initcall(init_mpc7450_pmu); +early_initcall(init_mpc7450_pmu); diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 9cf197f01e9..bec1e930ed7 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -34,15 +34,26 @@ #undef DEBUG_NVRAM -static struct nvram_partition * nvram_part; -static long nvram_error_log_index = -1; -static long nvram_error_log_size = 0; +#define NVRAM_HEADER_LEN sizeof(struct nvram_header) +#define NVRAM_BLOCK_LEN NVRAM_HEADER_LEN + +/* If change this size, then change the size of NVNAME_LEN */ +struct nvram_header { + unsigned char signature; + unsigned char checksum; + unsigned short length; + /* Terminating null required only for names < 12 chars. */ + char name[12]; +}; -struct err_log_info { - int error_type; - unsigned int seq_num; +struct nvram_partition { + struct list_head partition; + struct nvram_header header; + unsigned int index; }; +static LIST_HEAD(nvram_partitions); + static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin) { int size; @@ -186,14 +197,12 @@ static struct miscdevice nvram_dev = { #ifdef DEBUG_NVRAM static void __init nvram_print_partitions(char * label) { - struct list_head * p; struct nvram_partition * tmp_part; printk(KERN_WARNING "--------%s---------\n", label); printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n"); - list_for_each(p, &nvram_part->partition) { - tmp_part = list_entry(p, struct nvram_partition, partition); - printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%s\n", + list_for_each_entry(tmp_part, &nvram_partitions, partition) { + printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%12s\n", tmp_part->index, tmp_part->header.signature, tmp_part->header.checksum, tmp_part->header.length, tmp_part->header.name); @@ -228,95 +237,136 @@ static unsigned char __init nvram_checksum(struct nvram_header *p) return c_sum; } -static int __init nvram_remove_os_partition(void) +/* + * Per the criteria passed via nvram_remove_partition(), should this + * partition be removed? 1=remove, 0=keep + */ +static int nvram_can_remove_partition(struct nvram_partition *part, + const char *name, int sig, const char *exceptions[]) +{ + if (part->header.signature != sig) + return 0; + if (name) { + if (strncmp(name, part->header.name, 12)) + return 0; + } else if (exceptions) { + const char **except; + for (except = exceptions; *except; except++) { + if (!strncmp(*except, part->header.name, 12)) + return 0; + } + } + return 1; +} + +/** + * nvram_remove_partition - Remove one or more partitions in nvram + * @name: name of the partition to remove, or NULL for a + * signature only match + * @sig: signature of the partition(s) to remove + * @exceptions: When removing all partitions with a matching signature, + * leave these alone. + */ + +int __init nvram_remove_partition(const char *name, int sig, + const char *exceptions[]) { - struct list_head *i; - struct list_head *j; - struct nvram_partition * part; - struct nvram_partition * cur_part; + struct nvram_partition *part, *prev, *tmp; int rc; - list_for_each(i, &nvram_part->partition) { - part = list_entry(i, struct nvram_partition, partition); - if (part->header.signature != NVRAM_SIG_OS) + list_for_each_entry(part, &nvram_partitions, partition) { + if (!nvram_can_remove_partition(part, name, sig, exceptions)) continue; - - /* Make os partition a free partition */ + + /* Make partition a free partition */ part->header.signature = NVRAM_SIG_FREE; - sprintf(part->header.name, "wwwwwwwwwwww"); + strncpy(part->header.name, "wwwwwwwwwwww", 12); part->header.checksum = nvram_checksum(&part->header); - - /* Merge contiguous free partitions backwards */ - list_for_each_prev(j, &part->partition) { - cur_part = list_entry(j, struct nvram_partition, partition); - if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) { - break; - } - - part->header.length += cur_part->header.length; - part->header.checksum = nvram_checksum(&part->header); - part->index = cur_part->index; - - list_del(&cur_part->partition); - kfree(cur_part); - j = &part->partition; /* fixup our loop */ - } - - /* Merge contiguous free partitions forwards */ - list_for_each(j, &part->partition) { - cur_part = list_entry(j, struct nvram_partition, partition); - if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) { - break; - } - - part->header.length += cur_part->header.length; - part->header.checksum = nvram_checksum(&part->header); - - list_del(&cur_part->partition); - kfree(cur_part); - j = &part->partition; /* fixup our loop */ - } - rc = nvram_write_header(part); if (rc <= 0) { - printk(KERN_ERR "nvram_remove_os_partition: nvram_write failed (%d)\n", rc); + printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc); return rc; } + } + /* Merge contiguous ones */ + prev = NULL; + list_for_each_entry_safe(part, tmp, &nvram_partitions, partition) { + if (part->header.signature != NVRAM_SIG_FREE) { + prev = NULL; + continue; + } + if (prev) { + prev->header.length += part->header.length; + prev->header.checksum = nvram_checksum(&part->header); + rc = nvram_write_header(part); + if (rc <= 0) { + printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc); + return rc; + } + list_del(&part->partition); + kfree(part); + } else + prev = part; } return 0; } -/* nvram_create_os_partition +/** + * nvram_create_partition - Create a partition in nvram + * @name: name of the partition to create + * @sig: signature of the partition to create + * @req_size: size of data to allocate in bytes + * @min_size: minimum acceptable size (0 means req_size) * - * Create a OS linux partition to buffer error logs. - * Will create a partition starting at the first free - * space found if space has enough room. + * Returns a negative error code or a positive nvram index + * of the beginning of the data area of the newly created + * partition. If you provided a min_size smaller than req_size + * you need to query for the actual size yourself after the + * call using nvram_partition_get_size(). */ -static int __init nvram_create_os_partition(void) +loff_t __init nvram_create_partition(const char *name, int sig, + int req_size, int min_size) { struct nvram_partition *part; struct nvram_partition *new_part; struct nvram_partition *free_part = NULL; - int seq_init[2] = { 0, 0 }; + static char nv_init_vals[16]; loff_t tmp_index; long size = 0; int rc; - + + /* Convert sizes from bytes to blocks */ + req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; + min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN; + + /* If no minimum size specified, make it the same as the + * requested size + */ + if (min_size == 0) + min_size = req_size; + if (min_size > req_size) + return -EINVAL; + + /* Now add one block to each for the header */ + req_size += 1; + min_size += 1; + /* Find a free partition that will give us the maximum needed size If can't find one that will give us the minimum size needed */ - list_for_each_entry(part, &nvram_part->partition, partition) { + list_for_each_entry(part, &nvram_partitions, partition) { if (part->header.signature != NVRAM_SIG_FREE) continue; - if (part->header.length >= NVRAM_MAX_REQ) { - size = NVRAM_MAX_REQ; + if (part->header.length >= req_size) { + size = req_size; free_part = part; break; } - if (!size && part->header.length >= NVRAM_MIN_REQ) { - size = NVRAM_MIN_REQ; + if (part->header.length > size && + part->header.length >= min_size) { + size = part->header.length; free_part = part; } } @@ -326,136 +376,95 @@ static int __init nvram_create_os_partition(void) /* Create our OS partition */ new_part = kmalloc(sizeof(*new_part), GFP_KERNEL); if (!new_part) { - printk(KERN_ERR "nvram_create_os_partition: kmalloc failed\n"); + pr_err("nvram_create_os_partition: kmalloc failed\n"); return -ENOMEM; } new_part->index = free_part->index; - new_part->header.signature = NVRAM_SIG_OS; + new_part->header.signature = sig; new_part->header.length = size; - strcpy(new_part->header.name, "ppc64,linux"); + strncpy(new_part->header.name, name, 12); new_part->header.checksum = nvram_checksum(&new_part->header); rc = nvram_write_header(new_part); if (rc <= 0) { - printk(KERN_ERR "nvram_create_os_partition: nvram_write_header " - "failed (%d)\n", rc); - return rc; - } - - /* make sure and initialize to zero the sequence number and the error - type logged */ - tmp_index = new_part->index + NVRAM_HEADER_LEN; - rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_create_os_partition: nvram_write " + pr_err("nvram_create_os_partition: nvram_write_header " "failed (%d)\n", rc); return rc; } - - nvram_error_log_index = new_part->index + NVRAM_HEADER_LEN; - nvram_error_log_size = ((part->header.length - 1) * - NVRAM_BLOCK_LEN) - sizeof(struct err_log_info); - list_add_tail(&new_part->partition, &free_part->partition); - if (free_part->header.length <= size) { + /* Adjust or remove the partition we stole the space from */ + if (free_part->header.length > size) { + free_part->index += size * NVRAM_BLOCK_LEN; + free_part->header.length -= size; + free_part->header.checksum = nvram_checksum(&free_part->header); + rc = nvram_write_header(free_part); + if (rc <= 0) { + pr_err("nvram_create_os_partition: nvram_write_header " + "failed (%d)\n", rc); + return rc; + } + } else { list_del(&free_part->partition); kfree(free_part); - return 0; } - /* Adjust the partition we stole the space from */ - free_part->index += size * NVRAM_BLOCK_LEN; - free_part->header.length -= size; - free_part->header.checksum = nvram_checksum(&free_part->header); - - rc = nvram_write_header(free_part); - if (rc <= 0) { - printk(KERN_ERR "nvram_create_os_partition: nvram_write_header " - "failed (%d)\n", rc); - return rc; + /* Clear the new partition */ + for (tmp_index = new_part->index + NVRAM_HEADER_LEN; + tmp_index < ((size - 1) * NVRAM_BLOCK_LEN); + tmp_index += NVRAM_BLOCK_LEN) { + rc = ppc_md.nvram_write(nv_init_vals, NVRAM_BLOCK_LEN, &tmp_index); + if (rc <= 0) { + pr_err("nvram_create_partition: nvram_write failed (%d)\n", rc); + return rc; + } } - - return 0; + + return new_part->index + NVRAM_HEADER_LEN; } - -/* nvram_setup_partition - * - * This will setup the partition we need for buffering the - * error logs and cleanup partitions if needed. - * - * The general strategy is the following: - * 1.) If there is ppc64,linux partition large enough then use it. - * 2.) If there is not a ppc64,linux partition large enough, search - * for a free partition that is large enough. - * 3.) If there is not a free partition large enough remove - * _all_ OS partitions and consolidate the space. - * 4.) Will first try getting a chunk that will satisfy the maximum - * error log size (NVRAM_MAX_REQ). - * 5.) If the max chunk cannot be allocated then try finding a chunk - * that will satisfy the minum needed (NVRAM_MIN_REQ). +/** + * nvram_get_partition_size - Get the data size of an nvram partition + * @data_index: This is the offset of the start of the data of + * the partition. The same value that is returned by + * nvram_create_partition(). */ -static int __init nvram_setup_partition(void) +int nvram_get_partition_size(loff_t data_index) { - struct list_head * p; - struct nvram_partition * part; - int rc; - - /* For now, we don't do any of this on pmac, until I - * have figured out if it's worth killing some unused stuffs - * in our nvram, as Apple defined partitions use pretty much - * all of the space - */ - if (machine_is(powermac)) - return -ENOSPC; - - /* see if we have an OS partition that meets our needs. - will try getting the max we need. If not we'll delete - partitions and try again. */ - list_for_each(p, &nvram_part->partition) { - part = list_entry(p, struct nvram_partition, partition); - if (part->header.signature != NVRAM_SIG_OS) - continue; + struct nvram_partition *part; + + list_for_each_entry(part, &nvram_partitions, partition) { + if (part->index + NVRAM_HEADER_LEN == data_index) + return (part->header.length - 1) * NVRAM_BLOCK_LEN; + } + return -1; +} - if (strcmp(part->header.name, "ppc64,linux")) - continue; - if (part->header.length >= NVRAM_MIN_REQ) { - /* found our partition */ - nvram_error_log_index = part->index + NVRAM_HEADER_LEN; - nvram_error_log_size = ((part->header.length - 1) * - NVRAM_BLOCK_LEN) - sizeof(struct err_log_info); - return 0; +/** + * nvram_find_partition - Find an nvram partition by signature and name + * @name: Name of the partition or NULL for any name + * @sig: Signature to test against + * @out_size: if non-NULL, returns the size of the data part of the partition + */ +loff_t nvram_find_partition(const char *name, int sig, int *out_size) +{ + struct nvram_partition *p; + + list_for_each_entry(p, &nvram_partitions, partition) { + if (p->header.signature == sig && + (!name || !strncmp(p->header.name, name, 12))) { + if (out_size) + *out_size = (p->header.length - 1) * + NVRAM_BLOCK_LEN; + return p->index + NVRAM_HEADER_LEN; } } - - /* try creating a partition with the free space we have */ - rc = nvram_create_os_partition(); - if (!rc) { - return 0; - } - - /* need to free up some space */ - rc = nvram_remove_os_partition(); - if (rc) { - return rc; - } - - /* create a partition in this new space */ - rc = nvram_create_os_partition(); - if (rc) { - printk(KERN_ERR "nvram_create_os_partition: Could not find a " - "NVRAM partition large enough\n"); - return rc; - } - return 0; } - -static int __init nvram_scan_partitions(void) +int __init nvram_scan_partitions(void) { loff_t cur_index = 0; struct nvram_header phead; @@ -465,7 +474,7 @@ static int __init nvram_scan_partitions(void) int total_size; int err; - if (ppc_md.nvram_size == NULL) + if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0) return -ENODEV; total_size = ppc_md.nvram_size(); @@ -512,12 +521,16 @@ static int __init nvram_scan_partitions(void) memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN); tmp_part->index = cur_index; - list_add_tail(&tmp_part->partition, &nvram_part->partition); + list_add_tail(&tmp_part->partition, &nvram_partitions); cur_index += phead.length * NVRAM_BLOCK_LEN; } err = 0; +#ifdef DEBUG_NVRAM + nvram_print_partitions("NVRAM Partitions"); +#endif + out: kfree(header); return err; @@ -525,9 +538,10 @@ static int __init nvram_scan_partitions(void) static int __init nvram_init(void) { - int error; int rc; + BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16); + if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0) return -ENODEV; @@ -537,29 +551,6 @@ static int __init nvram_init(void) return rc; } - /* initialize our anchor for the nvram partition list */ - nvram_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL); - if (!nvram_part) { - printk(KERN_ERR "nvram_init: Failed kmalloc\n"); - return -ENOMEM; - } - INIT_LIST_HEAD(&nvram_part->partition); - - /* Get all the NVRAM partitions */ - error = nvram_scan_partitions(); - if (error) { - printk(KERN_ERR "nvram_init: Failed nvram_scan_partitions\n"); - return error; - } - - if(nvram_setup_partition()) - printk(KERN_WARNING "nvram_init: Could not find nvram partition" - " for nvram buffered error logging.\n"); - -#ifdef DEBUG_NVRAM - nvram_print_partitions("NVRAM Partitions"); -#endif - return rc; } @@ -568,135 +559,6 @@ void __exit nvram_cleanup(void) misc_deregister( &nvram_dev ); } - -#ifdef CONFIG_PPC_PSERIES - -/* nvram_write_error_log - * - * We need to buffer the error logs into nvram to ensure that we have - * the failure information to decode. If we have a severe error there - * is no way to guarantee that the OS or the machine is in a state to - * get back to user land and write the error to disk. For example if - * the SCSI device driver causes a Machine Check by writing to a bad - * IO address, there is no way of guaranteeing that the device driver - * is in any state that is would also be able to write the error data - * captured to disk, thus we buffer it in NVRAM for analysis on the - * next boot. - * - * In NVRAM the partition containing the error log buffer will looks like: - * Header (in bytes): - * +-----------+----------+--------+------------+------------------+ - * | signature | checksum | length | name | data | - * |0 |1 |2 3|4 15|16 length-1| - * +-----------+----------+--------+------------+------------------+ - * - * The 'data' section would look like (in bytes): - * +--------------+------------+-----------------------------------+ - * | event_logged | sequence # | error log | - * |0 3|4 7|8 nvram_error_log_size-1| - * +--------------+------------+-----------------------------------+ - * - * event_logged: 0 if event has not been logged to syslog, 1 if it has - * sequence #: The unique sequence # for each event. (until it wraps) - * error log: The error log from event_scan - */ -int nvram_write_error_log(char * buff, int length, - unsigned int err_type, unsigned int error_log_cnt) -{ - int rc; - loff_t tmp_index; - struct err_log_info info; - - if (nvram_error_log_index == -1) { - return -ESPIPE; - } - - if (length > nvram_error_log_size) { - length = nvram_error_log_size; - } - - info.error_type = err_type; - info.seq_num = error_log_cnt; - - tmp_index = nvram_error_log_index; - - rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc); - return rc; - } - - rc = ppc_md.nvram_write(buff, length, &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc); - return rc; - } - - return 0; -} - -/* nvram_read_error_log - * - * Reads nvram for error log for at most 'length' - */ -int nvram_read_error_log(char * buff, int length, - unsigned int * err_type, unsigned int * error_log_cnt) -{ - int rc; - loff_t tmp_index; - struct err_log_info info; - - if (nvram_error_log_index == -1) - return -1; - - if (length > nvram_error_log_size) - length = nvram_error_log_size; - - tmp_index = nvram_error_log_index; - - rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); - return rc; - } - - rc = ppc_md.nvram_read(buff, length, &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); - return rc; - } - - *error_log_cnt = info.seq_num; - *err_type = info.error_type; - - return 0; -} - -/* This doesn't actually zero anything, but it sets the event_logged - * word to tell that this event is safely in syslog. - */ -int nvram_clear_error_log(void) -{ - loff_t tmp_index; - int clear_word = ERR_FLAG_ALREADY_LOGGED; - int rc; - - if (nvram_error_log_index == -1) - return -1; - - tmp_index = nvram_error_log_index; - - rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc); - return rc; - } - - return 0; -} - -#endif /* CONFIG_PPC_PSERIES */ - module_init(nvram_init); module_exit(nvram_cleanup); MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index b2c363ef38a..24582181b6e 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -36,8 +36,7 @@ * lacking some bits needed here. */ -static int __devinit of_pci_phb_probe(struct platform_device *dev, - const struct of_device_id *match) +static int __devinit of_pci_phb_probe(struct platform_device *dev) { struct pci_controller *phb; @@ -74,7 +73,7 @@ static int __devinit of_pci_phb_probe(struct platform_device *dev, #endif /* CONFIG_EEH */ /* Scan the bus */ - pcibios_scan_phb(phb, dev->dev.of_node); + pcibios_scan_phb(phb); if (phb->bus == NULL) return -ENXIO; @@ -104,7 +103,7 @@ static struct of_device_id of_pci_phb_ids[] = { {} }; -static struct of_platform_driver of_pci_phb_driver = { +static struct platform_driver of_pci_phb_driver = { .probe = of_pci_phb_probe, .driver = { .name = "of-pci", @@ -115,7 +114,7 @@ static struct of_platform_driver of_pci_phb_driver = { static __init int of_pci_phb_init(void) { - return of_register_platform_driver(&of_pci_phb_driver); + return platform_driver_register(&of_pci_phb_driver); } device_initcall(of_pci_phb_init); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index ebf9846f3c3..efeb8818418 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -7,7 +7,7 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/threads.h> +#include <linux/smp.h> #include <linux/module.h> #include <linux/memblock.h> @@ -27,20 +27,6 @@ extern unsigned long __toc_start; #ifdef CONFIG_PPC_BOOK3S /* - * We only have to have statically allocated lppaca structs on - * legacy iSeries, which supports at most 64 cpus. - */ -#ifdef CONFIG_PPC_ISERIES -#if NR_CPUS < 64 -#define NR_LPPACAS NR_CPUS -#else -#define NR_LPPACAS 64 -#endif -#else /* not iSeries */ -#define NR_LPPACAS 1 -#endif - -/* * The structure which the hypervisor knows about - this structure * should not cross a page boundary. The vpa_init/register_vpa call * is now known to fail if the lppaca structure crosses a page @@ -170,18 +156,29 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) /* Put the paca pointer into r13 and SPRG_PACA */ void setup_paca(struct paca_struct *new_paca) { + /* Setup r13 */ local_paca = new_paca; - mtspr(SPRN_SPRG_PACA, local_paca); + #ifdef CONFIG_PPC_BOOK3E + /* On Book3E, initialize the TLB miss exception frames */ mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); +#else + /* In HV mode, we setup both HPACA and PACA to avoid problems + * if we do a GET_PACA() before the feature fixups have been + * applied + */ + if (cpu_has_feature(CPU_FTR_HVMODE_206)) + mtspr(SPRN_SPRG_HPACA, local_paca); #endif + mtspr(SPRN_SPRG_PACA, local_paca); + } static int __initdata paca_size; void __init allocate_pacas(void) { - int nr_cpus, cpu, limit; + int cpu, limit; /* * We can't take SLB misses on the paca, and we want to access them @@ -193,23 +190,18 @@ void __init allocate_pacas(void) if (firmware_has_feature(FW_FEATURE_ISERIES)) limit = min(limit, HvPagesToMap * HVPAGESIZE); - nr_cpus = NR_CPUS; - /* On iSeries we know we can never have more than 64 cpus */ - if (firmware_has_feature(FW_FEATURE_ISERIES)) - nr_cpus = min(64, nr_cpus); - - paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus); + paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit)); memset(paca, 0, paca_size); printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", - paca_size, nr_cpus, paca); + paca_size, nr_cpu_ids, paca); - allocate_lppacas(nr_cpus, limit); + allocate_lppacas(nr_cpu_ids, limit); /* Can't use for_each_*_cpu, as they aren't functional yet */ - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_cpu_ids; cpu++) initialise_paca(&paca[cpu], cpu); } @@ -217,7 +209,7 @@ void __init free_unused_pacas(void) { int new_size; - new_size = PAGE_ALIGN(sizeof(struct paca_struct) * num_possible_cpus()); + new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids); if (new_size >= paca_size) return; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 10a44e68ef1..893af2a9cd0 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -22,6 +22,7 @@ #include <linux/init.h> #include <linux/bootmem.h> #include <linux/of_address.h> +#include <linux/of_pci.h> #include <linux/mm.h> #include <linux/list.h> #include <linux/syscalls.h> @@ -260,7 +261,7 @@ int pci_read_irq_line(struct pci_dev *pci_dev) virq = irq_create_mapping(NULL, line); if (virq != NO_IRQ) - set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); + irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); } else { pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n", oirq.size, oirq.specifier[0], oirq.specifier[1], @@ -1687,13 +1688,8 @@ int early_find_capability(struct pci_controller *hose, int bus, int devfn, /** * pci_scan_phb - Given a pci_controller, setup and scan the PCI bus * @hose: Pointer to the PCI host controller instance structure - * @sysdata: value to use for sysdata pointer. ppc32 and ppc64 differ here - * - * Note: the 'data' pointer is a temporary measure. As 32 and 64 bit - * pci code gets merged, this parameter should become unnecessary because - * both will use the same value. */ -void __devinit pcibios_scan_phb(struct pci_controller *hose, void *sysdata) +void __devinit pcibios_scan_phb(struct pci_controller *hose) { struct pci_bus *bus; struct device_node *node = hose->dn; @@ -1703,13 +1699,13 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose, void *sysdata) node ? node->full_name : "<NO NAME>"); /* Create an empty bus for the toplevel */ - bus = pci_create_bus(hose->parent, hose->first_busno, hose->ops, - sysdata); + bus = pci_create_bus(hose->parent, hose->first_busno, hose->ops, hose); if (bus == NULL) { pr_err("Failed to create bus for PCI domain %04x\n", hose->global_number); return; } + bus->dev.of_node = of_node_get(node); bus->secondary = hose->first_busno; hose->bus = bus; diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index e7db5b48004..bedb370459f 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -381,7 +381,7 @@ static int __init pcibios_init(void) if (pci_assign_all_buses) hose->first_busno = next_busno; hose->last_busno = 0xff; - pcibios_scan_phb(hose, hose); + pcibios_scan_phb(hose); pci_bus_add_devices(hose->bus); if (pci_assign_all_buses || next_busno <= hose->last_busno) next_busno = hose->last_busno + pcibios_assign_bus_offset; diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index d43fc65749c..fc6452b6be9 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -64,7 +64,7 @@ static int __init pcibios_init(void) /* Scan all of the recorded PCI controllers. */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - pcibios_scan_phb(hose, hose->dn); + pcibios_scan_phb(hose); pci_bus_add_devices(hose->bus); } @@ -193,8 +193,7 @@ int __devinit pcibios_map_io_space(struct pci_bus *bus) hose->io_resource.start += io_virt_offset; hose->io_resource.end += io_virt_offset; - pr_debug(" hose->io_resource=0x%016llx...0x%016llx\n", - hose->io_resource.start, hose->io_resource.end); + pr_debug(" hose->io_resource=%pR\n", &hose->io_resource); return 0; } @@ -243,10 +242,10 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus, break; bus = NULL; } - if (bus == NULL || bus->sysdata == NULL) + if (bus == NULL || bus->dev.of_node == NULL) return -ENODEV; - hose_node = (struct device_node *)bus->sysdata; + hose_node = bus->dev.of_node; hose = PCI_DN(hose_node)->phb; switch (which) { diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index d56b35ee7f7..6baabc13306 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -43,10 +43,9 @@ void * __devinit update_dn_pci_info(struct device_node *dn, void *data) const u32 *regs; struct pci_dn *pdn; - pdn = alloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); + pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); if (pdn == NULL) return NULL; - memset(pdn, 0, sizeof(*pdn)); dn->data = pdn; pdn->node = dn; pdn->phb = phb; @@ -161,7 +160,7 @@ static void *is_devfn_node(struct device_node *dn, void *data) /* * This is the "slow" path for looking up a device_node from a * pci_dev. It will hunt for the device under its parent's - * phb and then update sysdata for a future fastpath. + * phb and then update of_node pointer. * * It may also do fixups on the actual device since this happens * on the first read/write. @@ -170,16 +169,22 @@ static void *is_devfn_node(struct device_node *dn, void *data) * In this case it may probe for real hardware ("just in case") * and add a device_node to the device tree if necessary. * + * Is this function necessary anymore now that dev->dev.of_node is + * used to store the node pointer? + * */ struct device_node *fetch_dev_dn(struct pci_dev *dev) { - struct device_node *orig_dn = dev->sysdata; + struct pci_controller *phb = dev->sysdata; struct device_node *dn; unsigned long searchval = (dev->bus->number << 8) | dev->devfn; - dn = traverse_pci_devices(orig_dn, is_devfn_node, (void *)searchval); + if (WARN_ON(!phb)) + return NULL; + + dn = traverse_pci_devices(phb->dn, is_devfn_node, (void *)searchval); if (dn) - dev->sysdata = dn; + dev->dev.of_node = dn; return dn; } EXPORT_SYMBOL(fetch_dev_dn); diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index e751506323b..1e89a72fd03 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -135,7 +135,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, pr_debug(" create device, devfn: %x, type: %s\n", devfn, type); dev->bus = bus; - dev->sysdata = node; + dev->dev.of_node = of_node_get(node); dev->dev.parent = bus->bridge; dev->dev.bus = &pci_bus_type; dev->devfn = devfn; @@ -238,7 +238,7 @@ void __devinit of_scan_pci_bridge(struct device_node *node, bus->primary = dev->bus->number; bus->subordinate = busrange[1]; bus->bridge_ctl = 0; - bus->sysdata = node; + bus->dev.of_node = of_node_get(node); /* parse ranges property */ /* PCI #address-cells == 3 and #size-cells == 2 always */ diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 3129c855933..822f63008ae 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -398,6 +398,25 @@ static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], return 0; } +static u64 check_and_compute_delta(u64 prev, u64 val) +{ + u64 delta = (val - prev) & 0xfffffffful; + + /* + * POWER7 can roll back counter values, if the new value is smaller + * than the previous value it will cause the delta and the counter to + * have bogus values unless we rolled a counter over. If a coutner is + * rolled back, it will be smaller, but within 256, which is the maximum + * number of events to rollback at once. If we dectect a rollback + * return 0. This can lead to a small lack of precision in the + * counters. + */ + if (prev > val && (prev - val) < 256) + delta = 0; + + return delta; +} + static void power_pmu_read(struct perf_event *event) { s64 val, delta, prev; @@ -416,10 +435,11 @@ static void power_pmu_read(struct perf_event *event) prev = local64_read(&event->hw.prev_count); barrier(); val = read_pmc(event->hw.idx); + delta = check_and_compute_delta(prev, val); + if (!delta) + return; } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); - /* The counters are only 32 bits wide */ - delta = (val - prev) & 0xfffffffful; local64_add(delta, &event->count); local64_sub(delta, &event->hw.period_left); } @@ -449,8 +469,9 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw, val = (event->hw.idx == 5) ? pmc5 : pmc6; prev = local64_read(&event->hw.prev_count); event->hw.idx = 0; - delta = (val - prev) & 0xfffffffful; - local64_add(delta, &event->count); + delta = check_and_compute_delta(prev, val); + if (delta) + local64_add(delta, &event->count); } } @@ -458,14 +479,16 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw, unsigned long pmc5, unsigned long pmc6) { struct perf_event *event; - u64 val; + u64 val, prev; int i; for (i = 0; i < cpuhw->n_limited; ++i) { event = cpuhw->limited_counter[i]; event->hw.idx = cpuhw->limited_hwidx[i]; val = (event->hw.idx == 5) ? pmc5 : pmc6; - local64_set(&event->hw.prev_count, val); + prev = local64_read(&event->hw.prev_count); + if (check_and_compute_delta(prev, val)) + local64_set(&event->hw.prev_count, val); perf_event_update_userpage(event); } } @@ -759,7 +782,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) /* * If group events scheduling transaction was started, - * skip the schedulability test here, it will be peformed + * skip the schedulability test here, it will be performed * at commit time(->commit_txn) as a whole */ if (cpuhw->group_flag & PERF_EVENT_TXN) @@ -1197,7 +1220,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); - delta = (val - prev) & 0xfffffffful; + delta = check_and_compute_delta(prev, val); local64_add(delta, &event->count); /* @@ -1212,6 +1235,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (left <= 0) left = period; record = 1; + event->hw.last_period = event->hw.sample_period; } if (left < 0x80000000LL) val = 0x80000000LL - left; @@ -1268,6 +1292,28 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) return ip; } +static bool pmc_overflow(unsigned long val) +{ + if ((int)val < 0) + return true; + + /* + * Events on POWER7 can roll back if a speculative event doesn't + * eventually complete. Unfortunately in some rare cases they will + * raise a performance monitor exception. We need to catch this to + * ensure we reset the PMC. In all cases the PMC will be 256 or less + * cycles from overflow. + * + * We only do this if the first pass fails to find any overflowing + * PMCs because a user might set a period of less than 256 and we + * don't want to mistakenly reset them. + */ + if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256)) + return true; + + return false; +} + /* * Performance monitor interrupt stuff */ @@ -1315,7 +1361,7 @@ static void perf_event_interrupt(struct pt_regs *regs) if (is_limited_pmc(i + 1)) continue; val = read_pmc(i + 1); - if ((int)val < 0) + if (pmc_overflow(val)) write_pmc(i + 1, 0); } } @@ -1379,7 +1425,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ - perf_pmu_register(&power_pmu); + perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW); perf_cpu_notifier(power_pmu_notifier); return 0; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 7ecca59ddf7..b0dc8f7069c 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -596,6 +596,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (left <= 0) left = period; record = 1; + event->hw.last_period = event->hw.sample_period; } if (left < 0x80000000LL) val = 0x80000000LL - left; @@ -681,7 +682,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); - perf_pmu_register(&fsl_emb_pmu); + perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW); return 0; } diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 2a361cdda63..ead8b3c2649 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -613,4 +613,4 @@ static int init_power4_pmu(void) return register_power_pmu(&power4_pmu); } -arch_initcall(init_power4_pmu); +early_initcall(init_power4_pmu); diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 199de527d41..eca0ac595cb 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -682,4 +682,4 @@ static int init_power5p_pmu(void) return register_power_pmu(&power5p_pmu); } -arch_initcall(init_power5p_pmu); +early_initcall(init_power5p_pmu); diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 98b6a729a9d..d5ff0f64a5e 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -621,4 +621,4 @@ static int init_power5_pmu(void) return register_power_pmu(&power5_pmu); } -arch_initcall(init_power5_pmu); +early_initcall(init_power5_pmu); diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 84a607bda8f..31603927e37 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -544,4 +544,4 @@ static int init_power6_pmu(void) return register_power_pmu(&power6_pmu); } -arch_initcall(init_power6_pmu); +early_initcall(init_power6_pmu); diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 852f7b7f6b4..593740fcb79 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -369,4 +369,4 @@ static int init_power7_pmu(void) return register_power_pmu(&power7_pmu); } -arch_initcall(init_power7_pmu); +early_initcall(init_power7_pmu); diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 3fee685de4d..9a6e093858f 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -494,4 +494,4 @@ static int init_ppc970_pmu(void) return register_power_pmu(&ppc970_pmu); } -arch_initcall(init_ppc970_pmu); +early_initcall(init_ppc970_pmu); diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index ab3e392ac63..7d28f540200 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -54,7 +54,6 @@ extern void single_step_exception(struct pt_regs *regs); extern int sys_sigreturn(struct pt_regs *regs); EXPORT_SYMBOL(clear_pages); -EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(ISA_DMA_THRESHOLD); EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); @@ -88,9 +87,7 @@ EXPORT_SYMBOL(__copy_tofrom_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__strnlen_user); -#ifdef CONFIG_PPC64 -EXPORT_SYMBOL(copy_4K_page); -#endif +EXPORT_SYMBOL(copy_page); #if defined(CONFIG_PCI) && defined(CONFIG_PPC32) EXPORT_SYMBOL(isa_io_base); @@ -186,3 +183,10 @@ EXPORT_SYMBOL(__mtdcr); EXPORT_SYMBOL(__mfdcr); #endif EXPORT_SYMBOL(empty_zero_page); + +#ifdef CONFIG_PPC64 +EXPORT_SYMBOL(__arch_hweight8); +EXPORT_SYMBOL(__arch_hweight16); +EXPORT_SYMBOL(__arch_hweight32); +EXPORT_SYMBOL(__arch_hweight64); +#endif diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index 5113bd2285e..1b1787d5289 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -11,10 +11,11 @@ #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> +#include <asm/ptrace.h> /* * Grab the register values as they are now. - * This won't do a particularily good job because we really + * This won't do a particularly good job because we really * want our caller's caller's registers, and our caller has * already executed its prologue. * ToDo: We could reach back into the caller's save area to do diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 84906d3fc86..095043d7994 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -353,6 +353,7 @@ static void switch_booke_debug_regs(struct thread_struct *new_thread) prime_debug_regs(new_thread); } #else /* !CONFIG_PPC_ADV_DEBUG_REGS */ +#ifndef CONFIG_HAVE_HW_BREAKPOINT static void set_debug_reg_defaults(struct thread_struct *thread) { if (thread->dabr) { @@ -360,6 +361,7 @@ static void set_debug_reg_defaults(struct thread_struct *thread) set_dabr(0); } } +#endif /* !CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ int set_dabr(unsigned long dabr) @@ -631,7 +633,7 @@ void show_regs(struct pt_regs * regs) #ifdef CONFIG_PPC_ADV_DEBUG_REGS printk("DEAR: "REG", ESR: "REG"\n", regs->dar, regs->dsisr); #else - printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr); + printk("DAR: "REG", DSISR: %08lx\n", regs->dar, regs->dsisr); #endif printk("TASK = %p[%d] '%s' THREAD: %p", current, task_pid_nr(current), current->comm, task_thread_info(current)); @@ -670,11 +672,11 @@ void flush_thread(void) { discard_lazy_cpu_state(); -#ifdef CONFIG_HAVE_HW_BREAKPOINTS +#ifdef CONFIG_HAVE_HW_BREAKPOINT flush_ptrace_hw_breakpoint(current); -#else /* CONFIG_HAVE_HW_BREAKPOINTS */ +#else /* CONFIG_HAVE_HW_BREAKPOINT */ set_debug_reg_defaults(¤t->thread); -#endif /* CONFIG_HAVE_HW_BREAKPOINTS */ +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ } void @@ -700,6 +702,8 @@ void prepare_to_copy(struct task_struct *tsk) /* * Copy a thread.. */ +extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */ + int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) @@ -753,11 +757,11 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, _ALIGN_UP(sizeof(struct thread_info), 16); #ifdef CONFIG_PPC_STD_MMU_64 - if (cpu_has_feature(CPU_FTR_SLB)) { + if (mmu_has_feature(MMU_FTR_SLB)) { unsigned long sp_vsid; unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; - if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T) << SLB_VSID_SHIFT_1T; else @@ -767,6 +771,20 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, p->thread.ksp_vsid = sp_vsid; } #endif /* CONFIG_PPC_STD_MMU_64 */ +#ifdef CONFIG_PPC64 + if (cpu_has_feature(CPU_FTR_DSCR)) { + if (current->thread.dscr_inherit) { + p->thread.dscr_inherit = 1; + p->thread.dscr = current->thread.dscr; + } else if (0 != dscr_default) { + p->thread.dscr_inherit = 1; + p->thread.dscr = dscr_default; + } else { + p->thread.dscr_inherit = 0; + p->thread.dscr = 0; + } + } +#endif /* * The PPC64 ABI makes use of a TOC to contain function @@ -1216,11 +1234,11 @@ void __ppc64_runlatch_off(void) static struct kmem_cache *thread_info_cache; -struct thread_info *alloc_thread_info(struct task_struct *tsk) +struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node) { struct thread_info *ti; - ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL); + ti = kmem_cache_alloc_node(thread_info_cache, GFP_KERNEL, node); if (unlikely(ti == NULL)) return NULL; #ifdef CONFIG_DEBUG_STACK_USAGE diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 9e3132db718..48aeb55faae 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -68,6 +68,7 @@ int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; u64 ppc64_rma_size; #endif +static phys_addr_t first_memblock_size; static int __init early_parse_mem(char *p) { @@ -97,7 +98,7 @@ static void __init move_device_tree(void) start = __pa(initial_boot_params); size = be32_to_cpu(initial_boot_params->totalsize); - if ((memory_limit && (start + size) > memory_limit) || + if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) || overlaps_crashkernel(start, size)) { p = __va(memblock_alloc(size, PAGE_SIZE)); memcpy(p, initial_boot_params, size); @@ -123,18 +124,19 @@ static void __init move_device_tree(void) */ static struct ibm_pa_feature { unsigned long cpu_features; /* CPU_FTR_xxx bit */ + unsigned long mmu_features; /* MMU_FTR_xxx bit */ unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */ unsigned char pabyte; /* byte number in ibm,pa-features */ unsigned char pabit; /* bit number (big-endian) */ unsigned char invert; /* if 1, pa bit set => clear feature */ } ibm_pa_features[] __initdata = { - {0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, - {0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, - {CPU_FTR_SLB, 0, 0, 2, 0}, - {CPU_FTR_CTRL, 0, 0, 3, 0}, - {CPU_FTR_NOEXECUTE, 0, 0, 6, 0}, - {CPU_FTR_NODSISRALIGN, 0, 1, 1, 1}, - {CPU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, + {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, + {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, + {0, MMU_FTR_SLB, 0, 0, 2, 0}, + {CPU_FTR_CTRL, 0, 0, 0, 3, 0}, + {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, + {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1}, + {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, }; @@ -166,9 +168,11 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs, if (bit ^ fp->invert) { cur_cpu_spec->cpu_features |= fp->cpu_features; cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs; + cur_cpu_spec->mmu_features |= fp->mmu_features; } else { cur_cpu_spec->cpu_features &= ~fp->cpu_features; cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs; + cur_cpu_spec->mmu_features &= ~fp->mmu_features; } } } @@ -268,13 +272,13 @@ static int __init early_init_dt_scan_cpus(unsigned long node, const char *uname, int depth, void *data) { - static int logical_cpuid = 0; char *type = of_get_flat_dt_prop(node, "device_type", NULL); const u32 *prop; const u32 *intserv; int i, nthreads; unsigned long len; - int found = 0; + int found = -1; + int found_thread = 0; /* We are scanning "cpu" nodes only */ if (type == NULL || strcmp(type, "cpu") != 0) @@ -298,11 +302,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * version 2 of the kexec param format adds the phys cpuid of * booted proc. */ - if (initial_boot_params && initial_boot_params->version >= 2) { - if (intserv[i] == - initial_boot_params->boot_cpuid_phys) { - found = 1; - break; + if (initial_boot_params->version >= 2) { + if (intserv[i] == initial_boot_params->boot_cpuid_phys) { + found = boot_cpu_count; + found_thread = i; } } else { /* @@ -311,23 +314,20 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * off secondary threads. */ if (of_get_flat_dt_prop(node, - "linux,boot-cpu", NULL) != NULL) { - found = 1; - break; - } + "linux,boot-cpu", NULL) != NULL) + found = boot_cpu_count; } - #ifdef CONFIG_SMP /* logical cpu id is always 0 on UP kernels */ - logical_cpuid++; + boot_cpu_count++; #endif } - if (found) { - DBG("boot cpu: logical %d physical %d\n", logical_cpuid, - intserv[i]); - boot_cpuid = logical_cpuid; - set_hard_smp_processor_id(boot_cpuid, intserv[i]); + if (found >= 0) { + DBG("boot cpu: logical %d physical %d\n", found, + intserv[found_thread]); + boot_cpuid = found; + set_hard_smp_processor_id(found, intserv[found_thread]); /* * PAPR defines "logical" PVR values for cpus that @@ -509,19 +509,22 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size) size = 0x80000000ul - base; } #endif - - /* First MEMBLOCK added, do some special initializations */ - if (memstart_addr == ~(phys_addr_t)0) - setup_initial_memory_limit(base, size); - memstart_addr = min((u64)memstart_addr, base); + /* Keep track of the beginning of memory -and- the size of + * the very first block in the device-tree as it represents + * the RMA on ppc64 server + */ + if (base < memstart_addr) { + memstart_addr = base; + first_memblock_size = size; + } /* Add the chunk to the MEMBLOCK list */ memblock_add(base, size); } -u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) { - return memblock_alloc(size, align); + return __va(memblock_alloc(size, align)); } #ifdef CONFIG_BLK_DEV_INITRD @@ -683,7 +686,7 @@ void __init early_init_devtree(void *params) #endif #ifdef CONFIG_PHYP_DUMP - /* scan tree to see if dump occured during last boot */ + /* scan tree to see if dump occurred during last boot */ of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL); #endif @@ -698,6 +701,7 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); + setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Save command line for /proc/cmdline and then parse parameters */ strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); @@ -739,7 +743,7 @@ void __init early_init_devtree(void *params) DBG("Scanning CPUs ...\n"); - /* Retreive CPU related informations from the flat tree + /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ of_scan_flat_dt(early_init_dt_scan_cpus, NULL); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 941ff4dbc56..c016033ba78 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -335,6 +335,7 @@ static void __init prom_printf(const char *format, ...) const char *p, *q, *s; va_list args; unsigned long v; + long vs; struct prom_t *_prom = &RELOC(prom); va_start(args, format); @@ -368,12 +369,35 @@ static void __init prom_printf(const char *format, ...) v = va_arg(args, unsigned long); prom_print_hex(v); break; + case 'd': + ++q; + vs = va_arg(args, int); + if (vs < 0) { + prom_print(RELOC("-")); + vs = -vs; + } + prom_print_dec(vs); + break; case 'l': ++q; - if (*q == 'u') { /* '%lu' */ + if (*q == 0) + break; + else if (*q == 'x') { + ++q; + v = va_arg(args, unsigned long); + prom_print_hex(v); + } else if (*q == 'u') { /* '%lu' */ ++q; v = va_arg(args, unsigned long); prom_print_dec(v); + } else if (*q == 'd') { /* %ld */ + ++q; + vs = va_arg(args, long); + if (vs < 0) { + prom_print(RELOC("-")); + vs = -vs; + } + prom_print_dec(vs); } break; } @@ -676,8 +700,10 @@ static void __init early_cmdline_parse(void) #endif /* CONFIG_PCI_MSI */ #ifdef CONFIG_PPC_SMLPAR #define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */ +#define OV5_XCMO 0x40 /* Page Coalescing */ #else #define OV5_CMO 0x00 +#define OV5_XCMO 0x00 #endif #define OV5_TYPE1_AFFINITY 0x80 /* Type 1 NUMA affinity */ @@ -732,7 +758,7 @@ static unsigned char ibm_architecture_vec[] = { OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_DONATE_DEDICATE_CPU | OV5_MSI, 0, - OV5_CMO, + OV5_CMO | OV5_XCMO, OV5_TYPE1_AFFINITY, 0, 0, diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c index 88334af038e..47187cc2cf0 100644 --- a/arch/powerpc/kernel/prom_parse.c +++ b/arch/powerpc/kernel/prom_parse.c @@ -2,95 +2,11 @@ #include <linux/kernel.h> #include <linux/string.h> -#include <linux/pci_regs.h> #include <linux/module.h> #include <linux/ioport.h> #include <linux/etherdevice.h> #include <linux/of_address.h> #include <asm/prom.h> -#include <asm/pci-bridge.h> - -#ifdef CONFIG_PCI -int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq) -{ - struct device_node *dn, *ppnode; - struct pci_dev *ppdev; - u32 lspec; - u32 laddr[3]; - u8 pin; - int rc; - - /* Check if we have a device node, if yes, fallback to standard OF - * parsing - */ - dn = pci_device_to_OF_node(pdev); - if (dn) { - rc = of_irq_map_one(dn, 0, out_irq); - if (!rc) - return rc; - } - - /* Ok, we don't, time to have fun. Let's start by building up an - * interrupt spec. we assume #interrupt-cells is 1, which is standard - * for PCI. If you do different, then don't use that routine. - */ - rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin); - if (rc != 0) - return rc; - /* No pin, exit */ - if (pin == 0) - return -ENODEV; - - /* Now we walk up the PCI tree */ - lspec = pin; - for (;;) { - /* Get the pci_dev of our parent */ - ppdev = pdev->bus->self; - - /* Ouch, it's a host bridge... */ - if (ppdev == NULL) { -#ifdef CONFIG_PPC64 - ppnode = pci_bus_to_OF_node(pdev->bus); -#else - struct pci_controller *host; - host = pci_bus_to_host(pdev->bus); - ppnode = host ? host->dn : NULL; -#endif - /* No node for host bridge ? give up */ - if (ppnode == NULL) - return -EINVAL; - } else - /* We found a P2P bridge, check if it has a node */ - ppnode = pci_device_to_OF_node(ppdev); - - /* Ok, we have found a parent with a device-node, hand over to - * the OF parsing code. - * We build a unit address from the linux device to be used for - * resolution. Note that we use the linux bus number which may - * not match your firmware bus numbering. - * Fortunately, in most cases, interrupt-map-mask doesn't include - * the bus number as part of the matching. - * You should still be careful about that though if you intend - * to rely on this function (you ship a firmware that doesn't - * create device nodes for all PCI devices). - */ - if (ppnode) - break; - - /* We can only get here if we hit a P2P bridge with no node, - * let's do standard swizzling and try again - */ - lspec = pci_swizzle_interrupt_pin(pdev, lspec); - pdev = ppdev; - } - - laddr[0] = (pdev->bus->number << 16) - | (pdev->devfn << 8); - laddr[1] = laddr[2] = 0; - return of_irq_map_raw(ppnode, &lspec, 1, laddr, out_irq); -} -EXPORT_SYMBOL_GPL(of_irq_map_pci); -#endif /* CONFIG_PCI */ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, unsigned long *busno, unsigned long *phys, unsigned long *size) @@ -117,41 +33,3 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop, cells = prop ? *(u32 *)prop : of_n_size_cells(dn); *size = of_read_number(dma_window, cells); } - -/** - * Search the device tree for the best MAC address to use. 'mac-address' is - * checked first, because that is supposed to contain to "most recent" MAC - * address. If that isn't set, then 'local-mac-address' is checked next, - * because that is the default address. If that isn't set, then the obsolete - * 'address' is checked, just in case we're using an old device tree. - * - * Note that the 'address' property is supposed to contain a virtual address of - * the register set, but some DTS files have redefined that property to be the - * MAC address. - * - * All-zero MAC addresses are rejected, because those could be properties that - * exist in the device tree, but were not set by U-Boot. For example, the - * DTS could define 'mac-address' and 'local-mac-address', with zero MAC - * addresses. Some older U-Boots only initialized 'local-mac-address'. In - * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists - * but is all zeros. -*/ -const void *of_get_mac_address(struct device_node *np) -{ - struct property *pp; - - pp = of_find_property(np, "mac-address", NULL); - if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) - return pp->value; - - pp = of_find_property(np, "local-mac-address", NULL); - if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) - return pp->value; - - pp = of_find_property(np, "address", NULL); - if (pp && (pp->length == 6) && is_valid_ether_addr(pp->value)) - return pp->value; - - return NULL; -} -EXPORT_SYMBOL(of_get_mac_address); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 286d9783d93..a6ae1cfad86 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -229,12 +229,16 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - int ret; + int i, ret; if (target->thread.regs == NULL) return -EIO; - CHECK_FULL_REGS(target->thread.regs); + if (!FULL_REGS(target->thread.regs)) { + /* We have a partial register set. Fill 14-31 with bogus values */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, target->thread.regs, @@ -459,7 +463,7 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset, #ifdef CONFIG_VSX /* * Currently to set and and get all the vsx state, you need to call - * the fp and VMX calls aswell. This only get/sets the lower 32 + * the fp and VMX calls as well. This only get/sets the lower 32 * 128bit VSX registers. */ @@ -641,11 +645,16 @@ static int gpr32_get(struct task_struct *target, compat_ulong_t *k = kbuf; compat_ulong_t __user *u = ubuf; compat_ulong_t reg; + int i; if (target->thread.regs == NULL) return -EIO; - CHECK_FULL_REGS(target->thread.regs); + if (!FULL_REGS(target->thread.regs)) { + /* We have a partial register set. Fill 14-31 with bogus values */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } pos /= sizeof(reg); count /= sizeof(reg); @@ -924,12 +933,16 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, if (data && !(data & DABR_TRANSLATION)) return -EIO; #ifdef CONFIG_HAVE_HW_BREAKPOINT + if (ptrace_get_breakpoints(task) < 0) + return -ESRCH; + bp = thread->ptrace_bps[0]; if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) { if (bp) { unregister_hw_breakpoint(bp); thread->ptrace_bps[0] = NULL; } + ptrace_put_breakpoints(task); return 0; } if (bp) { @@ -939,9 +952,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, (DABR_DATA_WRITE | DABR_DATA_READ), &attr.bp_type); ret = modify_user_hw_breakpoint(bp, &attr); - if (ret) + if (ret) { + ptrace_put_breakpoints(task); return ret; + } thread->ptrace_bps[0] = bp; + ptrace_put_breakpoints(task); thread->dabr = data; return 0; } @@ -956,9 +972,12 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, ptrace_triggered, task); if (IS_ERR(bp)) { thread->ptrace_bps[0] = NULL; + ptrace_put_breakpoints(task); return PTR_ERR(bp); } + ptrace_put_breakpoints(task); + #endif /* CONFIG_HAVE_HW_BREAKPOINT */ /* Move contents to the DABR register */ @@ -1316,6 +1335,10 @@ static int set_dac_range(struct task_struct *child, static long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) { +#ifndef CONFIG_PPC_ADV_DEBUG_REGS + unsigned long dabr; +#endif + if (bp_info->version != 1) return -ENOTSUPP; #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -1353,11 +1376,10 @@ static long ppc_set_hwdebug(struct task_struct *child, /* * We only support one data breakpoint */ - if (((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0) || - ((bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0) || - (bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_WRITE) || - (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) || - (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)) + if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 || + (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 || + bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT || + bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE) return -EINVAL; if (child->thread.dabr) @@ -1366,7 +1388,14 @@ static long ppc_set_hwdebug(struct task_struct *child, if ((unsigned long)bp_info->addr >= TASK_SIZE) return -EIO; - child->thread.dabr = (unsigned long)bp_info->addr; + dabr = (unsigned long)bp_info->addr & ~7UL; + dabr |= DABR_TRANSLATION; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ) + dabr |= DABR_DATA_READ; + if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE) + dabr |= DABR_DATA_WRITE; + + child->thread.dabr = dabr; return 1; #endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */ @@ -1406,37 +1435,42 @@ static long ppc_del_hwdebug(struct task_struct *child, long addr, long data) * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, * we mark them as obsolete now, they will be removed in a future version */ -static long arch_ptrace_old(struct task_struct *child, long request, long addr, - long data) +static long arch_ptrace_old(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { + void __user *datavp = (void __user *) data; + switch (request) { case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ return copy_regset_to_user(child, &user_ppc_native_view, REGSET_GPR, 0, 32 * sizeof(long), - (void __user *) data); + datavp); case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ return copy_regset_from_user(child, &user_ppc_native_view, REGSET_GPR, 0, 32 * sizeof(long), - (const void __user *) data); + datavp); case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */ return copy_regset_to_user(child, &user_ppc_native_view, REGSET_FPR, 0, 32 * sizeof(double), - (void __user *) data); + datavp); case PPC_PTRACE_SETFPREGS: /* Set FPRs 0 - 31. */ return copy_regset_from_user(child, &user_ppc_native_view, REGSET_FPR, 0, 32 * sizeof(double), - (const void __user *) data); + datavp); } return -EPERM; } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { int ret = -EPERM; + void __user *datavp = (void __user *) data; + unsigned long __user *datalp = datavp; switch (request) { /* read the word at location addr in the USER area. */ @@ -1446,11 +1480,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = -EIO; /* convert to index and check */ #ifdef CONFIG_PPC32 - index = (unsigned long) addr >> 2; + index = addr >> 2; if ((addr & 3) || (index > PT_FPSCR) || (child->thread.regs == NULL)) #else - index = (unsigned long) addr >> 3; + index = addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) #endif break; @@ -1463,7 +1497,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) tmp = ((unsigned long *)child->thread.fpr) [TS_FPRWIDTH * (index - PT_FPR0)]; } - ret = put_user(tmp,(unsigned long __user *) data); + ret = put_user(tmp, datalp); break; } @@ -1474,11 +1508,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = -EIO; /* convert to index and check */ #ifdef CONFIG_PPC32 - index = (unsigned long) addr >> 2; + index = addr >> 2; if ((addr & 3) || (index > PT_FPSCR) || (child->thread.regs == NULL)) #else - index = (unsigned long) addr >> 3; + index = addr >> 3; if ((addr & 7) || (index > PT_FPSCR)) #endif break; @@ -1525,11 +1559,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) dbginfo.features = 0; #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ - if (!access_ok(VERIFY_WRITE, data, + if (!access_ok(VERIFY_WRITE, datavp, sizeof(struct ppc_debug_info))) return -EFAULT; - ret = __copy_to_user((struct ppc_debug_info __user *)data, - &dbginfo, sizeof(struct ppc_debug_info)) ? + ret = __copy_to_user(datavp, &dbginfo, + sizeof(struct ppc_debug_info)) ? -EFAULT : 0; break; } @@ -1537,11 +1571,10 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case PPC_PTRACE_SETHWDEBUG: { struct ppc_hw_breakpoint bp_info; - if (!access_ok(VERIFY_READ, data, + if (!access_ok(VERIFY_READ, datavp, sizeof(struct ppc_hw_breakpoint))) return -EFAULT; - ret = __copy_from_user(&bp_info, - (struct ppc_hw_breakpoint __user *)data, + ret = __copy_from_user(&bp_info, datavp, sizeof(struct ppc_hw_breakpoint)) ? -EFAULT : 0; if (!ret) @@ -1560,11 +1593,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) if (addr > 0) break; #ifdef CONFIG_PPC_ADV_DEBUG_REGS - ret = put_user(child->thread.dac1, - (unsigned long __user *)data); + ret = put_user(child->thread.dac1, datalp); #else - ret = put_user(child->thread.dabr, - (unsigned long __user *)data); + ret = put_user(child->thread.dabr, datalp); #endif break; } @@ -1580,7 +1611,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) return copy_regset_to_user(child, &user_ppc_native_view, REGSET_GPR, 0, sizeof(struct pt_regs), - (void __user *) data); + datavp); #ifdef CONFIG_PPC64 case PTRACE_SETREGS64: @@ -1589,19 +1620,19 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) return copy_regset_from_user(child, &user_ppc_native_view, REGSET_GPR, 0, sizeof(struct pt_regs), - (const void __user *) data); + datavp); case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */ return copy_regset_to_user(child, &user_ppc_native_view, REGSET_FPR, 0, sizeof(elf_fpregset_t), - (void __user *) data); + datavp); case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */ return copy_regset_from_user(child, &user_ppc_native_view, REGSET_FPR, 0, sizeof(elf_fpregset_t), - (const void __user *) data); + datavp); #ifdef CONFIG_ALTIVEC case PTRACE_GETVRREGS: @@ -1609,40 +1640,40 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) REGSET_VMX, 0, (33 * sizeof(vector128) + sizeof(u32)), - (void __user *) data); + datavp); case PTRACE_SETVRREGS: return copy_regset_from_user(child, &user_ppc_native_view, REGSET_VMX, 0, (33 * sizeof(vector128) + sizeof(u32)), - (const void __user *) data); + datavp); #endif #ifdef CONFIG_VSX case PTRACE_GETVSRREGS: return copy_regset_to_user(child, &user_ppc_native_view, REGSET_VSX, 0, 32 * sizeof(double), - (void __user *) data); + datavp); case PTRACE_SETVSRREGS: return copy_regset_from_user(child, &user_ppc_native_view, REGSET_VSX, 0, 32 * sizeof(double), - (const void __user *) data); + datavp); #endif #ifdef CONFIG_SPE case PTRACE_GETEVRREGS: /* Get the child spe register state. */ return copy_regset_to_user(child, &user_ppc_native_view, REGSET_SPE, 0, 35 * sizeof(u32), - (void __user *) data); + datavp); case PTRACE_SETEVRREGS: /* Set the child spe register state. */ return copy_regset_from_user(child, &user_ppc_native_view, REGSET_SPE, 0, 35 * sizeof(u32), - (const void __user *) data); + datavp); #endif /* Old reverse args ptrace callss */ diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 8a6daf4129f..69c4be917d0 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -280,7 +280,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, /* We only support one DABR and no IABRS at the moment */ if (addr > 0) break; +#ifdef CONFIG_PPC_ADV_DEBUG_REGS + ret = put_user(child->thread.dac1, (u32 __user *)data); +#else ret = put_user(child->thread.dabr, (u32 __user *)data); +#endif break; } @@ -312,6 +316,9 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, case PTRACE_SET_DEBUGREG: case PTRACE_SYSCALL: case PTRACE_CONT: + case PPC_PTRACE_GETHWDBGINFO: + case PPC_PTRACE_SETHWDEBUG: + case PPC_PTRACE_DELHWDEBUG: ret = arch_ptrace(child, request, addr, data); break; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8fe8bc61c10..271ff6318ed 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -41,6 +41,8 @@ #include <asm/atomic.h> #include <asm/time.h> #include <asm/mmu.h> +#include <asm/topology.h> +#include <asm/pSeries_reconfig.h> struct rtas_t rtas = { .lock = __ARCH_SPIN_LOCK_UNLOCKED @@ -493,7 +495,7 @@ unsigned int rtas_busy_delay(int status) might_sleep(); ms = rtas_busy_delay_time(status); - if (ms) + if (ms && need_resched()) msleep(ms); return ms; @@ -713,6 +715,7 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w int cpu; slb_set_size(SLB_MIN_SIZE); + stop_topology_update(); printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id()); while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) && @@ -728,6 +731,8 @@ static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_w rc = atomic_read(&data->error); atomic_set(&data->error, rc); + start_topology_update(); + pSeries_coalesce_init(); if (wake_when_done) { atomic_set(&data->done, 1); diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 2b442e6c21e..bf5f5ce3a7b 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -256,31 +256,16 @@ static ssize_t rtas_flash_read(struct file *file, char __user *buf, struct proc_dir_entry *dp = PDE(file->f_path.dentry->d_inode); struct rtas_update_flash_t *uf; char msg[RTAS_MSG_MAXLEN]; - int msglen; - uf = (struct rtas_update_flash_t *) dp->data; + uf = dp->data; if (!strcmp(dp->name, FIRMWARE_FLASH_NAME)) { get_flash_status_msg(uf->status, msg); } else { /* FIRMWARE_UPDATE_NAME */ sprintf(msg, "%d\n", uf->status); } - msglen = strlen(msg); - if (msglen > count) - msglen = count; - - if (ppos && *ppos != 0) - return 0; /* be cheap */ - - if (!access_ok(VERIFY_WRITE, buf, msglen)) - return -EINVAL; - if (copy_to_user(buf, msg, msglen)) - return -EFAULT; - - if (ppos) - *ppos = msglen; - return msglen; + return simple_read_from_buffer(buf, count, ppos, msg, strlen(msg)); } /* constructor for flash_block_cache */ @@ -394,26 +379,13 @@ static ssize_t manage_flash_read(struct file *file, char __user *buf, char msg[RTAS_MSG_MAXLEN]; int msglen; - args_buf = (struct rtas_manage_flash_t *) dp->data; + args_buf = dp->data; if (args_buf == NULL) return 0; msglen = sprintf(msg, "%d\n", args_buf->status); - if (msglen > count) - msglen = count; - if (ppos && *ppos != 0) - return 0; /* be cheap */ - - if (!access_ok(VERIFY_WRITE, buf, msglen)) - return -EINVAL; - - if (copy_to_user(buf, msg, msglen)) - return -EFAULT; - - if (ppos) - *ppos = msglen; - return msglen; + return simple_read_from_buffer(buf, count, ppos, msg, msglen); } static ssize_t manage_flash_write(struct file *file, const char __user *buf, @@ -495,24 +467,11 @@ static ssize_t validate_flash_read(struct file *file, char __user *buf, char msg[RTAS_MSG_MAXLEN]; int msglen; - args_buf = (struct rtas_validate_flash_t *) dp->data; + args_buf = dp->data; - if (ppos && *ppos != 0) - return 0; /* be cheap */ - msglen = get_validate_flash_msg(args_buf, msg); - if (msglen > count) - msglen = count; - - if (!access_ok(VERIFY_WRITE, buf, msglen)) - return -EINVAL; - - if (copy_to_user(buf, msg, msglen)) - return -EFAULT; - if (ppos) - *ppos = msglen; - return msglen; + return simple_read_from_buffer(buf, count, ppos, msg, msglen); } static ssize_t validate_flash_write(struct file *file, const char __user *buf, diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 0438f819fe6..67f6c3b5135 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -160,7 +160,7 @@ static int log_rtas_len(char * buf) /* rtas fixed header */ len = 8; err = (struct rtas_error_log *)buf; - if (err->extended_log_length) { + if (err->extended && err->extended_log_length) { /* extended header */ len += err->extended_log_length; @@ -412,7 +412,8 @@ static void rtas_event_scan(struct work_struct *w) get_online_cpus(); - cpu = cpumask_next(smp_processor_id(), cpu_online_mask); + /* raw_ OK because just using CPU as starting point. */ + cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); if (cpu >= nr_cpu_ids) { cpu = cpumask_first(cpu_online_mask); @@ -464,7 +465,7 @@ static void start_event_scan(void) pr_debug("rtasd: will sleep for %d milliseconds\n", (30000 / rtas_event_scan_rate)); - /* Retreive errors from nvram if any */ + /* Retrieve errors from nvram if any */ retreive_nvram_error_log(); schedule_delayed_work_on(cpumask_first(cpu_online_mask), diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9d4882a4664..79fca2651b6 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -381,7 +381,7 @@ static void __init cpu_init_thread_core_maps(int tpc) int i; threads_per_core = tpc; - threads_core_mask = CPU_MASK_NONE; + cpumask_clear(&threads_core_mask); /* This implementation only supports power of 2 number of threads * for simplicity and performance @@ -390,7 +390,7 @@ static void __init cpu_init_thread_core_maps(int tpc) BUG_ON(tpc != (1 << threads_shift)); for (i = 0; i < tpc; i++) - cpu_set(i, threads_core_mask); + cpumask_set_cpu(i, &threads_core_mask); printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n", tpc, tpc > 1 ? "s" : ""); @@ -404,7 +404,7 @@ static void __init cpu_init_thread_core_maps(int tpc) * cpu_present_mask * * Having the possible map set up early allows us to restrict allocations - * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. + * of things like irqstacks to nr_cpu_ids rather than NR_CPUS. * * We do not initialize the online map here; cpus set their own bits in * cpu_online_mask as they come up. @@ -424,7 +424,7 @@ void __init smp_setup_cpu_maps(void) DBG("smp_setup_cpu_maps()\n"); - while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { + while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) { const int *intserv; int j, len; @@ -443,7 +443,7 @@ void __init smp_setup_cpu_maps(void) intserv = &cpu; /* assume logical == phys */ } - for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { + for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, intserv[j]); set_cpu_present(cpu, true); @@ -483,12 +483,12 @@ void __init smp_setup_cpu_maps(void) if (cpu_has_feature(CPU_FTR_SMT)) maxcpus *= nthreads; - if (maxcpus > NR_CPUS) { + if (maxcpus > nr_cpu_ids) { printk(KERN_WARNING "Partition configured for %d cpus, " "operating system maximum is %d.\n", - maxcpus, NR_CPUS); - maxcpus = NR_CPUS; + maxcpus, nr_cpu_ids); + maxcpus = nr_cpu_ids; } else printk(KERN_INFO "Partition configured for %d cpus.\n", maxcpus); @@ -509,6 +509,9 @@ void __init smp_setup_cpu_maps(void) */ cpu_init_thread_core_maps(nthreads); + /* Now that possible cpus are set, set nr_cpu_ids for later use */ + setup_nr_cpu_ids(); + free_unused_pacas(); } #endif /* CONFIG_SMP */ @@ -599,6 +602,10 @@ int check_legacy_ioport(unsigned long base_port) * name instead */ if (!np) np = of_find_node_by_name(NULL, "8042"); + if (np) { + of_i8042_kbd_irq = 1; + of_i8042_aux_irq = 12; + } break; case FDC_BASE: /* FDC1 */ np = of_find_node_by_type(NULL, "fdc"); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 1d2fbc90530..620d792b52e 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -48,6 +48,7 @@ extern void bootx_init(unsigned long r4, unsigned long phys); int boot_cpuid = -1; EXPORT_SYMBOL_GPL(boot_cpuid); +int __initdata boot_cpu_count; int boot_cpuid_phys; int smp_hw_index[NR_CPUS]; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2a178b0ebcd..a88bf2713d4 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -62,6 +62,7 @@ #include <asm/udbg.h> #include <asm/kexec.h> #include <asm/mmu_context.h> +#include <asm/code-patching.h> #include "setup.h" @@ -72,6 +73,7 @@ #endif int boot_cpuid = 0; +int __initdata boot_cpu_count; u64 ppc64_pft_size; /* Pick defaults since we might want to patch instructions @@ -233,6 +235,7 @@ void early_setup_secondary(void) void smp_release_cpus(void) { unsigned long *ptr; + int i; DBG(" -> smp_release_cpus()\n"); @@ -245,7 +248,16 @@ void smp_release_cpus(void) ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop - PHYSICAL_START); *ptr = __pa(generic_secondary_smp_init); - mb(); + + /* And wait a bit for them to catch up */ + for (i = 0; i < 100000; i++) { + mb(); + HMT_low(); + if (boot_cpu_count == 0) + break; + udelay(1); + } + DBG("boot_cpu_count = %d\n", boot_cpu_count); DBG(" <- smp_release_cpus()\n"); } @@ -423,22 +435,35 @@ void __init setup_system(void) DBG(" <- setup_system()\n"); } -static u64 slb0_limit(void) +/* This returns the limit below which memory accesses to the linear + * mapping are guarnateed not to cause a TLB or SLB miss. This is + * used to allocate interrupt or emergency stacks for which our + * exception entry path doesn't deal with being interrupted. + */ +static u64 safe_stack_limit(void) { - if (cpu_has_feature(CPU_FTR_1T_SEGMENT)) { +#ifdef CONFIG_PPC_BOOK3E + /* Freescale BookE bolts the entire linear mapping */ + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + return linear_map_top; + /* Other BookE, we assume the first GB is bolted */ + return 1ul << 30; +#else + /* BookS, the first segment is bolted */ + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) return 1UL << SID_SHIFT_1T; - } return 1UL << SID_SHIFT; +#endif } static void __init irqstack_early_init(void) { - u64 limit = slb0_limit(); + u64 limit = safe_stack_limit(); unsigned int i; /* - * interrupt stacks must be under 256MB, we cannot afford to take - * SLB misses on them. + * Interrupt stacks must be in the first segment since we + * cannot afford to take SLB misses on them. */ for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) @@ -453,6 +478,9 @@ static void __init irqstack_early_init(void) #ifdef CONFIG_PPC_BOOK3E static void __init exc_lvl_early_init(void) { + extern unsigned int interrupt_base_book3e; + extern unsigned int exc_debug_debug_book3e; + unsigned int i; for_each_possible_cpu(i) { @@ -463,6 +491,10 @@ static void __init exc_lvl_early_init(void) mcheckirq_ctx[i] = (struct thread_info *) __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); } + + if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) + patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1, + (unsigned long)&exc_debug_debug_book3e, 0); } #else #define exc_lvl_early_init() @@ -486,7 +518,7 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(slb0_limit(), ppc64_rma_size); + limit = min(safe_stack_limit(), ppc64_rma_size); for_each_possible_cpu(i) { unsigned long sp; @@ -497,9 +529,8 @@ static void __init emergency_stack_init(void) } /* - * Called into from start_kernel, after lock_kernel has been called. - * Initializes bootmem, which is unsed to manage page allocation until - * mem_init is called. + * Called into from start_kernel this initializes bootmem, which is used + * to manage page allocation until mem_init is called. */ void __init setup_arch(char **cmdline_p) { diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 27c4a4584f8..da989fff19c 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -381,7 +381,7 @@ badframe: regs, uc, &uc->uc_mcontext); #endif if (show_unhandled_signals && printk_ratelimit()) - printk(regs->msr & MSR_SF ? fmt64 : fmt32, + printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "rt_sigreturn", (long)uc, regs->nip, regs->link); @@ -469,7 +469,7 @@ badframe: regs, frame, newsp); #endif if (show_unhandled_signals && printk_ratelimit()) - printk(regs->msr & MSR_SF ? fmt64 : fmt32, + printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "setup_rt_frame", (long)frame, regs->nip, regs->link); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 68034bbf2e4..4a6f2ec7e76 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -57,6 +57,25 @@ #define DBG(fmt...) #endif + +/* Store all idle threads, this can be reused instead of creating +* a new thread. Also avoids complicated thread destroy functionality +* for idle threads. +*/ +#ifdef CONFIG_HOTPLUG_CPU +/* + * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is + * removed after init for !CONFIG_HOTPLUG_CPU. + */ +static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); +#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) +#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) +#else +static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; +#define get_idle_for_cpu(x) (idle_thread_array[(x)]) +#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) +#endif + struct thread_info *secondary_ti; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); @@ -76,7 +95,7 @@ int smt_enabled_at_boot = 1; static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; #ifdef CONFIG_PPC64 -void __devinit smp_generic_kick_cpu(int nr) +int __devinit smp_generic_kick_cpu(int nr) { BUG_ON(nr < 0 || nr >= NR_CPUS); @@ -87,37 +106,10 @@ void __devinit smp_generic_kick_cpu(int nr) */ paca[nr].cpu_start = 1; smp_mb(); -} -#endif -void smp_message_recv(int msg) -{ - switch(msg) { - case PPC_MSG_CALL_FUNCTION: - generic_smp_call_function_interrupt(); - break; - case PPC_MSG_RESCHEDULE: - /* we notice need_resched on exit */ - break; - case PPC_MSG_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; - case PPC_MSG_DEBUGGER_BREAK: - if (crash_ipi_function_ptr) { - crash_ipi_function_ptr(get_irq_regs()); - break; - } -#ifdef CONFIG_DEBUGGER - debugger_ipi(get_irq_regs()); - break; -#endif /* CONFIG_DEBUGGER */ - /* FALLTHROUGH */ - default: - printk("SMP %d: smp_message_recv(): unknown msg %d\n", - smp_processor_id(), msg); - break; - } + return 0; } +#endif static irqreturn_t call_function_action(int irq, void *data) { @@ -127,7 +119,7 @@ static irqreturn_t call_function_action(int irq, void *data) static irqreturn_t reschedule_action(int irq, void *data) { - /* we just need the return path side effect of checking need_resched */ + scheduler_ipi(); return IRQ_HANDLED; } @@ -137,9 +129,17 @@ static irqreturn_t call_function_single_action(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t debug_ipi_action(int irq, void *data) +irqreturn_t debug_ipi_action(int irq, void *data) { - smp_message_recv(PPC_MSG_DEBUGGER_BREAK); + if (crash_ipi_function_ptr) { + crash_ipi_function_ptr(get_irq_regs()); + return IRQ_HANDLED; + } + +#ifdef CONFIG_DEBUGGER + debugger_ipi(get_irq_regs()); +#endif /* CONFIG_DEBUGGER */ + return IRQ_HANDLED; } @@ -178,6 +178,66 @@ int smp_request_message_ipi(int virq, int msg) return err; } +#ifdef CONFIG_PPC_SMP_MUXED_IPI +struct cpu_messages { + int messages; /* current messages */ + unsigned long data; /* data for cause ipi */ +}; +static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); + +void smp_muxed_ipi_set_data(int cpu, unsigned long data) +{ + struct cpu_messages *info = &per_cpu(ipi_message, cpu); + + info->data = data; +} + +void smp_muxed_ipi_message_pass(int cpu, int msg) +{ + struct cpu_messages *info = &per_cpu(ipi_message, cpu); + char *message = (char *)&info->messages; + + message[msg] = 1; + mb(); + smp_ops->cause_ipi(cpu, info->data); +} + +void smp_muxed_ipi_resend(void) +{ + struct cpu_messages *info = &__get_cpu_var(ipi_message); + + if (info->messages) + smp_ops->cause_ipi(smp_processor_id(), info->data); +} + +irqreturn_t smp_ipi_demux(void) +{ + struct cpu_messages *info = &__get_cpu_var(ipi_message); + unsigned int all; + + mb(); /* order any irq clear */ + + do { + all = xchg_local(&info->messages, 0); + +#ifdef __BIG_ENDIAN + if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION))) + generic_smp_call_function_interrupt(); + if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE))) + scheduler_ipi(); + if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE))) + generic_smp_call_function_single_interrupt(); + if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK))) + debug_ipi_action(0, NULL); +#else +#error Unsupported ENDIAN +#endif + } while (info->messages); + + return IRQ_HANDLED; +} +#endif /* CONFIG_PPC_SMP_MUXED_IPI */ + void smp_send_reschedule(int cpu) { if (likely(smp_ops)) @@ -197,11 +257,18 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION); } -#ifdef CONFIG_DEBUGGER -void smp_send_debugger_break(int cpu) +#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) +void smp_send_debugger_break(void) { - if (likely(smp_ops)) - smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); + int cpu; + int me = raw_smp_processor_id(); + + if (unlikely(!smp_ops)) + return; + + for_each_online_cpu(cpu) + if (cpu != me) + smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); } #endif @@ -209,9 +276,9 @@ void smp_send_debugger_break(int cpu) void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { crash_ipi_function_ptr = crash_ipi_callback; - if (crash_ipi_callback && smp_ops) { + if (crash_ipi_callback) { mb(); - smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_DEBUGGER_BREAK); + smp_send_debugger_break(); } } #endif @@ -238,23 +305,6 @@ static void __devinit smp_store_cpu_info(int id) per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR); } -static void __init smp_create_idle(unsigned int cpu) -{ - struct task_struct *p; - - /* create a process for the processor */ - p = fork_idle(cpu); - if (IS_ERR(p)) - panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p)); -#ifdef CONFIG_PPC64 - paca[cpu].__current = p; - paca[cpu].kstack = (unsigned long) task_thread_info(p) - + THREAD_SIZE - STACK_FRAME_OVERHEAD; -#endif - current_set[cpu] = task_thread_info(p); - task_thread_info(p)->cpu = cpu; -} - void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned int cpu; @@ -288,10 +338,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) max_cpus = NR_CPUS; else max_cpus = 1; - - for_each_possible_cpu(cpu) - if (cpu != boot_cpuid) - smp_create_idle(cpu); } void __devinit smp_prepare_boot_cpu(void) @@ -305,7 +351,7 @@ void __devinit smp_prepare_boot_cpu(void) #ifdef CONFIG_HOTPLUG_CPU /* State of each CPU during hotplug phases */ -DEFINE_PER_CPU(int, cpu_state) = { 0 }; +static DEFINE_PER_CPU(int, cpu_state) = { 0 }; int generic_cpu_disable(void) { @@ -317,30 +363,8 @@ int generic_cpu_disable(void) set_cpu_online(cpu, false); #ifdef CONFIG_PPC64 vdso_data->processorCount--; - fixup_irqs(cpu_online_mask); -#endif - return 0; -} - -int generic_cpu_enable(unsigned int cpu) -{ - /* Do the normal bootup if we haven't - * already bootstrapped. */ - if (system_state != SYSTEM_RUNNING) - return -ENOSYS; - - /* get the target out of it's holding state */ - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; - smp_wmb(); - - while (!cpu_online(cpu)) - cpu_relax(); - -#ifdef CONFIG_PPC64 - fixup_irqs(cpu_online_mask); - /* counter the irq disable in fixup_irqs */ - local_irq_enable(); #endif + migrate_irqs(); return 0; } @@ -362,37 +386,89 @@ void generic_mach_cpu_die(void) unsigned int cpu; local_irq_disable(); + idle_task_exit(); cpu = smp_processor_id(); printk(KERN_DEBUG "CPU%d offline\n", cpu); __get_cpu_var(cpu_state) = CPU_DEAD; smp_wmb(); while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) cpu_relax(); - set_cpu_online(cpu, true); - local_irq_enable(); +} + +void generic_set_cpu_dead(unsigned int cpu) +{ + per_cpu(cpu_state, cpu) = CPU_DEAD; } #endif -static int __devinit cpu_enable(unsigned int cpu) +struct create_idle { + struct work_struct work; + struct task_struct *idle; + struct completion done; + int cpu; +}; + +static void __cpuinit do_fork_idle(struct work_struct *work) { - if (smp_ops && smp_ops->cpu_enable) - return smp_ops->cpu_enable(cpu); + struct create_idle *c_idle = + container_of(work, struct create_idle, work); - return -ENOSYS; + c_idle->idle = fork_idle(c_idle->cpu); + complete(&c_idle->done); } -int __cpuinit __cpu_up(unsigned int cpu) +static int __cpuinit create_idle(unsigned int cpu) { - int c; + struct thread_info *ti; + struct create_idle c_idle = { + .cpu = cpu, + .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), + }; + INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle); - secondary_ti = current_set[cpu]; - if (!cpu_enable(cpu)) - return 0; + c_idle.idle = get_idle_for_cpu(cpu); + + /* We can't use kernel_thread since we must avoid to + * reschedule the child. We use a workqueue because + * we want to fork from a kernel thread, not whatever + * userspace process happens to be trying to online us. + */ + if (!c_idle.idle) { + schedule_work(&c_idle.work); + wait_for_completion(&c_idle.done); + } else + init_idle(c_idle.idle, cpu); + if (IS_ERR(c_idle.idle)) { + pr_err("Failed fork for CPU %u: %li", cpu, PTR_ERR(c_idle.idle)); + return PTR_ERR(c_idle.idle); + } + ti = task_thread_info(c_idle.idle); + +#ifdef CONFIG_PPC64 + paca[cpu].__current = c_idle.idle; + paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD; +#endif + ti->cpu = cpu; + current_set[cpu] = ti; + + return 0; +} + +int __cpuinit __cpu_up(unsigned int cpu) +{ + int rc, c; if (smp_ops == NULL || (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))) return -EINVAL; + /* Make sure we have an idle thread */ + rc = create_idle(cpu); + if (rc) + return rc; + + secondary_ti = current_set[cpu]; + /* Make sure callin-map entry is 0 (can be leftover a CPU * hotplug */ @@ -406,7 +482,11 @@ int __cpuinit __cpu_up(unsigned int cpu) /* wake up cpus */ DBG("smp: kicking cpu %d\n", cpu); - smp_ops->kick_cpu(cpu); + rc = smp_ops->kick_cpu(cpu); + if (rc) { + pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc); + return rc; + } /* * wait to see if the cpu made a callin (is actually up). @@ -466,6 +546,19 @@ out: return id; } +/* Helper routines for cpu to core mapping */ +int cpu_core_index_of_thread(int cpu) +{ + return cpu >> threads_shift; +} +EXPORT_SYMBOL_GPL(cpu_core_index_of_thread); + +int cpu_first_thread_of_core(int core) +{ + return core << threads_shift; +} +EXPORT_SYMBOL_GPL(cpu_first_thread_of_core); + /* Must be called when no change can occur to cpu_present_mask, * i.e. during cpu online or offline. */ @@ -489,7 +582,7 @@ static struct device_node *cpu_to_l2cache(int cpu) } /* Activate a secondary processor. */ -int __devinit start_secondary(void *unused) +void __devinit start_secondary(void *unused) { unsigned int cpu = smp_processor_id(); struct device_node *l2_cache; @@ -510,11 +603,15 @@ int __devinit start_secondary(void *unused) secondary_cpu_time_init(); +#ifdef CONFIG_PPC64 + if (system_state == SYSTEM_RUNNING) + vdso_data->processorCount++; +#endif ipi_call_lock(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); /* Update sibling maps */ - base = cpu_first_thread_in_core(cpu); + base = cpu_first_thread_sibling(cpu); for (i = 0; i < threads_per_core; i++) { if (cpu_is_offline(base + i)) continue; @@ -545,7 +642,8 @@ int __devinit start_secondary(void *unused) local_irq_enable(); cpu_idle(); - return 0; + + BUG(); } int setup_profiling_timer(unsigned int multiplier) @@ -562,7 +660,7 @@ void __init smp_cpus_done(unsigned int max_cpus) * se we pin us down to CPU 0 for a short while */ alloc_cpumask_var(&old_mask, GFP_NOWAIT); - cpumask_copy(old_mask, ¤t->cpus_allowed); + cpumask_copy(old_mask, tsk_cpus_allowed(current)); set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid)); if (smp_ops && smp_ops->setup_cpu) @@ -572,7 +670,11 @@ void __init smp_cpus_done(unsigned int max_cpus) free_cpumask_var(old_mask); + if (smp_ops && smp_ops->bringup_done) + smp_ops->bringup_done(); + dump_numa_cpu_topology(); + } int arch_sd_sibling_asym_packing(void) @@ -600,7 +702,7 @@ int __cpu_disable(void) return err; /* Update sibling maps */ - base = cpu_first_thread_in_core(cpu); + base = cpu_first_thread_sibling(cpu); for (i = 0; i < threads_per_core; i++) { cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i)); cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu)); @@ -647,5 +749,9 @@ void cpu_die(void) { if (ppc_md.cpu_die) ppc_md.cpu_die(); + + /* If we return, we re-enter start_secondary */ + start_secondary_resume(); } + #endif diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index b0754e23743..ba4dee3d233 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -143,7 +143,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /* Disable MSR:DR to make sure we don't take a TLB or * hash miss during the copy, as our hash table will - * for a while be unuseable. For .text, we assume we are + * for a while be unusable. For .text, we assume we are * covered by a BAT. This works only for non-G5 at this * point. G5 will need a better approach, possibly using * a small temporary hash table filled with large mappings, diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index b1b6043a56c..4e5bf1edc0f 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -23,7 +23,6 @@ #include <linux/resource.h> #include <linux/times.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/sem.h> #include <linux/msg.h> #include <linux/shm.h> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index c0d8c2006bf..f0f2199e64e 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -182,6 +182,41 @@ static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL); static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr); static SYSDEV_ATTR(purr, 0600, show_purr, store_purr); + +unsigned long dscr_default = 0; +EXPORT_SYMBOL(dscr_default); + +static ssize_t show_dscr_default(struct sysdev_class *class, + struct sysdev_class_attribute *attr, char *buf) +{ + return sprintf(buf, "%lx\n", dscr_default); +} + +static ssize_t __used store_dscr_default(struct sysdev_class *class, + struct sysdev_class_attribute *attr, const char *buf, + size_t count) +{ + unsigned long val; + int ret = 0; + + ret = sscanf(buf, "%lx", &val); + if (ret != 1) + return -EINVAL; + dscr_default = val; + + return count; +} + +static SYSDEV_CLASS_ATTR(dscr_default, 0600, + show_dscr_default, store_dscr_default); + +static void sysfs_create_dscr_default(void) +{ + int err = 0; + if (cpu_has_feature(CPU_FTR_DSCR)) + err = sysfs_create_file(&cpu_sysdev_class.kset.kobj, + &attr_dscr_default.attr); +} #endif /* CONFIG_PPC64 */ #ifdef HAS_PPC_PMC_PA6T @@ -617,6 +652,9 @@ static int __init topology_init(void) if (cpu_online(cpu)) register_cpu_online(cpu); } +#ifdef CONFIG_PPC64 + sysfs_create_dscr_default(); +#endif /* CONFIG_PPC64 */ return 0; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 010406958d9..f33acfd872a 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -155,7 +155,7 @@ EXPORT_SYMBOL_GPL(rtc_lock); static u64 tb_to_ns_scale __read_mostly; static unsigned tb_to_ns_shift __read_mostly; -static unsigned long boot_tb __read_mostly; +static u64 boot_tb __read_mostly; extern struct timezone sys_tz; static long timezone_offset; @@ -229,6 +229,9 @@ static u64 scan_dispatch_log(u64 stop_tb) u64 stolen = 0; u64 dtb; + if (!dtl) + return 0; + if (i == vpa->dtl_idx) return 0; while (i < vpa->dtl_idx) { @@ -265,11 +268,26 @@ void accumulate_stolen_time(void) { u64 sst, ust; - sst = scan_dispatch_log(get_paca()->starttime_user); - ust = scan_dispatch_log(get_paca()->starttime); - get_paca()->system_time -= sst; - get_paca()->user_time -= ust; - get_paca()->stolen_time += ust + sst; + u8 save_soft_enabled = local_paca->soft_enabled; + u8 save_hard_enabled = local_paca->hard_enabled; + + /* We are called early in the exception entry, before + * soft/hard_enabled are sync'ed to the expected state + * for the exception. We are hard disabled but the PACA + * needs to reflect that so various debug stuff doesn't + * complain + */ + local_paca->soft_enabled = 0; + local_paca->hard_enabled = 0; + + sst = scan_dispatch_log(local_paca->starttime_user); + ust = scan_dispatch_log(local_paca->starttime); + local_paca->system_time -= sst; + local_paca->user_time -= ust; + local_paca->stolen_time += ust + sst; + + local_paca->soft_enabled = save_soft_enabled; + local_paca->hard_enabled = save_hard_enabled; } static inline u64 calculate_stolen_time(u64 stop_tb) @@ -341,7 +359,7 @@ void account_system_vtime(struct task_struct *tsk) } get_paca()->user_time_scaled += user_scaled; - if (in_irq() || idle_task(smp_processor_id()) != tsk) { + if (in_interrupt() || idle_task(smp_processor_id()) != tsk) { account_system_time(tsk, 0, delta, sys_scaled); if (stolen) account_steal_time(stolen); @@ -562,14 +580,21 @@ void timer_interrupt(struct pt_regs * regs) struct clock_event_device *evt = &decrementer->event; u64 now; + /* Ensure a positive value is written to the decrementer, or else + * some CPUs will continue to take decrementer exceptions. + */ + set_dec(DECREMENTER_MAX); + + /* Some implementations of hotplug will get timer interrupts while + * offline, just ignore these + */ + if (!cpu_online(smp_processor_id())) + return; + trace_timer_interrupt_entry(regs); __get_cpu_var(irq_stat).timer_irqs++; - /* Ensure a positive value is written to the decrementer, or else - * some CPUs will continuue to take decrementer exceptions */ - set_dec(DECREMENTER_MAX); - #if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1b2cdc8eec9..b13306b0d92 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -143,7 +143,6 @@ int die(const char *str, struct pt_regs *regs, long err) #endif printk("%s\n", ppc_md.name ? ppc_md.name : ""); - sysfs_printk_last_file(); if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP) return 1; @@ -199,7 +198,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) } else if (show_unhandled_signals && unhandled_signal(current, signr) && printk_ratelimit()) { - printk(regs->msr & MSR_SF ? fmt64 : fmt32, + printk(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, signr, addr, regs->nip, regs->link, code); } @@ -221,7 +220,7 @@ void system_reset_exception(struct pt_regs *regs) } #ifdef CONFIG_KEXEC - cpu_set(smp_processor_id(), cpus_in_sr); + cpumask_set_cpu(smp_processor_id(), &cpus_in_sr); #endif die("System Reset", regs, SIGABRT); @@ -626,12 +625,6 @@ void machine_check_exception(struct pt_regs *regs) if (recover > 0) return; - if (user_mode(regs)) { - regs->msr |= MSR_RI; - _exception(SIGBUS, regs, BUS_ADRERR, regs->nip); - return; - } - #if defined(CONFIG_8xx) && defined(CONFIG_PCI) /* the qspan pci read routines can cause machine checks -- Cort * @@ -643,16 +636,12 @@ void machine_check_exception(struct pt_regs *regs) return; #endif - if (debugger_fault_handler(regs)) { - regs->msr |= MSR_RI; + if (debugger_fault_handler(regs)) return; - } if (check_io_access(regs)) return; - if (debugger_fault_handler(regs)) - return; die("Machine check", regs, SIGBUS); /* Must die if the interrupt is not recoverable */ @@ -919,6 +908,26 @@ static int emulate_instruction(struct pt_regs *regs) return emulate_isel(regs, instword); } +#ifdef CONFIG_PPC64 + /* Emulate the mfspr rD, DSCR. */ + if (((instword & PPC_INST_MFSPR_DSCR_MASK) == PPC_INST_MFSPR_DSCR) && + cpu_has_feature(CPU_FTR_DSCR)) { + PPC_WARN_EMULATED(mfdscr, regs); + rd = (instword >> 21) & 0x1f; + regs->gpr[rd] = mfspr(SPRN_DSCR); + return 0; + } + /* Emulate the mtspr DSCR, rD. */ + if (((instword & PPC_INST_MTSPR_DSCR_MASK) == PPC_INST_MTSPR_DSCR) && + cpu_has_feature(CPU_FTR_DSCR)) { + PPC_WARN_EMULATED(mtdscr, regs); + rd = (instword >> 21) & 0x1f; + mtspr(SPRN_DSCR, regs->gpr[rd]); + current->thread.dscr_inherit = 1; + return 0; + } +#endif + return -EINVAL; } @@ -969,7 +978,7 @@ void __kprobes program_check_exception(struct pt_regs *regs) * ESR_DST (!?) or 0. In the process of chasing this with the * hardware people - not sure if it can happen on any illegal * instruction or only on FP instructions, whether there is a - * pattern to occurences etc. -dgibson 31/Mar/2003 */ + * pattern to occurrences etc. -dgibson 31/Mar/2003 */ switch (do_mathemu(regs)) { case 0: emulate_single_step(regs); @@ -1516,6 +1525,10 @@ struct ppc_emulated ppc_emulated = { #ifdef CONFIG_VSX WARN_EMULATED_SETUP(vsx), #endif +#ifdef CONFIG_PPC64 + WARN_EMULATED_SETUP(mfdscr), + WARN_EMULATED_SETUP(mtdscr), +#endif }; u32 ppc_warn_emulated; diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index e39cad83c88..23d65abbedc 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -62,6 +62,8 @@ void __init udbg_early_init(void) udbg_init_cpm(); #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO) udbg_init_usbgecko(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) + udbg_init_wsp(); #endif #ifdef CONFIG_PPC_EARLY_DEBUG diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index b4b167b3364..6837f839ab7 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -1,5 +1,5 @@ /* - * udbg for NS16550 compatable serial ports + * udbg for NS16550 compatible serial ports * * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp * @@ -11,6 +11,7 @@ #include <linux/types.h> #include <asm/udbg.h> #include <asm/io.h> +#include <asm/reg_a2.h> extern u8 real_readb(volatile u8 __iomem *addr); extern void real_writeb(u8 data, volatile u8 __iomem *addr); @@ -298,3 +299,53 @@ void __init udbg_init_40x_realmode(void) udbg_getc_poll = NULL; } #endif /* CONFIG_PPC_EARLY_DEBUG_40x */ + +#ifdef CONFIG_PPC_EARLY_DEBUG_WSP +static void udbg_wsp_flush(void) +{ + if (udbg_comport) { + while ((readb(&udbg_comport->lsr) & LSR_THRE) == 0) + /* wait for idle */; + } +} + +static void udbg_wsp_putc(char c) +{ + if (udbg_comport) { + if (c == '\n') + udbg_wsp_putc('\r'); + udbg_wsp_flush(); + writeb(c, &udbg_comport->thr); eieio(); + } +} + +static int udbg_wsp_getc(void) +{ + if (udbg_comport) { + while ((readb(&udbg_comport->lsr) & LSR_DR) == 0) + ; /* wait for char */ + return readb(&udbg_comport->rbr); + } + return -1; +} + +static int udbg_wsp_getc_poll(void) +{ + if (udbg_comport) + if (readb(&udbg_comport->lsr) & LSR_DR) + return readb(&udbg_comport->rbr); + return -1; +} + +void __init udbg_init_wsp(void) +{ + udbg_comport = (struct NS16550 __iomem *)WSP_UART_VIRT; + + udbg_init_uart(udbg_comport, 57600, 50000000); + + udbg_putc = udbg_wsp_putc; + udbg_flush = udbg_wsp_flush; + udbg_getc = udbg_wsp_getc; + udbg_getc_poll = udbg_wsp_getc_poll; +} +#endif /* CONFIG_PPC_EARLY_DEBUG_WSP */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index fd8728729ab..142ab1008c3 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -820,17 +820,17 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso32/sigtramp.S index 68d49dd71dc..cf0c9c9c24f 100644 --- a/arch/powerpc/kernel/vdso32/sigtramp.S +++ b/arch/powerpc/kernel/vdso32/sigtramp.S @@ -19,7 +19,7 @@ /* The nop here is a hack. The dwarf2 unwind routines subtract 1 from the return address to get an address in the middle of the presumed - call instruction. Since we don't have a call here, we artifically + call instruction. Since we don't have a call here, we artificially extend the range covered by the unwind info by adding a nop before the real start. */ nop diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S index 59eb59bb408..45ea281e9a2 100644 --- a/arch/powerpc/kernel/vdso64/sigtramp.S +++ b/arch/powerpc/kernel/vdso64/sigtramp.S @@ -20,7 +20,7 @@ /* The nop here is a hack. The dwarf2 unwind routines subtract 1 from the return address to get an address in the middle of the presumed - call instruction. Since we don't have a call here, we artifically + call instruction. Since we don't have a call here, we artificially extend the range covered by the unwind info by padding before the real start. */ nop diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index fe460482fa6..4d5a3edff49 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -5,6 +5,7 @@ #include <asm/cputable.h> #include <asm/thread_info.h> #include <asm/page.h> +#include <asm/ptrace.h> /* * load_up_altivec(unused, unused, tsk) @@ -101,7 +102,7 @@ _GLOBAL(giveup_altivec) MTMSRD(r5) /* enable use of VMX now */ isync PPC_LCMPI 0,r3,0 - beqlr- /* if no previous owner, done */ + beqlr /* if no previous owner, done */ addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r5,0 diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index d692989a431..1b695fdc362 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -238,9 +238,7 @@ static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size) * memory in this pool does not change. */ if (spare_needed && reserve_freed) { - tmp = min(spare_needed, min(reserve_freed, - (viodev->cmo.entitled - - VIO_CMO_MIN_ENT))); + tmp = min3(spare_needed, reserve_freed, (viodev->cmo.entitled - VIO_CMO_MIN_ENT)); vio_cmo.spare += tmp; viodev->cmo.entitled -= tmp; @@ -602,6 +600,11 @@ static void vio_dma_iommu_unmap_sg(struct device *dev, vio_cmo_dealloc(viodev, alloc_size); } +static int vio_dma_iommu_dma_supported(struct device *dev, u64 mask) +{ + return dma_iommu_ops.dma_supported(dev, mask); +} + struct dma_map_ops vio_dma_mapping_ops = { .alloc_coherent = vio_dma_iommu_alloc_coherent, .free_coherent = vio_dma_iommu_free_coherent, @@ -609,6 +612,7 @@ struct dma_map_ops vio_dma_mapping_ops = { .unmap_sg = vio_dma_iommu_unmap_sg, .map_page = vio_dma_iommu_map_page, .unmap_page = vio_dma_iommu_unmap_page, + .dma_supported = vio_dma_iommu_dma_supported, }; @@ -860,8 +864,7 @@ static void vio_cmo_bus_remove(struct vio_dev *viodev) static void vio_cmo_set_dma_ops(struct vio_dev *viodev) { - vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported; - viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops; + set_dma_ops(&viodev->dev, &vio_dma_mapping_ops); } /** @@ -1246,7 +1249,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) if (firmware_has_feature(FW_FEATURE_CMO)) vio_cmo_set_dma_ops(viodev); else - viodev->dev.archdata.dma_ops = &dma_iommu_ops; + set_dma_ops(&viodev->dev, &dma_iommu_ops); set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev)); set_dev_node(&viodev->dev, of_node_to_nid(of_node)); @@ -1254,6 +1257,10 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node) viodev->dev.parent = &vio_bus_device.dev; viodev->dev.bus = &vio_bus_type; viodev->dev.release = vio_dev_release; + /* needed to ensure proper operation of coherent allocations + * later, in case driver doesn't set it explicitly */ + dma_set_mask(&viodev->dev, DMA_BIT_MASK(64)); + dma_set_coherent_mask(&viodev->dev, DMA_BIT_MASK(64)); /* register with generic device framework */ if (device_register(&viodev->dev)) { diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8a0deefac08..b9150f07d26 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -160,7 +160,7 @@ SECTIONS INIT_RAM_FS } - PERCPU(PAGE_SIZE) + PERCPU(L1_CACHE_BYTES, PAGE_SIZE) . = ALIGN(8); .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { |
