diff options
Diffstat (limited to 'arch/powerpc/kernel')
26 files changed, 3752 insertions, 486 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 572d4f5eaac..b3ae2993efb 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -13,12 +13,15 @@ endif obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ signal_32.o pmc.o obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ - ptrace32.o systbl.o + signal_64.o ptrace32.o systbl.o obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_POWER4) += idle_power4.o obj-$(CONFIG_PPC_OF) += of_device.o obj-$(CONFIG_PPC_RTAS) += rtas.o +obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o +obj-$(CONFIG_RTAS_PROC) += rtas-proc.o obj-$(CONFIG_IBMVIO) += vio.o +obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o ifeq ($(CONFIG_PPC_MERGE),y) @@ -38,6 +41,7 @@ obj-$(CONFIG_PPC_OF) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_6xx) += idle_6xx.o +obj-$(CONFIG_SMP) += smp.o ifeq ($(CONFIG_PPC_ISERIES),y) $(obj)/head_64.o: $(obj)/lparmap.s @@ -46,8 +50,9 @@ endif else # stuff used from here for ARCH=ppc or ARCH=ppc64 +smpobj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PPC64) += traps.o process.o init_task.o time.o \ - setup-common.o + setup-common.o $(smpobj-y) endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 330cd783206..b7575725199 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -40,7 +40,7 @@ #ifdef CONFIG_PPC64 #include <asm/paca.h> #include <asm/lppaca.h> -#include <asm/iSeries/HvLpEvent.h> +#include <asm/iseries/hv_lp_event.h> #include <asm/cache.h> #include <asm/systemcfg.h> #include <asm/compat.h> @@ -125,6 +125,9 @@ int main(void) DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); +#ifdef CONFIG_PPC_64K_PAGES + DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir)); +#endif #ifdef CONFIG_HUGETLB_PAGE DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b91345fa080..cc4e9eb1c13 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -240,7 +240,7 @@ struct cpu_spec cpu_specs[] = { .oprofile_model = &op_model_power4, #endif }, - { /* Power5 */ + { /* Power5 GR */ .pvr_mask = 0xffff0000, .pvr_value = 0x003a0000, .cpu_name = "POWER5 (gr)", @@ -255,7 +255,7 @@ struct cpu_spec cpu_specs[] = { .oprofile_model = &op_model_power4, #endif }, - { /* Power5 */ + { /* Power5 GS */ .pvr_mask = 0xffff0000, .pvr_value = 0x003b0000, .cpu_name = "POWER5 (gs)", @@ -929,6 +929,16 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, }, + { /* 440SPe Rev. A */ + .pvr_mask = 0xff000fff, + .pvr_value = 0x53000890, + .cpu_name = "440SPe Rev. A", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .icache_bsize = 32, + .dcache_bsize = 32, + }, #endif /* CONFIG_44x */ #ifdef CONFIG_FSL_BOOKE { /* e200z5 */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 147215a0d6c..16ab40daa73 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -35,7 +35,7 @@ #include <asm/cputable.h> #include <asm/setup.h> #include <asm/hvcall.h> -#include <asm/iSeries/LparMap.h> +#include <asm/iseries/lpar_map.h> #include <asm/thread_info.h> #ifdef CONFIG_PPC_ISERIES @@ -195,11 +195,11 @@ exception_marker: #define EX_R12 24 #define EX_R13 32 #define EX_SRR0 40 -#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */ #define EX_DAR 48 -#define EX_LR 48 /* SLB miss saves LR, but not DAR */ #define EX_DSISR 56 #define EX_CCR 60 +#define EX_R3 64 +#define EX_LR 72 #define EXCEPTION_PROLOG_PSERIES(area, label) \ mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \ @@ -419,17 +419,22 @@ data_access_slb_pSeries: mtspr SPRN_SPRG1,r13 RUNLATCH_ON(r13) mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_DAR std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + mfcr r9 +#ifdef __DISABLED__ + /* Keep that around for when we re-implement dynamic VSIDs */ + cmpdi r3,0 + bge slb_miss_user_pseries +#endif /* __DISABLED__ */ std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r9,SPRN_SPRG1 - std r9,PACA_EXSLB+EX_R13(r13) - mfcr r9 + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ - mfspr r3,SPRN_DAR - b .do_slb_miss /* Rel. branch works in real mode */ + b .slb_miss_realmode /* Rel. branch works in real mode */ STD_EXCEPTION_PSERIES(0x400, instruction_access) @@ -440,17 +445,22 @@ instruction_access_slb_pSeries: mtspr SPRN_SPRG1,r13 RUNLATCH_ON(r13) mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + mfcr r9 +#ifdef __DISABLED__ + /* Keep that around for when we re-implement dynamic VSIDs */ + cmpdi r3,0 + bge slb_miss_user_pseries +#endif /* __DISABLED__ */ std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r9,SPRN_SPRG1 - std r9,PACA_EXSLB+EX_R13(r13) - mfcr r9 + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ - mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ - b .do_slb_miss /* Rel. branch works in real mode */ + b .slb_miss_realmode /* Rel. branch works in real mode */ STD_EXCEPTION_PSERIES(0x500, hardware_interrupt) STD_EXCEPTION_PSERIES(0x600, alignment) @@ -509,6 +519,38 @@ _GLOBAL(do_stab_bolted_pSeries) EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) /* + * We have some room here we use that to put + * the peries slb miss user trampoline code so it's reasonably + * away from slb_miss_user_common to avoid problems with rfid + * + * This is used for when the SLB miss handler has to go virtual, + * which doesn't happen for now anymore but will once we re-implement + * dynamic VSIDs for shared page tables + */ +#ifdef __DISABLED__ +slb_miss_user_pseries: + std r10,PACA_EXGEN+EX_R10(r13) + std r11,PACA_EXGEN+EX_R11(r13) + std r12,PACA_EXGEN+EX_R12(r13) + mfspr r10,SPRG1 + ld r11,PACA_EXSLB+EX_R9(r13) + ld r12,PACA_EXSLB+EX_R3(r13) + std r10,PACA_EXGEN+EX_R13(r13) + std r11,PACA_EXGEN+EX_R9(r13) + std r12,PACA_EXGEN+EX_R3(r13) + clrrdi r12,r13,32 + mfmsr r10 + mfspr r11,SRR0 /* save SRR0 */ + ori r12,r12,slb_miss_user_common@l /* virt addr of handler */ + ori r10,r10,MSR_IR|MSR_DR|MSR_RI + mtspr SRR0,r12 + mfspr r12,SRR1 /* and SRR1 */ + mtspr SRR1,r10 + rfid + b . /* prevent spec. execution */ +#endif /* __DISABLED__ */ + +/* * Vectors for the FWNMI option. Share common code. */ .globl system_reset_fwnmi @@ -559,22 +601,59 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) .globl data_access_slb_iSeries data_access_slb_iSeries: mtspr SPRN_SPRG1,r13 /* save r13 */ - EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) - ld r12,PACALPPACA+LPPACASRR1(r13) mfspr r3,SPRN_DAR - b .do_slb_miss + std r9,PACA_EXSLB+EX_R9(r13) + mfcr r9 +#ifdef __DISABLED__ + cmpdi r3,0 + bge slb_miss_user_iseries +#endif + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) + ld r12,PACALPPACA+LPPACASRR1(r13); + b .slb_miss_realmode STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN) .globl instruction_access_slb_iSeries instruction_access_slb_iSeries: mtspr SPRN_SPRG1,r13 /* save r13 */ - EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) - ld r12,PACALPPACA+LPPACASRR1(r13) - ld r3,PACALPPACA+LPPACASRR0(r13) - b .do_slb_miss + ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ + std r9,PACA_EXSLB+EX_R9(r13) + mfcr r9 +#ifdef __DISABLED__ + cmpdi r3,0 + bge .slb_miss_user_iseries +#endif + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) + ld r12,PACALPPACA+LPPACASRR1(r13); + b .slb_miss_realmode + +#ifdef __DISABLED__ +slb_miss_user_iseries: + std r10,PACA_EXGEN+EX_R10(r13) + std r11,PACA_EXGEN+EX_R11(r13) + std r12,PACA_EXGEN+EX_R12(r13) + mfspr r10,SPRG1 + ld r11,PACA_EXSLB+EX_R9(r13) + ld r12,PACA_EXSLB+EX_R3(r13) + std r10,PACA_EXGEN+EX_R13(r13) + std r11,PACA_EXGEN+EX_R9(r13) + std r12,PACA_EXGEN+EX_R3(r13) + EXCEPTION_PROLOG_ISERIES_2 + b slb_miss_user_common +#endif MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt) STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN) @@ -809,6 +888,126 @@ instruction_access_common: li r5,0x400 b .do_hash_page /* Try to handle as hpte fault */ +/* + * Here is the common SLB miss user that is used when going to virtual + * mode for SLB misses, that is currently not used + */ +#ifdef __DISABLED__ + .align 7 + .globl slb_miss_user_common +slb_miss_user_common: + mflr r10 + std r3,PACA_EXGEN+EX_DAR(r13) + stw r9,PACA_EXGEN+EX_CCR(r13) + std r10,PACA_EXGEN+EX_LR(r13) + std r11,PACA_EXGEN+EX_SRR0(r13) + bl .slb_allocate_user + + ld r10,PACA_EXGEN+EX_LR(r13) + ld r3,PACA_EXGEN+EX_R3(r13) + lwz r9,PACA_EXGEN+EX_CCR(r13) + ld r11,PACA_EXGEN+EX_SRR0(r13) + mtlr r10 + beq- slb_miss_fault + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- unrecov_user_slb + mfmsr r10 + +.machine push +.machine "power4" + mtcrf 0x80,r9 +.machine pop + + clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */ + mtmsrd r10,1 + + mtspr SRR0,r11 + mtspr SRR1,r12 + + ld r9,PACA_EXGEN+EX_R9(r13) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + rfid + b . + +slb_miss_fault: + EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN) + ld r4,PACA_EXGEN+EX_DAR(r13) + li r5,0 + std r4,_DAR(r1) + std r5,_DSISR(r1) + b .handle_page_fault + +unrecov_user_slb: + EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + +#endif /* __DISABLED__ */ + + +/* + * r13 points to the PACA, r9 contains the saved CR, + * r12 contain the saved SRR1, SRR0 is still ready for return + * r3 has the faulting address + * r9 - r13 are saved in paca->exslb. + * r3 is saved in paca->slb_r3 + * We assume we aren't going to take any exceptions during this procedure. + */ +_GLOBAL(slb_miss_realmode) + mflr r10 + + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + + bl .slb_allocate_realmode + + /* All done -- return from exception. */ + + ld r10,PACA_EXSLB+EX_LR(r13) + ld r3,PACA_EXSLB+EX_R3(r13) + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ +#ifdef CONFIG_PPC_ISERIES + ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ +#endif /* CONFIG_PPC_ISERIES */ + + mtlr r10 + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- unrecov_slb + +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + +#ifdef CONFIG_PPC_ISERIES + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 +#endif /* CONFIG_PPC_ISERIES */ + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +unrecov_slb: + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + .align 7 .globl hardware_interrupt_common .globl hardware_interrupt_entry @@ -1139,62 +1338,6 @@ _GLOBAL(do_stab_bolted) b . /* prevent speculative execution */ /* - * r13 points to the PACA, r9 contains the saved CR, - * r11 and r12 contain the saved SRR0 and SRR1. - * r3 has the faulting address - * r9 - r13 are saved in paca->exslb. - * r3 is saved in paca->slb_r3 - * We assume we aren't going to take any exceptions during this procedure. - */ -_GLOBAL(do_slb_miss) - mflr r10 - - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ - - bl .slb_allocate /* handle it */ - - /* All done -- return from exception. */ - - ld r10,PACA_EXSLB+EX_LR(r13) - ld r3,PACA_EXSLB+EX_R3(r13) - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ -#ifdef CONFIG_PPC_ISERIES - ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ -#endif /* CONFIG_PPC_ISERIES */ - - mtlr r10 - - andi. r10,r12,MSR_RI /* check for unrecoverable exception */ - beq- unrecov_slb - -.machine push -.machine "power4" - mtcrf 0x80,r9 - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ -.machine pop - -#ifdef CONFIG_PPC_ISERIES - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 -#endif /* CONFIG_PPC_ISERIES */ - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - rfid - b . /* prevent speculative execution */ - -unrecov_slb: - EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) - DISABLE_INTS - bl .save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception - b 1b - -/* * Space for CPU0's segment table. * * On iSeries, the hypervisor must fill in at least one entry before @@ -1569,7 +1712,10 @@ _GLOBAL(__secondary_start) #endif /* Initialize the first segment table (or SLB) entry */ ld r3,PACASTABVIRT(r13) /* get addr of segment table */ +BEGIN_FTR_SECTION bl .stab_initialize +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + bl .slb_initialize /* Initialize the kernel stack. Just a repeat for iSeries. */ LOADADDR(r3,current_set) @@ -1914,24 +2060,6 @@ _GLOBAL(hmt_start_secondary) blr #endif -#if defined(CONFIG_KEXEC) || defined(CONFIG_SMP) -_GLOBAL(smp_release_cpus) - /* All secondary cpus are spinning on a common - * spinloop, release them all now so they can start - * to spin on their individual paca spinloops. - * For non SMP kernels, the secondary cpus never - * get out of the common spinloop. - * XXX This does nothing useful on iSeries, secondaries are - * already waiting on their paca. - */ - li r3,1 - LOADADDR(r5,__secondary_hold_spinloop) - std r3,0(r5) - sync - blr -#endif /* CONFIG_SMP */ - - /* * We put a few things here that have to be page-aligned. * This stuff goes at the beginning of the bss, which is page-aligned. diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c index b81de286df5..5a05a797485 100644 --- a/arch/powerpc/kernel/lparmap.c +++ b/arch/powerpc/kernel/lparmap.c @@ -8,7 +8,7 @@ */ #include <asm/mmu.h> #include <asm/page.h> -#include <asm/iSeries/LparMap.h> +#include <asm/iseries/lpar_map.h> const struct LparMap __attribute__((__section__(".text"))) xLparMap = { .xNumberEsids = HvEsidsToMap, @@ -25,7 +25,7 @@ const struct LparMap __attribute__((__section__(".text"))) xLparMap = { .xRanges = { { .xPages = HvPagesToMap, .xOffset = 0, - .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT), + .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - HW_PAGE_SHIFT), }, }, }; diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index b3e95ff0dba..ae1433da09b 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -604,6 +604,76 @@ _GLOBAL(real_writeb) #endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ /* + * SCOM access functions for 970 (FX only for now) + * + * unsigned long scom970_read(unsigned int address); + * void scom970_write(unsigned int address, unsigned long value); + * + * The address passed in is the 24 bits register address. This code + * is 970 specific and will not check the status bits, so you should + * know what you are doing. + */ +_GLOBAL(scom970_read) + /* interrupts off */ + mfmsr r4 + ori r0,r4,MSR_EE + xori r0,r0,MSR_EE + mtmsrd r0,1 + + /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + * (including parity). On current CPUs they must be 0'd, + * and finally or in RW bit + */ + rlwinm r3,r3,8,0,15 + ori r3,r3,0x8000 + + /* do the actual scom read */ + sync + mtspr SPRN_SCOMC,r3 + isync + mfspr r3,SPRN_SCOMD + isync + mfspr r0,SPRN_SCOMC + isync + + /* XXX: fixup result on some buggy 970's (ouch ! we lost a bit, bah + * that's the best we can do). Not implemented yet as we don't use + * the scom on any of the bogus CPUs yet, but may have to be done + * ultimately + */ + + /* restore interrupts */ + mtmsrd r4,1 + blr + + +_GLOBAL(scom970_write) + /* interrupts off */ + mfmsr r5 + ori r0,r5,MSR_EE + xori r0,r0,MSR_EE + mtmsrd r0,1 + + /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + * (including parity). On current CPUs they must be 0'd. + */ + + rlwinm r3,r3,8,0,15 + + sync + mtspr SPRN_SCOMD,r4 /* write data */ + isync + mtspr SPRN_SCOMC,r3 /* write command */ + isync + mfspr 3,SPRN_SCOMC + isync + + /* restore interrupts */ + mtmsrd r5,1 + blr + + +/* * Create a kernel thread * kernel_thread(fn, arg, flags) */ diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h new file mode 100644 index 00000000000..90e56277179 --- /dev/null +++ b/arch/powerpc/kernel/ppc32.h @@ -0,0 +1,138 @@ +#ifndef _PPC64_PPC32_H +#define _PPC64_PPC32_H + +#include <linux/compat.h> +#include <asm/siginfo.h> +#include <asm/signal.h> + +/* + * Data types and macros for providing 32b PowerPC support. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* These are here to support 32-bit syscalls on a 64-bit kernel. */ + +typedef struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[SI_PAD_SIZE32]; + + /* kill() */ + struct { + compat_pid_t _pid; /* sender's pid */ + compat_uid_t _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + } _timer; + + /* POSIX.1b signals */ + struct { + compat_pid_t _pid; /* sender's pid */ + compat_uid_t _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + compat_pid_t _pid; /* which child */ + compat_uid_t _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */ + struct { + unsigned int _addr; /* faulting insn/memory ref. */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + } _sifields; +} compat_siginfo_t; + +#define __old_sigaction32 old_sigaction32 + +struct __old_sigaction32 { + compat_uptr_t sa_handler; + compat_old_sigset_t sa_mask; + unsigned int sa_flags; + compat_uptr_t sa_restorer; /* not used by Linux/SPARC yet */ +}; + + + +struct sigaction32 { + compat_uptr_t sa_handler; /* Really a pointer, but need to deal with 32 bits */ + unsigned int sa_flags; + compat_uptr_t sa_restorer; /* Another 32 bit pointer */ + compat_sigset_t sa_mask; /* A 32 bit mask */ +}; + +typedef struct sigaltstack_32 { + unsigned int ss_sp; + int ss_flags; + compat_size_t ss_size; +} stack_32_t; + +struct pt_regs32 { + unsigned int gpr[32]; + unsigned int nip; + unsigned int msr; + unsigned int orig_gpr3; /* Used for restarting system calls */ + unsigned int ctr; + unsigned int link; + unsigned int xer; + unsigned int ccr; + unsigned int mq; /* 601 only (not used at present) */ + unsigned int trap; /* Reason for being here */ + unsigned int dar; /* Fault registers */ + unsigned int dsisr; + unsigned int result; /* Result of a system call */ +}; + +struct sigcontext32 { + unsigned int _unused[4]; + int signal; + compat_uptr_t handler; + unsigned int oldmask; + compat_uptr_t regs; /* 4 byte pointer to the pt_regs32 structure. */ +}; + +struct mcontext32 { + elf_gregset_t32 mc_gregs; + elf_fpregset_t mc_fregs; + unsigned int mc_pad[2]; + elf_vrregset_t32 mc_vregs __attribute__((__aligned__(16))); +}; + +struct ucontext32 { + unsigned int uc_flags; + unsigned int uc_link; + stack_32_t uc_stack; + int uc_pad[7]; + compat_uptr_t uc_regs; /* points to uc_mcontext field */ + compat_sigset_t uc_sigmask; /* mask last for extensibility */ + /* glibc has 1024-bit signal masks, ours are 64-bit */ + int uc_maskext[30]; + int uc_pad2[3]; + struct mcontext32 uc_mcontext; +}; + +#endif /* _PPC64_PPC32_H */ diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 8bc540337ba..47d6f7e2ea9 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -81,15 +81,6 @@ EXPORT_SYMBOL(_prep_type); EXPORT_SYMBOL(ucSystemType); #endif -#if !defined(__INLINE_BITOPS) -EXPORT_SYMBOL(set_bit); -EXPORT_SYMBOL(clear_bit); -EXPORT_SYMBOL(change_bit); -EXPORT_SYMBOL(test_and_set_bit); -EXPORT_SYMBOL(test_and_clear_bit); -EXPORT_SYMBOL(test_and_change_bit); -#endif /* __INLINE_BITOPS */ - EXPORT_SYMBOL(strcpy); EXPORT_SYMBOL(strncpy); EXPORT_SYMBOL(strcat); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8f85dabe4df..de69fb37c73 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -46,9 +46,9 @@ #include <asm/processor.h> #include <asm/mmu.h> #include <asm/prom.h> +#include <asm/machdep.h> #ifdef CONFIG_PPC64 #include <asm/firmware.h> -#include <asm/plpar_wrappers.h> #include <asm/time.h> #endif @@ -201,27 +201,13 @@ int dump_spe(struct pt_regs *regs, elf_vrregset_t *evrregs) } #endif /* CONFIG_SPE */ -static void set_dabr_spr(unsigned long val) -{ - mtspr(SPRN_DABR, val); -} - int set_dabr(unsigned long dabr) { - int ret = 0; - -#ifdef CONFIG_PPC64 - if (firmware_has_feature(FW_FEATURE_XDABR)) { - /* We want to catch accesses from kernel and userspace */ - unsigned long flags = H_DABRX_KERNEL|H_DABRX_USER; - ret = plpar_set_xdabr(dabr, flags); - } else if (firmware_has_feature(FW_FEATURE_DABR)) { - ret = plpar_set_dabr(dabr); - } else -#endif - set_dabr_spr(dabr); + if (ppc_md.set_dabr) + return ppc_md.set_dabr(dabr); - return ret; + mtspr(SPRN_DABR, dabr); + return 0; } #ifdef CONFIG_PPC64 @@ -566,12 +552,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_SLB)) { unsigned long sp_vsid = get_kernel_vsid(sp); + unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; sp_vsid <<= SLB_VSID_SHIFT; - sp_vsid |= SLB_VSID_KERNEL; - if (cpu_has_feature(CPU_FTR_16M_PAGE)) - sp_vsid |= SLB_VSID_L; - + sp_vsid |= SLB_VSID_KERNEL | llp; p->thread.ksp_vsid = sp_vsid; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 2eccd0e159e..f645adb5753 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -724,10 +724,10 @@ static inline char *find_flat_dt_string(u32 offset) * used to extract the memory informations at boot before we can * unflatten the tree */ -static int __init scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data) +int __init of_scan_flat_dt(int (*it)(unsigned long node, + const char *uname, int depth, + void *data), + void *data) { unsigned long p = ((unsigned long)initial_boot_params) + initial_boot_params->off_dt_struct; @@ -784,8 +784,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node, * This function can be used within scan_flattened_dt callback to get * access to properties */ -static void* __init get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size) +void* __init of_get_flat_dt_prop(unsigned long node, const char *name, + unsigned long *size) { unsigned long p = node; @@ -1087,7 +1087,7 @@ void __init unflatten_device_tree(void) static int __init early_init_dt_scan_cpus(unsigned long node, const char *uname, int depth, void *data) { - char *type = get_flat_dt_prop(node, "device_type", NULL); + char *type = of_get_flat_dt_prop(node, "device_type", NULL); u32 *prop; unsigned long size = 0; @@ -1095,19 +1095,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; -#ifdef CONFIG_PPC_PSERIES - /* On LPAR, look for the first ibm,pft-size property for the hash table size - */ - if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) { - u32 *pft_size; - pft_size = get_flat_dt_prop(node, "ibm,pft-size", NULL); - if (pft_size != NULL) { - /* pft_size[0] is the NUMA CEC cookie */ - ppc64_pft_size = pft_size[1]; - } - } -#endif - boot_cpuid = 0; boot_cpuid_phys = 0; if (initial_boot_params && initial_boot_params->version >= 2) { @@ -1117,8 +1104,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, boot_cpuid_phys = initial_boot_params->boot_cpuid_phys; } else { /* Check if it's the boot-cpu, set it's hw index now */ - if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) { - prop = get_flat_dt_prop(node, "reg", NULL); + if (of_get_flat_dt_prop(node, + "linux,boot-cpu", NULL) != NULL) { + prop = of_get_flat_dt_prop(node, "reg", NULL); if (prop != NULL) boot_cpuid_phys = *prop; } @@ -1127,14 +1115,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node, #ifdef CONFIG_ALTIVEC /* Check if we have a VMX and eventually update CPU features */ - prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", &size); + prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", &size); if (prop && (*prop) > 0) { cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; } /* Same goes for Apple's "altivec" property */ - prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL); if (prop) { cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; @@ -1147,7 +1135,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * this by looking at the size of the ibm,ppc-interrupt-server#s * property */ - prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", + prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &size); cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; if (prop && ((size / sizeof(u32)) > 1)) @@ -1170,7 +1158,7 @@ static int __init early_init_dt_scan_chosen(unsigned long node, return 0; /* get platform type */ - prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL); if (prop == NULL) return 0; #ifdef CONFIG_PPC64 @@ -1183,21 +1171,21 @@ static int __init early_init_dt_scan_chosen(unsigned long node, #ifdef CONFIG_PPC64 /* check if iommu is forced on or off */ - if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) + if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) iommu_is_off = 1; - if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) + if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) iommu_force_on = 1; #endif - lprop = get_flat_dt_prop(node, "linux,memory-limit", NULL); + lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL); if (lprop) memory_limit = *lprop; #ifdef CONFIG_PPC64 - lprop = get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); if (lprop) tce_alloc_start = *lprop; - lprop = get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); if (lprop) tce_alloc_end = *lprop; #endif @@ -1209,9 +1197,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node, { u64 *basep, *entryp; - basep = get_flat_dt_prop(node, "linux,rtas-base", NULL); - entryp = get_flat_dt_prop(node, "linux,rtas-entry", NULL); - prop = get_flat_dt_prop(node, "linux,rtas-size", NULL); + basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL); + entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); + prop = of_get_flat_dt_prop(node, "linux,rtas-size", NULL); if (basep && entryp && prop) { rtas.base = *basep; rtas.entry = *entryp; @@ -1232,11 +1220,11 @@ static int __init early_init_dt_scan_root(unsigned long node, if (depth != 0) return 0; - prop = get_flat_dt_prop(node, "#size-cells", NULL); + prop = of_get_flat_dt_prop(node, "#size-cells", NULL); dt_root_size_cells = (prop == NULL) ? 1 : *prop; DBG("dt_root_size_cells = %x\n", dt_root_size_cells); - prop = get_flat_dt_prop(node, "#address-cells", NULL); + prop = of_get_flat_dt_prop(node, "#address-cells", NULL); dt_root_addr_cells = (prop == NULL) ? 2 : *prop; DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); @@ -1271,7 +1259,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp) static int __init early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data) { - char *type = get_flat_dt_prop(node, "device_type", NULL); + char *type = of_get_flat_dt_prop(node, "device_type", NULL); cell_t *reg, *endp; unsigned long l; @@ -1279,13 +1267,13 @@ static int __init early_init_dt_scan_memory(unsigned long node, if (type == NULL || strcmp(type, "memory") != 0) return 0; - reg = (cell_t *)get_flat_dt_prop(node, "reg", &l); + reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l); if (reg == NULL) return 0; endp = reg + (l / sizeof(cell_t)); - DBG("memory scan node %s ..., reg size %ld, data: %x %x %x %x, ...\n", + DBG("memory scan node %s, reg size %ld, data: %x %x %x %x,\n", uname, l, reg[0], reg[1], reg[2], reg[3]); while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { @@ -1343,12 +1331,12 @@ void __init early_init_devtree(void *params) * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... */ - scan_flat_dt(early_init_dt_scan_chosen, NULL); + of_scan_flat_dt(early_init_dt_scan_chosen, NULL); /* Scan memory nodes and rebuild LMBs */ lmb_init(); - scan_flat_dt(early_init_dt_scan_root, NULL); - scan_flat_dt(early_init_dt_scan_memory, NULL); + of_scan_flat_dt(early_init_dt_scan_root, NULL); + of_scan_flat_dt(early_init_dt_scan_memory, NULL); lmb_enforce_memory_limit(memory_limit); lmb_analyze(); #ifdef CONFIG_PPC64 @@ -1363,10 +1351,10 @@ void __init early_init_devtree(void *params) DBG("Scanning CPUs ...\n"); - /* Retreive hash table size from flattened tree plus other - * CPU related informations (altivec support, boot CPU ID, ...) + /* Retreive CPU related informations from the flat tree + * (altivec support, boot CPU ID, ...) */ - scan_flat_dt(early_init_dt_scan_cpus, NULL); + of_scan_flat_dt(early_init_dt_scan_cpus, NULL); DBG(" <- early_init_devtree()\n"); } @@ -1986,14 +1974,29 @@ EXPORT_SYMBOL(get_property); /* * Add a property to a node */ -void prom_add_property(struct device_node* np, struct property* prop) +int prom_add_property(struct device_node* np, struct property* prop) { - struct property **next = &np->properties; + struct property **next; prop->next = NULL; - while (*next) + write_lock(&devtree_lock); + next = &np->properties; + while (*next) { + if (strcmp(prop->name, (*next)->name) == 0) { + /* duplicate ! don't insert it */ + write_unlock(&devtree_lock); + return -1; + } next = &(*next)->next; + } *next = prop; + write_unlock(&devtree_lock); + + /* try to add to proc as well if it was initialized */ + if (np->pde) + proc_device_tree_add_prop(np->pde, prop); + + return 0; } /* I quickly hacked that one, check against spec ! */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 9750b3cd8ec..6dc33d19fc2 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -403,19 +403,19 @@ static int __init prom_next_node(phandle *nodep) } } -static int __init prom_getprop(phandle node, const char *pname, +static int inline prom_getprop(phandle node, const char *pname, void *value, size_t valuelen) { return call_prom("getprop", 4, 1, node, ADDR(pname), (u32)(unsigned long) value, (u32) valuelen); } -static int __init prom_getproplen(phandle node, const char *pname) +static int inline prom_getproplen(phandle node, const char *pname) { return call_prom("getproplen", 2, 1, node, ADDR(pname)); } -static int __init prom_setprop(phandle node, const char *pname, +static int inline prom_setprop(phandle node, const char *pname, void *value, size_t valuelen) { return call_prom("setprop", 4, 1, node, ADDR(pname), @@ -1408,8 +1408,9 @@ static int __init prom_find_machine_type(void) struct prom_t *_prom = &RELOC(prom); char compat[256]; int len, i = 0; +#ifdef CONFIG_PPC64 phandle rtas; - +#endif len = prom_getprop(_prom->root, "compatible", compat, sizeof(compat)-1); if (len > 0) { @@ -1872,7 +1873,7 @@ static void __init fixup_device_tree(void) if (prom_getprop(u3, "device-rev", &u3_rev, sizeof(u3_rev)) == PROM_ERROR) return; - if (u3_rev != 0x35 && u3_rev != 0x37) + if (u3_rev < 0x35 || u3_rev > 0x39) return; /* does it need fixup ? */ if (prom_getproplen(i2c, "interrupts") > 0) @@ -2000,7 +2001,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, #endif /* - * On pSeries and BPA, copy the CPU hold code + * Copy the CPU hold code */ if (RELOC(of_platform) != PLATFORM_POWERMAC) copy_and_flush(0, KERNELBASE + offset, 0x100, 0); diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 568ea335d61..3d2abd95c7a 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -248,46 +248,10 @@ void ptrace_disable(struct task_struct *child) clear_single_step(child); } -long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; int ret = -EPERM; - lock_kernel(); - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -540,10 +504,7 @@ long sys_ptrace(long request, long pid, long addr, long data) ret = ptrace_request(child, request, addr, data); break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); + return ret; } diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c new file mode 100644 index 00000000000..5bdd5b079d9 --- /dev/null +++ b/arch/powerpc/kernel/rtas-proc.c @@ -0,0 +1,808 @@ +/* + * arch/ppc64/kernel/rtas-proc.c + * Copyright (C) 2000 Tilmann Bitterberg + * (tilmann@bitterberg.de) + * + * RTAS (Runtime Abstraction Services) stuff + * Intention is to provide a clean user interface + * to use the RTAS. + * + * TODO: + * Split off a header file and maybe move it to a different + * location. Write Documentation on what the /proc/rtas/ entries + * actually do. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/ctype.h> +#include <linux/time.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/seq_file.h> +#include <linux/bitops.h> +#include <linux/rtc.h> + +#include <asm/uaccess.h> +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/machdep.h> /* for ppc_md */ +#include <asm/time.h> +#include <asm/systemcfg.h> + +/* Token for Sensors */ +#define KEY_SWITCH 0x0001 +#define ENCLOSURE_SWITCH 0x0002 +#define THERMAL_SENSOR 0x0003 +#define LID_STATUS 0x0004 +#define POWER_SOURCE 0x0005 +#define BATTERY_VOLTAGE 0x0006 +#define BATTERY_REMAINING 0x0007 +#define BATTERY_PERCENTAGE 0x0008 +#define EPOW_SENSOR 0x0009 +#define BATTERY_CYCLESTATE 0x000a +#define BATTERY_CHARGING 0x000b + +/* IBM specific sensors */ +#define IBM_SURVEILLANCE 0x2328 /* 9000 */ +#define IBM_FANRPM 0x2329 /* 9001 */ +#define IBM_VOLTAGE 0x232a /* 9002 */ +#define IBM_DRCONNECTOR 0x232b /* 9003 */ +#define IBM_POWERSUPPLY 0x232c /* 9004 */ + +/* Status return values */ +#define SENSOR_CRITICAL_HIGH 13 +#define SENSOR_WARNING_HIGH 12 +#define SENSOR_NORMAL 11 +#define SENSOR_WARNING_LOW 10 +#define SENSOR_CRITICAL_LOW 9 +#define SENSOR_SUCCESS 0 +#define SENSOR_HW_ERROR -1 +#define SENSOR_BUSY -2 +#define SENSOR_NOT_EXIST -3 +#define SENSOR_DR_ENTITY -9000 + +/* Location Codes */ +#define LOC_SCSI_DEV_ADDR 'A' +#define LOC_SCSI_DEV_LOC 'B' +#define LOC_CPU 'C' +#define LOC_DISKETTE 'D' +#define LOC_ETHERNET 'E' +#define LOC_FAN 'F' +#define LOC_GRAPHICS 'G' +/* reserved / not used 'H' */ +#define LOC_IO_ADAPTER 'I' +/* reserved / not used 'J' */ +#define LOC_KEYBOARD 'K' +#define LOC_LCD 'L' +#define LOC_MEMORY 'M' +#define LOC_NV_MEMORY 'N' +#define LOC_MOUSE 'O' +#define LOC_PLANAR 'P' +#define LOC_OTHER_IO 'Q' +#define LOC_PARALLEL 'R' +#define LOC_SERIAL 'S' +#define LOC_DEAD_RING 'T' +#define LOC_RACKMOUNTED 'U' /* for _u_nit is rack mounted */ +#define LOC_VOLTAGE 'V' +#define LOC_SWITCH_ADAPTER 'W' +#define LOC_OTHER 'X' +#define LOC_FIRMWARE 'Y' +#define LOC_SCSI 'Z' + +/* Tokens for indicators */ +#define TONE_FREQUENCY 0x0001 /* 0 - 1000 (HZ)*/ +#define TONE_VOLUME 0x0002 /* 0 - 100 (%) */ +#define SYSTEM_POWER_STATE 0x0003 +#define WARNING_LIGHT 0x0004 +#define DISK_ACTIVITY_LIGHT 0x0005 +#define HEX_DISPLAY_UNIT 0x0006 +#define BATTERY_WARNING_TIME 0x0007 +#define CONDITION_CYCLE_REQUEST 0x0008 +#define SURVEILLANCE_INDICATOR 0x2328 /* 9000 */ +#define DR_ACTION 0x2329 /* 9001 */ +#define DR_INDICATOR 0x232a /* 9002 */ +/* 9003 - 9004: Vendor specific */ +/* 9006 - 9999: Vendor specific */ + +/* other */ +#define MAX_SENSORS 17 /* I only know of 17 sensors */ +#define MAX_LINELENGTH 256 +#define SENSOR_PREFIX "ibm,sensor-" +#define cel_to_fahr(x) ((x*9/5)+32) + + +/* Globals */ +static struct rtas_sensors sensors; +static struct device_node *rtas_node = NULL; +static unsigned long power_on_time = 0; /* Save the time the user set */ +static char progress_led[MAX_LINELENGTH]; + +static unsigned long rtas_tone_frequency = 1000; +static unsigned long rtas_tone_volume = 0; + +/* ****************STRUCTS******************************************* */ +struct individual_sensor { + unsigned int token; + unsigned int quant; +}; + +struct rtas_sensors { + struct individual_sensor sensor[MAX_SENSORS]; + unsigned int quant; +}; + +/* ****************************************************************** */ +/* Declarations */ +static int ppc_rtas_sensors_show(struct seq_file *m, void *v); +static int ppc_rtas_clock_show(struct seq_file *m, void *v); +static ssize_t ppc_rtas_clock_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); +static int ppc_rtas_progress_show(struct seq_file *m, void *v); +static ssize_t ppc_rtas_progress_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); +static int ppc_rtas_poweron_show(struct seq_file *m, void *v); +static ssize_t ppc_rtas_poweron_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); + +static ssize_t ppc_rtas_tone_freq_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); +static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v); +static ssize_t ppc_rtas_tone_volume_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos); +static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v); +static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v); + +static int sensors_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_sensors_show, NULL); +} + +struct file_operations ppc_rtas_sensors_operations = { + .open = sensors_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int poweron_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_poweron_show, NULL); +} + +struct file_operations ppc_rtas_poweron_operations = { + .open = poweron_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_poweron_write, + .release = single_release, +}; + +static int progress_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_progress_show, NULL); +} + +struct file_operations ppc_rtas_progress_operations = { + .open = progress_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_progress_write, + .release = single_release, +}; + +static int clock_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_clock_show, NULL); +} + +struct file_operations ppc_rtas_clock_operations = { + .open = clock_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_clock_write, + .release = single_release, +}; + +static int tone_freq_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_tone_freq_show, NULL); +} + +struct file_operations ppc_rtas_tone_freq_operations = { + .open = tone_freq_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_tone_freq_write, + .release = single_release, +}; + +static int tone_volume_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_tone_volume_show, NULL); +} + +struct file_operations ppc_rtas_tone_volume_operations = { + .open = tone_volume_open, + .read = seq_read, + .llseek = seq_lseek, + .write = ppc_rtas_tone_volume_write, + .release = single_release, +}; + +static int rmo_buf_open(struct inode *inode, struct file *file) +{ + return single_open(file, ppc_rtas_rmo_buf_show, NULL); +} + +struct file_operations ppc_rtas_rmo_buf_ops = { + .open = rmo_buf_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int ppc_rtas_find_all_sensors(void); +static void ppc_rtas_process_sensor(struct seq_file *m, + struct individual_sensor *s, int state, int error, char *loc); +static char *ppc_rtas_process_error(int error); +static void get_location_code(struct seq_file *m, + struct individual_sensor *s, char *loc); +static void check_location_string(struct seq_file *m, char *c); +static void check_location(struct seq_file *m, char *c); + +static int __init proc_rtas_init(void) +{ + struct proc_dir_entry *entry; + + if (!(systemcfg->platform & PLATFORM_PSERIES)) + return 1; + + rtas_node = of_find_node_by_name(NULL, "rtas"); + if (rtas_node == NULL) + return 1; + + entry = create_proc_entry("ppc64/rtas/progress", S_IRUGO|S_IWUSR, NULL); + if (entry) + entry->proc_fops = &ppc_rtas_progress_operations; + + entry = create_proc_entry("ppc64/rtas/clock", S_IRUGO|S_IWUSR, NULL); + if (entry) + entry->proc_fops = &ppc_rtas_clock_operations; + + entry = create_proc_entry("ppc64/rtas/poweron", S_IWUSR|S_IRUGO, NULL); + if (entry) + entry->proc_fops = &ppc_rtas_poweron_operations; + + entry = create_proc_entry("ppc64/rtas/sensors", S_IRUGO, NULL); + if (entry) + entry->proc_fops = &ppc_rtas_sensors_operations; + + entry = create_proc_entry("ppc64/rtas/frequency", S_IWUSR|S_IRUGO, + NULL); + if (entry) + entry->proc_fops = &ppc_rtas_tone_freq_operations; + + entry = create_proc_entry("ppc64/rtas/volume", S_IWUSR|S_IRUGO, NULL); + if (entry) + entry->proc_fops = &ppc_rtas_tone_volume_operations; + + entry = create_proc_entry("ppc64/rtas/rmo_buffer", S_IRUSR, NULL); + if (entry) + entry->proc_fops = &ppc_rtas_rmo_buf_ops; + + return 0; +} + +__initcall(proc_rtas_init); + +static int parse_number(const char __user *p, size_t count, unsigned long *val) +{ + char buf[40]; + char *end; + + if (count > 39) + return -EINVAL; + + if (copy_from_user(buf, p, count)) + return -EFAULT; + + buf[count] = 0; + + *val = simple_strtoul(buf, &end, 10); + if (*end && *end != '\n') + return -EINVAL; + + return 0; +} + +/* ****************************************************************** */ +/* POWER-ON-TIME */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_poweron_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct rtc_time tm; + unsigned long nowtime; + int error = parse_number(buf, count, &nowtime); + if (error) + return error; + + power_on_time = nowtime; /* save the time */ + + to_tm(nowtime, &tm); + + error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL, + tm.tm_year, tm.tm_mon, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); + if (error) + printk(KERN_WARNING "error: setting poweron time returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static int ppc_rtas_poweron_show(struct seq_file *m, void *v) +{ + if (power_on_time == 0) + seq_printf(m, "Power on time not set\n"); + else + seq_printf(m, "%lu\n",power_on_time); + return 0; +} + +/* ****************************************************************** */ +/* PROGRESS */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_progress_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + unsigned long hex; + + if (count >= MAX_LINELENGTH) + count = MAX_LINELENGTH -1; + if (copy_from_user(progress_led, buf, count)) { /* save the string */ + return -EFAULT; + } + progress_led[count] = 0; + + /* Lets see if the user passed hexdigits */ + hex = simple_strtoul(progress_led, NULL, 10); + + rtas_progress ((char *)progress_led, hex); + return count; + + /* clear the line */ + /* rtas_progress(" ", 0xffff);*/ +} +/* ****************************************************************** */ +static int ppc_rtas_progress_show(struct seq_file *m, void *v) +{ + if (progress_led) + seq_printf(m, "%s\n", progress_led); + return 0; +} + +/* ****************************************************************** */ +/* CLOCK */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_clock_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct rtc_time tm; + unsigned long nowtime; + int error = parse_number(buf, count, &nowtime); + if (error) + return error; + + to_tm(nowtime, &tm); + error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, + tm.tm_year, tm.tm_mon, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0); + if (error) + printk(KERN_WARNING "error: setting the clock returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static int ppc_rtas_clock_show(struct seq_file *m, void *v) +{ + int ret[8]; + int error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + + if (error) { + printk(KERN_WARNING "error: reading the clock returned: %s\n", + ppc_rtas_process_error(error)); + seq_printf(m, "0"); + } else { + unsigned int year, mon, day, hour, min, sec; + year = ret[0]; mon = ret[1]; day = ret[2]; + hour = ret[3]; min = ret[4]; sec = ret[5]; + seq_printf(m, "%lu\n", + mktime(year, mon, day, hour, min, sec)); + } + return 0; +} + +/* ****************************************************************** */ +/* SENSOR STUFF */ +/* ****************************************************************** */ +static int ppc_rtas_sensors_show(struct seq_file *m, void *v) +{ + int i,j; + int state, error; + int get_sensor_state = rtas_token("get-sensor-state"); + + seq_printf(m, "RTAS (RunTime Abstraction Services) Sensor Information\n"); + seq_printf(m, "Sensor\t\tValue\t\tCondition\tLocation\n"); + seq_printf(m, "********************************************************\n"); + + if (ppc_rtas_find_all_sensors() != 0) { + seq_printf(m, "\nNo sensors are available\n"); + return 0; + } + + for (i=0; i<sensors.quant; i++) { + struct individual_sensor *p = &sensors.sensor[i]; + char rstr[64]; + char *loc; + int llen, offs; + + sprintf (rstr, SENSOR_PREFIX"%04d", p->token); + loc = (char *) get_property(rtas_node, rstr, &llen); + + /* A sensor may have multiple instances */ + for (j = 0, offs = 0; j <= p->quant; j++) { + error = rtas_call(get_sensor_state, 2, 2, &state, + p->token, j); + + ppc_rtas_process_sensor(m, p, state, error, loc); + seq_putc(m, '\n'); + if (loc) { + offs += strlen(loc) + 1; + loc += strlen(loc) + 1; + if (offs >= llen) + loc = NULL; + } + } + } + return 0; +} + +/* ****************************************************************** */ + +static int ppc_rtas_find_all_sensors(void) +{ + unsigned int *utmp; + int len, i; + + utmp = (unsigned int *) get_property(rtas_node, "rtas-sensors", &len); + if (utmp == NULL) { + printk (KERN_ERR "error: could not get rtas-sensors\n"); + return 1; + } + + sensors.quant = len / 8; /* int + int */ + + for (i=0; i<sensors.quant; i++) { + sensors.sensor[i].token = *utmp++; + sensors.sensor[i].quant = *utmp++; + } + return 0; +} + +/* ****************************************************************** */ +/* + * Builds a string of what rtas returned + */ +static char *ppc_rtas_process_error(int error) +{ + switch (error) { + case SENSOR_CRITICAL_HIGH: + return "(critical high)"; + case SENSOR_WARNING_HIGH: + return "(warning high)"; + case SENSOR_NORMAL: + return "(normal)"; + case SENSOR_WARNING_LOW: + return "(warning low)"; + case SENSOR_CRITICAL_LOW: + return "(critical low)"; + case SENSOR_SUCCESS: + return "(read ok)"; + case SENSOR_HW_ERROR: + return "(hardware error)"; + case SENSOR_BUSY: + return "(busy)"; + case SENSOR_NOT_EXIST: + return "(non existent)"; + case SENSOR_DR_ENTITY: + return "(dr entity removed)"; + default: + return "(UNKNOWN)"; + } +} + +/* ****************************************************************** */ +/* + * Builds a string out of what the sensor said + */ + +static void ppc_rtas_process_sensor(struct seq_file *m, + struct individual_sensor *s, int state, int error, char *loc) +{ + /* Defined return vales */ + const char * key_switch[] = { "Off\t", "Normal\t", "Secure\t", + "Maintenance" }; + const char * enclosure_switch[] = { "Closed", "Open" }; + const char * lid_status[] = { " ", "Open", "Closed" }; + const char * power_source[] = { "AC\t", "Battery", + "AC & Battery" }; + const char * battery_remaining[] = { "Very Low", "Low", "Mid", "High" }; + const char * epow_sensor[] = { + "EPOW Reset", "Cooling warning", "Power warning", + "System shutdown", "System halt", "EPOW main enclosure", + "EPOW power off" }; + const char * battery_cyclestate[] = { "None", "In progress", + "Requested" }; + const char * battery_charging[] = { "Charging", "Discharching", + "No current flow" }; + const char * ibm_drconnector[] = { "Empty", "Present", "Unusable", + "Exchange" }; + + int have_strings = 0; + int num_states = 0; + int temperature = 0; + int unknown = 0; + + /* What kind of sensor do we have here? */ + + switch (s->token) { + case KEY_SWITCH: + seq_printf(m, "Key switch:\t"); + num_states = sizeof(key_switch) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", key_switch[state]); + have_strings = 1; + } + break; + case ENCLOSURE_SWITCH: + seq_printf(m, "Enclosure switch:\t"); + num_states = sizeof(enclosure_switch) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + enclosure_switch[state]); + have_strings = 1; + } + break; + case THERMAL_SENSOR: + seq_printf(m, "Temp. (C/F):\t"); + temperature = 1; + break; + case LID_STATUS: + seq_printf(m, "Lid status:\t"); + num_states = sizeof(lid_status) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", lid_status[state]); + have_strings = 1; + } + break; + case POWER_SOURCE: + seq_printf(m, "Power source:\t"); + num_states = sizeof(power_source) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + power_source[state]); + have_strings = 1; + } + break; + case BATTERY_VOLTAGE: + seq_printf(m, "Battery voltage:\t"); + break; + case BATTERY_REMAINING: + seq_printf(m, "Battery remaining:\t"); + num_states = sizeof(battery_remaining) / sizeof(char *); + if (state < num_states) + { + seq_printf(m, "%s\t", + battery_remaining[state]); + have_strings = 1; + } + break; + case BATTERY_PERCENTAGE: + seq_printf(m, "Battery percentage:\t"); + break; + case EPOW_SENSOR: + seq_printf(m, "EPOW Sensor:\t"); + num_states = sizeof(epow_sensor) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", epow_sensor[state]); + have_strings = 1; + } + break; + case BATTERY_CYCLESTATE: + seq_printf(m, "Battery cyclestate:\t"); + num_states = sizeof(battery_cyclestate) / + sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + battery_cyclestate[state]); + have_strings = 1; + } + break; + case BATTERY_CHARGING: + seq_printf(m, "Battery Charging:\t"); + num_states = sizeof(battery_charging) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + battery_charging[state]); + have_strings = 1; + } + break; + case IBM_SURVEILLANCE: + seq_printf(m, "Surveillance:\t"); + break; + case IBM_FANRPM: + seq_printf(m, "Fan (rpm):\t"); + break; + case IBM_VOLTAGE: + seq_printf(m, "Voltage (mv):\t"); + break; + case IBM_DRCONNECTOR: + seq_printf(m, "DR connector:\t"); + num_states = sizeof(ibm_drconnector) / sizeof(char *); + if (state < num_states) { + seq_printf(m, "%s\t", + ibm_drconnector[state]); + have_strings = 1; + } + break; + case IBM_POWERSUPPLY: + seq_printf(m, "Powersupply:\t"); + break; + default: + seq_printf(m, "Unknown sensor (type %d), ignoring it\n", + s->token); + unknown = 1; + have_strings = 1; + break; + } + if (have_strings == 0) { + if (temperature) { + seq_printf(m, "%4d /%4d\t", state, cel_to_fahr(state)); + } else + seq_printf(m, "%10d\t", state); + } + if (unknown == 0) { + seq_printf(m, "%s\t", ppc_rtas_process_error(error)); + get_location_code(m, s, loc); + } +} + +/* ****************************************************************** */ + +static void check_location(struct seq_file *m, char *c) +{ + switch (c[0]) { + case LOC_PLANAR: + seq_printf(m, "Planar #%c", c[1]); + break; + case LOC_CPU: + seq_printf(m, "CPU #%c", c[1]); + break; + case LOC_FAN: + seq_printf(m, "Fan #%c", c[1]); + break; + case LOC_RACKMOUNTED: + seq_printf(m, "Rack #%c", c[1]); + break; + case LOC_VOLTAGE: + seq_printf(m, "Voltage #%c", c[1]); + break; + case LOC_LCD: + seq_printf(m, "LCD #%c", c[1]); + break; + case '.': + seq_printf(m, "- %c", c[1]); + break; + default: + seq_printf(m, "Unknown location"); + break; + } +} + + +/* ****************************************************************** */ +/* + * Format: + * ${LETTER}${NUMBER}[[-/]${LETTER}${NUMBER} [ ... ] ] + * the '.' may be an abbrevation + */ +static void check_location_string(struct seq_file *m, char *c) +{ + while (*c) { + if (isalpha(*c) || *c == '.') + check_location(m, c); + else if (*c == '/' || *c == '-') + seq_printf(m, " at "); + c++; + } +} + + +/* ****************************************************************** */ + +static void get_location_code(struct seq_file *m, struct individual_sensor *s, char *loc) +{ + if (!loc || !*loc) { + seq_printf(m, "---");/* does not have a location */ + } else { + check_location_string(m, loc); + } + seq_putc(m, ' '); +} +/* ****************************************************************** */ +/* INDICATORS - Tone Frequency */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_tone_freq_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + unsigned long freq; + int error = parse_number(buf, count, &freq); + if (error) + return error; + + rtas_tone_frequency = freq; /* save it for later */ + error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, + TONE_FREQUENCY, 0, freq); + if (error) + printk(KERN_WARNING "error: setting tone frequency returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%lu\n", rtas_tone_frequency); + return 0; +} +/* ****************************************************************** */ +/* INDICATORS - Tone Volume */ +/* ****************************************************************** */ +static ssize_t ppc_rtas_tone_volume_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + unsigned long volume; + int error = parse_number(buf, count, &volume); + if (error) + return error; + + if (volume > 100) + volume = 100; + + rtas_tone_volume = volume; /* save it for later */ + error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, + TONE_VOLUME, 0, volume); + if (error) + printk(KERN_WARNING "error: setting tone volume returned: %s\n", + ppc_rtas_process_error(error)); + return count; +} +/* ****************************************************************** */ +static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%lu\n", rtas_tone_volume); + return 0; +} + +#define RMO_READ_BUF_MAX 30 + +/* RTAS Userspace access */ +static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_RMOBUF_MAX); + return 0; +} diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 4d22eeeeb91..9d4e07f6f1e 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -17,6 +17,7 @@ #include <linux/spinlock.h> #include <linux/module.h> #include <linux/init.h> +#include <linux/delay.h> #include <asm/prom.h> #include <asm/rtas.h> @@ -43,6 +44,13 @@ char rtas_data_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned; unsigned long rtas_rmo_buf; /* + * If non-NULL, this gets called when the kernel terminates. + * This is done like this so rtas_flash can be a module. + */ +void (*rtas_flash_term_hook)(int); +EXPORT_SYMBOL(rtas_flash_term_hook); + +/* * call_rtas_display_status and call_rtas_display_status_delay * are designed only for very early low-level debugging, which * is why the token is hard-coded to 10. @@ -76,7 +84,7 @@ void call_rtas_display_status_delay(unsigned char c) while (width-- > 0) call_rtas_display_status(' '); width = 16; - udelay(500000); + mdelay(500); pending_newline = 1; } else { if (pending_newline) { @@ -206,6 +214,7 @@ void rtas_progress(char *s, unsigned short hex) spin_unlock(&progress_lock); } +EXPORT_SYMBOL(rtas_progress); /* needed by rtas_flash module */ int rtas_token(const char *service) { @@ -492,6 +501,8 @@ int rtas_set_indicator(int indicator, int index, int new_value) void rtas_restart(char *cmd) { + if (rtas_flash_term_hook) + rtas_flash_term_hook(SYS_RESTART); printk("RTAS system-reboot returned %d\n", rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); for (;;); @@ -499,6 +510,8 @@ void rtas_restart(char *cmd) void rtas_power_off(void) { + if (rtas_flash_term_hook) + rtas_flash_term_hook(SYS_POWER_OFF); /* allow power on only with power button press */ printk("RTAS power-off returned %d\n", rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); @@ -507,7 +520,12 @@ void rtas_power_off(void) void rtas_halt(void) { - rtas_power_off(); + if (rtas_flash_term_hook) + rtas_flash_term_hook(SYS_HALT); + /* allow power on only with power button press */ + printk("RTAS power-off returned %d\n", + rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + for (;;); } /* Must be in the RMO region, so we place it here */ @@ -591,7 +609,6 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) return 0; } -#ifdef CONFIG_SMP /* This version can't take the spinlock, because it never returns */ struct rtas_args rtas_stop_self_args = { @@ -616,7 +633,6 @@ void rtas_stop_self(void) panic("Alas, I survived.\n"); } -#endif /* * Call early during boot, before mem init or bootmem, to retreive the RTAS diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c new file mode 100644 index 00000000000..50500093c97 --- /dev/null +++ b/arch/powerpc/kernel/rtas_flash.c @@ -0,0 +1,834 @@ +/* + * c 2001 PPC 64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * /proc/ppc64/rtas/firmware_flash interface + * + * This file implements a firmware_flash interface to pump a firmware + * image into the kernel. At reboot time rtas_restart() will see the + * firmware image and flash it as it reboots (see rtas.c). + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <asm/delay.h> +#include <asm/uaccess.h> +#include <asm/rtas.h> +#include <asm/abs_addr.h> + +#define MODULE_VERS "1.0" +#define MODULE_NAME "rtas_flash" + +#define FIRMWARE_FLASH_NAME "firmware_flash" +#define FIRMWARE_UPDATE_NAME "firmware_update" +#define MANAGE_FLASH_NAME "manage_flash" +#define VALIDATE_FLASH_NAME "validate_flash" + +/* General RTAS Status Codes */ +#define RTAS_RC_SUCCESS 0 +#define RTAS_RC_HW_ERR -1 +#define RTAS_RC_BUSY -2 + +/* Flash image status values */ +#define FLASH_AUTH -9002 /* RTAS Not Service Authority Partition */ +#define FLASH_NO_OP -1099 /* No operation initiated by user */ +#define FLASH_IMG_SHORT -1005 /* Flash image shorter than expected */ +#define FLASH_IMG_BAD_LEN -1004 /* Bad length value in flash list block */ +#define FLASH_IMG_NULL_DATA -1003 /* Bad data value in flash list block */ +#define FLASH_IMG_READY 0 /* Firmware img ready for flash on reboot */ + +/* Manage image status values */ +#define MANAGE_AUTH -9002 /* RTAS Not Service Authority Partition */ +#define MANAGE_ACTIVE_ERR -9001 /* RTAS Cannot Overwrite Active Img */ +#define MANAGE_NO_OP -1099 /* No operation initiated by user */ +#define MANAGE_PARAM_ERR -3 /* RTAS Parameter Error */ +#define MANAGE_HW_ERR -1 /* RTAS Hardware Error */ + +/* Validate image status values */ +#define VALIDATE_AUTH -9002 /* RTAS Not Service Authority Partition */ +#define VALIDATE_NO_OP -1099 /* No operation initiated by the user */ +#define VALIDATE_INCOMPLETE -1002 /* User copied < VALIDATE_BUF_SIZE */ +#define VALIDATE_READY -1001 /* Firmware image ready for validation */ +#define VALIDATE_PARAM_ERR -3 /* RTAS Parameter Error */ +#define VALIDATE_HW_ERR -1 /* RTAS Hardware Error */ +#define VALIDATE_TMP_UPDATE 0 /* Validate Return Status */ +#define VALIDATE_FLASH_AUTH 1 /* Validate Return Status */ +#define VALIDATE_INVALID_IMG 2 /* Validate Return Status */ +#define VALIDATE_CUR_UNKNOWN 3 /* Validate Return Status */ +#define VALIDATE_TMP_COMMIT_DL 4 /* Validate Return Status */ +#define VALIDATE_TMP_COMMIT 5 /* Validate Return Status */ +#define VALIDATE_TMP_UPDATE_DL 6 /* Validate Return Status */ + +/* ibm,manage-flash-image operation tokens */ +#define RTAS_REJECT_TMP_IMG 0 +#define RTAS_COMMIT_TMP_IMG 1 + +/* Array sizes */ +#define VALIDATE_BUF_SIZE 4096 +#define RTAS_MSG_MAXLEN 64 + +struct flash_block { + char *data; + unsigned long length; +}; + +/* This struct is very similar but not identical to + * that needed by the rtas flash update. + * All we need to do for rtas is rewrite num_blocks + * into a version/length and translate the pointers + * to absolute. + */ +#define FLASH_BLOCKS_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct flash_block)) +struct flash_block_list { + unsigned long num_blocks; + struct flash_block_list *next; + struct flash_block blocks[FLASH_BLOCKS_PER_NODE]; +}; +struct flash_block_list_header { /* just the header of flash_block_list */ + unsigned long num_blocks; + struct flash_block_list *next; +}; + +static struct flash_block_list_header rtas_firmware_flash_list = {0, NULL}; + +#define FLASH_BLOCK_LIST_VERSION (1UL) + +/* Local copy of the flash block list. + * We only allow one open of the flash proc file and create this + * list as we go. This list will be put in the + * rtas_firmware_flash_list var once it is fully read. + * + * For convenience as we build the list we use virtual addrs, + * we do not fill in the version number, and the length field + * is treated as the number of entries currently in the block + * (i.e. not a byte count). This is all fixed on release. + */ + +/* Status int must be first member of struct */ +struct rtas_update_flash_t +{ + int status; /* Flash update status */ + struct flash_block_list *flist; /* Local copy of flash block list */ +}; + +/* Status int must be first member of struct */ +struct rtas_manage_flash_t +{ + int status; /* Returned status */ + unsigned int op; /* Reject or commit image */ +}; + +/* Status int must be first member of struct */ +struct rtas_validate_flash_t +{ + int status; /* Returned status */ + char buf[VALIDATE_BUF_SIZE]; /* Candidate image buffer */ + unsigned int buf_size; /* Size of image buf */ + unsigned int update_results; /* Update results token */ +}; + +static DEFINE_SPINLOCK(flash_file_open_lock); +static struct proc_dir_entry *firmware_flash_pde; +static struct proc_dir_entry *firmware_update_pde; +static struct proc_dir_entry *validate_pde; +static struct proc_dir_entry *manage_pde; + +/* Do simple sanity checks on the flash image. */ +static int flash_list_valid(struct flash_block_list *flist) +{ + struct flash_block_list *f; + int i; + unsigned long block_size, image_size; + + /* Paranoid self test here. We also collect the image size. */ + image_size = 0; + for (f = flist; f; f = f->next) { + for (i = 0; i < f->num_blocks; i++) { + if (f->blocks[i].data == NULL) { + return FLASH_IMG_NULL_DATA; + } + block_size = f->blocks[i].length; + if (block_size <= 0 || block_size > PAGE_SIZE) { + return FLASH_IMG_BAD_LEN; + } + image_size += block_size; + } + } + + if (image_size < (256 << 10)) { + if (image_size < 2) + return FLASH_NO_OP; + } + + printk(KERN_INFO "FLASH: flash image with %ld bytes stored for hardware flash on reboot\n", image_size); + + return FLASH_IMG_READY; +} + +static void free_flash_list(struct flash_block_list *f) +{ + struct flash_block_list *next; + int i; + + while (f) { + for (i = 0; i < f->num_blocks; i++) + free_page((unsigned long)(f->blocks[i].data)); + next = f->next; + free_page((unsigned long)f); + f = next; + } +} + +static int rtas_flash_release(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_update_flash_t *uf; + + uf = (struct rtas_update_flash_t *) dp->data; + if (uf->flist) { + /* File was opened in write mode for a new flash attempt */ + /* Clear saved list */ + if (rtas_firmware_flash_list.next) { + free_flash_list(rtas_firmware_flash_list.next); + rtas_firmware_flash_list.next = NULL; + } + + if (uf->status != FLASH_AUTH) + uf->status = flash_list_valid(uf->flist); + + if (uf->status == FLASH_IMG_READY) + rtas_firmware_flash_list.next = uf->flist; + else + free_flash_list(uf->flist); + + uf->flist = NULL; + } + + atomic_dec(&dp->count); + return 0; +} + +static void get_flash_status_msg(int status, char *buf) +{ + char *msg; + + switch (status) { + case FLASH_AUTH: + msg = "error: this partition does not have service authority\n"; + break; + case FLASH_NO_OP: + msg = "info: no firmware image for flash\n"; + break; + case FLASH_IMG_SHORT: + msg = "error: flash image short\n"; + break; + case FLASH_IMG_BAD_LEN: + msg = "error: internal error bad length\n"; + break; + case FLASH_IMG_NULL_DATA: + msg = "error: internal error null data\n"; + break; + case FLASH_IMG_READY: + msg = "ready: firmware image ready for flash on reboot\n"; + break; + default: + sprintf(buf, "error: unexpected status value %d\n", status); + return; + } + + strcpy(buf, msg); +} + +/* Reading the proc file will show status (not the firmware contents) */ +static ssize_t rtas_flash_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_update_flash_t *uf; + char msg[RTAS_MSG_MAXLEN]; + int msglen; + + uf = (struct rtas_update_flash_t *) dp->data; + + if (!strcmp(dp->name, FIRMWARE_FLASH_NAME)) { + get_flash_status_msg(uf->status, msg); + } else { /* FIRMWARE_UPDATE_NAME */ + sprintf(msg, "%d\n", uf->status); + } + msglen = strlen(msg); + if (msglen > count) + msglen = count; + + if (ppos && *ppos != 0) + return 0; /* be cheap */ + + if (!access_ok(VERIFY_WRITE, buf, msglen)) + return -EINVAL; + + if (copy_to_user(buf, msg, msglen)) + return -EFAULT; + + if (ppos) + *ppos = msglen; + return msglen; +} + +/* We could be much more efficient here. But to keep this function + * simple we allocate a page to the block list no matter how small the + * count is. If the system is low on memory it will be just as well + * that we fail.... + */ +static ssize_t rtas_flash_write(struct file *file, const char __user *buffer, + size_t count, loff_t *off) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_update_flash_t *uf; + char *p; + int next_free; + struct flash_block_list *fl; + + uf = (struct rtas_update_flash_t *) dp->data; + + if (uf->status == FLASH_AUTH || count == 0) + return count; /* discard data */ + + /* In the case that the image is not ready for flashing, the memory + * allocated for the block list will be freed upon the release of the + * proc file + */ + if (uf->flist == NULL) { + uf->flist = (struct flash_block_list *) get_zeroed_page(GFP_KERNEL); + if (!uf->flist) + return -ENOMEM; + } + + fl = uf->flist; + while (fl->next) + fl = fl->next; /* seek to last block_list for append */ + next_free = fl->num_blocks; + if (next_free == FLASH_BLOCKS_PER_NODE) { + /* Need to allocate another block_list */ + fl->next = (struct flash_block_list *)get_zeroed_page(GFP_KERNEL); + if (!fl->next) + return -ENOMEM; + fl = fl->next; + next_free = 0; + } + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + p = (char *)get_zeroed_page(GFP_KERNEL); + if (!p) + return -ENOMEM; + + if(copy_from_user(p, buffer, count)) { + free_page((unsigned long)p); + return -EFAULT; + } + fl->blocks[next_free].data = p; + fl->blocks[next_free].length = count; + fl->num_blocks++; + + return count; +} + +static int rtas_excl_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(inode); + + /* Enforce exclusive open with use count of PDE */ + spin_lock(&flash_file_open_lock); + if (atomic_read(&dp->count) > 1) { + spin_unlock(&flash_file_open_lock); + return -EBUSY; + } + + atomic_inc(&dp->count); + spin_unlock(&flash_file_open_lock); + + return 0; +} + +static int rtas_excl_release(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(inode); + + atomic_dec(&dp->count); + + return 0; +} + +static void manage_flash(struct rtas_manage_flash_t *args_buf) +{ + unsigned int wait_time; + s32 rc; + + while (1) { + rc = rtas_call(rtas_token("ibm,manage-flash-image"), 1, + 1, NULL, args_buf->op); + if (rc == RTAS_RC_BUSY) + udelay(1); + else if (rtas_is_extended_busy(rc)) { + wait_time = rtas_extended_busy_delay_time(rc); + udelay(wait_time * 1000); + } else + break; + } + + args_buf->status = rc; +} + +static ssize_t manage_flash_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_manage_flash_t *args_buf; + char msg[RTAS_MSG_MAXLEN]; + int msglen; + + args_buf = (struct rtas_manage_flash_t *) dp->data; + if (args_buf == NULL) + return 0; + + msglen = sprintf(msg, "%d\n", args_buf->status); + if (msglen > count) + msglen = count; + + if (ppos && *ppos != 0) + return 0; /* be cheap */ + + if (!access_ok(VERIFY_WRITE, buf, msglen)) + return -EINVAL; + + if (copy_to_user(buf, msg, msglen)) + return -EFAULT; + + if (ppos) + *ppos = msglen; + return msglen; +} + +static ssize_t manage_flash_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_manage_flash_t *args_buf; + const char reject_str[] = "0"; + const char commit_str[] = "1"; + char stkbuf[10]; + int op; + + args_buf = (struct rtas_manage_flash_t *) dp->data; + if ((args_buf->status == MANAGE_AUTH) || (count == 0)) + return count; + + op = -1; + if (buf) { + if (count > 9) count = 9; + if (copy_from_user (stkbuf, buf, count)) { + return -EFAULT; + } + if (strncmp(stkbuf, reject_str, strlen(reject_str)) == 0) + op = RTAS_REJECT_TMP_IMG; + else if (strncmp(stkbuf, commit_str, strlen(commit_str)) == 0) + op = RTAS_COMMIT_TMP_IMG; + } + + if (op == -1) /* buf is empty, or contains invalid string */ + return -EINVAL; + + args_buf->op = op; + manage_flash(args_buf); + + return count; +} + +static void validate_flash(struct rtas_validate_flash_t *args_buf) +{ + int token = rtas_token("ibm,validate-flash-image"); + unsigned int wait_time; + int update_results; + s32 rc; + + rc = 0; + while(1) { + spin_lock(&rtas_data_buf_lock); + memcpy(rtas_data_buf, args_buf->buf, VALIDATE_BUF_SIZE); + rc = rtas_call(token, 2, 2, &update_results, + (u32) __pa(rtas_data_buf), args_buf->buf_size); + memcpy(args_buf->buf, rtas_data_buf, VALIDATE_BUF_SIZE); + spin_unlock(&rtas_data_buf_lock); + + if (rc == RTAS_RC_BUSY) + udelay(1); + else if (rtas_is_extended_busy(rc)) { + wait_time = rtas_extended_busy_delay_time(rc); + udelay(wait_time * 1000); + } else + break; + } + + args_buf->status = rc; + args_buf->update_results = update_results; +} + +static int get_validate_flash_msg(struct rtas_validate_flash_t *args_buf, + char *msg) +{ + int n; + + if (args_buf->status >= VALIDATE_TMP_UPDATE) { + n = sprintf(msg, "%d\n", args_buf->update_results); + if ((args_buf->update_results >= VALIDATE_CUR_UNKNOWN) || + (args_buf->update_results == VALIDATE_TMP_UPDATE)) + n += sprintf(msg + n, "%s\n", args_buf->buf); + } else { + n = sprintf(msg, "%d\n", args_buf->status); + } + return n; +} + +static ssize_t validate_flash_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_validate_flash_t *args_buf; + char msg[RTAS_MSG_MAXLEN]; + int msglen; + + args_buf = (struct rtas_validate_flash_t *) dp->data; + + if (ppos && *ppos != 0) + return 0; /* be cheap */ + + msglen = get_validate_flash_msg(args_buf, msg); + if (msglen > count) + msglen = count; + + if (!access_ok(VERIFY_WRITE, buf, msglen)) + return -EINVAL; + + if (copy_to_user(buf, msg, msglen)) + return -EFAULT; + + if (ppos) + *ppos = msglen; + return msglen; +} + +static ssize_t validate_flash_write(struct file *file, const char __user *buf, + size_t count, loff_t *off) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_validate_flash_t *args_buf; + int rc; + + args_buf = (struct rtas_validate_flash_t *) dp->data; + + if (dp->data == NULL) { + dp->data = kmalloc(sizeof(struct rtas_validate_flash_t), + GFP_KERNEL); + if (dp->data == NULL) + return -ENOMEM; + } + + /* We are only interested in the first 4K of the + * candidate image */ + if ((*off >= VALIDATE_BUF_SIZE) || + (args_buf->status == VALIDATE_AUTH)) { + *off += count; + return count; + } + + if (*off + count >= VALIDATE_BUF_SIZE) { + count = VALIDATE_BUF_SIZE - *off; + args_buf->status = VALIDATE_READY; + } else { + args_buf->status = VALIDATE_INCOMPLETE; + } + + if (!access_ok(VERIFY_READ, buf, count)) { + rc = -EFAULT; + goto done; + } + if (copy_from_user(args_buf->buf + *off, buf, count)) { + rc = -EFAULT; + goto done; + } + + *off += count; + rc = count; +done: + if (rc < 0) { + kfree(dp->data); + dp->data = NULL; + } + return rc; +} + +static int validate_flash_release(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode); + struct rtas_validate_flash_t *args_buf; + + args_buf = (struct rtas_validate_flash_t *) dp->data; + + if (args_buf->status == VALIDATE_READY) { + args_buf->buf_size = VALIDATE_BUF_SIZE; + validate_flash(args_buf); + } + + /* The matching atomic_inc was in rtas_excl_open() */ + atomic_dec(&dp->count); + + return 0; +} + +static void rtas_flash_firmware(int reboot_type) +{ + unsigned long image_size; + struct flash_block_list *f, *next, *flist; + unsigned long rtas_block_list; + int i, status, update_token; + + if (rtas_firmware_flash_list.next == NULL) + return; /* nothing to do */ + + if (reboot_type != SYS_RESTART) { + printk(KERN_ALERT "FLASH: firmware flash requires a reboot\n"); + printk(KERN_ALERT "FLASH: the firmware image will NOT be flashed\n"); + return; + } + + update_token = rtas_token("ibm,update-flash-64-and-reboot"); + if (update_token == RTAS_UNKNOWN_SERVICE) { + printk(KERN_ALERT "FLASH: ibm,update-flash-64-and-reboot " + "is not available -- not a service partition?\n"); + printk(KERN_ALERT "FLASH: firmware will not be flashed\n"); + return; + } + + /* NOTE: the "first" block list is a global var with no data + * blocks in the kernel data segment. We do this because + * we want to ensure this block_list addr is under 4GB. + */ + rtas_firmware_flash_list.num_blocks = 0; + flist = (struct flash_block_list *)&rtas_firmware_flash_list; + rtas_block_list = virt_to_abs(flist); + if (rtas_block_list >= 4UL*1024*1024*1024) { + printk(KERN_ALERT "FLASH: kernel bug...flash list header addr above 4GB\n"); + return; + } + + printk(KERN_ALERT "FLASH: preparing saved firmware image for flash\n"); + /* Update the block_list in place. */ + image_size = 0; + for (f = flist; f; f = next) { + /* Translate data addrs to absolute */ + for (i = 0; i < f->num_blocks; i++) { + f->blocks[i].data = (char *)virt_to_abs(f->blocks[i].data); + image_size += f->blocks[i].length; + } + next = f->next; + /* Don't translate NULL pointer for last entry */ + if (f->next) + f->next = (struct flash_block_list *)virt_to_abs(f->next); + else + f->next = NULL; + /* make num_blocks into the version/length field */ + f->num_blocks = (FLASH_BLOCK_LIST_VERSION << 56) | ((f->num_blocks+1)*16); + } + + printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size); + printk(KERN_ALERT "FLASH: performing flash and reboot\n"); + rtas_progress("Flashing \n", 0x0); + rtas_progress("Please Wait... ", 0x0); + printk(KERN_ALERT "FLASH: this will take several minutes. Do not power off!\n"); + status = rtas_call(update_token, 1, 1, NULL, rtas_block_list); + switch (status) { /* should only get "bad" status */ + case 0: + printk(KERN_ALERT "FLASH: success\n"); + break; + case -1: + printk(KERN_ALERT "FLASH: hardware error. Firmware may not be not flashed\n"); + break; + case -3: + printk(KERN_ALERT "FLASH: image is corrupt or not correct for this platform. Firmware not flashed\n"); + break; + case -4: + printk(KERN_ALERT "FLASH: flash failed when partially complete. System may not reboot\n"); + break; + default: + printk(KERN_ALERT "FLASH: unknown flash return code %d\n", status); + break; + } +} + +static void remove_flash_pde(struct proc_dir_entry *dp) +{ + if (dp) { + if (dp->data != NULL) + kfree(dp->data); + dp->owner = NULL; + remove_proc_entry(dp->name, dp->parent); + } +} + +static int initialize_flash_pde_data(const char *rtas_call_name, + size_t buf_size, + struct proc_dir_entry *dp) +{ + int *status; + int token; + + dp->data = kmalloc(buf_size, GFP_KERNEL); + if (dp->data == NULL) { + remove_flash_pde(dp); + return -ENOMEM; + } + + memset(dp->data, 0, buf_size); + + /* + * This code assumes that the status int is the first member of the + * struct + */ + status = (int *) dp->data; + token = rtas_token(rtas_call_name); + if (token == RTAS_UNKNOWN_SERVICE) + *status = FLASH_AUTH; + else + *status = FLASH_NO_OP; + + return 0; +} + +static struct proc_dir_entry *create_flash_pde(const char *filename, + struct file_operations *fops) +{ + struct proc_dir_entry *ent = NULL; + + ent = create_proc_entry(filename, S_IRUSR | S_IWUSR, NULL); + if (ent != NULL) { + ent->nlink = 1; + ent->proc_fops = fops; + ent->owner = THIS_MODULE; + } + + return ent; +} + +static struct file_operations rtas_flash_operations = { + .read = rtas_flash_read, + .write = rtas_flash_write, + .open = rtas_excl_open, + .release = rtas_flash_release, +}; + +static struct file_operations manage_flash_operations = { + .read = manage_flash_read, + .write = manage_flash_write, + .open = rtas_excl_open, + .release = rtas_excl_release, +}; + +static struct file_operations validate_flash_operations = { + .read = validate_flash_read, + .write = validate_flash_write, + .open = rtas_excl_open, + .release = validate_flash_release, +}; + +int __init rtas_flash_init(void) +{ + int rc; + + if (rtas_token("ibm,update-flash-64-and-reboot") == + RTAS_UNKNOWN_SERVICE) { + printk(KERN_ERR "rtas_flash: no firmware flash support\n"); + return 1; + } + + firmware_flash_pde = create_flash_pde("ppc64/rtas/" + FIRMWARE_FLASH_NAME, + &rtas_flash_operations); + if (firmware_flash_pde == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + rc = initialize_flash_pde_data("ibm,update-flash-64-and-reboot", + sizeof(struct rtas_update_flash_t), + firmware_flash_pde); + if (rc != 0) + goto cleanup; + + firmware_update_pde = create_flash_pde("ppc64/rtas/" + FIRMWARE_UPDATE_NAME, + &rtas_flash_operations); + if (firmware_update_pde == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + rc = initialize_flash_pde_data("ibm,update-flash-64-and-reboot", + sizeof(struct rtas_update_flash_t), + firmware_update_pde); + if (rc != 0) + goto cleanup; + + validate_pde = create_flash_pde("ppc64/rtas/" VALIDATE_FLASH_NAME, + &validate_flash_operations); + if (validate_pde == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + rc = initialize_flash_pde_data("ibm,validate-flash-image", + sizeof(struct rtas_validate_flash_t), + validate_pde); + if (rc != 0) + goto cleanup; + + manage_pde = create_flash_pde("ppc64/rtas/" MANAGE_FLASH_NAME, + &manage_flash_operations); + if (manage_pde == NULL) { + rc = -ENOMEM; + goto cleanup; + } + + rc = initialize_flash_pde_data("ibm,manage-flash-image", + sizeof(struct rtas_manage_flash_t), + manage_pde); + if (rc != 0) + goto cleanup; + + rtas_flash_term_hook = rtas_flash_firmware; + return 0; + +cleanup: + remove_flash_pde(firmware_flash_pde); + remove_flash_pde(firmware_update_pde); + remove_flash_pde(validate_pde); + remove_flash_pde(manage_pde); + + return rc; +} + +void __exit rtas_flash_cleanup(void) +{ + rtas_flash_term_hook = NULL; + remove_flash_pde(firmware_flash_pde); + remove_flash_pde(firmware_update_pde); + remove_flash_pde(validate_pde); + remove_flash_pde(manage_pde); +} + +module_init(rtas_flash_init); +module_exit(rtas_flash_cleanup); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1292460fcde..e22856ecb5a 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -170,12 +170,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) } #ifdef CONFIG_SMP -#ifdef CONFIG_PPC64 /* XXX for now */ pvr = per_cpu(pvr, cpu_id); #else - pvr = cpu_data[cpu_id].pvr; -#endif -#else pvr = mfspr(SPRN_PVR); #endif maj = (pvr >> 8) & 0xFF; @@ -201,11 +197,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_TAU_AVERAGE /* more straightforward, but potentially misleading */ seq_printf(m, "temperature \t: %u C (uncalibrated)\n", - cpu_temp(i)); + cpu_temp(cpu_id)); #else /* show the actual temp sensor range */ u32 temp; - temp = cpu_temp_both(i); + temp = cpu_temp_both(cpu_id); seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n", temp & 0xff, temp >> 16); #endif @@ -408,3 +404,158 @@ static int __init set_preferred_console(void) } console_initcall(set_preferred_console); #endif /* CONFIG_PPC_MULTIPLATFORM */ + +void __init check_for_initrd(void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + unsigned long *prop; + + DBG(" -> check_for_initrd()\n"); + + if (of_chosen) { + prop = (unsigned long *)get_property(of_chosen, + "linux,initrd-start", NULL); + if (prop != NULL) { + initrd_start = (unsigned long)__va(*prop); + prop = (unsigned long *)get_property(of_chosen, + "linux,initrd-end", NULL); + if (prop != NULL) { + initrd_end = (unsigned long)__va(*prop); + initrd_below_start_ok = 1; + } else + initrd_start = 0; + } + } + + /* If we were passed an initrd, set the ROOT_DEV properly if the values + * look sensible. If not, clear initrd reference. + */ + if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && + initrd_end > initrd_start) + ROOT_DEV = Root_RAM0; + else { + printk("Bogus initrd %08lx %08lx\n", initrd_start, initrd_end); + initrd_start = initrd_end = 0; + } + + if (initrd_start) + printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); + + DBG(" <- check_for_initrd()\n"); +#endif /* CONFIG_BLK_DEV_INITRD */ +} + +#ifdef CONFIG_SMP + +/** + * setup_cpu_maps - initialize the following cpu maps: + * cpu_possible_map + * cpu_present_map + * cpu_sibling_map + * + * Having the possible map set up early allows us to restrict allocations + * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. + * + * We do not initialize the online map here; cpus set their own bits in + * cpu_online_map as they come up. + * + * This function is valid only for Open Firmware systems. finish_device_tree + * must be called before using this. + * + * While we're here, we may as well set the "physical" cpu ids in the paca. + */ +void __init smp_setup_cpu_maps(void) +{ + struct device_node *dn = NULL; + int cpu = 0; + int swap_cpuid = 0; + + while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { + int *intserv; + int j, len = sizeof(u32), nthreads = 1; + + intserv = (int *)get_property(dn, "ibm,ppc-interrupt-server#s", + &len); + if (intserv) + nthreads = len / sizeof(int); + else { + intserv = (int *) get_property(dn, "reg", NULL); + if (!intserv) + intserv = &cpu; /* assume logical == phys */ + } + + for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { + cpu_set(cpu, cpu_present_map); + set_hard_smp_processor_id(cpu, intserv[j]); + + if (intserv[j] == boot_cpuid_phys) + swap_cpuid = cpu; + cpu_set(cpu, cpu_possible_map); + cpu++; + } + } + + /* Swap CPU id 0 with boot_cpuid_phys, so we can always assume that + * boot cpu is logical 0. + */ + if (boot_cpuid_phys != get_hard_smp_processor_id(0)) { + u32 tmp; + tmp = get_hard_smp_processor_id(0); + set_hard_smp_processor_id(0, boot_cpuid_phys); + set_hard_smp_processor_id(swap_cpuid, tmp); + } + +#ifdef CONFIG_PPC64 + /* + * On pSeries LPAR, we need to know how many cpus + * could possibly be added to this partition. + */ + if (systemcfg->platform == PLATFORM_PSERIES_LPAR && + (dn = of_find_node_by_path("/rtas"))) { + int num_addr_cell, num_size_cell, maxcpus; + unsigned int *ireg; + + num_addr_cell = prom_n_addr_cells(dn); + num_size_cell = prom_n_size_cells(dn); + + ireg = (unsigned int *) + get_property(dn, "ibm,lrdr-capacity", NULL); + + if (!ireg) + goto out; + + maxcpus = ireg[num_addr_cell + num_size_cell]; + + /* Double maxcpus for processors which have SMT capability */ + if (cpu_has_feature(CPU_FTR_SMT)) + maxcpus *= 2; + + if (maxcpus > NR_CPUS) { + printk(KERN_WARNING + "Partition configured for %d cpus, " + "operating system maximum is %d.\n", + maxcpus, NR_CPUS); + maxcpus = NR_CPUS; + } else + printk(KERN_INFO "Partition configured for %d cpus.\n", + maxcpus); + + for (cpu = 0; cpu < maxcpus; cpu++) + cpu_set(cpu, cpu_possible_map); + out: + of_node_put(dn); + } + + /* + * Do the sibling map; assume only two threads per processor. + */ + for_each_cpu(cpu) { + cpu_set(cpu, cpu_sibling_map[cpu]); + if (cpu_has_feature(CPU_FTR_SMT)) + cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); + } + + systemcfg->processorCount = num_present_cpus(); +#endif /* CONFIG_PPC64 */ +} +#endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 9680ae99b08..3af2631e3fa 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -286,8 +286,11 @@ void __init setup_arch(char **cmdline_p) loops_per_jiffy = 500000000 / HZ; unflatten_device_tree(); + check_for_initrd(); finish_device_tree(); + smp_setup_cpu_maps(); + #ifdef CONFIG_BOOTX_TEXT init_boot_display(); #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 40c48100bf1..0471e843b6c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -41,7 +41,6 @@ #include <asm/elf.h> #include <asm/machdep.h> #include <asm/paca.h> -#include <asm/ppcdebug.h> #include <asm/time.h> #include <asm/cputable.h> #include <asm/sections.h> @@ -56,10 +55,11 @@ #include <asm/page.h> #include <asm/mmu.h> #include <asm/lmb.h> -#include <asm/iSeries/ItLpNaca.h> +#include <asm/iseries/it_lp_naca.h> #include <asm/firmware.h> #include <asm/systemcfg.h> #include <asm/xmon.h> +#include <asm/udbg.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -103,8 +103,6 @@ extern void htab_initialize(void); extern void early_init_devtree(void *flat_dt); extern void unflatten_device_tree(void); -extern void smp_release_cpus(void); - int have_of = 1; int boot_cpuid = 0; int boot_cpuid_phys = 0; @@ -183,120 +181,14 @@ static int __init early_smt_enabled(char *p) } early_param("smt-enabled", early_smt_enabled); -/** - * setup_cpu_maps - initialize the following cpu maps: - * cpu_possible_map - * cpu_present_map - * cpu_sibling_map - * - * Having the possible map set up early allows us to restrict allocations - * of things like irqstacks to num_possible_cpus() rather than NR_CPUS. - * - * We do not initialize the online map here; cpus set their own bits in - * cpu_online_map as they come up. - * - * This function is valid only for Open Firmware systems. finish_device_tree - * must be called before using this. - * - * While we're here, we may as well set the "physical" cpu ids in the paca. - */ -static void __init setup_cpu_maps(void) -{ - struct device_node *dn = NULL; - int cpu = 0; - int swap_cpuid = 0; - - check_smt_enabled(); - - while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { - u32 *intserv; - int j, len = sizeof(u32), nthreads; - - intserv = (u32 *)get_property(dn, "ibm,ppc-interrupt-server#s", - &len); - if (!intserv) - intserv = (u32 *)get_property(dn, "reg", NULL); - - nthreads = len / sizeof(u32); - - for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { - cpu_set(cpu, cpu_present_map); - set_hard_smp_processor_id(cpu, intserv[j]); - - if (intserv[j] == boot_cpuid_phys) - swap_cpuid = cpu; - cpu_set(cpu, cpu_possible_map); - cpu++; - } - } - - /* Swap CPU id 0 with boot_cpuid_phys, so we can always assume that - * boot cpu is logical 0. - */ - if (boot_cpuid_phys != get_hard_smp_processor_id(0)) { - u32 tmp; - tmp = get_hard_smp_processor_id(0); - set_hard_smp_processor_id(0, boot_cpuid_phys); - set_hard_smp_processor_id(swap_cpuid, tmp); - } - - /* - * On pSeries LPAR, we need to know how many cpus - * could possibly be added to this partition. - */ - if (systemcfg->platform == PLATFORM_PSERIES_LPAR && - (dn = of_find_node_by_path("/rtas"))) { - int num_addr_cell, num_size_cell, maxcpus; - unsigned int *ireg; - - num_addr_cell = prom_n_addr_cells(dn); - num_size_cell = prom_n_size_cells(dn); - - ireg = (unsigned int *) - get_property(dn, "ibm,lrdr-capacity", NULL); - - if (!ireg) - goto out; - - maxcpus = ireg[num_addr_cell + num_size_cell]; - - /* Double maxcpus for processors which have SMT capability */ - if (cpu_has_feature(CPU_FTR_SMT)) - maxcpus *= 2; - - if (maxcpus > NR_CPUS) { - printk(KERN_WARNING - "Partition configured for %d cpus, " - "operating system maximum is %d.\n", - maxcpus, NR_CPUS); - maxcpus = NR_CPUS; - } else - printk(KERN_INFO "Partition configured for %d cpus.\n", - maxcpus); - - for (cpu = 0; cpu < maxcpus; cpu++) - cpu_set(cpu, cpu_possible_map); - out: - of_node_put(dn); - } - - /* - * Do the sibling map; assume only two threads per processor. - */ - for_each_cpu(cpu) { - cpu_set(cpu, cpu_sibling_map[cpu]); - if (cpu_has_feature(CPU_FTR_SMT)) - cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); - } - - systemcfg->processorCount = num_present_cpus(); -} +#else +#define check_smt_enabled() #endif /* CONFIG_SMP */ extern struct machdep_calls pSeries_md; extern struct machdep_calls pmac_md; extern struct machdep_calls maple_md; -extern struct machdep_calls bpa_md; +extern struct machdep_calls cell_md; extern struct machdep_calls iseries_md; /* Ultimately, stuff them in an elf section like initcalls... */ @@ -310,8 +202,8 @@ static struct machdep_calls __initdata *machines[] = { #ifdef CONFIG_PPC_MAPLE &maple_md, #endif /* CONFIG_PPC_MAPLE */ -#ifdef CONFIG_PPC_BPA - &bpa_md, +#ifdef CONFIG_PPC_CELL + &cell_md, #endif #ifdef CONFIG_PPC_ISERIES &iseries_md, @@ -352,12 +244,6 @@ void __init early_setup(unsigned long dt_ptr) DBG(" -> early_setup()\n"); /* - * Fill the default DBG level (do we want to keep - * that old mecanism around forever ?) - */ - ppcdbg_initialize(); - - /* * Do early initializations using the flattened device * tree, like retreiving the physical memory map or * calculating/retreiving the hash table size @@ -385,21 +271,49 @@ void __init early_setup(unsigned long dt_ptr) DBG("Found, Initializing memory management...\n"); /* - * Initialize stab / SLB management + * Initialize the MMU Hash table and create the linear mapping + * of memory. Has to be done before stab/slb initialization as + * this is currently where the page size encoding is obtained */ - if (!firmware_has_feature(FW_FEATURE_ISERIES)) - stab_initialize(lpaca->stab_real); + htab_initialize(); /* - * Initialize the MMU Hash table and create the linear mapping - * of memory + * Initialize stab / SLB management except on iSeries */ - htab_initialize(); + if (!firmware_has_feature(FW_FEATURE_ISERIES)) { + if (cpu_has_feature(CPU_FTR_SLB)) + slb_initialize(); + else + stab_initialize(lpaca->stab_real); + } DBG(" <- early_setup()\n"); } +#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) +void smp_release_cpus(void) +{ + extern unsigned long __secondary_hold_spinloop; + + DBG(" -> smp_release_cpus()\n"); + + /* All secondary cpus are spinning on a common spinloop, release them + * all now so they can start to spin on their individual paca + * spinloops. For non SMP kernels, the secondary cpus never get out + * of the common spinloop. + * This is useless but harmless on iSeries, secondaries are already + * waiting on their paca spinloops. */ + + __secondary_hold_spinloop = 1; + mb(); + + DBG(" <- smp_release_cpus()\n"); +} +#else +#define smp_release_cpus() +#endif /* CONFIG_SMP || CONFIG_KEXEC */ + /* * Initialize some remaining members of the ppc64_caches and systemcfg structures * (at least until we get rid of them completely). This is mostly some @@ -481,43 +395,6 @@ static void __init initialize_cache_info(void) DBG(" <- initialize_cache_info()\n"); } -static void __init check_for_initrd(void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - u64 *prop; - - DBG(" -> check_for_initrd()\n"); - - if (of_chosen) { - prop = (u64 *)get_property(of_chosen, - "linux,initrd-start", NULL); - if (prop != NULL) { - initrd_start = (unsigned long)__va(*prop); - prop = (u64 *)get_property(of_chosen, - "linux,initrd-end", NULL); - if (prop != NULL) { - initrd_end = (unsigned long)__va(*prop); - initrd_below_start_ok = 1; - } else - initrd_start = 0; - } - } - - /* If we were passed an initrd, set the ROOT_DEV properly if the values - * look sensible. If not, clear initrd reference. - */ - if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE && - initrd_end > initrd_start) - ROOT_DEV = Root_RAM0; - else - initrd_start = initrd_end = 0; - - if (initrd_start) - printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end); - - DBG(" <- check_for_initrd()\n"); -#endif /* CONFIG_BLK_DEV_INITRD */ -} /* * Do some initial setup of the system. The parameters are those which @@ -589,23 +466,18 @@ void __init setup_system(void) parse_early_param(); -#ifdef CONFIG_SMP - /* - * iSeries has already initialized the cpu maps at this point. - */ - setup_cpu_maps(); + check_smt_enabled(); + smp_setup_cpu_maps(); /* Release secondary cpus out of their spinloops at 0x60 now that * we can map physical -> logical CPU ids */ smp_release_cpus(); -#endif printk("Starting Linux PPC64 %s\n", system_utsname.version); printk("-----------------------------------------------------\n"); printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size); - printk("ppc64_debug_switch = 0x%lx\n", ppc64_debug_switch); printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller); printk("systemcfg = 0x%p\n", systemcfg); printk("systemcfg->platform = 0x%x\n", systemcfg->platform); @@ -631,23 +503,6 @@ static int ppc64_panic_event(struct notifier_block *this, return NOTIFY_DONE; } -#ifdef CONFIG_PPC_ISERIES -/* - * On iSeries we just parse the mem=X option from the command line. - * On pSeries it's a bit more complicated, see prom_init_mem() - */ -static int __init early_parsemem(char *p) -{ - if (!p) - return 0; - - memory_limit = ALIGN(memparse(p, &p), PAGE_SIZE); - - return 0; -} -early_param("mem", early_parsemem); -#endif /* CONFIG_PPC_ISERIES */ - #ifdef CONFIG_IRQSTACKS static void __init irqstack_early_init(void) { @@ -658,10 +513,12 @@ static void __init irqstack_early_init(void) * SLB misses on them. */ for_each_cpu(i) { - softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); - hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); + softirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc_base(THREAD_SIZE, + THREAD_SIZE, 0x10000000)); + hardirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc_base(THREAD_SIZE, + THREAD_SIZE, 0x10000000)); } } #else @@ -689,8 +546,8 @@ static void __init emergency_stack_init(void) limit = min(0x10000000UL, lmb.rmo_size); for_each_cpu(i) - paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128, - limit)) + PAGE_SIZE; + paca[i].emergency_sp = + __va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE; } /* diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 444c3e81884..081d931eae4 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -43,8 +43,7 @@ #include <asm/uaccess.h> #include <asm/cacheflush.h> #ifdef CONFIG_PPC64 -#include <asm/ppc32.h> -#include <asm/ppcdebug.h> +#include "ppc32.h" #include <asm/unistd.h> #include <asm/vdso.h> #else diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c new file mode 100644 index 00000000000..58194e15071 --- /dev/null +++ b/arch/powerpc/kernel/signal_64.c @@ -0,0 +1,580 @@ +/* + * linux/arch/ppc64/kernel/signal.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/i386/kernel/signal.c" + * Copyright (C) 1991, 1992 Linus Torvalds + * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/errno.h> +#include <linux/wait.h> +#include <linux/unistd.h> +#include <linux/stddef.h> +#include <linux/elf.h> +#include <linux/ptrace.h> +#include <linux/module.h> + +#include <asm/sigcontext.h> +#include <asm/ucontext.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/unistd.h> +#include <asm/cacheflush.h> +#include <asm/vdso.h> + +#define DEBUG_SIG 0 + +#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) + +#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) +#define FP_REGS_SIZE sizeof(elf_fpregset_t) + +#define TRAMP_TRACEBACK 3 +#define TRAMP_SIZE 6 + +/* + * When we have signals to deliver, we set up on the user stack, + * going down from the original stack pointer: + * 1) a rt_sigframe struct which contains the ucontext + * 2) a gap of __SIGNAL_FRAMESIZE bytes which acts as a dummy caller + * frame for the signal handler. + */ + +struct rt_sigframe { + /* sys_rt_sigreturn requires the ucontext be the first field */ + struct ucontext uc; + unsigned long _unused[2]; + unsigned int tramp[TRAMP_SIZE]; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + /* 64 bit ABI allows for 288 bytes below sp before decrementing it. */ + char abigap[288]; +} __attribute__ ((aligned (16))); + + +/* + * Atomically swap in the new signal mask, and wait for a signal. + */ +long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, int p3, int p4, + int p6, int p7, struct pt_regs *regs) +{ + sigset_t saveset, newset; + + /* XXX: Don't preclude handling different sized sigset_t's. */ + if (sigsetsize != sizeof(sigset_t)) + return -EINVAL; + + if (copy_from_user(&newset, unewset, sizeof(newset))) + return -EFAULT; + sigdelsetmask(&newset, ~_BLOCKABLE); + + spin_lock_irq(¤t->sighand->siglock); + saveset = current->blocked; + current->blocked = newset; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + + regs->result = -EINTR; + regs->gpr[3] = EINTR; + regs->ccr |= 0x10000000; + while (1) { + current->state = TASK_INTERRUPTIBLE; + schedule(); + if (do_signal(&saveset, regs)) + return 0; + } +} + +long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r5, + unsigned long r6, unsigned long r7, unsigned long r8, + struct pt_regs *regs) +{ + return do_sigaltstack(uss, uoss, regs->gpr[1]); +} + + +/* + * Set up the sigcontext for the signal frame. + */ + +static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, + int signr, sigset_t *set, unsigned long handler) +{ + /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the + * process never used altivec yet (MSR_VEC is zero in pt_regs of + * the context). This is very important because we must ensure we + * don't lose the VRSAVE content that may have been set prior to + * the process doing its first vector operation + * Userland shall check AT_HWCAP to know wether it can rely on the + * v_regs pointer or not + */ +#ifdef CONFIG_ALTIVEC + elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *)(((unsigned long)sc->vmx_reserve + 15) & ~0xful); +#endif + long err = 0; + + flush_fp_to_thread(current); + + /* Make sure signal doesn't get spurrious FP exceptions */ + current->thread.fpscr.val = 0; + +#ifdef CONFIG_ALTIVEC + err |= __put_user(v_regs, &sc->v_regs); + + /* save altivec registers */ + if (current->thread.used_vr) { + flush_altivec_to_thread(current); + /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ + err |= __copy_to_user(v_regs, current->thread.vr, 33 * sizeof(vector128)); + /* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg) + * contains valid data. + */ + regs->msr |= MSR_VEC; + } + /* We always copy to/from vrsave, it's 0 if we don't have or don't + * use altivec. + */ + err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); +#else /* CONFIG_ALTIVEC */ + err |= __put_user(0, &sc->v_regs); +#endif /* CONFIG_ALTIVEC */ + err |= __put_user(&sc->gp_regs, &sc->regs); + err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); + err |= __copy_to_user(&sc->fp_regs, ¤t->thread.fpr, FP_REGS_SIZE); + err |= __put_user(signr, &sc->signal); + err |= __put_user(handler, &sc->handler); + if (set != NULL) + err |= __put_user(set->sig[0], &sc->oldmask); + + return err; +} + +/* + * Restore the sigcontext from the signal frame. + */ + +static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, + struct sigcontext __user *sc) +{ +#ifdef CONFIG_ALTIVEC + elf_vrreg_t __user *v_regs; +#endif + unsigned long err = 0; + unsigned long save_r13 = 0; + elf_greg_t *gregs = (elf_greg_t *)regs; +#ifdef CONFIG_ALTIVEC + unsigned long msr; +#endif + int i; + + /* If this is not a signal return, we preserve the TLS in r13 */ + if (!sig) + save_r13 = regs->gpr[13]; + + /* copy everything before MSR */ + err |= __copy_from_user(regs, &sc->gp_regs, + PT_MSR*sizeof(unsigned long)); + + /* skip MSR and SOFTE */ + for (i = PT_MSR+1; i <= PT_RESULT; i++) { + if (i == PT_SOFTE) + continue; + err |= __get_user(gregs[i], &sc->gp_regs[i]); + } + + if (!sig) + regs->gpr[13] = save_r13; + err |= __copy_from_user(¤t->thread.fpr, &sc->fp_regs, FP_REGS_SIZE); + if (set != NULL) + err |= __get_user(set->sig[0], &sc->oldmask); + +#ifdef CONFIG_ALTIVEC + err |= __get_user(v_regs, &sc->v_regs); + err |= __get_user(msr, &sc->gp_regs[PT_MSR]); + if (err) + return err; + /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ + if (v_regs != 0 && (msr & MSR_VEC) != 0) + err |= __copy_from_user(current->thread.vr, v_regs, + 33 * sizeof(vector128)); + else if (current->thread.used_vr) + memset(current->thread.vr, 0, 33 * sizeof(vector128)); + /* Always get VRSAVE back */ + if (v_regs != 0) + err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); + else + current->thread.vrsave = 0; +#endif /* CONFIG_ALTIVEC */ + +#ifndef CONFIG_SMP + preempt_disable(); + if (last_task_used_math == current) + last_task_used_math = NULL; + if (last_task_used_altivec == current) + last_task_used_altivec = NULL; + preempt_enable(); +#endif + /* Force reload of FP/VEC */ + regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC); + + return err; +} + +/* + * Allocate space for the signal frame + */ +static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, + size_t frame_size) +{ + unsigned long newsp; + + /* Default to using normal stack */ + newsp = regs->gpr[1]; + + if (ka->sa.sa_flags & SA_ONSTACK) { + if (! on_sig_stack(regs->gpr[1])) + newsp = (current->sas_ss_sp + current->sas_ss_size); + } + + return (void __user *)((newsp - frame_size) & -16ul); +} + +/* + * Setup the trampoline code on the stack + */ +static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) +{ + int i; + long err = 0; + + /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */ + err |= __put_user(0x38210000UL | (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]); + /* li r0, __NR_[rt_]sigreturn| */ + err |= __put_user(0x38000000UL | (syscall & 0xffff), &tramp[1]); + /* sc */ + err |= __put_user(0x44000002UL, &tramp[2]); + + /* Minimal traceback info */ + for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++) + err |= __put_user(0, &tramp[i]); + + if (!err) + flush_icache_range((unsigned long) &tramp[0], + (unsigned long) &tramp[TRAMP_SIZE]); + + return err; +} + +/* + * Restore the user process's signal mask (also used by signal32.c) + */ +void restore_sigmask(sigset_t *set) +{ + sigdelsetmask(set, ~_BLOCKABLE); + spin_lock_irq(¤t->sighand->siglock); + current->blocked = *set; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); +} + + +/* + * Handle {get,set,swap}_context operations + */ +int sys_swapcontext(struct ucontext __user *old_ctx, + struct ucontext __user *new_ctx, + long ctx_size, long r6, long r7, long r8, struct pt_regs *regs) +{ + unsigned char tmp; + sigset_t set; + + /* Context size is for future use. Right now, we only make sure + * we are passed something we understand + */ + if (ctx_size < sizeof(struct ucontext)) + return -EINVAL; + + if (old_ctx != NULL) { + if (!access_ok(VERIFY_WRITE, old_ctx, sizeof(*old_ctx)) + || setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0) + || __copy_to_user(&old_ctx->uc_sigmask, + ¤t->blocked, sizeof(sigset_t))) + return -EFAULT; + } + if (new_ctx == NULL) + return 0; + if (!access_ok(VERIFY_READ, new_ctx, sizeof(*new_ctx)) + || __get_user(tmp, (u8 __user *) new_ctx) + || __get_user(tmp, (u8 __user *) (new_ctx + 1) - 1)) + return -EFAULT; + + /* + * If we get a fault copying the context into the kernel's + * image of the user's registers, we can't just return -EFAULT + * because the user's registers will be corrupted. For instance + * the NIP value may have been updated but not some of the + * other registers. Given that we have done the access_ok + * and successfully read the first and last bytes of the region + * above, this should only happen in an out-of-memory situation + * or if another thread unmaps the region containing the context. + * We kill the task with a SIGSEGV in this situation. + */ + + if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set))) + do_exit(SIGSEGV); + restore_sigmask(&set); + if (restore_sigcontext(regs, NULL, 0, &new_ctx->uc_mcontext)) + do_exit(SIGSEGV); + + /* This returns like rt_sigreturn */ + return 0; +} + + +/* + * Do a signal return; undo the signal stack. + */ + +int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7, unsigned long r8, + struct pt_regs *regs) +{ + struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1]; + sigset_t set; + + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + + if (!access_ok(VERIFY_READ, uc, sizeof(*uc))) + goto badframe; + + if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) + goto badframe; + restore_sigmask(&set); + if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext)) + goto badframe; + + /* do_sigaltstack expects a __user pointer and won't modify + * what's in there anyway + */ + do_sigaltstack(&uc->uc_stack, NULL, regs->gpr[1]); + + return regs->result; + +badframe: +#if DEBUG_SIG + printk("badframe in sys_rt_sigreturn, regs=%p uc=%p &uc->uc_mcontext=%p\n", + regs, uc, &uc->uc_mcontext); +#endif + force_sig(SIGSEGV, current); + return 0; +} + +static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) +{ + /* Handler is *really* a pointer to the function descriptor for + * the signal routine. The first entry in the function + * descriptor is the entry address of signal and the second + * entry is the TOC value we need to use. + */ + func_descr_t __user *funct_desc_ptr; + struct rt_sigframe __user *frame; + unsigned long newsp = 0; + long err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + goto badframe; + + err |= __put_user(&frame->info, &frame->pinfo); + err |= __put_user(&frame->uc, &frame->puc); + err |= copy_siginfo_to_user(&frame->info, info); + if (err) + goto badframe; + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(0, &frame->uc.uc_link); + err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + err |= __put_user(sas_ss_flags(regs->gpr[1]), + &frame->uc.uc_stack.ss_flags); + err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, NULL, + (unsigned long)ka->sa.sa_handler); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto badframe; + + /* Set up to return from userspace. */ + if (vdso64_rt_sigtramp && current->thread.vdso_base) { + regs->link = current->thread.vdso_base + vdso64_rt_sigtramp; + } else { + err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); + if (err) + goto badframe; + regs->link = (unsigned long) &frame->tramp[0]; + } + funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler; + + /* Allocate a dummy caller frame for the signal handler. */ + newsp = (unsigned long)frame - __SIGNAL_FRAMESIZE; + err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); + + /* Set up "regs" so we "return" to the signal handler. */ + err |= get_user(regs->nip, &funct_desc_ptr->entry); + regs->gpr[1] = newsp; + err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); + regs->gpr[3] = signr; + regs->result = 0; + if (ka->sa.sa_flags & SA_SIGINFO) { + err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo); + err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc); + regs->gpr[6] = (unsigned long) frame; + } else { + regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext; + } + if (err) + goto badframe; + + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); + + return 1; + +badframe: +#if DEBUG_SIG + printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n", + regs, frame, newsp); +#endif + force_sigsegv(signr, current); + return 0; +} + + +/* + * OK, we're invoking a handler + */ +static int handle_signal(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) +{ + int ret; + + /* Set up Signal Frame */ + ret = setup_rt_frame(sig, ka, info, oldset, regs); + + if (ret) { + spin_lock_irq(¤t->sighand->siglock); + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + if (!(ka->sa.sa_flags & SA_NODEFER)) + sigaddset(¤t->blocked,sig); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + } + + return ret; +} + +static inline void syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) +{ + switch ((int)regs->result) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + /* ERESTARTNOHAND means that the syscall should only be + * restarted if there was no handler for the signal, and since + * we only get here if there is a handler, we dont restart. + */ + regs->result = -EINTR; + break; + case -ERESTARTSYS: + /* ERESTARTSYS means to restart the syscall if there is no + * handler or the handler was registered with SA_RESTART + */ + if (!(ka->sa.sa_flags & SA_RESTART)) { + regs->result = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + /* ERESTARTNOINTR means that the syscall should be + * called again after the signal handler returns. + */ + regs->gpr[3] = regs->orig_gpr3; + regs->nip -= 4; + regs->result = 0; + break; + } +} + +/* + * Note that 'init' is a special process: it doesn't get signals it doesn't + * want to handle. Thus you cannot kill init even with a SIGKILL even by + * mistake. + */ +int do_signal(sigset_t *oldset, struct pt_regs *regs) +{ + siginfo_t info; + int signr; + struct k_sigaction ka; + + /* + * If the current thread is 32 bit - invoke the + * 32 bit signal handling code + */ + if (test_thread_flag(TIF_32BIT)) + return do_signal32(oldset, regs); + + if (!oldset) + oldset = ¤t->blocked; + + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + if (signr > 0) { + /* Whee! Actually deliver the signal. */ + if (TRAP(regs) == 0x0C00) + syscall_restart(regs, &ka); + + /* + * Reenable the DABR before delivering the signal to + * user space. The DABR will have been cleared if it + * triggered inside the kernel. + */ + if (current->thread.dabr) + set_dabr(current->thread.dabr); + + return handle_signal(signr, &ka, &info, oldset, regs); + } + + if (TRAP(regs) == 0x0C00) { /* System Call! */ + if ((int)regs->result == -ERESTARTNOHAND || + (int)regs->result == -ERESTARTSYS || + (int)regs->result == -ERESTARTNOINTR) { + regs->gpr[3] = regs->orig_gpr3; + regs->nip -= 4; /* Back up & retry system call */ + regs->result = 0; + } else if ((int)regs->result == -ERESTART_RESTARTBLOCK) { + regs->gpr[0] = __NR_restart_syscall; + regs->nip -= 4; + regs->result = 0; + } + } + + return 0; +} +EXPORT_SYMBOL(do_signal); diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c new file mode 100644 index 00000000000..9adef3bddad --- /dev/null +++ b/arch/powerpc/kernel/smp-tbsync.c @@ -0,0 +1,171 @@ +/* + * Smp timebase synchronization for ppc. + * + * Copyright (C) 2003 Samuel Rydh (samuel@ibrium.se) + * + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/unistd.h> +#include <linux/init.h> +#include <asm/atomic.h> +#include <asm/smp.h> +#include <asm/time.h> + +#define NUM_ITER 300 + +enum { + kExit=0, kSetAndTest, kTest +}; + +static struct { + volatile u64 tb; + volatile u64 mark; + volatile int cmd; + volatile int handshake; + int filler[2]; + + volatile int ack; + int filler2[7]; + + volatile int race_result; +} *tbsync; + +static volatile int running; + +static void __devinit enter_contest(u64 mark, long add) +{ + while (get_tb() < mark) + tbsync->race_result = add; +} + +void __devinit smp_generic_take_timebase(void) +{ + int cmd; + u64 tb; + + local_irq_disable(); + while (!running) + barrier(); + rmb(); + + for (;;) { + tbsync->ack = 1; + while (!tbsync->handshake) + barrier(); + rmb(); + + cmd = tbsync->cmd; + tb = tbsync->tb; + mb(); + tbsync->ack = 0; + if (cmd == kExit) + break; + + while (tbsync->handshake) + barrier(); + if (cmd == kSetAndTest) + set_tb(tb >> 32, tb & 0xfffffffful); + enter_contest(tbsync->mark, -1); + } + local_irq_enable(); +} + +static int __devinit start_contest(int cmd, long offset, int num) +{ + int i, score=0; + u64 tb; + long mark; + + tbsync->cmd = cmd; + + local_irq_disable(); + for (i = -3; i < num; ) { + tb = get_tb() + 400; + tbsync->tb = tb + offset; + tbsync->mark = mark = tb + 400; + + wmb(); + + tbsync->handshake = 1; + while (tbsync->ack) + barrier(); + + while (get_tb() <= tb) + barrier(); + tbsync->handshake = 0; + enter_contest(mark, 1); + + while (!tbsync->ack) + barrier(); + + if (i++ > 0) + score += tbsync->race_result; + } + local_irq_enable(); + return score; +} + +void __devinit smp_generic_give_timebase(void) +{ + int i, score, score2, old, min=0, max=5000, offset=1000; + + printk("Synchronizing timebase\n"); + + /* if this fails then this kernel won't work anyway... */ + tbsync = kmalloc( sizeof(*tbsync), GFP_KERNEL ); + memset( tbsync, 0, sizeof(*tbsync) ); + mb(); + running = 1; + + while (!tbsync->ack) + barrier(); + + printk("Got ack\n"); + + /* binary search */ + for (old = -1; old != offset ; offset = (min+max) / 2) { + score = start_contest(kSetAndTest, offset, NUM_ITER); + + printk("score %d, offset %d\n", score, offset ); + + if( score > 0 ) + max = offset; + else + min = offset; + old = offset; + } + score = start_contest(kSetAndTest, min, NUM_ITER); + score2 = start_contest(kSetAndTest, max, NUM_ITER); + + printk("Min %d (score %d), Max %d (score %d)\n", + min, score, max, score2); + score = abs(score); + score2 = abs(score2); + offset = (score < score2) ? min : max; + + /* guard against inaccurate mttb */ + for (i = 0; i < 10; i++) { + start_contest(kSetAndTest, offset, NUM_ITER/10); + + if ((score2 = start_contest(kTest, offset, NUM_ITER)) < 0) + score2 = -score2; + if (score2 <= score || score2 < 20) + break; + } + printk("Final offset: %d (%d/%d)\n", offset, score2, NUM_ITER ); + + /* exiting */ + tbsync->cmd = kExit; + wmb(); + tbsync->handshake = 1; + while (tbsync->ack) + barrier(); + tbsync->handshake = 0; + kfree(tbsync); + tbsync = NULL; + running = 0; +} diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c new file mode 100644 index 00000000000..5c330c3366e --- /dev/null +++ b/arch/powerpc/kernel/smp.c @@ -0,0 +1,564 @@ +/* + * SMP support for ppc. + * + * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great + * deal of code from the sparc and intel versions. + * + * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu> + * + * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/cache.h> +#include <linux/err.h> +#include <linux/sysdev.h> +#include <linux/cpu.h> +#include <linux/notifier.h> + +#include <asm/ptrace.h> +#include <asm/atomic.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/smp.h> +#include <asm/time.h> +#include <asm/machdep.h> +#include <asm/cputable.h> +#include <asm/system.h> +#include <asm/mpic.h> +#ifdef CONFIG_PPC64 +#include <asm/paca.h> +#endif + +int smp_hw_index[NR_CPUS]; +struct thread_info *secondary_ti; + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +cpumask_t cpu_possible_map = CPU_MASK_NONE; +cpumask_t cpu_online_map = CPU_MASK_NONE; +cpumask_t cpu_sibling_map[NR_CPUS] = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; + +EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL(cpu_possible_map); + +/* SMP operations for this machine */ +struct smp_ops_t *smp_ops; + +static volatile unsigned int cpu_callin_map[NR_CPUS]; + +void smp_call_function_interrupt(void); + +int smt_enabled_at_boot = 1; + +#ifdef CONFIG_MPIC +int __init smp_mpic_probe(void) +{ + int nr_cpus; + + DBG("smp_mpic_probe()...\n"); + + nr_cpus = cpus_weight(cpu_possible_map); + + DBG("nr_cpus: %d\n", nr_cpus); + + if (nr_cpus > 1) + mpic_request_ipis(); + + return nr_cpus; +} + +void __devinit smp_mpic_setup_cpu(int cpu) +{ + mpic_setup_this_cpu(); +} +#endif /* CONFIG_MPIC */ + +#ifdef CONFIG_PPC64 +void __devinit smp_generic_kick_cpu(int nr) +{ + BUG_ON(nr < 0 || nr >= NR_CPUS); + + /* + * The processor is currently spinning, waiting for the + * cpu_start field to become non-zero After we set cpu_start, + * the processor will continue on to secondary_start + */ + paca[nr].cpu_start = 1; + smp_mb(); +} +#endif + +void smp_message_recv(int msg, struct pt_regs *regs) +{ + switch(msg) { + case PPC_MSG_CALL_FUNCTION: + smp_call_function_interrupt(); + break; + case PPC_MSG_RESCHEDULE: + /* XXX Do we have to do this? */ + set_need_resched(); + break; +#ifdef CONFIG_DEBUGGER + case PPC_MSG_DEBUGGER_BREAK: + debugger_ipi(regs); + break; +#endif + default: + printk("SMP %d: smp_message_recv(): unknown msg %d\n", + smp_processor_id(), msg); + break; + } +} + +void smp_send_reschedule(int cpu) +{ + smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE); +} + +#ifdef CONFIG_DEBUGGER +void smp_send_debugger_break(int cpu) +{ + smp_ops->message_pass(cpu, PPC_MSG_DEBUGGER_BREAK); +} +#endif + +static void stop_this_cpu(void *dummy) +{ + local_irq_disable(); + while (1) + ; +} + +void smp_send_stop(void) +{ + smp_call_function(stop_this_cpu, NULL, 1, 0); +} + +/* + * Structure and data for smp_call_function(). This is designed to minimise + * static memory requirements. It also looks cleaner. + * Stolen from the i386 version. + */ +static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock); + +static struct call_data_struct { + void (*func) (void *info); + void *info; + atomic_t started; + atomic_t finished; + int wait; +} *call_data; + +/* delay of at least 8 seconds */ +#define SMP_CALL_TIMEOUT 8 + +/* + * This function sends a 'generic call function' IPI to all other CPUs + * in the system. + * + * [SUMMARY] Run a function on all other CPUs. + * <func> The function to run. This must be fast and non-blocking. + * <info> An arbitrary pointer to pass to the function. + * <nonatomic> currently unused. + * <wait> If true, wait (atomically) until function has completed on other CPUs. + * [RETURNS] 0 on success, else a negative status code. Does not return until + * remote CPUs are nearly ready to execute <<func>> or are or have executed. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int smp_call_function (void (*func) (void *info), void *info, int nonatomic, + int wait) +{ + struct call_data_struct data; + int ret = -1, cpus; + u64 timeout; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + spin_lock(&call_lock); + /* Must grab online cpu count with preempt disabled, otherwise + * it can change. */ + cpus = num_online_cpus() - 1; + if (!cpus) { + ret = 0; + goto out; + } + + call_data = &data; + smp_wmb(); + /* Send a message to all other CPUs and wait for them to respond */ + smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_CALL_FUNCTION); + + timeout = get_tb() + (u64) SMP_CALL_TIMEOUT * tb_ticks_per_sec; + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) { + HMT_low(); + if (get_tb() >= timeout) { + printk("smp_call_function on cpu %d: other cpus not " + "responding (%d)\n", smp_processor_id(), + atomic_read(&data.started)); + debugger(NULL); + goto out; + } + } + + if (wait) { + while (atomic_read(&data.finished) != cpus) { + HMT_low(); + if (get_tb() >= timeout) { + printk("smp_call_function on cpu %d: other " + "cpus not finishing (%d/%d)\n", + smp_processor_id(), + atomic_read(&data.finished), + atomic_read(&data.started)); + debugger(NULL); + goto out; + } + } + } + + ret = 0; + + out: + call_data = NULL; + HMT_medium(); + spin_unlock(&call_lock); + return ret; +} + +EXPORT_SYMBOL(smp_call_function); + +void smp_call_function_interrupt(void) +{ + void (*func) (void *info); + void *info; + int wait; + + /* call_data will be NULL if the sender timed out while + * waiting on us to receive the call. + */ + if (!call_data) + return; + + func = call_data->func; + info = call_data->info; + wait = call_data->wait; + + if (!wait) + smp_mb__before_atomic_inc(); + + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function + */ + atomic_inc(&call_data->started); + /* + * At this point the info structure may be out of scope unless wait==1 + */ + (*func)(info); + if (wait) { + smp_mb__before_atomic_inc(); + atomic_inc(&call_data->finished); + } +} + +extern struct gettimeofday_struct do_gtod; + +struct thread_info *current_set[NR_CPUS]; + +DECLARE_PER_CPU(unsigned int, pvr); + +static void __devinit smp_store_cpu_info(int id) +{ + per_cpu(pvr, id) = mfspr(SPRN_PVR); +} + +static void __init smp_create_idle(unsigned int cpu) +{ + struct task_struct *p; + + /* create a process for the processor */ + p = fork_idle(cpu); + if (IS_ERR(p)) + panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p)); +#ifdef CONFIG_PPC64 + paca[cpu].__current = p; +#endif + current_set[cpu] = p->thread_info; + p->thread_info->cpu = cpu; +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + unsigned int cpu; + + DBG("smp_prepare_cpus\n"); + + /* + * setup_cpu may need to be called on the boot cpu. We havent + * spun any cpus up but lets be paranoid. + */ + BUG_ON(boot_cpuid != smp_processor_id()); + + /* Fixup boot cpu */ + smp_store_cpu_info(boot_cpuid); + cpu_callin_map[boot_cpuid] = 1; + + max_cpus = smp_ops->probe(); + + smp_space_timers(max_cpus); + + for_each_cpu(cpu) + if (cpu != boot_cpuid) + smp_create_idle(cpu); +} + +void __devinit smp_prepare_boot_cpu(void) +{ + BUG_ON(smp_processor_id() != boot_cpuid); + + cpu_set(boot_cpuid, cpu_online_map); +#ifdef CONFIG_PPC64 + paca[boot_cpuid].__current = current; +#endif + current_set[boot_cpuid] = current->thread_info; +} + +#ifdef CONFIG_HOTPLUG_CPU +/* State of each CPU during hotplug phases */ +DEFINE_PER_CPU(int, cpu_state) = { 0 }; + +int generic_cpu_disable(void) +{ + unsigned int cpu = smp_processor_id(); + + if (cpu == boot_cpuid) + return -EBUSY; + + systemcfg->processorCount--; + cpu_clear(cpu, cpu_online_map); + fixup_irqs(cpu_online_map); + return 0; +} + +int generic_cpu_enable(unsigned int cpu) +{ + /* Do the normal bootup if we haven't + * already bootstrapped. */ + if (system_state != SYSTEM_RUNNING) + return -ENOSYS; + + /* get the target out of it's holding state */ + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + smp_wmb(); + + while (!cpu_online(cpu)) + cpu_relax(); + + fixup_irqs(cpu_online_map); + /* counter the irq disable in fixup_irqs */ + local_irq_enable(); + return 0; +} + +void generic_cpu_die(unsigned int cpu) +{ + int i; + + for (i = 0; i < 100; i++) { + smp_rmb(); + if (per_cpu(cpu_state, cpu) == CPU_DEAD) + return; + msleep(100); + } + printk(KERN_ERR "CPU%d didn't die...\n", cpu); +} + +void generic_mach_cpu_die(void) +{ + unsigned int cpu; + + local_irq_disable(); + cpu = smp_processor_id(); + printk(KERN_DEBUG "CPU%d offline\n", cpu); + __get_cpu_var(cpu_state) = CPU_DEAD; + smp_wmb(); + while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) + cpu_relax(); + + flush_tlb_pending(); + cpu_set(cpu, cpu_online_map); + local_irq_enable(); +} +#endif + +static int __devinit cpu_enable(unsigned int cpu) +{ + if (smp_ops->cpu_enable) + return smp_ops->cpu_enable(cpu); + + return -ENOSYS; +} + +int __devinit __cpu_up(unsigned int cpu) +{ + int c; + + secondary_ti = current_set[cpu]; + if (!cpu_enable(cpu)) + return 0; + + if (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)) + return -EINVAL; + +#ifdef CONFIG_PPC64 + paca[cpu].default_decr = tb_ticks_per_jiffy; +#endif + + /* Make sure callin-map entry is 0 (can be leftover a CPU + * hotplug + */ + cpu_callin_map[cpu] = 0; + + /* The information for processor bringup must + * be written out to main store before we release + * the processor. + */ + smp_mb(); + + /* wake up cpus */ + DBG("smp: kicking cpu %d\n", cpu); + smp_ops->kick_cpu(cpu); + + /* + * wait to see if the cpu made a callin (is actually up). + * use this value that I found through experimentation. + * -- Cort + */ + if (system_state < SYSTEM_RUNNING) + for (c = 5000; c && !cpu_callin_map[cpu]; c--) + udelay(100); +#ifdef CONFIG_HOTPLUG_CPU + else + /* + * CPUs can take much longer to come up in the + * hotplug case. Wait five seconds. + */ + for (c = 25; c && !cpu_callin_map[cpu]; c--) { + msleep(200); + } +#endif + + if (!cpu_callin_map[cpu]) { + printk("Processor %u is stuck.\n", cpu); + return -ENOENT; + } + + printk("Processor %u found.\n", cpu); + + if (smp_ops->give_timebase) + smp_ops->give_timebase(); + + /* Wait until cpu puts itself in the online map */ + while (!cpu_online(cpu)) + cpu_relax(); + + return 0; +} + + +/* Activate a secondary processor. */ +int __devinit start_secondary(void *unused) +{ + unsigned int cpu = smp_processor_id(); + + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + + smp_store_cpu_info(cpu); + set_dec(tb_ticks_per_jiffy); + cpu_callin_map[cpu] = 1; + + smp_ops->setup_cpu(cpu); + if (smp_ops->take_timebase) + smp_ops->take_timebase(); + + spin_lock(&call_lock); + cpu_set(cpu, cpu_online_map); + spin_unlock(&call_lock); + + local_irq_enable(); + + cpu_idle(); + return 0; +} + +int setup_profiling_timer(unsigned int multiplier) +{ + return 0; +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ + cpumask_t old_mask; + + /* We want the setup_cpu() here to be called from CPU 0, but our + * init thread may have been "borrowed" by another CPU in the meantime + * se we pin us down to CPU 0 for a short while + */ + old_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid)); + + smp_ops->setup_cpu(boot_cpuid); + + set_cpus_allowed(current, old_mask); +} + +#ifdef CONFIG_HOTPLUG_CPU +int __cpu_disable(void) +{ + if (smp_ops->cpu_disable) + return smp_ops->cpu_disable(); + + return -ENOSYS; +} + +void __cpu_die(unsigned int cpu) +{ + if (smp_ops->cpu_die) + smp_ops->cpu_die(cpu); +} +#endif diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 23436b6c188..a6282b625b4 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -61,14 +61,16 @@ #include <asm/prom.h> #include <asm/irq.h> #include <asm/div64.h> +#include <asm/smp.h> #ifdef CONFIG_PPC64 #include <asm/systemcfg.h> #include <asm/firmware.h> #endif #ifdef CONFIG_PPC_ISERIES -#include <asm/iSeries/ItLpQueue.h> -#include <asm/iSeries/HvCallXm.h> +#include <asm/iseries/it_lp_queue.h> +#include <asm/iseries/hv_call_xm.h> #endif +#include <asm/smp.h> /* keep track of when we need to update the rtc */ time_t last_rtc_update; @@ -118,10 +120,6 @@ static unsigned adjusting_time = 0; unsigned long ppc_proc_freq; unsigned long ppc_tb_freq; -#ifdef CONFIG_PPC32 /* XXX for now */ -#define boot_cpuid 0 -#endif - u64 tb_last_jiffy __cacheline_aligned_in_smp; unsigned long tb_last_stamp; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 5d638ecddbd..0578f838760 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -39,7 +39,6 @@ #include <asm/io.h> #include <asm/machdep.h> #include <asm/rtas.h> -#include <asm/xmon.h> #include <asm/pmc.h> #ifdef CONFIG_PPC32 #include <asm/reg.h> @@ -147,8 +146,8 @@ int die(const char *str, struct pt_regs *regs, long err) printk("POWERMAC "); nl = 1; break; - case PLATFORM_BPA: - printk("BPA "); + case PLATFORM_CELL: + printk("CELL "); nl = 1; break; } @@ -748,23 +747,13 @@ static int check_bug_trap(struct pt_regs *regs) return 0; if (bug->line & BUG_WARNING_TRAP) { /* this is a WARN_ON rather than BUG/BUG_ON */ -#ifdef CONFIG_XMON - xmon_printf(KERN_ERR "Badness in %s at %s:%d\n", - bug->function, bug->file, - bug->line & ~BUG_WARNING_TRAP); -#endif /* CONFIG_XMON */ - printk(KERN_ERR "Badness in %s at %s:%d\n", + printk(KERN_ERR "Badness in %s at %s:%ld\n", bug->function, bug->file, bug->line & ~BUG_WARNING_TRAP); dump_stack(); return 1; } -#ifdef CONFIG_XMON - xmon_printf(KERN_CRIT "kernel BUG in %s at %s:%d!\n", - bug->function, bug->file, bug->line); - xmon(regs); -#endif /* CONFIG_XMON */ - printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", + printk(KERN_CRIT "kernel BUG in %s at %s:%ld!\n", bug->function, bug->file, bug->line); return 0; @@ -898,10 +887,6 @@ void altivec_unavailable_exception(struct pt_regs *regs) die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); } -#ifdef CONFIG_PPC64 -extern perf_irq_t perf_irq; -#endif - #if defined(CONFIG_PPC64) || defined(CONFIG_E500) void performance_monitor_exception(struct pt_regs *regs) { diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 97082a4203a..71a6addf9f7 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -21,6 +21,7 @@ #include <asm/iommu.h> #include <asm/dma.h> #include <asm/vio.h> +#include <asm/prom.h> static const struct vio_device_id *vio_match_device( const struct vio_device_id *, const struct vio_dev *); @@ -265,7 +266,33 @@ static int vio_bus_match(struct device *dev, struct device_driver *drv) return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL); } +static int vio_hotplug(struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size) +{ + const struct vio_dev *vio_dev = to_vio_dev(dev); + char *cp; + int length; + + if (!num_envp) + return -ENOMEM; + + if (!vio_dev->dev.platform_data) + return -ENODEV; + cp = (char *)get_property(vio_dev->dev.platform_data, "compatible", &length); + if (!cp) + return -ENODEV; + + envp[0] = buffer; + length = scnprintf(buffer, buffer_size, "MODALIAS=vio:T%sS%s", + vio_dev->type, cp); + if (buffer_size - length <= 0) + return -ENOMEM; + envp[1] = NULL; + return 0; +} + struct bus_type vio_bus_type = { .name = "vio", + .hotplug = vio_hotplug, .match = vio_bus_match, }; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index d4dfcfbce27..7fa7b15fd8e 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -3,9 +3,12 @@ #include <asm/page.h> #else #define PAGE_SIZE 4096 +#define KERNELBASE CONFIG_KERNEL_START #endif #include <asm-generic/vmlinux.lds.h> +ENTRY(_stext) + #ifdef CONFIG_PPC64 OUTPUT_ARCH(powerpc:common64) jiffies = jiffies_64; @@ -21,33 +24,9 @@ SECTIONS *(.exit.data) } + . = KERNELBASE; /* Read-only sections, merged into text segment: */ -#ifdef CONFIG_PPC32 - . = + SIZEOF_HEADERS; - .interp : { *(.interp) } - .hash : { *(.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .rel.text : { *(.rel.text) } - .rela.text : { *(.rela.text) } - .rel.data : { *(.rel.data) } - .rela.data : { *(.rela.data) } - .rel.rodata : { *(.rel.rodata) } - .rela.rodata : { *(.rela.rodata) } - .rel.got : { *(.rel.got) } - .rela.got : { *(.rela.got) } - .rel.ctors : { *(.rel.ctors) } - .rela.ctors : { *(.rela.ctors) } - .rel.dtors : { *(.rel.dtors) } - .rela.dtors : { *(.rela.dtors) } - .rel.bss : { *(.rel.bss) } - .rela.bss : { *(.rela.bss) } - .rel.plt : { *(.rel.plt) } - .rela.plt : { *(.rela.plt) } -/* .init : { *(.init) } =0*/ - .plt : { *(.plt) } -#endif .text : { *(.text .text.*) SCHED_TEXT |