diff options
Diffstat (limited to 'arch/powerpc/kernel')
50 files changed, 1648 insertions, 1132 deletions
diff --git a/arch/powerpc/kernel/.gitignore b/arch/powerpc/kernel/.gitignore new file mode 100644 index 00000000000..c5f676c3c22 --- /dev/null +++ b/arch/powerpc/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 946daea780f..92673b43858 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -12,7 +12,7 @@ CFLAGS_prom_init.o      += -fPIC  CFLAGS_btext.o		+= -fPIC  endif -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER  # Do not trace early boot code  CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog  CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog @@ -28,13 +28,14 @@ endif  obj-y				:= cputable.o ptrace.o syscalls.o \  				   irq.o align.o signal_32.o pmc.o vdso.o \  				   init_task.o process.o systbl.o idle.o \ -				   signal.o +				   signal.o sysfs.o  obj-y				+= vdso32/  obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \  				   signal_64.o ptrace32.o \  				   paca.o cpu_setup_ppc970.o \  				   cpu_setup_pa6t.o \ -				   firmware.o sysfs.o nvram_64.o +				   firmware.o nvram_64.o +obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o  obj-$(CONFIG_PPC64)		+= vdso64/  obj-$(CONFIG_ALTIVEC)		+= vecemu.o vector.o  obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o @@ -69,10 +70,10 @@ extra-$(CONFIG_8xx)		:= head_8xx.o  extra-y				+= vmlinux.lds  obj-y				+= time.o prom.o traps.o setup-common.o \ -				   udbg.o misc.o io.o \ +				   udbg.o misc.o io.o dma.o \  				   misc_$(CONFIG_WORD_SIZE).o  obj-$(CONFIG_PPC32)		+= entry_32.o setup_32.o -obj-$(CONFIG_PPC64)		+= dma_64.o iommu.o +obj-$(CONFIG_PPC64)		+= dma-iommu.o iommu.o  obj-$(CONFIG_KGDB)		+= kgdb.o  obj-$(CONFIG_PPC_MULTIPLATFORM)	+= prom_init.o  obj-$(CONFIG_MODULES)		+= ppc_ksyms.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 92768d3006f..75c5dd0138f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -122,6 +122,8 @@ int main(void)  	DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr));  	DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1));  	DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc)); +	DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase)); +	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));  	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));  	DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));  	DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); @@ -350,14 +352,15 @@ int main(void)  #endif  	DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); +	DEFINE(PTE_SIZE, sizeof(pte_t));  #ifdef CONFIG_KVM  	DEFINE(TLBE_BYTES, sizeof(struct tlbe));  	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));  	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); -	DEFINE(VCPU_HOST_TLB, offsetof(struct kvm_vcpu, arch.host_tlb));  	DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb)); +	DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod));  	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));  	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));  	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); @@ -369,7 +372,7 @@ int main(void)  	DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));  	DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));  	DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); -	DEFINE(VCPU_PID, offsetof(struct kvm_vcpu, arch.pid)); +	DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));  	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));  	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index d8f0329b134..26e58630ed7 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -442,28 +442,26 @@ void btext_drawtext(const char *c, unsigned int len)  void btext_drawhex(unsigned long v)  { -	char *hex_table = "0123456789abcdef"; -  	if (!boot_text_mapped)  		return;  #ifdef CONFIG_PPC64 -	btext_drawchar(hex_table[(v >> 60) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >> 56) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >> 52) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >> 48) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >> 44) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >> 40) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >> 36) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >> 32) & 0x0000000FUL]); +	btext_drawchar(hex_asc_hi(v >> 56)); +	btext_drawchar(hex_asc_lo(v >> 56)); +	btext_drawchar(hex_asc_hi(v >> 48)); +	btext_drawchar(hex_asc_lo(v >> 48)); +	btext_drawchar(hex_asc_hi(v >> 40)); +	btext_drawchar(hex_asc_lo(v >> 40)); +	btext_drawchar(hex_asc_hi(v >> 32)); +	btext_drawchar(hex_asc_lo(v >> 32));  #endif -	btext_drawchar(hex_table[(v >> 28) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >> 24) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >> 20) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >> 16) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >> 12) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >>  8) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >>  4) & 0x0000000FUL]); -	btext_drawchar(hex_table[(v >>  0) & 0x0000000FUL]); +	btext_drawchar(hex_asc_hi(v >> 24)); +	btext_drawchar(hex_asc_lo(v >> 24)); +	btext_drawchar(hex_asc_hi(v >> 16)); +	btext_drawchar(hex_asc_lo(v >> 16)); +	btext_drawchar(hex_asc_hi(v >> 8)); +	btext_drawchar(hex_asc_lo(v >> 8)); +	btext_drawchar(hex_asc_hi(v)); +	btext_drawchar(hex_asc_lo(v));  	btext_drawchar(' ');  } diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S index bf118c38575..27f2507279d 100644 --- a/arch/powerpc/kernel/cpu_setup_ppc970.S +++ b/arch/powerpc/kernel/cpu_setup_ppc970.S @@ -110,7 +110,7 @@ load_hids:  	isync  	/* Save away cpu state */ -	LOAD_REG_IMMEDIATE(r5,cpu_state_storage) +	LOAD_REG_ADDR(r5,cpu_state_storage)  	/* Save HID0,1,4 and 5 */  	mfspr	r3,SPRN_HID0 @@ -134,7 +134,7 @@ _GLOBAL(__restore_cpu_ppc970)  	rldicl.	r0,r0,4,63  	beqlr -	LOAD_REG_IMMEDIATE(r5,cpu_state_storage) +	LOAD_REG_ADDR(r5,cpu_state_storage)  	/* Before accessing memory, we make sure rm_ci is clear */  	li	r0,0  	mfspr	r3,SPRN_HID4 diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 25c273c761d..b1eb834bc0f 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -610,6 +610,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 4, +		.pmc_type		= PPC_PMC_IBM,  		.cpu_setup		= __setup_cpu_750cx,  		.machine_check		= machine_check_generic,  		.platform		= "ppc750", @@ -623,6 +624,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 4, +		.pmc_type		= PPC_PMC_IBM,  		.cpu_setup		= __setup_cpu_750cx,  		.machine_check		= machine_check_generic,  		.platform		= "ppc750", @@ -636,6 +638,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 4, +		.pmc_type		= PPC_PMC_IBM,  		.cpu_setup		= __setup_cpu_750cx,  		.machine_check		= machine_check_generic,  		.platform		= "ppc750", @@ -649,6 +652,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 4, +		.pmc_type		= PPC_PMC_IBM,  		.cpu_setup		= __setup_cpu_750,  		.machine_check		= machine_check_generic,  		.platform		= "ppc750", @@ -662,6 +666,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 4, +		.pmc_type		= PPC_PMC_IBM,  		.cpu_setup		= __setup_cpu_750,  		.machine_check		= machine_check_generic,  		.platform		= "ppc750", @@ -675,6 +680,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 4, +		.pmc_type		= PPC_PMC_IBM,  		.cpu_setup		= __setup_cpu_750,  		.machine_check		= machine_check_generic,  		.platform		= "ppc750", @@ -688,6 +694,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 4, +		.pmc_type		= PPC_PMC_IBM,  		.cpu_setup		= __setup_cpu_750,  		.machine_check		= machine_check_generic,  		.platform		= "ppc750", @@ -701,6 +708,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 4, +		.pmc_type		= PPC_PMC_IBM,  		.cpu_setup		= __setup_cpu_750fx,  		.machine_check		= machine_check_generic,  		.platform		= "ppc750", @@ -714,6 +722,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 4, +		.pmc_type		= PPC_PMC_IBM,  		.cpu_setup		= __setup_cpu_750fx,  		.machine_check		= machine_check_generic,  		.platform		= "ppc750", @@ -727,6 +736,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 4, +		.pmc_type		= PPC_PMC_IBM,  		.cpu_setup		= __setup_cpu_750,  		.machine_check		= machine_check_generic,  		.platform		= "ppc750", @@ -741,6 +751,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 4, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_7400,  		.machine_check		= machine_check_generic,  		.platform		= "ppc7400", @@ -755,6 +766,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 4, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_7400,  		.machine_check		= machine_check_generic,  		.platform		= "ppc7400", @@ -769,6 +781,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 4, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_7410,  		.machine_check		= machine_check_generic,  		.platform		= "ppc7400", @@ -783,6 +796,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 6, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_745x,  		.oprofile_cpu_type      = "ppc/7450",  		.oprofile_type		= PPC_OPROFILE_G4, @@ -799,6 +813,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 6, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_745x,  		.oprofile_cpu_type      = "ppc/7450",  		.oprofile_type		= PPC_OPROFILE_G4, @@ -815,6 +830,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 6, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_745x,  		.oprofile_cpu_type      = "ppc/7450",  		.oprofile_type		= PPC_OPROFILE_G4, @@ -831,6 +847,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 6, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_745x,  		.oprofile_cpu_type      = "ppc/7450",  		.oprofile_type		= PPC_OPROFILE_G4, @@ -847,6 +864,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 6, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_745x,  		.oprofile_cpu_type      = "ppc/7450",  		.oprofile_type		= PPC_OPROFILE_G4, @@ -863,6 +881,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 6, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_745x,  		.oprofile_cpu_type      = "ppc/7450",  		.oprofile_type		= PPC_OPROFILE_G4, @@ -879,6 +898,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 6, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_745x,  		.oprofile_cpu_type      = "ppc/7450",  		.oprofile_type		= PPC_OPROFILE_G4, @@ -895,6 +915,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 6, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_745x,  		.oprofile_cpu_type      = "ppc/7450",  		.oprofile_type		= PPC_OPROFILE_G4, @@ -910,6 +931,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 6, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_745x,  		.oprofile_cpu_type      = "ppc/7450",  		.oprofile_type		= PPC_OPROFILE_G4, @@ -926,6 +948,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 6, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_745x,  		.oprofile_cpu_type      = "ppc/7450",  		.oprofile_type		= PPC_OPROFILE_G4, @@ -942,6 +965,7 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.icache_bsize		= 32,  		.dcache_bsize		= 32,  		.num_pmcs		= 6, +		.pmc_type		= PPC_PMC_G4,  		.cpu_setup		= __setup_cpu_745x,  		.oprofile_cpu_type      = "ppc/7450",  		.oprofile_type		= PPC_OPROFILE_G4, @@ -1253,6 +1277,19 @@ static struct cpu_spec __initdata cpu_specs[] = {  		.machine_check		= machine_check_4xx,  		.platform		= "ppc405",  	}, +	{ +		/* 405EZ */ +		.pvr_mask		= 0xffff0000, +		.pvr_value		= 0x41510000, +		.cpu_name		= "405EZ", +		.cpu_features		= CPU_FTRS_40X, +		.cpu_user_features	= PPC_FEATURE_32 | +			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, +		.icache_bsize		= 32, +		.dcache_bsize		= 32, +		.machine_check		= machine_check_4xx, +		.platform		= "ppc405", +	},  	{	/* default match */  		.pvr_mask		= 0x00000000,  		.pvr_value		= 0x00000000, diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index a323c9b32ee..19671aca659 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -27,6 +27,10 @@  #define DBG(fmt...)  #endif +/* Stores the physical address of elf header of crash image. */ +unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; + +#ifndef CONFIG_RELOCATABLE  void __init reserve_kdump_trampoline(void)  {  	lmb_reserve(0, KDUMP_RESERVE_LIMIT); @@ -65,8 +69,13 @@ void __init setup_kdump_trampoline(void)  	DBG(" <- setup_kdump_trampoline()\n");  } +#endif /* CONFIG_RELOCATABLE */ -#ifdef CONFIG_PROC_VMCORE +/* + * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by + * is_kdump_kernel() to determine if we are booting after a panic. Hence + * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE. + */  static int __init parse_elfcorehdr(char *p)  {  	if (p) @@ -75,7 +84,6 @@ static int __init parse_elfcorehdr(char *p)  	return 1;  }  __setup("elfcorehdr=", parse_elfcorehdr); -#endif  static int __init parse_savemaxmem(char *p)  { diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c new file mode 100644 index 00000000000..14183af1b3f --- /dev/null +++ b/arch/powerpc/kernel/dma-iommu.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation + * + * Provide default implementations of the DMA mapping callbacks for + * busses using the iommu infrastructure + */ + +#include <asm/iommu.h> + +/* + * Generic iommu implementation + */ + +/* Allocates a contiguous real buffer and creates mappings over it. + * Returns the virtual address of the buffer and sets dma_handle + * to the dma address (mapping) of the first page. + */ +static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, +				      dma_addr_t *dma_handle, gfp_t flag) +{ +	return iommu_alloc_coherent(dev, dev->archdata.dma_data, size, +				    dma_handle, device_to_mask(dev), flag, +				    dev_to_node(dev)); +} + +static void dma_iommu_free_coherent(struct device *dev, size_t size, +				    void *vaddr, dma_addr_t dma_handle) +{ +	iommu_free_coherent(dev->archdata.dma_data, size, vaddr, dma_handle); +} + +/* Creates TCEs for a user provided buffer.  The user buffer must be + * contiguous real kernel storage (not vmalloc).  The address passed here + * comprises a page address and offset into that page. The dma_addr_t + * returned will point to the same byte within the page as was passed in. + */ +static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, +				     unsigned long offset, size_t size, +				     enum dma_data_direction direction, +				     struct dma_attrs *attrs) +{ +	return iommu_map_page(dev, dev->archdata.dma_data, page, offset, size, +			      device_to_mask(dev), direction, attrs); +} + + +static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, +				 size_t size, enum dma_data_direction direction, +				 struct dma_attrs *attrs) +{ +	iommu_unmap_page(dev->archdata.dma_data, dma_handle, size, direction, +			 attrs); +} + + +static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, +			    int nelems, enum dma_data_direction direction, +			    struct dma_attrs *attrs) +{ +	return iommu_map_sg(dev, dev->archdata.dma_data, sglist, nelems, +			    device_to_mask(dev), direction, attrs); +} + +static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, +		int nelems, enum dma_data_direction direction, +		struct dma_attrs *attrs) +{ +	iommu_unmap_sg(dev->archdata.dma_data, sglist, nelems, direction, +		       attrs); +} + +/* We support DMA to/from any memory page via the iommu */ +static int dma_iommu_dma_supported(struct device *dev, u64 mask) +{ +	struct iommu_table *tbl = dev->archdata.dma_data; + +	if (!tbl || tbl->it_offset > mask) { +		printk(KERN_INFO +		       "Warning: IOMMU offset too big for device mask\n"); +		if (tbl) +			printk(KERN_INFO +			       "mask: 0x%08lx, table offset: 0x%08lx\n", +				mask, tbl->it_offset); +		else +			printk(KERN_INFO "mask: 0x%08lx, table unavailable\n", +				mask); +		return 0; +	} else +		return 1; +} + +struct dma_mapping_ops dma_iommu_ops = { +	.alloc_coherent	= dma_iommu_alloc_coherent, +	.free_coherent	= dma_iommu_free_coherent, +	.map_sg		= dma_iommu_map_sg, +	.unmap_sg	= dma_iommu_unmap_sg, +	.dma_supported	= dma_iommu_dma_supported, +	.map_page	= dma_iommu_map_page, +	.unmap_page	= dma_iommu_unmap_page, +}; +EXPORT_SYMBOL(dma_iommu_ops); diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c new file mode 100644 index 00000000000..1562daf8839 --- /dev/null +++ b/arch/powerpc/kernel/dma.c @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation + * + * Provide default implementations of the DMA mapping callbacks for + * directly mapped busses. + */ + +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <asm/bug.h> +#include <asm/abs_addr.h> + +/* + * Generic direct DMA implementation + * + * This implementation supports a per-device offset that can be applied if + * the address at which memory is visible to devices is not 0. Platform code + * can set archdata.dma_data to an unsigned long holding the offset. By + * default the offset is PCI_DRAM_OFFSET. + */ + +static unsigned long get_dma_direct_offset(struct device *dev) +{ +	if (dev) +		return (unsigned long)dev->archdata.dma_data; + +	return PCI_DRAM_OFFSET; +} + +void *dma_direct_alloc_coherent(struct device *dev, size_t size, +				dma_addr_t *dma_handle, gfp_t flag) +{ +	void *ret; +#ifdef CONFIG_NOT_COHERENT_CACHE +	ret = __dma_alloc_coherent(size, dma_handle, flag); +	if (ret == NULL) +		return NULL; +	*dma_handle += get_dma_direct_offset(dev); +	return ret; +#else +	struct page *page; +	int node = dev_to_node(dev); + +	/* ignore region specifiers */ +	flag  &= ~(__GFP_HIGHMEM); + +	page = alloc_pages_node(node, flag, get_order(size)); +	if (page == NULL) +		return NULL; +	ret = page_address(page); +	memset(ret, 0, size); +	*dma_handle = virt_to_abs(ret) + get_dma_direct_offset(dev); + +	return ret; +#endif +} + +void dma_direct_free_coherent(struct device *dev, size_t size, +			      void *vaddr, dma_addr_t dma_handle) +{ +#ifdef CONFIG_NOT_COHERENT_CACHE +	__dma_free_coherent(size, vaddr); +#else +	free_pages((unsigned long)vaddr, get_order(size)); +#endif +} + +static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, +			     int nents, enum dma_data_direction direction, +			     struct dma_attrs *attrs) +{ +	struct scatterlist *sg; +	int i; + +	for_each_sg(sgl, sg, nents, i) { +		sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev); +		sg->dma_length = sg->length; +	} + +	return nents; +} + +static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg, +				int nents, enum dma_data_direction direction, +				struct dma_attrs *attrs) +{ +} + +static int dma_direct_dma_supported(struct device *dev, u64 mask) +{ +#ifdef CONFIG_PPC64 +	/* Could be improved to check for memory though it better be +	 * done via some global so platforms can set the limit in case +	 * they have limited DMA windows +	 */ +	return mask >= DMA_32BIT_MASK; +#else +	return 1; +#endif +} + +static inline dma_addr_t dma_direct_map_page(struct device *dev, +					     struct page *page, +					     unsigned long offset, +					     size_t size, +					     enum dma_data_direction dir, +					     struct dma_attrs *attrs) +{ +	BUG_ON(dir == DMA_NONE); +	__dma_sync_page(page, offset, size, dir); +	return page_to_phys(page) + offset + get_dma_direct_offset(dev); +} + +static inline void dma_direct_unmap_page(struct device *dev, +					 dma_addr_t dma_address, +					 size_t size, +					 enum dma_data_direction direction, +					 struct dma_attrs *attrs) +{ +} + +struct dma_mapping_ops dma_direct_ops = { +	.alloc_coherent	= dma_direct_alloc_coherent, +	.free_coherent	= dma_direct_free_coherent, +	.map_sg		= dma_direct_map_sg, +	.unmap_sg	= dma_direct_unmap_sg, +	.dma_supported	= dma_direct_dma_supported, +	.map_page	= dma_direct_map_page, +	.unmap_page	= dma_direct_unmap_page, +}; +EXPORT_SYMBOL(dma_direct_ops); diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c deleted file mode 100644 index ae5708e3a31..00000000000 --- a/arch/powerpc/kernel/dma_64.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation - * - * Provide default implementations of the DMA mapping callbacks for - * directly mapped busses and busses using the iommu infrastructure - */ - -#include <linux/device.h> -#include <linux/dma-mapping.h> -#include <asm/bug.h> -#include <asm/iommu.h> -#include <asm/abs_addr.h> - -/* - * Generic iommu implementation - */ - -/* Allocates a contiguous real buffer and creates mappings over it. - * Returns the virtual address of the buffer and sets dma_handle - * to the dma address (mapping) of the first page. - */ -static void *dma_iommu_alloc_coherent(struct device *dev, size_t size, -				      dma_addr_t *dma_handle, gfp_t flag) -{ -	return iommu_alloc_coherent(dev, dev->archdata.dma_data, size, -				    dma_handle, device_to_mask(dev), flag, -				    dev->archdata.numa_node); -} - -static void dma_iommu_free_coherent(struct device *dev, size_t size, -				    void *vaddr, dma_addr_t dma_handle) -{ -	iommu_free_coherent(dev->archdata.dma_data, size, vaddr, dma_handle); -} - -/* Creates TCEs for a user provided buffer.  The user buffer must be - * contiguous real kernel storage (not vmalloc).  The address of the buffer - * passed here is the kernel (virtual) address of the buffer.  The buffer - * need not be page aligned, the dma_addr_t returned will point to the same - * byte within the page as vaddr. - */ -static dma_addr_t dma_iommu_map_single(struct device *dev, void *vaddr, -				       size_t size, -				       enum dma_data_direction direction, -				       struct dma_attrs *attrs) -{ -	return iommu_map_single(dev, dev->archdata.dma_data, vaddr, size, -				device_to_mask(dev), direction, attrs); -} - - -static void dma_iommu_unmap_single(struct device *dev, dma_addr_t dma_handle, -				   size_t size, -				   enum dma_data_direction direction, -				   struct dma_attrs *attrs) -{ -	iommu_unmap_single(dev->archdata.dma_data, dma_handle, size, direction, -			   attrs); -} - - -static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, -			    int nelems, enum dma_data_direction direction, -			    struct dma_attrs *attrs) -{ -	return iommu_map_sg(dev, dev->archdata.dma_data, sglist, nelems, -			    device_to_mask(dev), direction, attrs); -} - -static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, -		int nelems, enum dma_data_direction direction, -		struct dma_attrs *attrs) -{ -	iommu_unmap_sg(dev->archdata.dma_data, sglist, nelems, direction, -		       attrs); -} - -/* We support DMA to/from any memory page via the iommu */ -static int dma_iommu_dma_supported(struct device *dev, u64 mask) -{ -	struct iommu_table *tbl = dev->archdata.dma_data; - -	if (!tbl || tbl->it_offset > mask) { -		printk(KERN_INFO -		       "Warning: IOMMU offset too big for device mask\n"); -		if (tbl) -			printk(KERN_INFO -			       "mask: 0x%08lx, table offset: 0x%08lx\n", -				mask, tbl->it_offset); -		else -			printk(KERN_INFO "mask: 0x%08lx, table unavailable\n", -				mask); -		return 0; -	} else -		return 1; -} - -struct dma_mapping_ops dma_iommu_ops = { -	.alloc_coherent	= dma_iommu_alloc_coherent, -	.free_coherent	= dma_iommu_free_coherent, -	.map_single	= dma_iommu_map_single, -	.unmap_single	= dma_iommu_unmap_single, -	.map_sg		= dma_iommu_map_sg, -	.unmap_sg	= dma_iommu_unmap_sg, -	.dma_supported	= dma_iommu_dma_supported, -}; -EXPORT_SYMBOL(dma_iommu_ops); - -/* - * Generic direct DMA implementation - * - * This implementation supports a per-device offset that can be applied if - * the address at which memory is visible to devices is not 0. Platform code - * can set archdata.dma_data to an unsigned long holding the offset. By - * default the offset is zero. - */ - -static unsigned long get_dma_direct_offset(struct device *dev) -{ -	return (unsigned long)dev->archdata.dma_data; -} - -static void *dma_direct_alloc_coherent(struct device *dev, size_t size, -				       dma_addr_t *dma_handle, gfp_t flag) -{ -	struct page *page; -	void *ret; -	int node = dev->archdata.numa_node; - -	page = alloc_pages_node(node, flag, get_order(size)); -	if (page == NULL) -		return NULL; -	ret = page_address(page); -	memset(ret, 0, size); -	*dma_handle = virt_to_abs(ret) + get_dma_direct_offset(dev); - -	return ret; -} - -static void dma_direct_free_coherent(struct device *dev, size_t size, -				     void *vaddr, dma_addr_t dma_handle) -{ -	free_pages((unsigned long)vaddr, get_order(size)); -} - -static dma_addr_t dma_direct_map_single(struct device *dev, void *ptr, -					size_t size, -					enum dma_data_direction direction, -					struct dma_attrs *attrs) -{ -	return virt_to_abs(ptr) + get_dma_direct_offset(dev); -} - -static void dma_direct_unmap_single(struct device *dev, dma_addr_t dma_addr, -				    size_t size, -				    enum dma_data_direction direction, -				    struct dma_attrs *attrs) -{ -} - -static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, -			     int nents, enum dma_data_direction direction, -			     struct dma_attrs *attrs) -{ -	struct scatterlist *sg; -	int i; - -	for_each_sg(sgl, sg, nents, i) { -		sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev); -		sg->dma_length = sg->length; -	} - -	return nents; -} - -static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg, -				int nents, enum dma_data_direction direction, -				struct dma_attrs *attrs) -{ -} - -static int dma_direct_dma_supported(struct device *dev, u64 mask) -{ -	/* Could be improved to check for memory though it better be -	 * done via some global so platforms can set the limit in case -	 * they have limited DMA windows -	 */ -	return mask >= DMA_32BIT_MASK; -} - -struct dma_mapping_ops dma_direct_ops = { -	.alloc_coherent	= dma_direct_alloc_coherent, -	.free_coherent	= dma_direct_free_coherent, -	.map_single	= dma_direct_map_single, -	.unmap_single	= dma_direct_unmap_single, -	.map_sg		= dma_direct_map_sg, -	.unmap_sg	= dma_direct_unmap_sg, -	.dma_supported	= dma_direct_dma_supported, -}; -EXPORT_SYMBOL(dma_direct_ops); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 1cbbf703364..7ecc0d1855c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1158,7 +1158,7 @@ machine_check_in_rtas:  #endif /* CONFIG_PPC_RTAS */ -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER  #ifdef CONFIG_DYNAMIC_FTRACE  _GLOBAL(mcount)  _GLOBAL(_mcount) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2d802e97097..e6d52845854 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -512,31 +512,12 @@ _GLOBAL(ret_from_except_lite)  #endif  restore: -	ld	r5,SOFTE(r1) -#ifdef CONFIG_PPC_ISERIES  BEGIN_FW_FTR_SECTION -	cmpdi	0,r5,0 -	beq	4f -	/* Check for pending interrupts (iSeries) */ -	ld	r3,PACALPPACAPTR(r13) -	ld	r3,LPPACAANYINT(r3) -	cmpdi	r3,0 -	beq+	4f			/* skip do_IRQ if no interrupts */ - -	li	r3,0 -	stb	r3,PACASOFTIRQEN(r13)	/* ensure we are soft-disabled */ -#ifdef CONFIG_TRACE_IRQFLAGS -	bl	.trace_hardirqs_off -	mfmsr	r10 -#endif -	ori	r10,r10,MSR_EE -	mtmsrd	r10			/* hard-enable again */ -	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.do_IRQ -	b	.ret_from_except_lite		/* loop back and handle more */ -4: -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif +	ld	r5,SOFTE(r1) +FW_FTR_SECTION_ELSE +	b	iseries_check_pending_irqs +ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) +2:  	TRACE_AND_RESTORE_IRQ(r5);  	/* extract EE bit and use it to restore paca->hard_enabled */ @@ -592,6 +573,30 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)  	rfid  	b	.	/* prevent speculative execution */ +iseries_check_pending_irqs: +#ifdef CONFIG_PPC_ISERIES +	ld	r5,SOFTE(r1) +	cmpdi	0,r5,0 +	beq	2b +	/* Check for pending interrupts (iSeries) */ +	ld	r3,PACALPPACAPTR(r13) +	ld	r3,LPPACAANYINT(r3) +	cmpdi	r3,0 +	beq+	2b			/* skip do_IRQ if no interrupts */ + +	li	r3,0 +	stb	r3,PACASOFTIRQEN(r13)	/* ensure we are soft-disabled */ +#ifdef CONFIG_TRACE_IRQFLAGS +	bl	.trace_hardirqs_off +	mfmsr	r10 +#endif +	ori	r10,r10,MSR_EE +	mtmsrd	r10			/* hard-enable again */ +	addi	r3,r1,STACK_FRAME_OVERHEAD +	bl	.do_IRQ +	b	.ret_from_except_lite		/* loop back and handle more */ +#endif +  do_work:  #ifdef CONFIG_PREEMPT  	andi.	r0,r3,MSR_PR	/* Returning to user mode? */ @@ -685,10 +690,6 @@ _GLOBAL(enter_rtas)  	std	r7,_DAR(r1)  	mfdsisr	r8  	std	r8,_DSISR(r1) -	mfsrr0	r9 -	std	r9,_SRR0(r1) -	mfsrr1	r10 -	std	r10,_SRR1(r1)  	/* Temporary workaround to clear CR until RTAS can be modified to  	 * ignore all bits. @@ -749,6 +750,10 @@ _STATIC(rtas_return_loc)  	mfspr	r4,SPRN_SPRG3	        /* Get PACA */  	clrldi	r4,r4,2			/* convert to realmode address */ +	bcl	20,31,$+4 +0:	mflr	r3 +	ld	r3,(1f-0b)(r3)		/* get &.rtas_restore_regs */ +  	mfmsr   r6  	li	r0,MSR_RI  	andc	r6,r6,r0 @@ -756,7 +761,6 @@ _STATIC(rtas_return_loc)  	mtmsrd  r6          ld	r1,PACAR1(r4)           /* Restore our SP */ -	LOAD_REG_IMMEDIATE(r3,.rtas_restore_regs)          ld	r4,PACASAVEDMSR(r4)     /* Restore our MSR */  	mtspr	SPRN_SRR0,r3 @@ -764,6 +768,9 @@ _STATIC(rtas_return_loc)  	rfid  	b	.	/* prevent speculative execution */ +	.align	3 +1:	.llong	.rtas_restore_regs +  _STATIC(rtas_restore_regs)  	/* relocation is on at this point */  	REST_GPR(2, r1)			/* Restore the TOC */ @@ -783,10 +790,6 @@ _STATIC(rtas_restore_regs)  	mtdar	r7  	ld	r8,_DSISR(r1)  	mtdsisr	r8 -	ld	r9,_SRR0(r1) -	mtsrr0	r9 -	ld	r10,_SRR1(r1) -	mtsrr1	r10          addi	r1,r1,RTAS_FRAME_SIZE	/* Unstack our frame */  	ld	r0,16(r1)		/* get return address */ @@ -881,7 +884,7 @@ _GLOBAL(enter_prom)  	mtlr    r0          blr -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER  #ifdef CONFIG_DYNAMIC_FTRACE  _GLOBAL(mcount)  _GLOBAL(_mcount) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 3855ceb937b..f4b006ed0ab 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -28,17 +28,17 @@ static unsigned int ftrace_nop = 0x60000000;  #endif -static unsigned int notrace ftrace_calc_offset(long ip, long addr) +static unsigned int ftrace_calc_offset(long ip, long addr)  {  	return (int)(addr - ip);  } -notrace unsigned char *ftrace_nop_replace(void) +unsigned char *ftrace_nop_replace(void)  {  	return (char *)&ftrace_nop;  } -notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)  {  	static unsigned int op; @@ -68,7 +68,7 @@ notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)  # define _ASM_PTR	" .long "  #endif -notrace int +int  ftrace_modify_code(unsigned long ip, unsigned char *old_code,  		   unsigned char *new_code)  { @@ -113,7 +113,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,  	return faulted;  } -notrace int ftrace_update_ftrace_func(ftrace_func_t func) +int ftrace_update_ftrace_func(ftrace_func_t func)  {  	unsigned long ip = (unsigned long)(&ftrace_call);  	unsigned char old[MCOUNT_INSN_SIZE], *new; @@ -126,23 +126,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func)  	return ret;  } -notrace int ftrace_mcount_set(unsigned long *data) -{ -	unsigned long ip = (long)(&mcount_call); -	unsigned long *addr = data; -	unsigned char old[MCOUNT_INSN_SIZE], *new; - -	/* -	 * Replace the mcount stub with a pointer to the -	 * ip recorder function. -	 */ -	memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); -	new = ftrace_call_replace(ip, *addr); -	*addr = ftrace_modify_code(ip, old, new); - -	return 0; -} -  int __init ftrace_dyn_arch_init(void *data)  {  	/* This is running in kstop_machine */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 8bb65751929..0c326823c6d 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -110,6 +110,12 @@ __start:  #ifdef CONFIG_PPC_MULTIPLATFORM  	cmpwi	0,r5,0  	beq	1f + +	/* find out where we are now */ +	bcl	20,31,$+4 +0:	mflr	r8			/* r8 = runtime addr here */ +	addis	r8,r8,(_stext - 0b)@ha +	addi	r8,r8,(_stext - 0b)@l	/* current runtime base addr */  	bl	prom_init  	trap  #endif @@ -369,13 +375,13 @@ i##n:								\  DataAccess:  	EXCEPTION_PROLOG  	mfspr	r10,SPRN_DSISR +	stw	r10,_DSISR(r11)  	andis.	r0,r10,0xa470		/* weird error? */  	bne	1f			/* if not, try to put a PTE */  	mfspr	r4,SPRN_DAR		/* into the hash table */  	rlwinm	r3,r10,32-15,21,21	/* DSISR_STORE -> _PAGE_RW */  	bl	hash_page -1:	stw	r10,_DSISR(r11) -	mr	r5,r10 +1:	lwz	r5,_DSISR(r11)		/* get DSISR value */  	mfspr	r4,SPRN_DAR  	EXC_XFER_EE_LITE(0x300, handle_page_fault) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index cc8fb474d52..b4bcf5a930f 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -82,7 +82,11 @@ END_FTR_SECTION(0, 1)  	/* Catch branch to 0 in real mode */  	trap -	/* Secondary processors spin on this value until it goes to 1. */ +	/* Secondary processors spin on this value until it becomes nonzero. +	 * When it does it contains the real address of the descriptor +	 * of the function that the cpu should jump to to continue +	 * initialization. +	 */  	.globl  __secondary_hold_spinloop  __secondary_hold_spinloop:  	.llong	0x0 @@ -102,6 +106,20 @@ __secondary_hold_acknowledge:  	.llong hvReleaseData-KERNELBASE  #endif /* CONFIG_PPC_ISERIES */ +#ifdef CONFIG_CRASH_DUMP +	/* This flag is set to 1 by a loader if the kernel should run +	 * at the loaded address instead of the linked address.  This +	 * is used by kexec-tools to keep the the kdump kernel in the +	 * crash_kernel region.  The loader is responsible for +	 * observing the alignment requirement. +	 */ +	/* Do not move this variable as kexec-tools knows about it. */ +	. = 0x5c +	.globl	__run_at_load +__run_at_load: +	.long	0x72756e30	/* "run0" -- relocate to 0 by default */ +#endif +  	. = 0x60  /*   * The following code is used to hold secondary processors @@ -109,8 +127,11 @@ __secondary_hold_acknowledge:   * before the bulk of the kernel has been relocated.  This code   * is relocated to physical address 0x60 before prom_init is run.   * All of it must fit below the first exception vector at 0x100. + * Use .globl here not _GLOBAL because we want __secondary_hold + * to be the actual text address, not a descriptor.   */ -_GLOBAL(__secondary_hold) +	.globl	__secondary_hold +__secondary_hold:  	mfmsr	r24  	ori	r24,r24,MSR_RI  	mtmsrd	r24			/* RI on */ @@ -121,16 +142,16 @@ _GLOBAL(__secondary_hold)  	/* Tell the master cpu we're here */  	/* Relocation is off & we are located at an address less */  	/* than 0x100, so only need to grab low order offset.    */ -	std	r24,__secondary_hold_acknowledge@l(0) +	std	r24,__secondary_hold_acknowledge-_stext(0)  	sync  	/* All secondary cpus wait here until told to start. */ -100:	ld	r4,__secondary_hold_spinloop@l(0) -	cmpdi	0,r4,1 -	bne	100b +100:	ld	r4,__secondary_hold_spinloop-_stext(0) +	cmpdi	0,r4,0 +	beq	100b  #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) -	LOAD_REG_IMMEDIATE(r4, .generic_secondary_smp_init) +	ld	r4,0(r4)		/* deref function descriptor */  	mtctr	r4  	mr	r3,r24  	bctr @@ -147,6 +168,10 @@ exception_marker:  /*   * This is the start of the interrupt handlers for pSeries   * This code runs with relocation off. + * Code from here to __end_interrupts gets copied down to real + * address 0x100 when we are running a relocatable kernel. + * Therefore any relative branches in this section must only + * branch to labels in this section.   */  	. = 0x100  	.globl __start_interrupts @@ -200,7 +225,20 @@ data_access_slb_pSeries:  	mfspr	r10,SPRN_SPRG1  	std	r10,PACA_EXSLB+EX_R13(r13)  	mfspr	r12,SPRN_SRR1		/* and SRR1 */ -	b	.slb_miss_realmode	/* Rel. branch works in real mode */ +#ifndef CONFIG_RELOCATABLE +	b	.slb_miss_realmode +#else +	/* +	 * We can't just use a direct branch to .slb_miss_realmode +	 * because the distance from here to there depends on where +	 * the kernel ends up being put. +	 */ +	mfctr	r11 +	ld	r10,PACAKBASE(r13) +	LOAD_HANDLER(r10, .slb_miss_realmode) +	mtctr	r10 +	bctr +#endif  	STD_EXCEPTION_PSERIES(0x400, instruction_access) @@ -225,7 +263,15 @@ instruction_access_slb_pSeries:  	mfspr	r10,SPRN_SPRG1  	std	r10,PACA_EXSLB+EX_R13(r13)  	mfspr	r12,SPRN_SRR1		/* and SRR1 */ -	b	.slb_miss_realmode	/* Rel. branch works in real mode */ +#ifndef CONFIG_RELOCATABLE +	b	.slb_miss_realmode +#else +	mfctr	r11 +	ld	r10,PACAKBASE(r13) +	LOAD_HANDLER(r10, .slb_miss_realmode) +	mtctr	r10 +	bctr +#endif  	MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt)  	STD_EXCEPTION_PSERIES(0x600, alignment) @@ -244,14 +290,12 @@ BEGIN_FTR_SECTION  	beq-	1f  END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)  	mr	r9,r13 -	mfmsr	r10  	mfspr	r13,SPRN_SPRG3  	mfspr	r11,SPRN_SRR0 -	clrrdi	r12,r13,32 -	oris	r12,r12,system_call_common@h -	ori	r12,r12,system_call_common@l +	ld	r12,PACAKBASE(r13) +	ld	r10,PACAKMSR(r13) +	LOAD_HANDLER(r12, system_call_entry)  	mtspr	SPRN_SRR0,r12 -	ori	r10,r10,MSR_IR|MSR_DR|MSR_RI  	mfspr	r12,SPRN_SRR1  	mtspr	SPRN_SRR1,r10  	rfid @@ -325,16 +369,32 @@ do_stab_bolted_pSeries:  	mfspr	r12,SPRN_SPRG2  	EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) +#ifdef CONFIG_PPC_PSERIES +/* + * Vectors for the FWNMI option.  Share common code. + */ +	.globl system_reset_fwnmi +      .align 7 +system_reset_fwnmi: +	HMT_MEDIUM +	mtspr	SPRN_SPRG1,r13		/* save r13 */ +	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) + +	.globl machine_check_fwnmi +      .align 7 +machine_check_fwnmi: +	HMT_MEDIUM +	mtspr	SPRN_SPRG1,r13		/* save r13 */ +	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + +#endif /* CONFIG_PPC_PSERIES */ + +#ifdef __DISABLED__  /* - * We have some room here  we use that to put - * the peries slb miss user trampoline code so it's reasonably - * away from slb_miss_user_common to avoid problems with rfid - *   * This is used for when the SLB miss handler has to go virtual,   * which doesn't happen for now anymore but will once we re-implement   * dynamic VSIDs for shared page tables   */ -#ifdef __DISABLED__  slb_miss_user_pseries:  	std	r10,PACA_EXGEN+EX_R10(r13)  	std	r11,PACA_EXGEN+EX_R11(r13) @@ -357,25 +417,17 @@ slb_miss_user_pseries:  	b	.				/* prevent spec. execution */  #endif /* __DISABLED__ */ -#ifdef CONFIG_PPC_PSERIES +	.align	7 +	.globl	__end_interrupts +__end_interrupts: +  /* - * Vectors for the FWNMI option.  Share common code. + * Code from here down to __end_handlers is invoked from the + * exception prologs above.  Because the prologs assemble the + * addresses of these handlers using the LOAD_HANDLER macro, + * which uses an addi instruction, these handlers must be in + * the first 32k of the kernel image.   */ -	.globl system_reset_fwnmi -      .align 7 -system_reset_fwnmi: -	HMT_MEDIUM -	mtspr	SPRN_SPRG1,r13		/* save r13 */ -	EXCEPTION_PROLOG_PSERIES_FORCE_64BIT(PACA_EXGEN, system_reset_common) - -	.globl machine_check_fwnmi -      .align 7 -machine_check_fwnmi: -	HMT_MEDIUM -	mtspr	SPRN_SPRG1,r13		/* save r13 */ -	EXCEPTION_PROLOG_PSERIES_FORCE_64BIT(PACA_EXMC, machine_check_common) - -#endif /* CONFIG_PPC_PSERIES */  /*** Common interrupt handlers ***/ @@ -414,6 +466,10 @@ machine_check_common:  	STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception)  #endif /* CONFIG_CBE_RAS */ +	.align	7 +system_call_entry: +	b	system_call_common +  /*   * Here we have detected that the kernel stack pointer is bad.   * R9 contains the saved CR, r13 points to the paca, @@ -457,65 +513,6 @@ bad_stack:  	b	1b  /* - * Return from an exception with minimal checks. - * The caller is assumed to have done EXCEPTION_PROLOG_COMMON. - * If interrupts have been enabled, or anything has been - * done that might have changed the scheduling status of - * any task or sent any task a signal, you should use - * ret_from_except or ret_from_except_lite instead of this. - */ -fast_exc_return_irq:			/* restores irq state too */ -	ld	r3,SOFTE(r1) -	TRACE_AND_RESTORE_IRQ(r3); -	ld	r12,_MSR(r1) -	rldicl	r4,r12,49,63		/* get MSR_EE to LSB */ -	stb	r4,PACAHARDIRQEN(r13)	/* restore paca->hard_enabled */ -	b	1f - -	.globl	fast_exception_return -fast_exception_return: -	ld	r12,_MSR(r1) -1:	ld	r11,_NIP(r1) -	andi.	r3,r12,MSR_RI		/* check if RI is set */ -	beq-	unrecov_fer - -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -	andi.	r3,r12,MSR_PR -	beq	2f -	ACCOUNT_CPU_USER_EXIT(r3, r4) -2: -#endif - -	ld	r3,_CCR(r1) -	ld	r4,_LINK(r1) -	ld	r5,_CTR(r1) -	ld	r6,_XER(r1) -	mtcr	r3 -	mtlr	r4 -	mtctr	r5 -	mtxer	r6 -	REST_GPR(0, r1) -	REST_8GPRS(2, r1) - -	mfmsr	r10 -	rldicl	r10,r10,48,1		/* clear EE */ -	rldicr	r10,r10,16,61		/* clear RI (LE is 0 already) */ -	mtmsrd	r10,1 - -	mtspr	SPRN_SRR1,r12 -	mtspr	SPRN_SRR0,r11 -	REST_4GPRS(10, r1) -	ld	r1,GPR1(r1) -	rfid -	b	.	/* prevent speculative execution */ - -unrecov_fer: -	bl	.save_nvgprs -1:	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	.unrecoverable_exception -	b	1b - -/*   * Here r13 points to the paca, r9 contains the saved CR,   * SRR0 and SRR1 are saved in r11 and r12,   * r9 - r13 are saved in paca->exgen. @@ -616,6 +613,9 @@ unrecov_user_slb:   */  _GLOBAL(slb_miss_realmode)  	mflr	r10 +#ifdef CONFIG_RELOCATABLE +	mtctr	r11 +#endif  	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */  	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */ @@ -666,11 +666,10 @@ BEGIN_FW_FTR_SECTION  END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)  #endif /* CONFIG_PPC_ISERIES */  	mfspr	r11,SPRN_SRR0 -	clrrdi	r10,r13,32 +	ld	r10,PACAKBASE(r13)  	LOAD_HANDLER(r10,unrecov_slb)  	mtspr	SPRN_SRR0,r10 -	mfmsr	r10 -	ori	r10,r10,MSR_IR|MSR_DR|MSR_RI +	ld	r10,PACAKMSR(r13)  	mtspr	SPRN_SRR1,r10  	rfid  	b	. @@ -766,6 +765,85 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  	bl	.altivec_unavailable_exception  	b	.ret_from_except +	.align	7 +	.globl vsx_unavailable_common +vsx_unavailable_common: +	EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) +#ifdef CONFIG_VSX +BEGIN_FTR_SECTION +	bne	.load_up_vsx +1: +END_FTR_SECTION_IFSET(CPU_FTR_VSX) +#endif +	bl	.save_nvgprs +	addi	r3,r1,STACK_FRAME_OVERHEAD +	ENABLE_INTS +	bl	.vsx_unavailable_exception +	b	.ret_from_except + +	.align	7 +	.globl	__end_handlers +__end_handlers: + +/* + * Return from an exception with minimal checks. + * The caller is assumed to have done EXCEPTION_PROLOG_COMMON. + * If interrupts have been enabled, or anything has been + * done that might have changed the scheduling status of + * any task or sent any task a signal, you should use + * ret_from_except or ret_from_except_lite instead of this. + */ +fast_exc_return_irq:			/* restores irq state too */ +	ld	r3,SOFTE(r1) +	TRACE_AND_RESTORE_IRQ(r3); +	ld	r12,_MSR(r1) +	rldicl	r4,r12,49,63		/* get MSR_EE to LSB */ +	stb	r4,PACAHARDIRQEN(r13)	/* restore paca->hard_enabled */ +	b	1f + +	.globl	fast_exception_return +fast_exception_return: +	ld	r12,_MSR(r1) +1:	ld	r11,_NIP(r1) +	andi.	r3,r12,MSR_RI		/* check if RI is set */ +	beq-	unrecov_fer + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +	andi.	r3,r12,MSR_PR +	beq	2f +	ACCOUNT_CPU_USER_EXIT(r3, r4) +2: +#endif + +	ld	r3,_CCR(r1) +	ld	r4,_LINK(r1) +	ld	r5,_CTR(r1) +	ld	r6,_XER(r1) +	mtcr	r3 +	mtlr	r4 +	mtctr	r5 +	mtxer	r6 +	REST_GPR(0, r1) +	REST_8GPRS(2, r1) + +	mfmsr	r10 +	rldicl	r10,r10,48,1		/* clear EE */ +	rldicr	r10,r10,16,61		/* clear RI (LE is 0 already) */ +	mtmsrd	r10,1 + +	mtspr	SPRN_SRR1,r12 +	mtspr	SPRN_SRR0,r11 +	REST_4GPRS(10, r1) +	ld	r1,GPR1(r1) +	rfid +	b	.	/* prevent speculative execution */ + +unrecov_fer: +	bl	.save_nvgprs +1:	addi	r3,r1,STACK_FRAME_OVERHEAD +	bl	.unrecoverable_exception +	b	1b +  #ifdef CONFIG_ALTIVEC  /*   * load_up_altivec(unused, unused, tsk) @@ -840,22 +918,6 @@ _STATIC(load_up_altivec)  	blr  #endif /* CONFIG_ALTIVEC */ -	.align	7 -	.globl vsx_unavailable_common -vsx_unavailable_common: -	EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION -	bne	.load_up_vsx -1: -END_FTR_SECTION_IFSET(CPU_FTR_VSX) -#endif -	bl	.save_nvgprs -	addi	r3,r1,STACK_FRAME_OVERHEAD -	ENABLE_INTS -	bl	.vsx_unavailable_exception -	b	.ret_from_except -  #ifdef CONFIG_VSX  /*   * load_up_vsx(unused, unused, tsk) @@ -1175,11 +1237,14 @@ _GLOBAL(generic_secondary_smp_init)  	/* turn on 64-bit mode */  	bl	.enable_64b_mode +	/* get the TOC pointer (real address) */ +	bl	.relative_toc +  	/* Set up a paca value for this processor. Since we have the  	 * physical cpu id in r24, we need to search the pacas to find  	 * which logical id maps to our physical one.  	 */ -	LOAD_REG_IMMEDIATE(r13, paca)	/* Get base vaddr of paca array	 */ +	LOAD_REG_ADDR(r13, paca)	/* Get base vaddr of paca array	 */  	li	r5,0			/* logical cpu id                */  1:	lhz	r6,PACAHWCPUID(r13)	/* Load HW procid from paca      */  	cmpw	r6,r24			/* Compare to our id             */ @@ -1208,7 +1273,7 @@ _GLOBAL(generic_secondary_smp_init)  	sync				/* order paca.run and cur_cpu_spec */  	/* See if we need to call a cpu state restore handler */ -	LOAD_REG_IMMEDIATE(r23, cur_cpu_spec) +	LOAD_REG_ADDR(r23, cur_cpu_spec)  	ld	r23,0(r23)  	ld	r23,CPU_SPEC_RESTORE(r23)  	cmpdi	0,r23,0 @@ -1224,10 +1289,15 @@ _GLOBAL(generic_secondary_smp_init)  	b	__secondary_start  #endif +/* + * Turn the MMU off. + * Assumes we're mapped EA == RA if the MMU is on. + */  _STATIC(__mmu_off)  	mfmsr	r3  	andi.	r0,r3,MSR_IR|MSR_DR  	beqlr +	mflr	r4  	andc	r3,r3,r0  	mtspr	SPRN_SRR0,r4  	mtspr	SPRN_SRR1,r3 @@ -1248,6 +1318,18 @@ _STATIC(__mmu_off)   *   */  _GLOBAL(__start_initialization_multiplatform) +	/* Make sure we are running in 64 bits mode */ +	bl	.enable_64b_mode + +	/* Get TOC pointer (current runtime address) */ +	bl	.relative_toc + +	/* find out where we are now */ +	bcl	20,31,$+4 +0:	mflr	r26			/* r26 = runtime addr here */ +	addis	r26,r26,(_stext - 0b)@ha +	addi	r26,r26,(_stext - 0b)@l	/* current runtime base addr */ +  	/*  	 * Are we booted from a PROM Of-type client-interface ?  	 */ @@ -1259,9 +1341,6 @@ _GLOBAL(__start_initialization_multiplatform)  	mr	r31,r3  	mr	r30,r4 -	/* Make sure we are running in 64 bits mode */ -	bl	.enable_64b_mode -  	/* Setup some critical 970 SPRs before switching MMU off */  	mfspr	r0,SPRN_PVR  	srwi	r0,r0,16 @@ -1276,9 +1355,7 @@ _GLOBAL(__start_initialization_multiplatform)  1:	bl	.__cpu_preinit_ppc970  2: -	/* Switch off MMU if not already */ -	LOAD_REG_IMMEDIATE(r4, .__after_prom_start - KERNELBASE) -	add	r4,r4,r30 +	/* Switch off MMU if not already off */  	bl	.__mmu_off  	b	.__after_prom_start @@ -1293,22 +1370,15 @@ _INIT_STATIC(__boot_from_prom)  	/*  	 * Align the stack to 16-byte boundary  	 * Depending on the size and layout of the ELF sections in the initial -	 * boot binary, the stack pointer will be unalignet on PowerMac +	 * boot binary, the stack pointer may be unaligned on PowerMac  	 */  	rldicr	r1,r1,0,59 -	/* Make sure we are running in 64 bits mode */ -	bl	.enable_64b_mode - -	/* put a relocation offset into r3 */ -	bl	.reloc_offset - -	LOAD_REG_IMMEDIATE(r2,__toc_start) -	addi	r2,r2,0x4000 -	addi	r2,r2,0x4000 - -	/* Relocate the TOC from a virt addr to a real addr */ -	add	r2,r2,r3 +#ifdef CONFIG_RELOCATABLE +	/* Relocate code for where we are now */ +	mr	r3,r26 +	bl	.relocate +#endif  	/* Restore parameters */  	mr	r3,r31 @@ -1318,60 +1388,72 @@ _INIT_STATIC(__boot_from_prom)  	mr	r7,r27  	/* Do all of the interaction with OF client interface */ +	mr	r8,r26  	bl	.prom_init  	/* We never return */  	trap  _STATIC(__after_prom_start) +#ifdef CONFIG_RELOCATABLE +	/* process relocations for the final address of the kernel */ +	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */ +	sldi	r25,r25,32 +#ifdef CONFIG_CRASH_DUMP +	lwz	r7,__run_at_load-_stext(r26) +	cmplwi	cr0,r7,1	/* kdump kernel ? - stay where we are */ +	bne	1f +	add	r25,r25,r26 +#endif +1:	mr	r3,r25 +	bl	.relocate +#endif  /* - * We need to run with __start at physical address PHYSICAL_START. + * We need to run with _stext at physical address PHYSICAL_START.   * This will leave some code in the first 256B of   * real memory, which are reserved for software use. - * The remainder of the first page is loaded with the fixed - * interrupt vectors.  The next two pages are filled with - * unknown exception placeholders.   *   * Note: This process overwrites the OF exception vectors. - *	r26 == relocation offset - *	r27 == KERNELBASE   */ -	bl	.reloc_offset -	mr	r26,r3 -	LOAD_REG_IMMEDIATE(r27, KERNELBASE) - -	LOAD_REG_IMMEDIATE(r3, PHYSICAL_START)	/* target addr */ - -	// XXX FIXME: Use phys returned by OF (r30) -	add	r4,r27,r26 		/* source addr			 */ -					/* current address of _start	 */ -					/*   i.e. where we are running	 */ -					/*	the source addr		 */ - -	cmpdi	r4,0			/* In some cases the loader may  */ -	bne	1f -	b	.start_here_multiplatform /* have already put us at zero */ -					/* so we can skip the copy.      */ -1:	LOAD_REG_IMMEDIATE(r5,copy_to_here) /* # bytes of memory to copy */ -	sub	r5,r5,r27 - +	li	r3,0			/* target addr */ +	mr.	r4,r26			/* In some cases the loader may  */ +	beq	9f			/* have already put us at zero */  	li	r6,0x100		/* Start offset, the first 0x100 */  					/* bytes were copied earlier.	 */ +#ifdef CONFIG_CRASH_DUMP +/* + * Check if the kernel has to be running as relocatable kernel based on the + * variable __run_at_load, if it is set the kernel is treated as relocatable + * kernel, otherwise it will be moved to PHYSICAL_START + */ +	lwz	r7,__run_at_load-_stext(r26) +	cmplwi	cr0,r7,1 +	bne	3f + +	li	r5,__end_interrupts - _stext	/* just copy interrupts */ +	b	5f +3: +#endif +	lis	r5,(copy_to_here - _stext)@ha +	addi	r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */ +  	bl	.copy_and_flush		/* copy the first n bytes	 */  					/* this includes the code being	 */  					/* executed here.		 */ - -	LOAD_REG_IMMEDIATE(r0, 4f)	/* Jump to the copy of this code */ -	mtctr	r0			/* that we just made/relocated	 */ +	addis	r8,r3,(4f - _stext)@ha	/* Jump to the copy of this code */ +	addi	r8,r8,(4f - _stext)@l	/* that we just made */ +	mtctr	r8  	bctr -4:	LOAD_REG_IMMEDIATE(r5,klimit) -	add	r5,r5,r26 -	ld	r5,0(r5)		/* get the value of klimit */ -	sub	r5,r5,r27 -	bl	.copy_and_flush		/* copy the rest */ -	b	.start_here_multiplatform +p_end:	.llong	_end - _stext + +4:	/* Now copy the rest of the kernel up to _end */ +	addis	r5,r26,(p_end - _stext)@ha +	ld	r5,(p_end - _stext)@l(r5)	/* get _end */ +5:	bl	.copy_and_flush		/* copy the rest */ + +9:	b	.start_here_multiplatform  /*   * Copy routine used to copy the kernel to start at physical address 0 @@ -1436,6 +1518,9 @@ _GLOBAL(pmac_secondary_start)  	/* turn on 64-bit mode */  	bl	.enable_64b_mode +	/* get TOC pointer (real address) */ +	bl	.relative_toc +  	/* Copy some CPU settings from CPU 0 */  	bl	.__restore_cpu_ppc970 @@ -1445,10 +1530,10 @@ _GLOBAL(pmac_secondary_start)  	mtmsrd	r3			/* RI on */  	/* Set up a paca value for this processor. */ -	LOAD_REG_IMMEDIATE(r4, paca)	/* Get base vaddr of paca array	*/ -	mulli	r13,r24,PACA_SIZE	 /* Calculate vaddr of right paca */ +	LOAD_REG_ADDR(r4,paca)		/* Get base vaddr of paca array	*/ +	mulli	r13,r24,PACA_SIZE	/* Calculate vaddr of right paca */  	add	r13,r13,r4		/* for this processor.		*/ -	mtspr	SPRN_SPRG3,r13		 /* Save vaddr of paca in SPRG3	*/ +	mtspr	SPRN_SPRG3,r13		/* Save vaddr of paca in SPRG3	*/  	/* Create a temp kernel stack for use before relocation is on.	*/  	ld	r1,PACAEMERGSP(r13) @@ -1476,9 +1561,6 @@ __secondary_start:  	/* Set thread priority to MEDIUM */  	HMT_MEDIUM -	/* Load TOC */ -	ld	r2,PACATOC(r13) -  	/* Do early setup for that CPU (stab, slb, hash table pointer) */  	bl	.early_setup_secondary @@ -1515,9 +1597,11 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)  /*    * Running with relocation on at this point.  All we want to do is - * zero the stack back-chain pointer before going into C code. + * zero the stack back-chain pointer and get the TOC virtual address + * before going into C code.   */  _GLOBAL(start_secondary_prolog) +	ld	r2,PACATOC(r13)  	li	r3,0  	std	r3,0(r1)		/* Zero the stack frame pointer	*/  	bl	.start_secondary @@ -1529,34 +1613,46 @@ _GLOBAL(start_secondary_prolog)   */  _GLOBAL(enable_64b_mode)  	mfmsr	r11			/* grab the current MSR */ -	li	r12,1 -	rldicr	r12,r12,MSR_SF_LG,(63-MSR_SF_LG) -	or	r11,r11,r12 -	li	r12,1 -	rldicr	r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) +	li	r12,(MSR_SF | MSR_ISF)@highest +	sldi	r12,r12,48  	or	r11,r11,r12  	mtmsrd	r11  	isync  	blr  /* + * This puts the TOC pointer into r2, offset by 0x8000 (as expected + * by the toolchain).  It computes the correct value for wherever we + * are running at the moment, using position-independent code. + */ +_GLOBAL(relative_toc) +	mflr	r0 +	bcl	20,31,$+4 +0:	mflr	r9 +	ld	r2,(p_toc - 0b)(r9) +	add	r2,r2,r9 +	mtlr	r0 +	blr + +p_toc:	.llong	__toc_start + 0x8000 - 0b + +/*   * This is where the main kernel code starts.   */  _INIT_STATIC(start_here_multiplatform) -	/* get a new offset, now that the kernel has moved. */ -	bl	.reloc_offset -	mr	r26,r3 +	/* set up the TOC (real address) */ +	bl	.relative_toc  	/* Clear out the BSS. It may have been done in prom_init,  	 * already but that's irrelevant since prom_init will soon  	 * be detached from the kernel completely. Besides, we need  	 * to clear it now for kexec-style entry.  	 */ -	LOAD_REG_IMMEDIATE(r11,__bss_stop) -	LOAD_REG_IMMEDIATE(r8,__bss_start) +	LOAD_REG_ADDR(r11,__bss_stop) +	LOAD_REG_ADDR(r8,__bss_start)  	sub	r11,r11,r8		/* bss size			*/  	addi	r11,r11,7		/* round up to an even double word */ -	rldicl. r11,r11,61,3		/* shift right by 3		*/ +	srdi.	r11,r11,3		/* shift right by 3		*/  	beq	4f  	addi	r8,r8,-8  	li	r0,0 @@ -1569,35 +1665,35 @@ _INIT_STATIC(start_here_multiplatform)  	ori	r6,r6,MSR_RI  	mtmsrd	r6			/* RI on */ -	/* The following gets the stack and TOC set up with the regs */ +#ifdef CONFIG_RELOCATABLE +	/* Save the physical address we're running at in kernstart_addr */ +	LOAD_REG_ADDR(r4, kernstart_addr) +	clrldi	r0,r25,2 +	std	r0,0(r4) +#endif + +	/* The following gets the stack set up with the regs */  	/* pointing to the real addr of the kernel stack.  This is   */  	/* all done to support the C function call below which sets  */  	/* up the htab.  This is done because we have relocated the  */  	/* kernel but are still running in real mode. */ -	LOAD_REG_IMMEDIATE(r3,init_thread_union) -	add	r3,r3,r26 +	LOAD_REG_ADDR(r3,init_thread_union) -	/* set up a stack pointer (physical address) */ +	/* set up a stack pointer */  	addi	r1,r3,THREAD_SIZE  	li	r0,0  	stdu	r0,-STACK_FRAME_OVERHEAD(r1) -	/* set up the TOC (physical address) */ -	LOAD_REG_IMMEDIATE(r2,__toc_start) -	addi	r2,r2,0x4000 -	addi	r2,r2,0x4000 -	add	r2,r2,r26 -  	/* Do very early kernel initializations, including initial hash table,  	 * stab and slb setup before we turn on relocation.	*/  	/* Restore parameters passed from prom_init/kexec */  	mr	r3,r31 - 	bl	.early_setup +	bl	.early_setup		/* also sets r13 and SPRG3 */ -	LOAD_REG_IMMEDIATE(r3, .start_here_common) -	LOAD_REG_IMMEDIATE(r4, MSR_KERNEL) +	LOAD_REG_ADDR(r3, .start_here_common) +	ld	r4,PACAKMSR(r13)  	mtspr	SPRN_SRR0,r3  	mtspr	SPRN_SRR1,r4  	rfid @@ -1606,20 +1702,10 @@ _INIT_STATIC(start_here_multiplatform)  	/* This is where all platforms converge execution */  _INIT_GLOBAL(start_here_common)  	/* relocation is on at this point */ +	std	r1,PACAKSAVE(r13) -	/* The following code sets up the SP and TOC now that we are */ -	/* running with translation enabled. */ - -	LOAD_REG_IMMEDIATE(r3,init_thread_union) - -	/* set up the stack */ -	addi	r1,r3,THREAD_SIZE -	li	r0,0 -	stdu	r0,-STACK_FRAME_OVERHEAD(r1) - -	/* Load the TOC */ +	/* Load the TOC (virtual address) */  	ld	r2,PACATOC(r13) -	std	r1,PACAKSAVE(r13)  	bl	.setup_system diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 3cb52fa0eda..590304c24da 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -422,7 +422,6 @@ skpinv:	addi	r6,r6,1				/* Increment */   *   r12 is pointer to the pte   */  #ifdef CONFIG_PTE_64BIT -#define PTE_FLAGS_OFFSET	4  #define FIND_PTE	\  	rlwinm	r12, r10, 13, 19, 29;	/* Compute pgdir/pmd offset */	\  	lwzx	r11, r12, r11;		/* Get pgd/pmd entry */		\ @@ -431,7 +430,6 @@ skpinv:	addi	r6,r6,1				/* Increment */  	rlwimi	r12, r10, 23, 20, 28;	/* Compute pte address */	\  	lwz	r11, 4(r12);		/* Get pte entry */  #else -#define PTE_FLAGS_OFFSET	0  #define FIND_PTE	\  	rlwimi	r11, r10, 12, 20, 29;	/* Create L1 (pgdir/pmd) address */	\  	lwz	r11, 0(r11);		/* Get L1 entry */			\ @@ -579,13 +577,19 @@ interrupt_base:  	FIND_PTE  	andc.	r13,r13,r11		/* Check permission */ -	bne	2f			/* Bail if permission mismach */  #ifdef CONFIG_PTE_64BIT -	lwz	r13, 0(r12) +#ifdef CONFIG_SMP +	subf	r10,r11,r12		/* create false data dep */ +	lwzx	r13,r11,r10		/* Get upper pte bits */ +#else +	lwz	r13,0(r12)		/* Get upper pte bits */ +#endif  #endif -	 /* Jump to common tlb load */ +	bne	2f			/* Bail if permission/valid mismach */ + +	/* Jump to common tlb load */  	b	finish_tlb_load  2:  	/* The bailout.  Restore registers to pre-exception conditions @@ -640,12 +644,18 @@ interrupt_base:  	FIND_PTE  	andc.	r13,r13,r11		/* Check permission */ -	bne	2f			/* Bail if permission mismach */  #ifdef CONFIG_PTE_64BIT -	lwz	r13, 0(r12) +#ifdef CONFIG_SMP +	subf	r10,r11,r12		/* create false data dep */ +	lwzx	r13,r11,r10		/* Get upper pte bits */ +#else +	lwz	r13,0(r12)		/* Get upper pte bits */ +#endif  #endif +	bne	2f			/* Bail if permission mismach */ +  	/* Jump to common TLB load point */  	b	finish_tlb_load @@ -702,7 +712,7 @@ interrupt_base:  /*   * Both the instruction and data TLB miss get to this   * point to load the TLB. - *	r10 - EA of fault + *	r10 - available to use   *	r11 - TLB (info from Linux PTE)   *	r12 - available to use   *	r13 - upper bits of PTE (if PTE_64BIT) or available to use diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index a06362223f8..64299d28f36 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -79,20 +79,21 @@ static void ibmebus_free_coherent(struct device *dev,  	kfree(vaddr);  } -static dma_addr_t ibmebus_map_single(struct device *dev, -				     void *ptr, -				     size_t size, -				     enum dma_data_direction direction, -				     struct dma_attrs *attrs) +static dma_addr_t ibmebus_map_page(struct device *dev, +				   struct page *page, +				   unsigned long offset, +				   size_t size, +				   enum dma_data_direction direction, +				   struct dma_attrs *attrs)  { -	return (dma_addr_t)(ptr); +	return (dma_addr_t)(page_address(page) + offset);  } -static void ibmebus_unmap_single(struct device *dev, -				 dma_addr_t dma_addr, -				 size_t size, -				 enum dma_data_direction direction, -				 struct dma_attrs *attrs) +static void ibmebus_unmap_page(struct device *dev, +			       dma_addr_t dma_addr, +			       size_t size, +			       enum dma_data_direction direction, +			       struct dma_attrs *attrs)  {  	return;  } @@ -129,11 +130,11 @@ static int ibmebus_dma_supported(struct device *dev, u64 mask)  static struct dma_mapping_ops ibmebus_dma_ops = {  	.alloc_coherent = ibmebus_alloc_coherent,  	.free_coherent  = ibmebus_free_coherent, -	.map_single     = ibmebus_map_single, -	.unmap_single   = ibmebus_unmap_single,  	.map_sg         = ibmebus_map_sg,  	.unmap_sg       = ibmebus_unmap_sg,  	.dma_supported  = ibmebus_dma_supported, +	.map_page       = ibmebus_map_page, +	.unmap_page     = ibmebus_unmap_page,  };  static int ibmebus_match_path(struct device *dev, void *data) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 550a19399bf..1bfa706b96e 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -32,6 +32,7 @@  #include <linux/dma-mapping.h>  #include <linux/bitops.h>  #include <linux/iommu-helper.h> +#include <linux/crash_dump.h>  #include <asm/io.h>  #include <asm/prom.h>  #include <asm/iommu.h> @@ -51,17 +52,6 @@ static int protect4gb = 1;  static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); -static inline unsigned long iommu_num_pages(unsigned long vaddr, -					    unsigned long slen) -{ -	unsigned long npages; - -	npages = IOMMU_PAGE_ALIGN(vaddr + slen) - (vaddr & IOMMU_PAGE_MASK); -	npages >>= IOMMU_PAGE_SHIFT; - -	return npages; -} -  static int __init setup_protect4gb(char *str)  {  	if (strcmp(str, "on") == 0) @@ -325,7 +315,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,  		}  		/* Allocate iommu entries for that segment */  		vaddr = (unsigned long) sg_virt(s); -		npages = iommu_num_pages(vaddr, slen); +		npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE);  		align = 0;  		if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE &&  		    (vaddr & ~PAGE_MASK) == 0) @@ -418,7 +408,8 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,  			unsigned long vaddr, npages;  			vaddr = s->dma_address & IOMMU_PAGE_MASK; -			npages = iommu_num_pages(s->dma_address, s->dma_length); +			npages = iommu_num_pages(s->dma_address, s->dma_length, +						 IOMMU_PAGE_SIZE);  			__iommu_free(tbl, vaddr, npages);  			s->dma_address = DMA_ERROR_CODE;  			s->dma_length = 0; @@ -452,7 +443,8 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,  		if (sg->dma_length == 0)  			break; -		npages = iommu_num_pages(dma_handle, sg->dma_length); +		npages = iommu_num_pages(dma_handle, sg->dma_length, +					 IOMMU_PAGE_SIZE);  		__iommu_free(tbl, dma_handle, npages);  		sg = sg_next(sg);  	} @@ -467,6 +459,42 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,  	spin_unlock_irqrestore(&(tbl->it_lock), flags);  } +static void iommu_table_clear(struct iommu_table *tbl) +{ +	if (!is_kdump_kernel()) { +		/* Clear the table in case firmware left allocations in it */ +		ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); +		return; +	} + +#ifdef CONFIG_CRASH_DUMP +	if (ppc_md.tce_get) { +		unsigned long index, tceval, tcecount = 0; + +		/* Reserve the existing mappings left by the first kernel. */ +		for (index = 0; index < tbl->it_size; index++) { +			tceval = ppc_md.tce_get(tbl, index + tbl->it_offset); +			/* +			 * Freed TCE entry contains 0x7fffffffffffffff on JS20 +			 */ +			if (tceval && (tceval != 0x7fffffffffffffffUL)) { +				__set_bit(index, tbl->it_map); +				tcecount++; +			} +		} + +		if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) { +			printk(KERN_WARNING "TCE table is full; freeing "); +			printk(KERN_WARNING "%d entries for the kdump boot\n", +				KDUMP_MIN_TCE_ENTRIES); +			for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES; +				index < tbl->it_size; index++) +				__clear_bit(index, tbl->it_map); +		} +	} +#endif +} +  /*   * Build a iommu_table structure.  This contains a bit map which   * is used to manage allocation of the tce space. @@ -493,38 +521,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)  	tbl->it_largehint = tbl->it_halfpoint;  	spin_lock_init(&tbl->it_lock); -#ifdef CONFIG_CRASH_DUMP -	if (ppc_md.tce_get) { -		unsigned long index; -		unsigned long tceval; -		unsigned long tcecount = 0; - -		/* -		 * Reserve the existing mappings left by the first kernel. -		 */ -		for (index = 0; index < tbl->it_size; index++) { -			tceval = ppc_md.tce_get(tbl, index + tbl->it_offset); -			/* -			 * Freed TCE entry contains 0x7fffffffffffffff on JS20 -			 */ -			if (tceval && (tceval != 0x7fffffffffffffffUL)) { -				__set_bit(index, tbl->it_map); -				tcecount++; -			} -		} -		if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) { -			printk(KERN_WARNING "TCE table is full; "); -			printk(KERN_WARNING "freeing %d entries for the kdump boot\n", -				KDUMP_MIN_TCE_ENTRIES); -			for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES; -				index < tbl->it_size; index++) -				__clear_bit(index, tbl->it_map); -		} -	} -#else -	/* Clear the hardware table in case firmware left allocations in it */ -	ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size); -#endif +	iommu_table_clear(tbl);  	if (!welcomed) {  		printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", @@ -568,23 +565,25 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)  }  /* Creates TCEs for a user provided buffer.  The user buffer must be - * contiguous real kernel storage (not vmalloc).  The address of the buffer - * passed here is the kernel (virtual) address of the buffer.  The buffer - * need not be page aligned, the dma_addr_t returned will point to the same - * byte within the page as vaddr. + * contiguous real kernel storage (not vmalloc).  The address passed here + * comprises a page address and offset into that page. The dma_addr_t + * returned will point to the same byte within the page as was passed in.   */ -dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl, -			    void *vaddr, size_t size, unsigned long mask, -		enum dma_data_direction direction, struct dma_attrs *attrs) +dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, +			  struct page *page, unsigned long offset, size_t size, +			  unsigned long mask, enum dma_data_direction direction, +			  struct dma_attrs *attrs)  {  	dma_addr_t dma_handle = DMA_ERROR_CODE; +	void *vaddr;  	unsigned long uaddr;  	unsigned int npages, align;  	BUG_ON(direction == DMA_NONE); +	vaddr = page_address(page) + offset;  	uaddr = (unsigned long)vaddr; -	npages = iommu_num_pages(uaddr, size); +	npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE);  	if (tbl) {  		align = 0; @@ -608,16 +607,16 @@ dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl,  	return dma_handle;  } -void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle, -		size_t size, enum dma_data_direction direction, -		struct dma_attrs *attrs) +void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle, +		      size_t size, enum dma_data_direction direction, +		      struct dma_attrs *attrs)  {  	unsigned int npages;  	BUG_ON(direction == DMA_NONE);  	if (tbl) { -		npages = iommu_num_pages(dma_handle, size); +		npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE);  		iommu_free(tbl, dma_handle, npages);  	}  } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index d972decf032..ac222d0ab12 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -439,8 +439,8 @@ void do_softirq(void)  static LIST_HEAD(irq_hosts);  static DEFINE_SPINLOCK(irq_big_lock); -static DEFINE_PER_CPU(unsigned int, irq_radix_reader); -static unsigned int irq_radix_writer; +static unsigned int revmap_trees_allocated; +static DEFINE_MUTEX(revmap_trees_mutex);  struct irq_map_entry irq_map[NR_IRQS];  static unsigned int irq_virq_count = NR_IRQS;  static struct irq_host *irq_default_host; @@ -583,57 +583,6 @@ void irq_set_virq_count(unsigned int count)  		irq_virq_count = count;  } -/* radix tree not lockless safe ! we use a brlock-type mecanism - * for now, until we can use a lockless radix tree - */ -static void irq_radix_wrlock(unsigned long *flags) -{ -	unsigned int cpu, ok; - -	spin_lock_irqsave(&irq_big_lock, *flags); -	irq_radix_writer = 1; -	smp_mb(); -	do { -		barrier(); -		ok = 1; -		for_each_possible_cpu(cpu) { -			if (per_cpu(irq_radix_reader, cpu)) { -				ok = 0; -				break; -			} -		} -		if (!ok) -			cpu_relax(); -	} while(!ok); -} - -static void irq_radix_wrunlock(unsigned long flags) -{ -	smp_wmb(); -	irq_radix_writer = 0; -	spin_unlock_irqrestore(&irq_big_lock, flags); -} - -static void irq_radix_rdlock(unsigned long *flags) -{ -	local_irq_save(*flags); -	__get_cpu_var(irq_radix_reader) = 1; -	smp_mb(); -	if (likely(irq_radix_writer == 0)) -		return; -	__get_cpu_var(irq_radix_reader) = 0; -	smp_wmb(); -	spin_lock(&irq_big_lock); -	__get_cpu_var(irq_radix_reader) = 1; -	spin_unlock(&irq_big_lock); -} - -static void irq_radix_rdunlock(unsigned long flags) -{ -	__get_cpu_var(irq_radix_reader) = 0; -	local_irq_restore(flags); -} -  static int irq_setup_virq(struct irq_host *host, unsigned int virq,  			    irq_hw_number_t hwirq)  { @@ -788,7 +737,6 @@ void irq_dispose_mapping(unsigned int virq)  {  	struct irq_host *host;  	irq_hw_number_t hwirq; -	unsigned long flags;  	if (virq == NO_IRQ)  		return; @@ -821,12 +769,16 @@ void irq_dispose_mapping(unsigned int virq)  			host->revmap_data.linear.revmap[hwirq] = NO_IRQ;  		break;  	case IRQ_HOST_MAP_TREE: -		/* Check if radix tree allocated yet */ -		if (host->revmap_data.tree.gfp_mask == 0) +		/* +		 * Check if radix tree allocated yet, if not then nothing to +		 * remove. +		 */ +		smp_rmb(); +		if (revmap_trees_allocated < 1)  			break; -		irq_radix_wrlock(&flags); +		mutex_lock(&revmap_trees_mutex);  		radix_tree_delete(&host->revmap_data.tree, hwirq); -		irq_radix_wrunlock(flags); +		mutex_unlock(&revmap_trees_mutex);  		break;  	} @@ -875,43 +827,62 @@ unsigned int irq_find_mapping(struct irq_host *host,  EXPORT_SYMBOL_GPL(irq_find_mapping); -unsigned int irq_radix_revmap(struct irq_host *host, -			      irq_hw_number_t hwirq) +unsigned int irq_radix_revmap_lookup(struct irq_host *host, +				     irq_hw_number_t hwirq)  { -	struct radix_tree_root *tree;  	struct irq_map_entry *ptr;  	unsigned int virq; -	unsigned long flags;  	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE); -	/* Check if the radix tree exist yet. We test the value of -	 * the gfp_mask for that. Sneaky but saves another int in the -	 * structure. If not, we fallback to slow mode +	/* +	 * Check if the radix tree exists and has bee initialized. +	 * If not, we fallback to slow mode  	 */ -	tree = &host->revmap_data.tree; -	if (tree->gfp_mask == 0) +	if (revmap_trees_allocated < 2)  		return irq_find_mapping(host, hwirq);  	/* Now try to resolve */ -	irq_radix_rdlock(&flags); -	ptr = radix_tree_lookup(tree, hwirq); -	irq_radix_rdunlock(flags); +	/* +	 * No rcu_read_lock(ing) needed, the ptr returned can't go under us +	 * as it's referencing an entry in the static irq_map table. +	 */ +	ptr = radix_tree_lookup(&host->revmap_data.tree, hwirq); -	/* Found it, return */ -	if (ptr) { +	/* +	 * If found in radix tree, then fine. +	 * Else fallback to linear lookup - this should not happen in practice +	 * as it means that we failed to insert the node in the radix tree. +	 */ +	if (ptr)  		virq = ptr - irq_map; -		return virq; -	} +	else +		virq = irq_find_mapping(host, hwirq); + +	return virq; +} + +void irq_radix_revmap_insert(struct irq_host *host, unsigned int virq, +			     irq_hw_number_t hwirq) +{ + +	WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE); + +	/* +	 * Check if the radix tree exists yet. +	 * If not, then the irq will be inserted into the tree when it gets +	 * initialized. +	 */ +	smp_rmb(); +	if (revmap_trees_allocated < 1) +		return; -	/* If not there, try to insert it */ -	virq = irq_find_mapping(host, hwirq);  	if (virq != NO_IRQ) { -		irq_radix_wrlock(&flags); -		radix_tree_insert(tree, hwirq, &irq_map[virq]); -		irq_radix_wrunlock(flags); +		mutex_lock(&revmap_trees_mutex); +		radix_tree_insert(&host->revmap_data.tree, hwirq, +				  &irq_map[virq]); +		mutex_unlock(&revmap_trees_mutex);  	} -	return virq;  }  unsigned int irq_linear_revmap(struct irq_host *host, @@ -1020,14 +991,44 @@ void irq_early_init(void)  static int irq_late_init(void)  {  	struct irq_host *h; -	unsigned long flags; +	unsigned int i; -	irq_radix_wrlock(&flags); +	/* +	 * No mutual exclusion with respect to accessors of the tree is needed +	 * here as the synchronization is done via the state variable +	 * revmap_trees_allocated. +	 */  	list_for_each_entry(h, &irq_hosts, link) {  		if (h->revmap_type == IRQ_HOST_MAP_TREE) -			INIT_RADIX_TREE(&h->revmap_data.tree, GFP_ATOMIC); +			INIT_RADIX_TREE(&h->revmap_data.tree, GFP_KERNEL); +	} + +	/* +	 * Make sure the radix trees inits are visible before setting +	 * the flag +	 */ +	smp_wmb(); +	revmap_trees_allocated = 1; + +	/* +	 * Insert the reverse mapping for those interrupts already present +	 * in irq_map[]. +	 */ +	mutex_lock(&revmap_trees_mutex); +	for (i = 0; i < irq_virq_count; i++) { +		if (irq_map[i].host && +		    (irq_map[i].host->revmap_type == IRQ_HOST_MAP_TREE)) +			radix_tree_insert(&irq_map[i].host->revmap_data.tree, +					  irq_map[i].hwirq, &irq_map[i]);  	} -	irq_radix_wrunlock(flags); +	mutex_unlock(&revmap_trees_mutex); + +	/* +	 * Make sure the radix trees insertions are visible before setting +	 * the flag +	 */ +	smp_wmb(); +	revmap_trees_allocated = 2;  	return 0;  } diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index b3eef30b513..d051e8cbcd0 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -510,10 +510,10 @@ static ssize_t update_ppp(u64 *entitlement, u8 *weight)  		return -EINVAL;  	pr_debug("%s: current_entitled = %lu, current_weight = %u\n", -	         __FUNCTION__, ppp_data.entitlement, ppp_data.weight); +		 __func__, ppp_data.entitlement, ppp_data.weight);  	pr_debug("%s: new_entitled = %lu, new_weight = %u\n", -		 __FUNCTION__, new_entitled, new_weight); +		 __func__, new_entitled, new_weight);  	retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);  	return retval; @@ -556,10 +556,10 @@ static ssize_t update_mpp(u64 *entitlement, u8 *weight)  		return -EINVAL;  	pr_debug("%s: current_entitled = %lu, current_weight = %u\n", -	         __FUNCTION__, mpp_data.entitled_mem, mpp_data.mem_weight); +	         __func__, mpp_data.entitled_mem, mpp_data.mem_weight);  	pr_debug("%s: new_entitled = %lu, new_weight = %u\n", -	         __FUNCTION__, new_entitled, new_weight); +		 __func__, new_entitled, new_weight);  	rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);  	return rc; diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index aab76887a84..ac2a21f45c7 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -88,11 +88,13 @@ void __init reserve_crashkernel(void)  	crash_size = crashk_res.end - crashk_res.start + 1; +#ifndef CONFIG_RELOCATABLE  	if (crashk_res.start != KDUMP_KERNELBASE)  		printk("Crash kernel location must be 0x%x\n",  				KDUMP_KERNELBASE);  	crashk_res.start = KDUMP_KERNELBASE; +#endif  	crash_size = PAGE_ALIGN(crash_size);  	crashk_res.end = crashk_res.start + crash_size - 1; diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index a168514d860..3c4ca046e85 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -270,8 +270,8 @@ void default_machine_kexec(struct kimage *image)          * using debugger IPI.          */ -       if (crashing_cpu == -1) -               kexec_prepare_cpus(); +	if (crashing_cpu == -1) +		kexec_prepare_cpus();  	/* switch to a staticly allocated stack.  Based on irq stack code.  	 * XXX: the task struct will likely be invalid once we do the copy! @@ -312,11 +312,24 @@ static struct property kernel_end_prop = {  static void __init export_htab_values(void)  {  	struct device_node *node; +	struct property *prop;  	node = of_find_node_by_path("/chosen");  	if (!node)  		return; +	/* remove any stale propertys so ours can be found */ +	prop = of_find_property(node, kernel_end_prop.name, NULL); +	if (prop) +		prom_remove_property(node, prop); +	prop = of_find_property(node, htab_base_prop.name, NULL); +	if (prop) +		prom_remove_property(node, prop); +	prop = of_find_property(node, htab_size_prop.name, NULL); +	if (prop) +		prom_remove_property(node, prop); + +	/* information needed by userspace when using default_machine_kexec */  	kernel_end = __pa(_end);  	prom_add_property(node, &kernel_end_prop); diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 85cb6f34084..2d29752cbe1 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -31,11 +31,14 @@ _GLOBAL(reloc_offset)  	mflr	r0  	bl	1f  1:	mflr	r3 -	LOAD_REG_IMMEDIATE(r4,1b) +	PPC_LL	r4,(2f-1b)(r3)  	subf	r3,r4,r3  	mtlr	r0  	blr +	.align	3 +2:	PPC_LONG 1b +  /*   * add_reloc_offset(x) returns x + reloc_offset().   */ @@ -43,12 +46,15 @@ _GLOBAL(add_reloc_offset)  	mflr	r0  	bl	1f  1:	mflr	r5 -	LOAD_REG_IMMEDIATE(r4,1b) +	PPC_LL	r4,(2f-1b)(r5)  	subf	r5,r4,r5  	add	r3,r3,r5  	mtlr	r0  	blr +	.align	3 +2:	PPC_LONG 1b +  _GLOBAL(kernel_execve)  	li	r0,__NR_execve  	sc diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 7a6dfbca768..bdc8b0e860e 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -274,6 +274,10 @@ _GLOBAL(real_writeb)  /*   * Flush MMU TLB   */ +#ifndef CONFIG_FSL_BOOKE +_GLOBAL(_tlbil_all) +_GLOBAL(_tlbil_pid) +#endif  _GLOBAL(_tlbia)  #if defined(CONFIG_40x)  	sync			/* Flush to memory before changing mapping */ @@ -344,6 +348,9 @@ _GLOBAL(_tlbia)  /*   * Flush MMU TLB for a particular address   */ +#ifndef CONFIG_FSL_BOOKE +_GLOBAL(_tlbil_va) +#endif  _GLOBAL(_tlbie)  #if defined(CONFIG_40x)  	/* We run the search with interrupts disabled because we have to change @@ -436,6 +443,57 @@ _GLOBAL(_tlbie)  #endif /* ! CONFIG_40x */  	blr +#if defined(CONFIG_FSL_BOOKE) +/* + * Flush MMU TLB, but only on the local processor (no broadcast) + */ +_GLOBAL(_tlbil_all) +#define MMUCSR0_TLBFI	(MMUCSR0_TLB0FI | MMUCSR0_TLB1FI | \ +			 MMUCSR0_TLB2FI | MMUCSR0_TLB3FI) +	li	r3,(MMUCSR0_TLBFI)@l +	mtspr	SPRN_MMUCSR0, r3 +1: +	mfspr	r3,SPRN_MMUCSR0 +	andi.	r3,r3,MMUCSR0_TLBFI@l +	bne	1b +	blr + +/* + * Flush MMU TLB for a particular process id, but only on the local processor + * (no broadcast) + */ +_GLOBAL(_tlbil_pid) +/* we currently do an invalidate all since we don't have per pid invalidate */ +	li	r3,(MMUCSR0_TLBFI)@l +	mtspr	SPRN_MMUCSR0, r3 +1: +	mfspr	r3,SPRN_MMUCSR0 +	andi.	r3,r3,MMUCSR0_TLBFI@l +	bne	1b +	msync +	isync +	blr + +/* + * Flush MMU TLB for a particular address, but only on the local processor + * (no broadcast) + */ +_GLOBAL(_tlbil_va) +	slwi	r4,r4,16 +	mtspr	SPRN_MAS6,r4		/* assume AS=0 for now */ +	tlbsx	0,r3 +	mfspr	r4,SPRN_MAS1		/* check valid */ +	andis.	r3,r4,MAS1_VALID@h +	beqlr +	rlwinm	r4,r4,0,1,31 +	mtspr	SPRN_MAS1,r4 +	tlbwe +	msync +	isync +	blr +#endif /* CONFIG_FSL_BOOKE */ + +  /*   * Flush instruction cache.   * This is a no-op on the 601. @@ -846,8 +904,10 @@ _GLOBAL(kernel_thread)  	li	r4,0		/* new sp (unused) */  	li	r0,__NR_clone  	sc -	cmpwi	0,r3,0		/* parent or child? */ -	bne	1f		/* return if parent */ +	bns+	1f		/* did system call indicate error? */ +	neg	r3,r3		/* if so, make return code negative */ +1:	cmpwi	0,r3,0		/* parent or child? */ +	bne	2f		/* return if parent */  	li	r0,0		/* make top-level stack frame */  	stwu	r0,-16(r1)  	mtlr	r30		/* fn addr in lr */ @@ -857,7 +917,7 @@ _GLOBAL(kernel_thread)  	li	r0,__NR_exit	/* exit if function returns */  	li	r3,0  	sc -1:	lwz	r30,8(r1) +2:	lwz	r30,8(r1)  	lwz	r31,12(r1)  	addi	r1,r1,16  	blr diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 4dd70cf7bb4..3053fe5c62f 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -426,8 +426,10 @@ _GLOBAL(kernel_thread)  	li	r4,0		/* new sp (unused) */  	li	r0,__NR_clone  	sc -	cmpdi	0,r3,0		/* parent or child? */ -	bne	1f		/* return if parent */ +	bns+	1f		/* did system call indicate error? */ +	neg	r3,r3		/* if so, make return code negative */ +1:	cmpdi	0,r3,0		/* parent or child? */ +	bne	2f		/* return if parent */  	li	r0,0  	stdu	r0,-STACK_FRAME_OVERHEAD(r1)  	ld	r2,8(r29) @@ -438,7 +440,7 @@ _GLOBAL(kernel_thread)  	li	r0,__NR_exit	/* exit after child exits */          li	r3,0  	sc -1:	addi	r1,r1,STACK_FRAME_OVERHEAD	 +2:	addi	r1,r1,STACK_FRAME_OVERHEAD  	ld	r29,-24(r1)  	ld	r30,-16(r1)  	blr diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c index e9be908f199..f3c9cae01dd 100644 --- a/arch/powerpc/kernel/of_device.c +++ b/arch/powerpc/kernel/of_device.c @@ -78,7 +78,6 @@ struct of_device *of_device_alloc(struct device_node *np,  	dev->dev.parent = parent;  	dev->dev.release = of_release_dev;  	dev->dev.archdata.of_node = np; -	dev->dev.archdata.numa_node = of_node_to_nid(np);  	if (bus_id)  		strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index c9bf17eec31..48a347133f4 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -12,6 +12,7 @@  #include <asm/lppaca.h>  #include <asm/paca.h> +#include <asm/sections.h>  /* This symbol is provided by the linker - let it fill in the paca   * field correctly */ @@ -79,6 +80,8 @@ void __init initialise_pacas(void)  		new_paca->lock_token = 0x8000;  		new_paca->paca_index = cpu;  		new_paca->kernel_toc = kernel_toc; +		new_paca->kernelbase = (unsigned long) _stext; +		new_paca->kernel_msr = MSR_KERNEL;  		new_paca->hw_cpu_id = 0xffff;  		new_paca->slb_shadow_ptr = &slb_shadow[cpu];  		new_paca->__current = &init_task; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index ea0c61e09b7..f36936d9fda 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -56,6 +56,34 @@ resource_size_t isa_mem_base;  /* Default PCI flags is 0 */  unsigned int ppc_pci_flags; +static struct dma_mapping_ops *pci_dma_ops; + +void set_pci_dma_ops(struct dma_mapping_ops *dma_ops) +{ +	pci_dma_ops = dma_ops; +} + +struct dma_mapping_ops *get_pci_dma_ops(void) +{ +	return pci_dma_ops; +} +EXPORT_SYMBOL(get_pci_dma_ops); + +int pci_set_dma_mask(struct pci_dev *dev, u64 mask) +{ +	return dma_set_mask(&dev->dev, mask); +} + +int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) +{ +	int rc; + +	rc = dma_set_mask(&dev->dev, mask); +	dev->dev.coherent_dma_mask = dev->dma_mask; + +	return rc; +} +  struct pci_controller *pcibios_alloc_controller(struct device_node *dev)  {  	struct pci_controller *phb; @@ -180,6 +208,26 @@ char __devinit *pcibios_setup(char *str)  	return str;  } +void __devinit pcibios_setup_new_device(struct pci_dev *dev) +{ +	struct dev_archdata *sd = &dev->dev.archdata; + +	sd->of_node = pci_device_to_OF_node(dev); + +	DBG("PCI: device %s OF node: %s\n", pci_name(dev), +	    sd->of_node ? sd->of_node->full_name : "<none>"); + +	sd->dma_ops = pci_dma_ops; +#ifdef CONFIG_PPC32 +	sd->dma_data = (void *)PCI_DRAM_OFFSET; +#endif +	set_dev_node(&dev->dev, pcibus_to_node(dev->bus)); + +	if (ppc_md.pci_dma_dev_setup) +		ppc_md.pci_dma_dev_setup(dev); +} +EXPORT_SYMBOL(pcibios_setup_new_device); +  /*   * Reads the interrupt pin to determine if interrupt is use by card.   * If the interrupt is used, then gets the interrupt line from the @@ -371,7 +419,7 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,  	struct pci_dev *pdev = NULL;  	struct resource *found = NULL;  	unsigned long prot = pgprot_val(protection); -	unsigned long offset = pfn << PAGE_SHIFT; +	resource_size_t offset = ((resource_size_t)pfn) << PAGE_SHIFT;  	int i;  	if (page_is_ram(pfn)) @@ -403,7 +451,8 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,  		pci_dev_put(pdev);  	} -	DBG("non-PCI map for %lx, prot: %lx\n", offset, prot); +	DBG("non-PCI map for %llx, prot: %lx\n", +	    (unsigned long long)offset, prot);  	return __pgprot(prot);  } @@ -422,7 +471,8 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,  int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,  			enum pci_mmap_state mmap_state, int write_combine)  { -	resource_size_t offset = vma->vm_pgoff << PAGE_SHIFT; +	resource_size_t offset = +		((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;  	struct resource *rp;  	int ret; @@ -441,6 +491,132 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,  	return ret;  } +/* This provides legacy IO read access on a bus */ +int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size) +{ +	unsigned long offset; +	struct pci_controller *hose = pci_bus_to_host(bus); +	struct resource *rp = &hose->io_resource; +	void __iomem *addr; + +	/* Check if port can be supported by that bus. We only check +	 * the ranges of the PHB though, not the bus itself as the rules +	 * for forwarding legacy cycles down bridges are not our problem +	 * here. So if the host bridge supports it, we do it. +	 */ +	offset = (unsigned long)hose->io_base_virt - _IO_BASE; +	offset += port; + +	if (!(rp->flags & IORESOURCE_IO)) +		return -ENXIO; +	if (offset < rp->start || (offset + size) > rp->end) +		return -ENXIO; +	addr = hose->io_base_virt + port; + +	switch(size) { +	case 1: +		*((u8 *)val) = in_8(addr); +		return 1; +	case 2: +		if (port & 1) +			return -EINVAL; +		*((u16 *)val) = in_le16(addr); +		return 2; +	case 4: +		if (port & 3) +			return -EINVAL; +		*((u32 *)val) = in_le32(addr); +		return 4; +	} +	return -EINVAL; +} + +/* This provides legacy IO write access on a bus */ +int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size) +{ +	unsigned long offset; +	struct pci_controller *hose = pci_bus_to_host(bus); +	struct resource *rp = &hose->io_resource; +	void __iomem *addr; + +	/* Check if port can be supported by that bus. We only check +	 * the ranges of the PHB though, not the bus itself as the rules +	 * for forwarding legacy cycles down bridges are not our problem +	 * here. So if the host bridge supports it, we do it. +	 */ +	offset = (unsigned long)hose->io_base_virt - _IO_BASE; +	offset += port; + +	if (!(rp->flags & IORESOURCE_IO)) +		return -ENXIO; +	if (offset < rp->start || (offset + size) > rp->end) +		return -ENXIO; +	addr = hose->io_base_virt + port; + +	/* WARNING: The generic code is idiotic. It gets passed a pointer +	 * to what can be a 1, 2 or 4 byte quantity and always reads that +	 * as a u32, which means that we have to correct the location of +	 * the data read within those 32 bits for size 1 and 2 +	 */ +	switch(size) { +	case 1: +		out_8(addr, val >> 24); +		return 1; +	case 2: +		if (port & 1) +			return -EINVAL; +		out_le16(addr, val >> 16); +		return 2; +	case 4: +		if (port & 3) +			return -EINVAL; +		out_le32(addr, val); +		return 4; +	} +	return -EINVAL; +} + +/* This provides legacy IO or memory mmap access on a bus */ +int pci_mmap_legacy_page_range(struct pci_bus *bus, +			       struct vm_area_struct *vma, +			       enum pci_mmap_state mmap_state) +{ +	struct pci_controller *hose = pci_bus_to_host(bus); +	resource_size_t offset = +		((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT; +	resource_size_t size = vma->vm_end - vma->vm_start; +	struct resource *rp; + +	pr_debug("pci_mmap_legacy_page_range(%04x:%02x, %s @%llx..%llx)\n", +		 pci_domain_nr(bus), bus->number, +		 mmap_state == pci_mmap_mem ? "MEM" : "IO", +		 (unsigned long long)offset, +		 (unsigned long long)(offset + size - 1)); + +	if (mmap_state == pci_mmap_mem) { +		if ((offset + size) > hose->isa_mem_size) +			return -ENXIO; +		offset += hose->isa_mem_phys; +	} else { +		unsigned long io_offset = (unsigned long)hose->io_base_virt - _IO_BASE; +		unsigned long roffset = offset + io_offset; +		rp = &hose->io_resource; +		if (!(rp->flags & IORESOURCE_IO)) +			return -ENXIO; +		if (roffset < rp->start || (roffset + size) > rp->end) +			return -ENXIO; +		offset += hose->io_base_phys; +	} +	pr_debug(" -> mapping phys %llx\n", (unsigned long long)offset); + +	vma->vm_pgoff = offset >> PAGE_SHIFT; +	vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) +				     | _PAGE_NO_CACHE | _PAGE_GUARDED); +	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, +			       vma->vm_end - vma->vm_start, +			       vma->vm_page_prot); +} +  void pci_resource_to_user(const struct pci_dev *dev, int bar,  			  const struct resource *rsrc,  			  resource_size_t *start, resource_size_t *end) @@ -543,6 +719,12 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,  		cpu_addr = of_translate_address(dev, ranges + 3);  		size = of_read_number(ranges + pna + 3, 2);  		ranges += np; + +		/* If we failed translation or got a zero-sized region +		 * (some FW try to feed us with non sensical zero sized regions +		 * such as power3 which look like some kind of attempt at exposing +		 * the VGA memory hole) +		 */  		if (cpu_addr == OF_BAD_ADDR || size == 0)  			continue; @@ -616,6 +798,8 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,  				isa_hole = memno;  				if (primary || isa_mem_base == 0)  					isa_mem_base = cpu_addr; +				hose->isa_mem_phys = cpu_addr; +				hose->isa_mem_size = size;  			}  			/* We get the PCI/Mem offset from the first range or @@ -731,11 +915,6 @@ static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)  	res->start = (res->start + offset) & mask;  	res->end = (res->end + offset) & mask; - -	pr_debug("PCI:%s            %016llx-%016llx\n", -		 pci_name(dev), -		 (unsigned long long)res->start, -		 (unsigned long long)res->end);  } @@ -781,6 +960,11 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev)  			 (unsigned int)res->flags);  		fixup_resource(res, dev); + +		pr_debug("PCI:%s            %016llx-%016llx\n", +			 pci_name(dev), +			 (unsigned long long)res->start, +			 (unsigned long long)res->end);  	}  	/* Call machine specific resource fixup */ @@ -789,58 +973,127 @@ static void __devinit pcibios_fixup_resources(struct pci_dev *dev)  }  DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources); -static void __devinit __pcibios_fixup_bus(struct pci_bus *bus) +/* This function tries to figure out if a bridge resource has been initialized + * by the firmware or not. It doesn't have to be absolutely bullet proof, but + * things go more smoothly when it gets it right. It should covers cases such + * as Apple "closed" bridge resources and bare-metal pSeries unassigned bridges + */ +static int __devinit pcibios_uninitialized_bridge_resource(struct pci_bus *bus, +							   struct resource *res)  {  	struct pci_controller *hose = pci_bus_to_host(bus);  	struct pci_dev *dev = bus->self; +	resource_size_t offset; +	u16 command; +	int i; -	pr_debug("PCI: Fixup bus %d (%s)\n", bus->number, dev ? pci_name(dev) : "PHB"); +	/* We don't do anything if PCI_PROBE_ONLY is set */ +	if (ppc_pci_flags & PPC_PCI_PROBE_ONLY) +		return 0; -	/* Fixup PCI<->PCI bridges. Host bridges are handled separately, for -	 * now differently between 32 and 64 bits. -	 */ -	if (dev != NULL) { -		struct resource *res; -		int i; +	/* Job is a bit different between memory and IO */ +	if (res->flags & IORESOURCE_MEM) { +		/* If the BAR is non-0 (res != pci_mem_offset) then it's probably been +		 * initialized by somebody +		 */ +		if (res->start != hose->pci_mem_offset) +			return 0; -		for (i = 0; i < PCI_BUS_NUM_RESOURCES; ++i) { -			if ((res = bus->resource[i]) == NULL) -				continue; -			if (!res->flags) -				continue; -			if (i >= 3 && bus->self->transparent) -				continue; -			/* On PowerMac, Apple leaves bridge windows open over -			 * an inaccessible region of memory space (0...fffff) -			 * which is somewhat bogus, but that's what they think -			 * means disabled... -			 * -			 * We clear those to force them to be reallocated later -			 * -			 * We detect such regions by the fact that the base is -			 * equal to the pci_mem_offset of the host bridge and -			 * their size is smaller than 1M. -			 */ -			if (res->flags & IORESOURCE_MEM && -			    res->start == hose->pci_mem_offset && -			    res->end < 0x100000) { -				printk(KERN_INFO -				       "PCI: Closing bogus Apple Firmware" -				       " region %d on bus 0x%02x\n", -				       i, bus->number); -				res->flags = 0; -				continue; -			} +		/* The BAR is 0, let's check if memory decoding is enabled on +		 * the bridge. If not, we consider it unassigned +		 */ +		pci_read_config_word(dev, PCI_COMMAND, &command); +		if ((command & PCI_COMMAND_MEMORY) == 0) +			return 1; -			pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n", -				 pci_name(dev), i, -				 (unsigned long long)res->start,\ -				 (unsigned long long)res->end, -				 (unsigned int)res->flags); +		/* Memory decoding is enabled and the BAR is 0. If any of the bridge +		 * resources covers that starting address (0 then it's good enough for +		 * us for memory +		 */ +		for (i = 0; i < 3; i++) { +			if ((hose->mem_resources[i].flags & IORESOURCE_MEM) && +			    hose->mem_resources[i].start == hose->pci_mem_offset) +				return 0; +		} -			fixup_resource(res, dev); +		/* Well, it starts at 0 and we know it will collide so we may as +		 * well consider it as unassigned. That covers the Apple case. +		 */ +		return 1; +	} else { +		/* If the BAR is non-0, then we consider it assigned */ +		offset = (unsigned long)hose->io_base_virt - _IO_BASE; +		if (((res->start - offset) & 0xfffffffful) != 0) +			return 0; + +		/* Here, we are a bit different than memory as typically IO space +		 * starting at low addresses -is- valid. What we do instead if that +		 * we consider as unassigned anything that doesn't have IO enabled +		 * in the PCI command register, and that's it. +		 */ +		pci_read_config_word(dev, PCI_COMMAND, &command); +		if (command & PCI_COMMAND_IO) +			return 0; + +		/* It's starting at 0 and IO is disabled in the bridge, consider +		 * it unassigned +		 */ +		return 1; +	} +} + +/* Fixup resources of a PCI<->PCI bridge */ +static void __devinit pcibios_fixup_bridge(struct pci_bus *bus) +{ +	struct resource *res; +	int i; + +	struct pci_dev *dev = bus->self; + +	for (i = 0; i < PCI_BUS_NUM_RESOURCES; ++i) { +		if ((res = bus->resource[i]) == NULL) +			continue; +		if (!res->flags) +			continue; +		if (i >= 3 && bus->self->transparent) +			continue; + +		pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x] fixup...\n", +			 pci_name(dev), i, +			 (unsigned long long)res->start,\ +			 (unsigned long long)res->end, +			 (unsigned int)res->flags); + +		/* Perform fixup */ +		fixup_resource(res, dev); + +		/* Try to detect uninitialized P2P bridge resources, +		 * and clear them out so they get re-assigned later +		 */ +		if (pcibios_uninitialized_bridge_resource(bus, res)) { +			res->flags = 0; +			pr_debug("PCI:%s            (unassigned)\n", pci_name(dev)); +		} else { + +			pr_debug("PCI:%s            %016llx-%016llx\n", +				 pci_name(dev), +				 (unsigned long long)res->start, +				 (unsigned long long)res->end);  		}  	} +} + +static void __devinit __pcibios_fixup_bus(struct pci_bus *bus) +{ +	struct pci_dev *dev = bus->self; + +	pr_debug("PCI: Fixup bus %d (%s)\n", bus->number, dev ? pci_name(dev) : "PHB"); + +	/* Fixup PCI<->PCI bridges. Host bridges are handled separately, for +	 * now differently between 32 and 64 bits. +	 */ +	if (dev != NULL) +		pcibios_fixup_bridge(bus);  	/* Additional setup that is different between 32 and 64 bits for now */  	pcibios_do_bus_setup(bus); @@ -986,69 +1239,66 @@ static int __init reparent_resources(struct resource *parent,   *	    as well.   */ -static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) +void pcibios_allocate_bus_resources(struct pci_bus *bus)  { -	struct pci_bus *bus; +	struct pci_bus *b;  	int i;  	struct resource *res, *pr; -	/* Depth-First Search on bus tree */ -	list_for_each_entry(bus, bus_list, node) { -		for (i = 0; i < PCI_BUS_NUM_RESOURCES; ++i) { -			if ((res = bus->resource[i]) == NULL || !res->flags -			    || res->start > res->end) -				continue; -			if (bus->parent == NULL) -				pr = (res->flags & IORESOURCE_IO) ? -					&ioport_resource : &iomem_resource; -			else { -				/* Don't bother with non-root busses when -				 * re-assigning all resources. We clear the -				 * resource flags as if they were colliding -				 * and as such ensure proper re-allocation -				 * later. +	for (i = 0; i < PCI_BUS_NUM_RESOURCES; ++i) { +		if ((res = bus->resource[i]) == NULL || !res->flags +		    || res->start > res->end) +			continue; +		if (bus->parent == NULL) +			pr = (res->flags & IORESOURCE_IO) ? +				&ioport_resource : &iomem_resource; +		else { +			/* Don't bother with non-root busses when +			 * re-assigning all resources. We clear the +			 * resource flags as if they were colliding +			 * and as such ensure proper re-allocation +			 * later. +			 */ +			if (ppc_pci_flags & PPC_PCI_REASSIGN_ALL_RSRC) +				goto clear_resource; +			pr = pci_find_parent_resource(bus->self, res); +			if (pr == res) { +				/* this happens when the generic PCI +				 * code (wrongly) decides that this +				 * bridge is transparent  -- paulus  				 */ -				if (ppc_pci_flags & PPC_PCI_REASSIGN_ALL_RSRC) -					goto clear_resource; -				pr = pci_find_parent_resource(bus->self, res); -				if (pr == res) { -					/* this happens when the generic PCI -					 * code (wrongly) decides that this -					 * bridge is transparent  -- paulus -					 */ -					continue; -				} +				continue;  			} +		} -			DBG("PCI: %s (bus %d) bridge rsrc %d: %016llx-%016llx " -			    "[0x%x], parent %p (%s)\n", -			    bus->self ? pci_name(bus->self) : "PHB", -			    bus->number, i, -			    (unsigned long long)res->start, -			    (unsigned long long)res->end, -			    (unsigned int)res->flags, -			    pr, (pr && pr->name) ? pr->name : "nil"); - -			if (pr && !(pr->flags & IORESOURCE_UNSET)) { -				if (request_resource(pr, res) == 0) -					continue; -				/* -				 * Must be a conflict with an existing entry. -				 * Move that entry (or entries) under the -				 * bridge resource and try again. -				 */ -				if (reparent_resources(pr, res) == 0) -					continue; -			} -			printk(KERN_WARNING -			       "PCI: Cannot allocate resource region " -			       "%d of PCI bridge %d, will remap\n", -			       i, bus->number); -clear_resource: -			res->flags = 0; +		DBG("PCI: %s (bus %d) bridge rsrc %d: %016llx-%016llx " +		    "[0x%x], parent %p (%s)\n", +		    bus->self ? pci_name(bus->self) : "PHB", +		    bus->number, i, +		    (unsigned long long)res->start, +		    (unsigned long long)res->end, +		    (unsigned int)res->flags, +		    pr, (pr && pr->name) ? pr->name : "nil"); + +		if (pr && !(pr->flags & IORESOURCE_UNSET)) { +			if (request_resource(pr, res) == 0) +				continue; +			/* +			 * Must be a conflict with an existing entry. +			 * Move that entry (or entries) under the +			 * bridge resource and try again. +			 */ +			if (reparent_resources(pr, res) == 0) +				continue;  		} -		pcibios_allocate_bus_resources(&bus->children); +		printk(KERN_WARNING "PCI: Cannot allocate resource region " +		       "%d of PCI bridge %d, will remap\n", i, bus->number); +clear_resource: +		res->flags = 0;  	} + +	list_for_each_entry(b, &bus->children, node) +		pcibios_allocate_bus_resources(b);  }  static inline void __devinit alloc_resource(struct pci_dev *dev, int idx) @@ -1119,10 +1369,13 @@ static void __init pcibios_allocate_resources(int pass)  void __init pcibios_resource_survey(void)  { +	struct pci_bus *b; +  	/* Allocate and assign resources. If we re-assign everything, then  	 * we skip the allocate phase  	 */ -	pcibios_allocate_bus_resources(&pci_root_buses); +	list_for_each_entry(b, &pci_root_buses, node) +		pcibios_allocate_bus_resources(b);  	if (!(ppc_pci_flags & PPC_PCI_REASSIGN_ALL_RSRC)) {  		pcibios_allocate_resources(0); diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 88db4ffaf11..131b1dfa68c 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -53,12 +53,19 @@ LIST_HEAD(hose_list);  static int pci_bus_count; +/* This will remain NULL for now, until isa-bridge.c is made common + * to both 32-bit and 64-bit. + */ +struct pci_dev *isa_bridge_pcidev; +EXPORT_SYMBOL_GPL(isa_bridge_pcidev); +  static void -fixup_hide_host_resource_fsl(struct pci_dev* dev) +fixup_hide_host_resource_fsl(struct pci_dev *dev)  {  	int i, class = dev->class >> 8; -	if ((class == PCI_CLASS_PROCESSOR_POWERPC) && +	if ((class == PCI_CLASS_PROCESSOR_POWERPC || +	     class == PCI_CLASS_BRIDGE_OTHER) &&  		(dev->hdr_type == PCI_HEADER_TYPE_NORMAL) &&  		(dev->bus->parent == NULL)) {  		for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { @@ -424,6 +431,7 @@ void __devinit pcibios_do_bus_setup(struct pci_bus *bus)  	unsigned long io_offset;  	struct resource *res;  	int i; +	struct pci_dev *dev;  	/* Hookup PHB resources */  	io_offset = (unsigned long)hose->io_base_virt - isa_io_base; @@ -457,6 +465,12 @@ void __devinit pcibios_do_bus_setup(struct pci_bus *bus)  			bus->resource[i+1] = res;  		}  	} + +	if (ppc_md.pci_dma_bus_setup) +		ppc_md.pci_dma_bus_setup(bus); + +	list_for_each_entry(dev, &bus->devices, bus_list) +		pcibios_setup_new_device(dev);  }  /* the next one is stolen from the alpha port... */ diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 30eedfc5a56..3502b9101e6 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -52,35 +52,6 @@ EXPORT_SYMBOL(pci_io_base);  LIST_HEAD(hose_list); -static struct dma_mapping_ops *pci_dma_ops; - -void set_pci_dma_ops(struct dma_mapping_ops *dma_ops) -{ -	pci_dma_ops = dma_ops; -} - -struct dma_mapping_ops *get_pci_dma_ops(void) -{ -	return pci_dma_ops; -} -EXPORT_SYMBOL(get_pci_dma_ops); - - -int pci_set_dma_mask(struct pci_dev *dev, u64 mask) -{ -	return dma_set_mask(&dev->dev, mask); -} - -int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) -{ -	int rc; - -	rc = dma_set_mask(&dev->dev, mask); -	dev->dev.coherent_dma_mask = dev->dma_mask; - -	return rc; -} -  static void fixup_broken_pcnet32(struct pci_dev* dev)  {  	if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) { @@ -455,7 +426,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus)  		    pci_name(bus->self));  		__flush_hash_table_range(&init_mm, res->start + _IO_BASE, -					 res->end - res->start + 1); +					 res->end + _IO_BASE + 1);  		return 0;  	} @@ -548,26 +519,6 @@ int __devinit pcibios_map_io_space(struct pci_bus *bus)  }  EXPORT_SYMBOL_GPL(pcibios_map_io_space); -void __devinit pcibios_setup_new_device(struct pci_dev *dev) -{ -	struct dev_archdata *sd = &dev->dev.archdata; - -	sd->of_node = pci_device_to_OF_node(dev); - -	DBG("PCI: device %s OF node: %s\n", pci_name(dev), -	    sd->of_node ? sd->of_node->full_name : "<none>"); - -	sd->dma_ops = pci_dma_ops; -#ifdef CONFIG_NUMA -	sd->numa_node = pcibus_to_node(dev->bus); -#else -	sd->numa_node = -1; -#endif -	if (ppc_md.pci_dma_dev_setup) -		ppc_md.pci_dma_dev_setup(dev); -} -EXPORT_SYMBOL(pcibios_setup_new_device); -  void __devinit pcibios_do_bus_setup(struct pci_bus *bus)  {  	struct pci_dev *dev; diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index e1ea4fe5cfb..260089dccfb 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -68,7 +68,7 @@ EXPORT_SYMBOL(single_step_exception);  EXPORT_SYMBOL(sys_sigreturn);  #endif -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER  EXPORT_SYMBOL(_mcount);  #endif @@ -119,6 +119,9 @@ EXPORT_SYMBOL(flush_instruction_cache);  EXPORT_SYMBOL(flush_tlb_kernel_range);  EXPORT_SYMBOL(flush_tlb_page);  EXPORT_SYMBOL(_tlbie); +#if defined(CONFIG_4xx) || defined(CONFIG_8xx) || defined(CONFIG_FSL_BOOKE) +EXPORT_SYMBOL(_tlbil_va); +#endif  #endif  EXPORT_SYMBOL(__flush_icache_range);  EXPORT_SYMBOL(flush_dcache_range); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 87d83c56b31..3a2dc7e6586 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -888,9 +888,10 @@ static u64 __init dt_mem_next_cell(int s, cell_t **cellp)   */  static int __init early_init_dt_scan_drconf_memory(unsigned long node)  { -	cell_t *dm, *ls; +	cell_t *dm, *ls, *usm;  	unsigned long l, n, flags;  	u64 base, size, lmb_size; +	unsigned int is_kexec_kdump = 0, rngs;  	ls = (cell_t *)of_get_flat_dt_prop(node, "ibm,lmb-size", &l);  	if (ls == NULL || l < dt_root_size_cells * sizeof(cell_t)) @@ -905,6 +906,12 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)  	if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(cell_t))  		return 0; +	/* check if this is a kexec/kdump kernel. */ +	usm = (cell_t *)of_get_flat_dt_prop(node, "linux,drconf-usable-memory", +						 &l); +	if (usm != NULL) +		is_kexec_kdump = 1; +  	for (; n != 0; --n) {  		base = dt_mem_next_cell(dt_root_addr_cells, &dm);  		flags = dm[3]; @@ -915,13 +922,34 @@ static int __init early_init_dt_scan_drconf_memory(unsigned long node)  		if ((flags & 0x80) || !(flags & 0x8))  			continue;  		size = lmb_size; -		if (iommu_is_off) { -			if (base >= 0x80000000ul) +		rngs = 1; +		if (is_kexec_kdump) { +			/* +			 * For each lmb in ibm,dynamic-memory, a corresponding +			 * entry in linux,drconf-usable-memory property contains +			 * a counter 'p' followed by 'p' (base, size) duple. +			 * Now read the counter from +			 * linux,drconf-usable-memory property +			 */ +			rngs = dt_mem_next_cell(dt_root_size_cells, &usm); +			if (!rngs) /* there are no (base, size) duple */  				continue; -			if ((base + size) > 0x80000000ul) -				size = 0x80000000ul - base;  		} -		lmb_add(base, size); +		do { +			if (is_kexec_kdump) { +				base = dt_mem_next_cell(dt_root_addr_cells, +							 &usm); +				size = dt_mem_next_cell(dt_root_size_cells, +							 &usm); +			} +			if (iommu_is_off) { +				if (base >= 0x80000000ul) +					continue; +				if ((base + size) > 0x80000000ul) +					size = 0x80000000ul - base; +			} +			lmb_add(base, size); +		} while (--rngs);  	}  	lmb_dump_all();  	return 0; @@ -1164,6 +1192,9 @@ void __init early_init_devtree(void *params)  	/* Reserve LMB regions used by kernel, initrd, dt, etc... */  	lmb_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); +	/* If relocatable, reserve first 32k for interrupt vectors etc. */ +	if (PHYSICAL_START > MEMORY_START) +		lmb_reserve(MEMORY_START, 0x8000);  	reserve_kdump_trampoline();  	reserve_crashkernel();  	early_reserve_mem(); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index b72849ac7db..2445945d376 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -487,67 +487,6 @@ static int __init prom_setprop(phandle node, const char *nodename,  	return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd);  } -/* We can't use the standard versions because of RELOC headaches. */ -#define isxdigit(c)	(('0' <= (c) && (c) <= '9') \ -			 || ('a' <= (c) && (c) <= 'f') \ -			 || ('A' <= (c) && (c) <= 'F')) - -#define isdigit(c)	('0' <= (c) && (c) <= '9') -#define islower(c)	('a' <= (c) && (c) <= 'z') -#define toupper(c)	(islower(c) ? ((c) - 'a' + 'A') : (c)) - -unsigned long prom_strtoul(const char *cp, const char **endp) -{ -	unsigned long result = 0, base = 10, value; - -	if (*cp == '0') { -		base = 8; -		cp++; -		if (toupper(*cp) == 'X') { -			cp++; -			base = 16; -		} -	} - -	while (isxdigit(*cp) && -	       (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) { -		result = result * base + value; -		cp++; -	} - -	if (endp) -		*endp = cp; - -	return result; -} - -unsigned long prom_memparse(const char *ptr, const char **retptr) -{ -	unsigned long ret = prom_strtoul(ptr, retptr); -	int shift = 0; - -	/* -	 * We can't use a switch here because GCC *may* generate a -	 * jump table which won't work, because we're not running at -	 * the address we're linked at. -	 */ -	if ('G' == **retptr || 'g' == **retptr) -		shift = 30; - -	if ('M' == **retptr || 'm' == **retptr) -		shift = 20; - -	if ('K' == **retptr || 'k' == **retptr) -		shift = 10; - -	if (shift) { -		ret <<= shift; -		(*retptr)++; -	} - -	return ret; -} -  /*   * Early parsing of the command line passed to the kernel, used for   * "mem=x" and the options that affect the iommu @@ -1321,7 +1260,7 @@ static void __init prom_initialize_tce_table(void)   *   * -- Cort   */ -extern void __secondary_hold(void); +extern char __secondary_hold;  extern unsigned long __secondary_hold_spinloop;  extern unsigned long __secondary_hold_acknowledge; @@ -1342,13 +1281,7 @@ static void __init prom_hold_cpus(void)  		= (void *) LOW_ADDR(__secondary_hold_spinloop);  	unsigned long *acknowledge  		= (void *) LOW_ADDR(__secondary_hold_acknowledge); -#ifdef CONFIG_PPC64 -	/* __secondary_hold is actually a descriptor, not the text address */ -	unsigned long secondary_hold -		= __pa(*PTRRELOC((unsigned long *)__secondary_hold)); -#else  	unsigned long secondary_hold = LOW_ADDR(__secondary_hold); -#endif  	prom_debug("prom_hold_cpus: start...\n");  	prom_debug("    1) spinloop       = 0x%x\n", (unsigned long)spinloop); @@ -2315,13 +2248,14 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)  unsigned long __init prom_init(unsigned long r3, unsigned long r4,  			       unsigned long pp, -			       unsigned long r6, unsigned long r7) +			       unsigned long r6, unsigned long r7, +			       unsigned long kbase)  {	  	struct prom_t *_prom;  	unsigned long hdr; -	unsigned long offset = reloc_offset();  #ifdef CONFIG_PPC32 +	unsigned long offset = reloc_offset();  	reloc_got2(offset);  #endif @@ -2355,9 +2289,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,  	 */  	RELOC(of_platform) = prom_find_machine_type(); +#ifndef CONFIG_RELOCATABLE  	/* Bail if this is a kdump kernel. */  	if (PHYSICAL_START > 0)  		prom_panic("Error: You can't boot a kdump kernel from OF!\n"); +#endif  	/*  	 * Check for an initrd @@ -2377,7 +2313,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,  	 * Copy the CPU hold code  	 */  	if (RELOC(of_platform) != PLATFORM_POWERMAC) -		copy_and_flush(0, KERNELBASE + offset, 0x100, 0); +		copy_and_flush(0, kbase, 0x100, 0);  	/*  	 * Do early parsing of command line @@ -2480,7 +2416,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,  	reloc_got2(-offset);  #endif -	__start(hdr, KERNELBASE + offset, 0); +	__start(hdr, kbase, 0);  	return 0;  } diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 2c7e8e87f77..ea3a2ec03ff 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -20,7 +20,7 @@ WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush  _end enter_prom memcpy memset reloc_offset __secondary_hold  __secondary_hold_acknowledge __secondary_hold_spinloop __start  strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 -reloc_got2 kernstart_addr" +reloc_got2 kernstart_addr memstart_addr"  NM="$1"  OBJ="$2" diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S new file mode 100644 index 00000000000..b47a0e1ab00 --- /dev/null +++ b/arch/powerpc/kernel/reloc_64.S @@ -0,0 +1,87 @@ +/* + * Code to process dynamic relocations in the kernel. + * + * Copyright 2008 Paul Mackerras, IBM Corp. + * + *  This program is free software; you can redistribute it and/or + *  modify it under the terms of the GNU General Public License + *  as published by the Free Software Foundation; either version + *  2 of the License, or (at your option) any later version. + */ + +#include <asm/ppc_asm.h> + +RELA = 7 +RELACOUNT = 0x6ffffff9 +R_PPC64_RELATIVE = 22 + +/* + * r3 = desired final address of kernel + */ +_GLOBAL(relocate) +	mflr	r0 +	bcl	20,31,$+4 +0:	mflr	r12		/* r12 has runtime addr of label 0 */ +	mtlr	r0 +	ld	r11,(p_dyn - 0b)(r12) +	add	r11,r11,r12	/* r11 has runtime addr of .dynamic section */ +	ld	r9,(p_rela - 0b)(r12) +	add	r9,r9,r12	/* r9 has runtime addr of .rela.dyn section */ +	ld	r10,(p_st - 0b)(r12) +	add	r10,r10,r12	/* r10 has runtime addr of _stext */ + +	/* +	 * Scan the dynamic section for the RELA and RELACOUNT entries. +	 */ +	li	r7,0 +	li	r8,0 +1:	ld	r6,0(r11)	/* get tag */ +	cmpdi	r6,0 +	beq	4f		/* end of list */ +	cmpdi	r6,RELA +	bne	2f +	ld	r7,8(r11)	/* get RELA pointer in r7 */ +	b	3f +2:	addis	r6,r6,(-RELACOUNT)@ha +	cmpdi	r6,RELACOUNT@l +	bne	3f +	ld	r8,8(r11)	/* get RELACOUNT value in r8 */ +3:	addi	r11,r11,16 +	b	1b +4:	cmpdi	r7,0		/* check we have both RELA and RELACOUNT */ +	cmpdi	cr1,r8,0 +	beq	6f +	beq	cr1,6f + +	/* +	 * Work out linktime address of _stext and hence the +	 * relocation offset to be applied. +	 * cur_offset [r7] = rela.run [r9] - rela.link [r7] +	 * _stext.link [r10] = _stext.run [r10] - cur_offset [r7] +	 * final_offset [r3] = _stext.final [r3] - _stext.link [r10] +	 */ +	subf	r7,r7,r9	/* cur_offset */ +	subf	r10,r7,r10 +	subf	r3,r10,r3	/* final_offset */ + +	/* +	 * Run through the list of relocations and process the +	 * R_PPC64_RELATIVE ones. +	 */ +	mtctr	r8 +5:	lwz	r0,12(9)	/* ELF64_R_TYPE(reloc->r_info) */ +	cmpwi	r0,R_PPC64_RELATIVE +	bne	6f +	ld	r6,0(r9)	/* reloc->r_offset */ +	ld	r0,16(r9)	/* reloc->r_addend */ +	add	r0,r0,r3 +	stdx	r0,r7,r6 +	addi	r9,r9,24 +	bdnz	5b + +6:	blr + +p_dyn:	.llong	__dynamic_start - 0b +p_rela:	.llong	__rela_dyn_start - 0b +p_st:	.llong	_stext - 0b + diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9cc5a52711e..705fc4bf380 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -59,6 +59,7 @@  #include <asm/mmu.h>  #include <asm/xmon.h>  #include <asm/cputhreads.h> +#include <mm/mmu_decl.h>  #include "setup.h" @@ -190,6 +191,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)  		if (ppc_md.show_cpuinfo != NULL)  			ppc_md.show_cpuinfo(m); +#ifdef CONFIG_PPC32 +		/* Display the amount of memory */ +		seq_printf(m, "Memory\t\t: %d MB\n", +			   (unsigned int)(total_memory / (1024 * 1024))); +#endif +  		return 0;  	} @@ -254,8 +261,21 @@ static int show_cpuinfo(struct seq_file *m, void *v)  	/* If we are a Freescale core do a simple check so  	 * we dont have to keep adding cases in the future */  	if (PVR_VER(pvr) & 0x8000) { -		maj = PVR_MAJ(pvr); -		min = PVR_MIN(pvr); +		switch (PVR_VER(pvr)) { +		case 0x8000:	/* 7441/7450/7451, Voyager */ +		case 0x8001:	/* 7445/7455, Apollo 6 */ +		case 0x8002:	/* 7447/7457, Apollo 7 */ +		case 0x8003:	/* 7447A, Apollo 7 PM */ +		case 0x8004:	/* 7448, Apollo 8 */ +		case 0x800c:	/* 7410, Nitro */ +			maj = ((pvr >> 8) & 0xF); +			min = PVR_MIN(pvr); +			break; +		default:	/* e500/book-e */ +			maj = PVR_MAJ(pvr); +			min = PVR_MIN(pvr); +			break; +		}  	} else {  		switch (PVR_VER(pvr)) {  			case 0x0020:	/* 403 family */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 066e65c59b5..c1a27626a94 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -111,7 +111,7 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)   * This is called very early on the boot process, after a minimal   * MMU environment has been set up but before MMU_init is called.   */ -notrace void __init machine_init(unsigned long dt_ptr, unsigned long phys) +notrace void __init machine_init(unsigned long dt_ptr)  {  	/* Enable early debugging if any specified (see udbg.h) */  	udbg_early_init(); @@ -209,23 +209,12 @@ EXPORT_SYMBOL(nvram_sync);  #endif /* CONFIG_NVRAM */ -static DEFINE_PER_CPU(struct cpu, cpu_devices); -  int __init ppc_init(void)  { -	int cpu; -  	/* clear the progress line */  	if (ppc_md.progress)  		ppc_md.progress("             ", 0xffff); -	/* register CPU devices */ -	for_each_possible_cpu(cpu) { -		struct cpu *c = &per_cpu(cpu_devices, cpu); -		c->hotpluggable = 1; -		register_cpu(c, cpu); -	} -  	/* call platform init */  	if (ppc_md.init != NULL) {  		ppc_md.init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8b25f51f03b..169d74cef15 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -255,9 +255,11 @@ void early_setup_secondary(void)  #endif /* CONFIG_SMP */  #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC) +extern unsigned long __secondary_hold_spinloop; +extern void generic_secondary_smp_init(void); +  void smp_release_cpus(void)  { -	extern unsigned long __secondary_hold_spinloop;  	unsigned long *ptr;  	DBG(" -> smp_release_cpus()\n"); @@ -266,12 +268,11 @@ void smp_release_cpus(void)  	 * all now so they can start to spin on their individual paca  	 * spinloops. For non SMP kernels, the secondary cpus never get out  	 * of the common spinloop. -	 * This is useless but harmless on iSeries, secondaries are already -	 * waiting on their paca spinloops. */ +	 */  	ptr  = (unsigned long *)((unsigned long)&__secondary_hold_spinloop  			- PHYSICAL_START); -	*ptr = 1; +	*ptr = __pa(generic_secondary_smp_init);  	mb();  	DBG(" <- smp_release_cpus()\n"); @@ -443,9 +444,9 @@ void __init setup_system(void)  	if (htab_address)  		printk("htab_address                  = 0x%p\n", htab_address);  	printk("htab_hash_mask                = 0x%lx\n", htab_hash_mask); -#if PHYSICAL_START > 0 -	printk("physical_start                = 0x%lx\n", PHYSICAL_START); -#endif +	if (PHYSICAL_START > 0) +		printk("physical_start                = 0x%lx\n", +		       PHYSICAL_START);  	printk("-----------------------------------------------------\n");  	DBG(" <- setup_system()\n"); diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 28f4b9f5fe5..b427bf8e1d8 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -12,6 +12,8 @@  #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) +extern void do_signal(struct pt_regs *regs, unsigned long thread_info_flags); +  extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,  				  size_t frame_size);  extern void restore_sigmask(sigset_t *set); diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 3e80aa32b8b..b13abf30599 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -410,7 +410,7 @@ inline unsigned long copy_fpr_from_user(struct task_struct *task,   * altivec/spe instructions at some point.   */  static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, -		int sigret) +		int sigret, int ctx_has_vsx_region)  {  	unsigned long msr = regs->msr; @@ -451,7 +451,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,  	 * the saved MSR value to indicate that frame->mc_vregs  	 * contains valid data  	 */ -	if (current->thread.used_vsr) { +	if (current->thread.used_vsr && ctx_has_vsx_region) {  		__giveup_vsx(current);  		if (copy_vsx_to_user(&frame->mc_vsregs, current))  			return 1; @@ -858,11 +858,11 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,  	frame = &rt_sf->uc.uc_mcontext;  	addr = frame;  	if (vdso32_rt_sigtramp && current->mm->context.vdso_base) { -		if (save_user_regs(regs, frame, 0)) +		if (save_user_regs(regs, frame, 0, 1))  			goto badframe;  		regs->link = current->mm->context.vdso_base + vdso32_rt_sigtramp;  	} else { -		if (save_user_regs(regs, frame, __NR_rt_sigreturn)) +		if (save_user_regs(regs, frame, __NR_rt_sigreturn, 1))  			goto badframe;  		regs->link = (unsigned long) frame->tramp;  	} @@ -936,13 +936,26 @@ long sys_swapcontext(struct ucontext __user *old_ctx,  		     int ctx_size, int r6, int r7, int r8, struct pt_regs *regs)  {  	unsigned char tmp; +	int ctx_has_vsx_region = 0;  #ifdef CONFIG_PPC64  	unsigned long new_msr = 0; -	if (new_ctx && -	    __get_user(new_msr, &new_ctx->uc_mcontext.mc_gregs[PT_MSR])) -		return -EFAULT; +	if (new_ctx) { +		struct mcontext __user *mcp; +		u32 cmcp; + +		/* +		 * Get pointer to the real mcontext.  No need for +		 * access_ok since we are dealing with compat +		 * pointers. +		 */ +		if (__get_user(cmcp, &new_ctx->uc_regs)) +			return -EFAULT; +		mcp = (struct mcontext __user *)(u64)cmcp; +		if (__get_user(new_msr, &mcp->mc_gregs[PT_MSR])) +			return -EFAULT; +	}  	/*  	 * Check that the context is not smaller than the original  	 * size (with VMX but without VSX) @@ -956,16 +969,9 @@ long sys_swapcontext(struct ucontext __user *old_ctx,  	if ((ctx_size < sizeof(struct ucontext)) &&  	    (new_msr & MSR_VSX))  		return -EINVAL; -#ifdef CONFIG_VSX -	/* -	 * If userspace doesn't provide enough room for VSX data, -	 * but current thread has used VSX, we don't have anywhere -	 * to store the full context back into. -	 */ -	if ((ctx_size < sizeof(struct ucontext)) && -	    (current->thread.used_vsr && old_ctx)) -		return -EINVAL; -#endif +	/* Does the context have enough room to store VSX data? */ +	if (ctx_size >= sizeof(struct ucontext)) +		ctx_has_vsx_region = 1;  #else  	/* Context size is for future use. Right now, we only make sure  	 * we are passed something we understand @@ -985,17 +991,17 @@ long sys_swapcontext(struct ucontext __user *old_ctx,  		 */  		mctx = (struct mcontext __user *)  			((unsigned long) &old_ctx->uc_mcontext & ~0xfUL); -		if (!access_ok(VERIFY_WRITE, old_ctx, sizeof(*old_ctx)) -		    || save_user_regs(regs, mctx, 0) +		if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) +		    || save_user_regs(regs, mctx, 0, ctx_has_vsx_region)  		    || put_sigset_t(&old_ctx->uc_sigmask, ¤t->blocked)  		    || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs))  			return -EFAULT;  	}  	if (new_ctx == NULL)  		return 0; -	if (!access_ok(VERIFY_READ, new_ctx, sizeof(*new_ctx)) +	if (!access_ok(VERIFY_READ, new_ctx, ctx_size)  	    || __get_user(tmp, (u8 __user *) new_ctx) -	    || __get_user(tmp, (u8 __user *) (new_ctx + 1) - 1)) +	    || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1))  		return -EFAULT;  	/* @@ -1196,11 +1202,11 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,  		goto badframe;  	if (vdso32_sigtramp && current->mm->context.vdso_base) { -		if (save_user_regs(regs, &frame->mctx, 0)) +		if (save_user_regs(regs, &frame->mctx, 0, 1))  			goto badframe;  		regs->link = current->mm->context.vdso_base + vdso32_sigtramp;  	} else { -		if (save_user_regs(regs, &frame->mctx, __NR_sigreturn)) +		if (save_user_regs(regs, &frame->mctx, __NR_sigreturn, 1))  			goto badframe;  		regs->link = (unsigned long) frame->mctx.tramp;  	} diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 65ad925c3a8..e132891d3ce 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -74,7 +74,8 @@ static const char fmt64[] = KERN_INFO \   */  static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, -		 int signr, sigset_t *set, unsigned long handler) +		 int signr, sigset_t *set, unsigned long handler, +		 int ctx_has_vsx_region)  {  	/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the  	 * process never used altivec yet (MSR_VEC is zero in pt_regs of @@ -121,7 +122,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,  	 * then out to userspace.  Update v_regs to point after the  	 * VMX data.  	 */ -	if (current->thread.used_vsr) { +	if (current->thread.used_vsr && ctx_has_vsx_region) {  		__giveup_vsx(current);  		v_regs += ELF_NVRREG;  		err |= copy_vsx_to_user(v_regs, current); @@ -235,8 +236,6 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,  	else  		for (i = 0; i < 32 ; i++)  			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0; - -#else  #endif  	return err;  } @@ -284,9 +283,10 @@ int sys_swapcontext(struct ucontext __user *old_ctx,  	unsigned char tmp;  	sigset_t set;  	unsigned long new_msr = 0; +	int ctx_has_vsx_region = 0;  	if (new_ctx && -	    __get_user(new_msr, &new_ctx->uc_mcontext.gp_regs[PT_MSR])) +	    get_user(new_msr, &new_ctx->uc_mcontext.gp_regs[PT_MSR]))  		return -EFAULT;  	/*  	 * Check that the context is not smaller than the original @@ -301,28 +301,23 @@ int sys_swapcontext(struct ucontext __user *old_ctx,  	if ((ctx_size < sizeof(struct ucontext)) &&  	    (new_msr & MSR_VSX))  		return -EINVAL; -#ifdef CONFIG_VSX -	/* -	 * If userspace doesn't provide enough room for VSX data, -	 * but current thread has used VSX, we don't have anywhere -	 * to store the full context back into. -	 */ -	if ((ctx_size < sizeof(struct ucontext)) && -	    (current->thread.used_vsr && old_ctx)) -		return -EINVAL; -#endif +	/* Does the context have enough room to store VSX data? */ +	if (ctx_size >= sizeof(struct ucontext)) +		ctx_has_vsx_region = 1; +  	if (old_ctx != NULL) { -		if (!access_ok(VERIFY_WRITE, old_ctx, sizeof(*old_ctx)) -		    || setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0) +		if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) +		    || setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0, +					ctx_has_vsx_region)  		    || __copy_to_user(&old_ctx->uc_sigmask,  				      ¤t->blocked, sizeof(sigset_t)))  			return -EFAULT;  	}  	if (new_ctx == NULL)  		return 0; -	if (!access_ok(VERIFY_READ, new_ctx, sizeof(*new_ctx)) +	if (!access_ok(VERIFY_READ, new_ctx, ctx_size)  	    || __get_user(tmp, (u8 __user *) new_ctx) -	    || __get_user(tmp, (u8 __user *) (new_ctx + 1) - 1)) +	    || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1))  		return -EFAULT;  	/* @@ -425,7 +420,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,  			  &frame->uc.uc_stack.ss_flags);  	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);  	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, NULL, -				(unsigned long)ka->sa.sa_handler); +				(unsigned long)ka->sa.sa_handler, 1);  	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));  	if (err)  		goto badframe; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 5337ca7bb64..ff9f7010097 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -101,8 +101,7 @@ void smp_message_recv(int msg)  		generic_smp_call_function_interrupt();  		break;  	case PPC_MSG_RESCHEDULE: -		/* XXX Do we have to do this? */ -		set_need_resched(); +		/* we notice need_resched on exit */  		break;  	case PPC_MSG_CALL_FUNC_SINGLE:  		generic_smp_call_function_single_interrupt(); @@ -453,6 +452,7 @@ int __devinit start_secondary(void *unused)  	secondary_cpu_time_init();  	ipi_call_lock(); +	notify_cpu_starting(cpu);  	cpu_set(cpu, cpu_online_map);  	/* Update sibling maps */  	base = cpu_first_thread_in_core(cpu); diff --git a/arch/powerpc/kernel/softemu8xx.c b/arch/powerpc/kernel/softemu8xx.c index c906c4bf683..23c8c5e7dc4 100644 --- a/arch/powerpc/kernel/softemu8xx.c +++ b/arch/powerpc/kernel/softemu8xx.c @@ -23,7 +23,6 @@  #include <linux/ptrace.h>  #include <linux/slab.h>  #include <linux/user.h> -#include <linux/a.out.h>  #include <linux/interrupt.h>  #include <asm/pgtable.h> diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index e092c3cbdb9..86ac1d90d02 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -133,7 +133,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  	cmpdi	r12,0  	beq-	nothing_to_copy -	li	r15,512 +	li	r15,PAGE_SIZE>>3  copyloop:  	ld	r13,pbe_address(r12)  	ld	r14,pbe_orig_address(r12) diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index d98634c7606..bb1cfcfdbbb 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -61,42 +61,6 @@ asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp,  	return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x));  } -int cp_compat_stat(struct kstat *stat, struct compat_stat __user *statbuf) -{ -	compat_ino_t ino; -	long err; - -	if (stat->size > MAX_NON_LFS || !new_valid_dev(stat->dev) || -	    !new_valid_dev(stat->rdev)) -		return -EOVERFLOW; - -	ino = stat->ino; -	if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) -		return -EOVERFLOW; - -	err  = access_ok(VERIFY_WRITE, statbuf, sizeof(*statbuf)) ? 0 : -EFAULT; -	err |= __put_user(new_encode_dev(stat->dev), &statbuf->st_dev); -	err |= __put_user(ino, &statbuf->st_ino); -	err |= __put_user(stat->mode, &statbuf->st_mode); -	err |= __put_user(stat->nlink, &statbuf->st_nlink); -	err |= __put_user(stat->uid, &statbuf->st_uid); -	err |= __put_user(stat->gid, &statbuf->st_gid); -	err |= __put_user(new_encode_dev(stat->rdev), &statbuf->st_rdev); -	err |= __put_user(stat->size, &statbuf->st_size); -	err |= __put_user(stat->atime.tv_sec, &statbuf->st_atime); -	err |= __put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec); -	err |= __put_user(stat->mtime.tv_sec, &statbuf->st_mtime); -	err |= __put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec); -	err |= __put_user(stat->ctime.tv_sec, &statbuf->st_ctime); -	err |= __put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec); -	err |= __put_user(stat->blksize, &statbuf->st_blksize); -	err |= __put_user(stat->blocks, &statbuf->st_blocks); -	err |= __put_user(0, &statbuf->__unused4[0]); -	err |= __put_user(0, &statbuf->__unused4[1]); - -	return err; -} -  /* Note: it is necessary to treat option as an unsigned int,   * with the corresponding cast to a signed int to insure that the    * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) @@ -107,77 +71,6 @@ asmlinkage long compat_sys_sysfs(u32 option, u32 arg1, u32 arg2)  	return sys_sysfs((int)option, arg1, arg2);  } -asmlinkage long compat_sys_pause(void) -{ -	current->state = TASK_INTERRUPTIBLE; -	schedule(); -	 -	return -ERESTARTNOHAND; -} - -static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) -{ -	long usec; - -	if (!access_ok(VERIFY_READ, i, sizeof(*i))) -		return -EFAULT; -	if (__get_user(o->tv_sec, &i->tv_sec)) -		return -EFAULT; -	if (__get_user(usec, &i->tv_usec)) -		return -EFAULT; -	o->tv_nsec = usec * 1000; -	return 0; -} - -static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i) -{ -	return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) || -		(__put_user(i->tv_sec, &o->tv_sec) | -		 __put_user(i->tv_usec, &o->tv_usec))); -} - - - - -/* Translations due to time_t size differences.  Which affects all -   sorts of things, like timeval and itimerval.  */ -extern struct timezone sys_tz; - -asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ -	if (tv) { -		struct timeval ktv; -		do_gettimeofday(&ktv); -		if (put_tv32(tv, &ktv)) -			return -EFAULT; -	} -	if (tz) { -		if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) -			return -EFAULT; -	} -	 -	return 0; -} - - - -asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, struct timezone __user *tz) -{ -	struct timespec kts; -	struct timezone ktz; -	 - 	if (tv) { -		if (get_ts32(&kts, tv)) -			return -EFAULT; -	} -	if (tz) { -		if (copy_from_user(&ktz, tz, sizeof(ktz))) -			return -EFAULT; -	} - -	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); -} -  #ifdef CONFIG_SYSVIPC  long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t ptr,  	       u32 fifth) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 56d172d16e5..86a2ffccef2 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -15,18 +15,24 @@  #include <asm/firmware.h>  #include <asm/hvcall.h>  #include <asm/prom.h> -#include <asm/paca.h> -#include <asm/lppaca.h>  #include <asm/machdep.h>  #include <asm/smp.h> +#ifdef CONFIG_PPC64 +#include <asm/paca.h> +#include <asm/lppaca.h> +#endif +  static DEFINE_PER_CPU(struct cpu, cpu_devices);  static DEFINE_PER_CPU(struct kobject *, cache_toplevel); -/* SMT stuff */ +/* + * SMT snooze delay stuff, 64-bit only for now + */ + +#ifdef CONFIG_PPC64 -#ifdef CONFIG_PPC_MULTIPLATFORM  /* Time in microseconds we delay before sleeping in the idle loop */  DEFINE_PER_CPU(unsigned long, smt_snooze_delay) = { 100 }; @@ -106,7 +112,7 @@ static int __init setup_smt_snooze_delay(char *str)  }  __setup("smt-snooze-delay=", setup_smt_snooze_delay); -#endif /* CONFIG_PPC_MULTIPLATFORM */ +#endif /* CONFIG_PPC64 */  /*   * Enabling PMCs will slow partition context switch times so we only do @@ -115,7 +121,7 @@ __setup("smt-snooze-delay=", setup_smt_snooze_delay);  static DEFINE_PER_CPU(char, pmcs_enabled); -void ppc64_enable_pmcs(void) +void ppc_enable_pmcs(void)  {  	/* Only need to enable them once */  	if (__get_cpu_var(pmcs_enabled)) @@ -126,8 +132,9 @@ void ppc64_enable_pmcs(void)  	if (ppc_md.enable_pmcs)  		ppc_md.enable_pmcs();  } -EXPORT_SYMBOL(ppc64_enable_pmcs); +EXPORT_SYMBOL(ppc_enable_pmcs); +#if defined(CONFIG_6xx) || defined(CONFIG_PPC64)  /* XXX convert to rusty's on_one_cpu */  static unsigned long run_on_cpu(unsigned long cpu,  			        unsigned long (*func)(unsigned long), @@ -146,6 +153,7 @@ static unsigned long run_on_cpu(unsigned long cpu,  	return ret;  } +#endif  #define SYSFS_PMCSETUP(NAME, ADDRESS) \  static unsigned long read_##NAME(unsigned long junk) \ @@ -154,7 +162,7 @@ static unsigned long read_##NAME(unsigned long junk) \  } \  static unsigned long write_##NAME(unsigned long val) \  { \ -	ppc64_enable_pmcs(); \ +	ppc_enable_pmcs(); \  	mtspr(ADDRESS, val); \  	return 0; \  } \ @@ -184,28 +192,53 @@ static ssize_t __used \   * that are implemented on the current processor   */ +#if defined(CONFIG_PPC64) +#define HAS_PPC_PMC_CLASSIC	1 +#define HAS_PPC_PMC_IBM		1 +#define HAS_PPC_PMC_PA6T	1 +#elif defined(CONFIG_6xx) +#define HAS_PPC_PMC_CLASSIC	1 +#define HAS_PPC_PMC_IBM		1 +#define HAS_PPC_PMC_G4		1 +#endif + + +#ifdef HAS_PPC_PMC_CLASSIC  SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);  SYSFS_PMCSETUP(mmcr1, SPRN_MMCR1); -SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);  SYSFS_PMCSETUP(pmc1, SPRN_PMC1);  SYSFS_PMCSETUP(pmc2, SPRN_PMC2);  SYSFS_PMCSETUP(pmc3, SPRN_PMC3);  SYSFS_PMCSETUP(pmc4, SPRN_PMC4);  SYSFS_PMCSETUP(pmc5, SPRN_PMC5);  SYSFS_PMCSETUP(pmc6, SPRN_PMC6); + +#ifdef HAS_PPC_PMC_G4 +SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2); +#endif + +#ifdef CONFIG_PPC64  SYSFS_PMCSETUP(pmc7, SPRN_PMC7);  SYSFS_PMCSETUP(pmc8, SPRN_PMC8); + +SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);  SYSFS_PMCSETUP(purr, SPRN_PURR);  SYSFS_PMCSETUP(spurr, SPRN_SPURR);  SYSFS_PMCSETUP(dscr, SPRN_DSCR); +static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); +static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL); +static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr); +static SYSDEV_ATTR(purr, 0600, show_purr, store_purr); +#endif /* CONFIG_PPC64 */ + +#ifdef HAS_PPC_PMC_PA6T  SYSFS_PMCSETUP(pa6t_pmc0, SPRN_PA6T_PMC0);  SYSFS_PMCSETUP(pa6t_pmc1, SPRN_PA6T_PMC1);  SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2);  SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);  SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);  SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5); -  #ifdef CONFIG_DEBUG_KERNEL  SYSFS_PMCSETUP(hid0, SPRN_HID0);  SYSFS_PMCSETUP(hid1, SPRN_HID1); @@ -236,28 +269,37 @@ SYSFS_PMCSETUP(tsr1, SPRN_PA6T_TSR1);  SYSFS_PMCSETUP(tsr2, SPRN_PA6T_TSR2);  SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3);  #endif /* CONFIG_DEBUG_KERNEL */ +#endif /* HAS_PPC_PMC_PA6T */ -static SYSDEV_ATTR(mmcra, 0600, show_mmcra, store_mmcra); -static SYSDEV_ATTR(spurr, 0600, show_spurr, NULL); -static SYSDEV_ATTR(dscr, 0600, show_dscr, store_dscr); -static SYSDEV_ATTR(purr, 0600, show_purr, store_purr); - +#ifdef HAS_PPC_PMC_IBM  static struct sysdev_attribute ibm_common_attrs[] = {  	_SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),  	_SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),  }; +#endif /* HAS_PPC_PMC_G4 */ + +#ifdef HAS_PPC_PMC_G4 +static struct sysdev_attribute g4_common_attrs[] = { +	_SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0), +	_SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), +	_SYSDEV_ATTR(mmcr2, 0600, show_mmcr2, store_mmcr2), +}; +#endif /* HAS_PPC_PMC_G4 */ -static struct sysdev_attribute ibm_pmc_attrs[] = { +static struct sysdev_attribute classic_pmc_attrs[] = {  	_SYSDEV_ATTR(pmc1, 0600, show_pmc1, store_pmc1),  	_SYSDEV_ATTR(pmc2, 0600, show_pmc2, store_pmc2),  	_SYSDEV_ATTR(pmc3, 0600, show_pmc3, store_pmc3),  	_SYSDEV_ATTR(pmc4, 0600, show_pmc4, store_pmc4),  	_SYSDEV_ATTR(pmc5, 0600, show_pmc5, store_pmc5),  	_SYSDEV_ATTR(pmc6, 0600, show_pmc6, store_pmc6), +#ifdef CONFIG_PPC64  	_SYSDEV_ATTR(pmc7, 0600, show_pmc7, store_pmc7),  	_SYSDEV_ATTR(pmc8, 0600, show_pmc8, store_pmc8), +#endif  }; +#ifdef HAS_PPC_PMC_PA6T  static struct sysdev_attribute pa6t_attrs[] = {  	_SYSDEV_ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),  	_SYSDEV_ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1), @@ -298,6 +340,8 @@ static struct sysdev_attribute pa6t_attrs[] = {  	_SYSDEV_ATTR(tsr3, 0600, show_tsr3, store_tsr3),  #endif /* CONFIG_DEBUG_KERNEL */  }; +#endif /* HAS_PPC_PMC_PA6T */ +#endif /* HAS_PPC_PMC_CLASSIC */  struct cache_desc {  	struct kobject kobj; @@ -588,23 +632,36 @@ static void __cpuinit register_cpu_online(unsigned int cpu)  	struct sysdev_attribute *attrs, *pmc_attrs;  	int i, nattrs; +#ifdef CONFIG_PPC64  	if (!firmware_has_feature(FW_FEATURE_ISERIES) &&  			cpu_has_feature(CPU_FTR_SMT))  		sysdev_create_file(s, &attr_smt_snooze_delay); +#endif  	/* PMC stuff */  	switch (cur_cpu_spec->pmc_type) { +#ifdef HAS_PPC_PMC_IBM  	case PPC_PMC_IBM:  		attrs = ibm_common_attrs;  		nattrs = sizeof(ibm_common_attrs) / sizeof(struct sysdev_attribute); -		pmc_attrs = ibm_pmc_attrs; +		pmc_attrs = classic_pmc_attrs;  		break; +#endif /* HAS_PPC_PMC_IBM */ +#ifdef HAS_PPC_PMC_G4 +	case PPC_PMC_G4: +		attrs = g4_common_attrs; +		nattrs = sizeof(g4_common_attrs) / sizeof(struct sysdev_attribute); +		pmc_attrs = classic_pmc_attrs; +		break; +#endif /* HAS_PPC_PMC_G4 */ +#ifdef HAS_PPC_PMC_PA6T  	case PPC_PMC_PA6T:  		/* PA Semi starts counting at PMC0 */  		attrs = pa6t_attrs;  		nattrs = sizeof(pa6t_attrs) / sizeof(struct sysdev_attribute);  		pmc_attrs = NULL;  		break; +#endif /* HAS_PPC_PMC_PA6T */  	default:  		attrs = NULL;  		nattrs = 0; @@ -618,6 +675,7 @@ static void __cpuinit register_cpu_online(unsigned int cpu)  		for (i = 0; i < cur_cpu_spec->num_pmcs; i++)  			sysdev_create_file(s, &pmc_attrs[i]); +#ifdef CONFIG_PPC64  	if (cpu_has_feature(CPU_FTR_MMCRA))  		sysdev_create_file(s, &attr_mmcra); @@ -629,6 +687,7 @@ static void __cpuinit register_cpu_online(unsigned int cpu)  	if (cpu_has_feature(CPU_FTR_DSCR))  		sysdev_create_file(s, &attr_dscr); +#endif /* CONFIG_PPC64 */  	create_cache_info(s);  } @@ -641,16 +700,9 @@ static void remove_cache_info(struct sys_device *sysdev)  	int cpu = sysdev->id;  	cache_desc = per_cpu(cache_desc, cpu); -	if (cache_desc != NULL) { -		sysfs_remove_file(&cache_desc->kobj, &cache_size_attr.attr); -		sysfs_remove_file(&cache_desc->kobj, &cache_line_size_attr.attr); -		sysfs_remove_file(&cache_desc->kobj, &cache_type_attr.attr); -		sysfs_remove_file(&cache_desc->kobj, &cache_level_attr.attr); -		sysfs_remove_file(&cache_desc->kobj, &cache_nr_sets_attr.attr); -		sysfs_remove_file(&cache_desc->kobj, &cache_assoc_attr.attr); - +	if (cache_desc != NULL)  		kobject_put(&cache_desc->kobj); -	} +  	cache_toplevel = per_cpu(cache_toplevel, cpu);  	if (cache_toplevel != NULL)  		kobject_put(cache_toplevel); @@ -671,17 +723,28 @@ static void unregister_cpu_online(unsigned int cpu)  	/* PMC stuff */  	switch (cur_cpu_spec->pmc_type) { +#ifdef HAS_PPC_PMC_IBM  	case PPC_PMC_IBM:  		attrs = ibm_common_attrs;  		nattrs = sizeof(ibm_common_attrs) / sizeof(struct sysdev_attribute); -		pmc_attrs = ibm_pmc_attrs; +		pmc_attrs = classic_pmc_attrs; +		break; +#endif /* HAS_PPC_PMC_IBM */ +#ifdef HAS_PPC_PMC_G4 +	case PPC_PMC_G4: +		attrs = g4_common_attrs; +		nattrs = sizeof(g4_common_attrs) / sizeof(struct sysdev_attribute); +		pmc_attrs = classic_pmc_attrs;  		break; +#endif /* HAS_PPC_PMC_G4 */ +#ifdef HAS_PPC_PMC_PA6T  	case PPC_PMC_PA6T:  		/* PA Semi starts counting at PMC0 */  		attrs = pa6t_attrs;  		nattrs = sizeof(pa6t_attrs) / sizeof(struct sysdev_attribute);  		pmc_attrs = NULL;  		break; +#endif /* HAS_PPC_PMC_PA6T */  	default:  		attrs = NULL;  		nattrs = 0; @@ -695,6 +758,7 @@ static void unregister_cpu_online(unsigned int cpu)  		for (i = 0; i < cur_cpu_spec->num_pmcs; i++)  			sysdev_remove_file(s, &pmc_attrs[i]); +#ifdef CONFIG_PPC64  	if (cpu_has_feature(CPU_FTR_MMCRA))  		sysdev_remove_file(s, &attr_mmcra); @@ -706,6 +770,7 @@ static void unregister_cpu_online(unsigned int cpu)  	if (cpu_has_feature(CPU_FTR_DSCR))  		sysdev_remove_file(s, &attr_dscr); +#endif /* CONFIG_PPC64 */  	remove_cache_info(s);  } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 81ccb8dd1a5..f5def6cf5cd 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -23,7 +23,6 @@  #include <linux/ptrace.h>  #include <linux/slab.h>  #include <linux/user.h> -#include <linux/a.out.h>  #include <linux/interrupt.h>  #include <linux/init.h>  #include <linux/module.h> diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index cb01ebc5938..7b7da8cfd5e 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -142,7 +142,7 @@ unsigned int udbg_probe_uart_speed(void __iomem *comport, unsigned int clock)  	speed = (clock / prescaler) / (divisor * 16);  	/* sanity check */ -	if (speed < 0 || speed > (clock / 16)) +	if (speed > (clock / 16))  		speed = 9600;  	return speed; diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 2750fbab197..a11e6bc59b3 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -516,10 +516,10 @@ static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,  	vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));  } -static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr, -                                           size_t size, -                                           enum dma_data_direction direction, -                                           struct dma_attrs *attrs) +static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, +                                         unsigned long offset, size_t size, +                                         enum dma_data_direction direction, +                                         struct dma_attrs *attrs)  {  	struct vio_dev *viodev = to_vio_dev(dev);  	dma_addr_t ret = DMA_ERROR_CODE; @@ -529,7 +529,7 @@ static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr,  		return ret;  	} -	ret = dma_iommu_ops.map_single(dev, vaddr, size, direction, attrs); +	ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs);  	if (unlikely(dma_mapping_error(dev, ret))) {  		vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));  		atomic_inc(&viodev->cmo.allocs_failed); @@ -538,14 +538,14 @@ static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr,  	return ret;  } -static void vio_dma_iommu_unmap_single(struct device *dev, -		dma_addr_t dma_handle, size_t size, -		enum dma_data_direction direction, -		struct dma_attrs *attrs) +static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, +				     size_t size, +				     enum dma_data_direction direction, +				     struct dma_attrs *attrs)  {  	struct vio_dev *viodev = to_vio_dev(dev); -	dma_iommu_ops.unmap_single(dev, dma_handle, size, direction, attrs); +	dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs);  	vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));  } @@ -603,10 +603,11 @@ static void vio_dma_iommu_unmap_sg(struct device *dev,  struct dma_mapping_ops vio_dma_mapping_ops = {  	.alloc_coherent = vio_dma_iommu_alloc_coherent,  	.free_coherent  = vio_dma_iommu_free_coherent, -	.map_single     = vio_dma_iommu_map_single, -	.unmap_single   = vio_dma_iommu_unmap_single,  	.map_sg         = vio_dma_iommu_map_sg,  	.unmap_sg       = vio_dma_iommu_unmap_sg, +	.map_page       = vio_dma_iommu_map_page, +	.unmap_page     = vio_dma_iommu_unmap_page, +  };  /** @@ -1232,7 +1233,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)  	else  		viodev->dev.archdata.dma_ops = &dma_iommu_ops;  	viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev); -	viodev->dev.archdata.numa_node = of_node_to_nid(of_node); +	set_dev_node(&viodev->dev, of_node_to_nid(of_node));  	/* init generic 'struct device' fields: */  	viodev->dev.parent = &vio_bus_device.dev; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 9f6c1ca1739..2412c056baa 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -187,6 +187,23 @@ SECTIONS  		*(.machine.desc)  		__machine_desc_end = . ;  	} +#ifdef CONFIG_RELOCATABLE +	. = ALIGN(8); +	.dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { *(.dynsym) } +	.dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) } +	.dynamic : AT(ADDR(.dynamic) - LOAD_OFFSET) +	{ +		__dynamic_start = .; +		*(.dynamic) +	} +	.hash : AT(ADDR(.hash) - LOAD_OFFSET) { *(.hash) } +	.interp : AT(ADDR(.interp) - LOAD_OFFSET) { *(.interp) } +	.rela.dyn : AT(ADDR(.rela.dyn) - LOAD_OFFSET) +	{ +		__rela_dyn_start = .; +		*(.rela*) +	} +#endif  	/* freed after init ends here */  	. = ALIGN(PAGE_SIZE);  | 
