aboutsummaryrefslogtreecommitdiff
path: root/drivers/platform/goldfish/pdev_bus.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/platform/goldfish/pdev_bus.c')
-rw-r--r--drivers/platform/goldfish/pdev_bus.c11
1 files changed, 5 insertions, 6 deletions
diff --git a/drivers/platform/goldfish/pdev_bus.c b/drivers/platform/goldfish/pdev_bus.c
index 92cc4cfafde..8c43589c3ed 100644
--- a/drivers/platform/goldfish/pdev_bus.c
+++ b/drivers/platform/goldfish/pdev_bus.c
@@ -36,6 +36,7 @@
#define PDEV_BUS_IO_SIZE (0x14)
#define PDEV_BUS_IRQ (0x18)
#define PDEV_BUS_IRQ_COUNT (0x1c)
+#define PDEV_BUS_GET_NAME_HIGH (0x20)
struct pdev_bus_dev {
struct list_head list;
@@ -129,7 +130,10 @@ static int goldfish_new_pdev(void)
dev->pdev.dev.dma_mask = (void *)(dev->pdev.name + name_len + 1);
*dev->pdev.dev.dma_mask = ~0;
- writel((unsigned long)name, pdev_bus_base + PDEV_BUS_GET_NAME);
+#ifdef CONFIG_64BIT
+ writel((u32)((u64)name>>32), pdev_bus_base + PDEV_BUS_GET_NAME_HIGH);
+#endif
+ writel((u32)(unsigned long)name, pdev_bus_base + PDEV_BUS_GET_NAME);
name[name_len] = '\0';
dev->pdev.id = readl(pdev_bus_base + PDEV_BUS_ID);
dev->pdev.resource[0].start = base;
@@ -184,11 +188,6 @@ static int goldfish_pdev_bus_probe(struct platform_device *pdev)
pdev_bus_addr = r->start;
pdev_bus_len = resource_size(r);
- if (request_mem_region(pdev_bus_addr, pdev_bus_len, "goldfish")) {
- dev_err(&pdev->dev, "unable to reserve Goldfish MMIO.\n");
- return -EBUSY;
- }
-
pdev_bus_base = ioremap(pdev_bus_addr, pdev_bus_len);
if (pdev_bus_base == NULL) {
ret = -ENOMEM;
einfo.h?id2=cfedc920a7bca828fd4af8e203f7d8514990f999'>arch/powerpc/kernel/cacheinfo.h8
-rw-r--r--arch/powerpc/kernel/compat_audit.c5
-rw-r--r--arch/powerpc/kernel/cpu_setup_44x.S74
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S54
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S225
-rw-r--r--arch/powerpc/kernel/cpu_setup_pa6t.S44
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S182
-rw-r--r--arch/powerpc/kernel/cpu_setup_ppc970.S44
-rw-r--r--arch/powerpc/kernel/cputable.c1212
-rw-r--r--arch/powerpc/kernel/crash.c393
-rw-r--r--arch/powerpc/kernel/crash_dump.c92
-rw-r--r--arch/powerpc/kernel/dbell.c57
-rw-r--r--arch/powerpc/kernel/dma-iommu.c119
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c127
-rw-r--r--arch/powerpc/kernel/dma.c243
-rw-r--r--arch/powerpc/kernel/dma_64.c151
-rw-r--r--arch/powerpc/kernel/eeh.c1183
-rw-r--r--arch/powerpc/kernel/eeh_cache.c310
-rw-r--r--arch/powerpc/kernel/eeh_dev.c112
-rw-r--r--arch/powerpc/kernel/eeh_driver.c870
-rw-r--r--arch/powerpc/kernel/eeh_event.c196
-rw-r--r--arch/powerpc/kernel/eeh_pe.c887
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c98
-rw-r--r--arch/powerpc/kernel/entry_32.S602
-rw-r--r--arch/powerpc/kernel/entry_64.S981
-rw-r--r--arch/powerpc/kernel/epapr_hcalls.S55
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c85
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S1650
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S1793
-rw-r--r--arch/powerpc/kernel/fadump.c1316
-rw-r--r--arch/powerpc/kernel/firmware.c5
-rw-r--r--arch/powerpc/kernel/fpu.S143
-rw-r--r--arch/powerpc/kernel/fsl_booke_entry_mapping.S237
-rw-r--r--arch/powerpc/kernel/ftrace.c591
-rw-r--r--arch/powerpc/kernel/head_32.S604
-rw-r--r--arch/powerpc/kernel/head_40x.S (renamed from arch/powerpc/kernel/head_4xx.S)279
-rw-r--r--arch/powerpc/kernel/head_44x.S1407
-rw-r--r--arch/powerpc/kernel/head_64.S2037
-rw-r--r--arch/powerpc/kernel/head_8xx.S403
-rw-r--r--arch/powerpc/kernel/head_booke.h298
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S1297
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c366
-rw-r--r--arch/powerpc/kernel/ibmebus.c840
-rw-r--r--arch/powerpc/kernel/idle.c108
-rw-r--r--arch/powerpc/kernel/idle_6xx.S6
-rw-r--r--arch/powerpc/kernel/idle_book3e.S101
-rw-r--r--arch/powerpc/kernel/idle_e500.S106
-rw-r--r--arch/powerpc/kernel/idle_power4.S30
-rw-r--r--arch/powerpc/kernel/idle_power7.S187
-rw-r--r--arch/powerpc/kernel/init_task.c36
-rw-r--r--arch/powerpc/kernel/io-workarounds.c212
-rw-r--r--arch/powerpc/kernel/io.c117
-rw-r--r--arch/powerpc/kernel/iomap.c73
-rw-r--r--arch/powerpc/kernel/iommu.c962
-rw-r--r--arch/powerpc/kernel/irq.c1155
-rw-r--r--arch/powerpc/kernel/isa-bridge.c266
-rw-r--r--arch/powerpc/kernel/jump_label.c25
-rw-r--r--arch/powerpc/kernel/kgdb.c494
-rw-r--r--arch/powerpc/kernel/kprobes.c189
-rw-r--r--arch/powerpc/kernel/kvm.c742
-rw-r--r--arch/powerpc/kernel/kvm_emul.S348
-rw-r--r--arch/powerpc/kernel/l2cr_6xx.S4
-rw-r--r--arch/powerpc/kernel/legacy_serial.c278
-rw-r--r--arch/powerpc/kernel/lparcfg.c620
-rw-r--r--arch/powerpc/kernel/lparmap.c31
-rw-r--r--arch/powerpc/kernel/machine_kexec.c252
-rw-r--r--arch/powerpc/kernel/machine_kexec_32.c10
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c254
-rw-r--r--arch/powerpc/kernel/mce.c352
-rw-r--r--arch/powerpc/kernel/mce_power.c313
-rw-r--r--arch/powerpc/kernel/misc.S77
-rw-r--r--arch/powerpc/kernel/misc_32.S816
-rw-r--r--arch/powerpc/kernel/misc_64.S387
-rw-r--r--arch/powerpc/kernel/module.c79
-rw-r--r--arch/powerpc/kernel/module_32.c181
-rw-r--r--arch/powerpc/kernel/module_64.c476
-rw-r--r--arch/powerpc/kernel/msi.c43
-rw-r--r--arch/powerpc/kernel/nvram_64.c584
-rw-r--r--arch/powerpc/kernel/of_device.c248
-rw-r--r--arch/powerpc/kernel/of_platform.c125
-rw-r--r--arch/powerpc/kernel/paca.c288
-rw-r--r--arch/powerpc/kernel/pci-common.c1681
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c109
-rw-r--r--arch/powerpc/kernel/pci_32.c1797
-rw-r--r--arch/powerpc/kernel/pci_64.c1399
-rw-r--r--arch/powerpc/kernel/pci_direct_iommu.c98
-rw-r--r--arch/powerpc/kernel/pci_dn.c122
-rw-r--r--arch/powerpc/kernel/pci_iommu.c164
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c404
-rw-r--r--arch/powerpc/kernel/perfmon_fsl_booke.c221
-rw-r--r--arch/powerpc/kernel/pmc.c48
-rw-r--r--arch/powerpc/kernel/ppc32.h78
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c103
-rw-r--r--arch/powerpc/kernel/ppc_save_regs.S75
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c (renamed from arch/powerpc/kernel/proc_ppc64.c)114
-rw-r--r--arch/powerpc/kernel/process.c1243
-rw-r--r--arch/powerpc/kernel/prom.c1803
-rw-r--r--arch/powerpc/kernel/prom_init.c1572
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh79
-rw-r--r--arch/powerpc/kernel/prom_parse.c977
-rw-r--r--arch/powerpc/kernel/ptrace-common.h161
-rw-r--r--arch/powerpc/kernel/ptrace.c1879
-rw-r--r--arch/powerpc/kernel/ptrace32.c301
-rw-r--r--arch/powerpc/kernel/reloc_32.S209
-rw-r--r--arch/powerpc/kernel/reloc_64.S88
-rw-r--r--arch/powerpc/kernel/rtas-proc.c65
-rw-r--r--arch/powerpc/kernel/rtas-rtc.c29
-rw-r--r--arch/powerpc/kernel/rtas.c586
-rw-r--r--arch/powerpc/kernel/rtas_flash.c635
-rw-r--r--arch/powerpc/kernel/rtas_pci.c226
-rw-r--r--arch/powerpc/kernel/rtasd.c602
-rw-r--r--arch/powerpc/kernel/semaphore.c135
-rw-r--r--arch/powerpc/kernel/setup-common.c446
-rw-r--r--arch/powerpc/kernel/setup.h9
-rw-r--r--arch/powerpc/kernel/setup_32.c214
-rw-r--r--arch/powerpc/kernel/setup_64.c557
-rw-r--r--arch/powerpc/kernel/signal.c212
-rw-r--r--arch/powerpc/kernel/signal.h62
-rw-r--r--arch/powerpc/kernel/signal_32.c1284
-rw-r--r--arch/powerpc/kernel/signal_64.c712
-rw-r--r--arch/powerpc/kernel/smp-tbsync.c29
-rw-r--r--arch/powerpc/kernel/smp.c830
-rw-r--r--arch/powerpc/kernel/stacktrace.c63
-rw-r--r--arch/powerpc/kernel/suspend.c25
-rw-r--r--arch/powerpc/kernel/swsusp.c38
-rw-r--r--arch/powerpc/kernel/swsusp_32.S12
-rw-r--r--arch/powerpc/kernel/swsusp_64.c24
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S273
-rw-r--r--arch/powerpc/kernel/swsusp_booke.S201
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c781
-rw-r--r--arch/powerpc/kernel/syscalls.c212
-rw-r--r--arch/powerpc/kernel/sysfs.c987
-rw-r--r--arch/powerpc/kernel/systbl.S20
-rw-r--r--arch/powerpc/kernel/systbl_chk.c58
-rw-r--r--arch/powerpc/kernel/systbl_chk.sh33
-rw-r--r--arch/powerpc/kernel/tau_6xx.c6
-rw-r--r--arch/powerpc/kernel/time.c1331
-rw-r--r--arch/powerpc/kernel/tm.S481
-rw-r--r--arch/powerpc/kernel/traps.c1486
-rw-r--r--arch/powerpc/kernel/udbg.c102
-rw-r--r--arch/powerpc/kernel/udbg_16550.c299
-rw-r--r--arch/powerpc/kernel/uprobes.c207
-rw-r--r--arch/powerpc/kernel/vdso.c308
-rw-r--r--arch/powerpc/kernel/vdso32/.gitignore1
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile31
-rw-r--r--arch/powerpc/kernel/vdso32/cacheflush.S41
-rw-r--r--arch/powerpc/kernel/vdso32/getcpu.S45
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S272
-rw-r--r--arch/powerpc/kernel/vdso32/sigtramp.S2
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S229
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32_wrapper.S6
-rw-r--r--arch/powerpc/kernel/vdso64/.gitignore1
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile29
-rw-r--r--arch/powerpc/kernel/vdso64/cacheflush.S41
-rw-r--r--arch/powerpc/kernel/vdso64/getcpu.S45
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S184
-rw-r--r--arch/powerpc/kernel/vdso64/sigtramp.S27
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64.lds.S227
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64_wrapper.S6
-rw-r--r--arch/powerpc/kernel/vecemu.c6
-rw-r--r--arch/powerpc/kernel/vector.S290
-rw-r--r--arch/powerpc/kernel/vio.c1613
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S249
171 files changed, 45219 insertions, 21299 deletions
diff --git a/arch/powerpc/kernel/.gitignore b/arch/powerpc/kernel/.gitignore
new file mode 100644
index 00000000000..c5f676c3c22
--- /dev/null
+++ b/arch/powerpc/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 8b133afbdc2..670c312d914 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -2,90 +2,167 @@
# Makefile for the linux kernel.
#
+CFLAGS_prom.o = -I$(src)/../../../scripts/dtc/libfdt
+CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
ifeq ($(CONFIG_PPC64),y)
-EXTRA_CFLAGS += -mno-minimal-toc
+CFLAGS_prom_init.o += $(NO_MINIMAL_TOC)
endif
ifeq ($(CONFIG_PPC32),y)
CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
endif
-obj-y := semaphore.o cputable.o ptrace.o syscalls.o \
+ifdef CONFIG_FUNCTION_TRACER
+# Do not trace early boot code
+CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog
+# do not trace tracer code
+CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog
+# timers used by tracing
+CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog
+endif
+
+obj-y := cputable.o ptrace.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
- init_task.o process.o systbl.o idle.o
-obj-y += vdso32/
-obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \
+ process.o systbl.o idle.o \
+ signal.o sysfs.o cacheinfo.o time.o \
+ prom.o traps.o setup-common.o \
+ udbg.o misc.o io.o dma.o \
+ misc_$(CONFIG_WORD_SIZE).o vdso32/
+obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
- paca.o cpu_setup_ppc970.o \
- firmware.o sysfs.o
+ paca.o nvram_64.o firmware.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
+obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
+obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
+obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC64) += vdso64/
-obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o
+obj-$(CONFIG_ALTIVEC) += vecemu.o
obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
-obj-$(CONFIG_PPC_OF) += of_device.o prom_parse.o
-procfs-$(CONFIG_PPC64) := proc_ppc64.o
+obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o
+obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o
+procfs-y := proc_powerpc.o
obj-$(CONFIG_PROC_FS) += $(procfs-y)
-rtaspci-$(CONFIG_PPC64) := rtas_pci.o
-obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y)
+rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o
+obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y)
+obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
-obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_IBMEBUS) += ibmebus.o
+obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
+ eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
-obj64-$(CONFIG_PPC_MULTIPLATFORM) += nvram_64.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_FA_DUMP) += fadump.o
+ifeq ($(CONFIG_PPC32),y)
+obj-$(CONFIG_E500) += idle_e500.o
+endif
obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
obj-$(CONFIG_TAU) += tau_6xx.o
-obj32-$(CONFIG_SOFTWARE_SUSPEND) += swsusp_32.o
-obj32-$(CONFIG_MODULES) += module_32.o
-obj-$(CONFIG_E500) += perfmon_fsl_booke.o
-
-ifeq ($(CONFIG_PPC_MERGE),y)
+obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o
+ifeq ($(CONFIG_FSL_BOOKE),y)
+obj-$(CONFIG_HIBERNATION) += swsusp_booke.o
+else
+obj-$(CONFIG_HIBERNATION) += swsusp_$(CONFIG_WORD_SIZE).o
+endif
+obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o
+obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_44x) += cpu_setup_44x.o
+obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o
+obj-$(CONFIG_PPC_DOORBELL) += dbell.o
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
-extra-$(CONFIG_PPC_STD_MMU) := head_32.o
-extra-$(CONFIG_PPC64) := head_64.o
-extra-$(CONFIG_40x) := head_4xx.o
+extra-y := head_$(CONFIG_WORD_SIZE).o
+extra-$(CONFIG_40x) := head_40x.o
extra-$(CONFIG_44x) := head_44x.o
extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
extra-$(CONFIG_8xx) := head_8xx.o
extra-y += vmlinux.lds
-obj-y += time.o prom.o traps.o setup-common.o \
- udbg.o misc.o io.o
-obj-$(CONFIG_PPC32) += entry_32.o setup_32.o misc_32.o
-obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o
-obj-$(CONFIG_PPC_MULTIPLATFORM) += prom_init.o
+obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o
+
+obj-$(CONFIG_PPC32) += entry_32.o setup_32.o
+obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
+obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_MODULES) += ppc_ksyms.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
-module-$(CONFIG_PPC64) += module_64.o
-obj-$(CONFIG_MODULES) += $(module-y)
-
-pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o pci_iommu.o \
- pci_direct_iommu.o iomap.o
-pci32-$(CONFIG_PPC32) := pci_32.o
-obj-$(CONFIG_PCI) += $(pci64-y) $(pci32-y)
-kexec-$(CONFIG_PPC64) := machine_kexec_64.o
-kexec-$(CONFIG_PPC32) := machine_kexec_32.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o $(kexec-y)
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
+
+pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
+obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \
+ pci-common.o pci_of_scan.o
+obj-$(CONFIG_PCI_MSI) += msi.o
+obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
+ machine_kexec_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_AUDIT) += audit.o
obj64-$(CONFIG_AUDIT) += compat_audit.o
-ifeq ($(CONFIG_PPC_ISERIES),y)
-$(obj)/head_64.o: $(obj)/lparmap.s
-AFLAGS_head_64.o += -I$(obj)
-endif
+obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o
-else
-# stuff used from here for ARCH=ppc
-smpobj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
+ifneq ($(CONFIG_PPC_INDIRECT_PIO),y)
+obj-y += iomap.o
endif
-obj-$(CONFIG_PPC32) += $(obj32-y)
+obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o
+
obj-$(CONFIG_PPC64) += $(obj64-y)
+obj-$(CONFIG_PPC32) += $(obj32-y)
+
+ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
+obj-y += ppc_save_regs.o
+endif
+
+obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
+obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
+
+# Disable GCOV in odd or sensitive code
+GCOV_PROFILE_prom_init.o := n
+GCOV_PROFILE_ftrace.o := n
+GCOV_PROFILE_machine_kexec_64.o := n
+GCOV_PROFILE_machine_kexec_32.o := n
+GCOV_PROFILE_kprobes.o := n
extra-$(CONFIG_PPC_FPU) += fpu.o
+extra-$(CONFIG_ALTIVEC) += vector.o
extra-$(CONFIG_PPC64) += entry_64.o
+extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
+
+extra-y += systbl_chk.i
+$(obj)/systbl.o: systbl_chk
+
+quiet_cmd_systbl_chk = CALL $<
+ cmd_systbl_chk = $(CONFIG_SHELL) $< $(obj)/systbl_chk.i
+
+PHONY += systbl_chk
+systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i
+ $(call cmd,systbl_chk)
+
+ifeq ($(CONFIG_PPC_OF_BOOT_TRAMPOLINE),y)
+$(obj)/built-in.o: prom_init_check
+
+quiet_cmd_prom_init_check = CALL $<
+ cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o"
+
+PHONY += prom_init_check
+prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
+ $(call cmd,prom_init_check)
+endif
+
+clean-files := vmlinux.lds
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 4734b5de599..34f55524d45 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -21,24 +21,24 @@
#include <linux/mm.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <asm/cache.h>
#include <asm/cputable.h>
+#include <asm/emulated_ops.h>
+#include <asm/switch_to.h>
+#include <asm/disassemble.h>
struct aligninfo {
unsigned char len;
unsigned char flags;
};
-#define IS_XFORM(inst) (((inst) >> 26) == 31)
-#define IS_DSFORM(inst) (((inst) >> 26) >= 56)
#define INVALID { 0, 0 }
/* Bits in the flags field */
#define LD 0 /* load */
#define ST 1 /* store */
-#define SE 2 /* sign-extend value */
+#define SE 2 /* sign-extend value, or FP ld/st as word */
#define F 4 /* to/from fp regs */
#define U 8 /* update index register */
#define M 0x10 /* multiple load/store */
@@ -46,12 +46,13 @@ struct aligninfo {
#define S 0x40 /* single-precision fp or... */
#define SX 0x40 /* ... byte count in XER */
#define HARD 0x80 /* string, stwcx. */
+#define E4 0x40 /* SPE endianness is word */
+#define E8 0x80 /* SPE endianness is double word */
+#define SPLT 0x80 /* VSX SPLAT load */
/* DSISR bits reported for a DCBZ instruction: */
#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */
-#define SWAP(a, b) (t = (a), (a) = (b), (b) = t)
-
/*
* The PowerPC stores certain bits of the instruction that caused the
* alignment exception in the DSISR register. This array maps those
@@ -71,7 +72,7 @@ static struct aligninfo aligninfo[128] = {
{ 8, LD+F }, /* 00 0 1001: lfd */
{ 4, ST+F+S }, /* 00 0 1010: stfs */
{ 8, ST+F }, /* 00 0 1011: stfd */
- INVALID, /* 00 0 1100 */
+ { 16, LD }, /* 00 0 1100: lq */
{ 8, LD }, /* 00 0 1101: ld/ldu/lwa */
INVALID, /* 00 0 1110 */
{ 8, ST }, /* 00 0 1111: std/stdu */
@@ -87,9 +88,9 @@ static struct aligninfo aligninfo[128] = {
{ 8, LD+F+U }, /* 00 1 1001: lfdu */
{ 4, ST+F+S+U }, /* 00 1 1010: stfsu */
{ 8, ST+F+U }, /* 00 1 1011: stfdu */
- INVALID, /* 00 1 1100 */
+ { 16, LD+F }, /* 00 1 1100: lfdp */
INVALID, /* 00 1 1101 */
- INVALID, /* 00 1 1110 */
+ { 16, ST+F }, /* 00 1 1110: stfdp */
INVALID, /* 00 1 1111 */
{ 8, LD }, /* 01 0 0000: ldx */
INVALID, /* 01 0 0001 */
@@ -138,7 +139,7 @@ static struct aligninfo aligninfo[128] = {
{ 2, LD+SW }, /* 10 0 1100: lhbrx */
{ 4, LD+SE }, /* 10 0 1101 lwa */
{ 2, ST+SW }, /* 10 0 1110: sthbrx */
- INVALID, /* 10 0 1111 */
+ { 16, ST }, /* 10 0 1111: stq */
INVALID, /* 10 1 0000 */
INVALID, /* 10 1 0001 */
INVALID, /* 10 1 0010 */
@@ -167,10 +168,10 @@ static struct aligninfo aligninfo[128] = {
{ 8, LD+F }, /* 11 0 1001: lfdx */
{ 4, ST+F+S }, /* 11 0 1010: stfsx */
{ 8, ST+F }, /* 11 0 1011: stfdx */
- INVALID, /* 11 0 1100 */
- { 8, LD+M }, /* 11 0 1101: lmd */
- INVALID, /* 11 0 1110 */
- { 8, ST+M }, /* 11 0 1111: stmd */
+ { 16, LD+F }, /* 11 0 1100: lfdpx */
+ { 4, LD+F+SE }, /* 11 0 1101: lfiwax */
+ { 16, ST+F }, /* 11 0 1110: stfdpx */
+ { 4, ST+F }, /* 11 0 1111: stfiwx */
{ 4, LD+U }, /* 11 1 0000: lwzux */
INVALID, /* 11 1 0001 */
{ 4, ST+U }, /* 11 1 0010: stwux */
@@ -184,43 +185,12 @@ static struct aligninfo aligninfo[128] = {
{ 4, ST+F+S+U }, /* 11 1 1010: stfsux */
{ 8, ST+F+U }, /* 11 1 1011: stfdux */
INVALID, /* 11 1 1100 */
- INVALID, /* 11 1 1101 */
+ { 4, LD+F }, /* 11 1 1101: lfiwzx */
INVALID, /* 11 1 1110 */
INVALID, /* 11 1 1111 */
};
/*
- * Create a DSISR value from the instruction
- */
-static inline unsigned make_dsisr(unsigned instr)
-{
- unsigned dsisr;
-
-
- /* bits 6:15 --> 22:31 */
- dsisr = (instr & 0x03ff0000) >> 16;
-
- if (IS_XFORM(instr)) {
- /* bits 29:30 --> 15:16 */
- dsisr |= (instr & 0x00000006) << 14;
- /* bit 25 --> 17 */
- dsisr |= (instr & 0x00000040) << 8;
- /* bits 21:24 --> 18:21 */
- dsisr |= (instr & 0x00000780) << 3;
- } else {
- /* bit 5 --> 17 */
- dsisr |= (instr & 0x04000000) >> 12;
- /* bits 1: 4 --> 18:21 */
- dsisr |= (instr & 0x78000000) >> 17;
- /* bits 30:31 --> 12:13 */
- if (IS_DSFORM(instr))
- dsisr |= (instr & 0x00000003) << 18;
- }
-
- return dsisr;
-}
-
-/*
* The dcbz (data cache block zero) instruction
* gives an alignment fault if used on non-cacheable
* memory. We handle the fault mainly for the
@@ -241,7 +211,7 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
return -EFAULT;
for (i = 0; i < size / sizeof(long); ++i)
- if (__put_user(0, p+i))
+ if (__put_user_inatomic(0, p+i))
return -EFAULT;
return 1;
}
@@ -252,11 +222,17 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
* bottom 4 bytes of each register, and the loads clear the
* top 4 bytes of the affected register.
*/
+#ifdef __BIG_ENDIAN__
#ifdef CONFIG_PPC64
#define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)
#else
#define REG_BYTE(rp, i) *((u8 *)(rp) + (i))
#endif
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define REG_BYTE(rp, i) (*(((u8 *)((rp) + ((i)>>2)) + ((i)&3))))
+#endif
#define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz))
@@ -288,7 +264,8 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
} else {
unsigned long pc = regs->nip ^ (swiz & 4);
- if (__get_user(instr, (unsigned int __user *)pc))
+ if (__get_user_inatomic(instr,
+ (unsigned int __user *)pc))
return -EFAULT;
if (swiz == 0 && (flags & SW))
instr = cpu_to_le32(instr);
@@ -300,6 +277,15 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
nb0 = nb + reg * 4 - 128;
nb = 128 - reg * 4;
}
+#ifdef __LITTLE_ENDIAN__
+ /*
+ * String instructions are endian neutral but the code
+ * below is not. Force byte swapping on so that the
+ * effects of swizzling are undone in the load/store
+ * loops below.
+ */
+ flags ^= SW;
+#endif
} else {
/* lwm, stmw */
nb = (32 - reg) * 4;
@@ -324,33 +310,420 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
((nb0 + 3) / 4) * sizeof(unsigned long));
for (i = 0; i < nb; ++i, ++p)
- if (__get_user(REG_BYTE(rptr, i ^ bswiz), SWIZ_PTR(p)))
+ if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
+ SWIZ_PTR(p)))
return -EFAULT;
if (nb0 > 0) {
rptr = &regs->gpr[0];
addr += nb;
for (i = 0; i < nb0; ++i, ++p)
- if (__get_user(REG_BYTE(rptr, i ^ bswiz),
- SWIZ_PTR(p)))
+ if (__get_user_inatomic(REG_BYTE(rptr,
+ i ^ bswiz),
+ SWIZ_PTR(p)))
return -EFAULT;
}
} else {
for (i = 0; i < nb; ++i, ++p)
- if (__put_user(REG_BYTE(rptr, i ^ bswiz), SWIZ_PTR(p)))
+ if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
+ SWIZ_PTR(p)))
return -EFAULT;
if (nb0 > 0) {
rptr = &regs->gpr[0];
addr += nb;
for (i = 0; i < nb0; ++i, ++p)
- if (__put_user(REG_BYTE(rptr, i ^ bswiz),
- SWIZ_PTR(p)))
+ if (__put_user_inatomic(REG_BYTE(rptr,
+ i ^ bswiz),
+ SWIZ_PTR(p)))
return -EFAULT;
}
}
return 1;
}
+/*
+ * Emulate floating-point pair loads and stores.
+ * Only POWER6 has these instructions, and it does true little-endian,
+ * so we don't need the address swizzling.
+ */
+static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
+ unsigned int flags)
+{
+ char *ptr0 = (char *) &current->thread.TS_FPR(reg);
+ char *ptr1 = (char *) &current->thread.TS_FPR(reg+1);
+ int i, ret, sw = 0;
+
+ if (reg & 1)
+ return 0; /* invalid form: FRS/FRT must be even */
+ if (flags & SW)
+ sw = 7;
+ ret = 0;
+ for (i = 0; i < 8; ++i) {
+ if (!(flags & ST)) {
+ ret |= __get_user(ptr0[i^sw], addr + i);
+ ret |= __get_user(ptr1[i^sw], addr + i + 8);
+ } else {
+ ret |= __put_user(ptr0[i^sw], addr + i);
+ ret |= __put_user(ptr1[i^sw], addr + i + 8);
+ }
+ }
+ if (ret)
+ return -EFAULT;
+ return 1; /* exception handled and fixed up */
+}
+
+#ifdef CONFIG_PPC64
+static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr,
+ unsigned int reg, unsigned int flags)
+{
+ char *ptr0 = (char *)&regs->gpr[reg];
+ char *ptr1 = (char *)&regs->gpr[reg+1];
+ int i, ret, sw = 0;
+
+ if (reg & 1)
+ return 0; /* invalid form: GPR must be even */
+ if (flags & SW)
+ sw = 7;
+ ret = 0;
+ for (i = 0; i < 8; ++i) {
+ if (!(flags & ST)) {
+ ret |= __get_user(ptr0[i^sw], addr + i);
+ ret |= __get_user(ptr1[i^sw], addr + i + 8);
+ } else {
+ ret |= __put_user(ptr0[i^sw], addr + i);
+ ret |= __put_user(ptr1[i^sw], addr + i + 8);
+ }
+ }
+ if (ret)
+ return -EFAULT;
+ return 1; /* exception handled and fixed up */
+}
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_SPE
+
+static struct aligninfo spe_aligninfo[32] = {
+ { 8, LD+E8 }, /* 0 00 00: evldd[x] */
+ { 8, LD+E4 }, /* 0 00 01: evldw[x] */
+ { 8, LD }, /* 0 00 10: evldh[x] */
+ INVALID, /* 0 00 11 */
+ { 2, LD }, /* 0 01 00: evlhhesplat[x] */
+ INVALID, /* 0 01 01 */
+ { 2, LD }, /* 0 01 10: evlhhousplat[x] */
+ { 2, LD+SE }, /* 0 01 11: evlhhossplat[x] */
+ { 4, LD }, /* 0 10 00: evlwhe[x] */
+ INVALID, /* 0 10 01 */
+ { 4, LD }, /* 0 10 10: evlwhou[x] */
+ { 4, LD+SE }, /* 0 10 11: evlwhos[x] */
+ { 4, LD+E4 }, /* 0 11 00: evlwwsplat[x] */
+ INVALID, /* 0 11 01 */
+ { 4, LD }, /* 0 11 10: evlwhsplat[x] */
+ INVALID, /* 0 11 11 */
+
+ { 8, ST+E8 }, /* 1 00 00: evstdd[x] */
+ { 8, ST+E4 }, /* 1 00 01: evstdw[x] */
+ { 8, ST }, /* 1 00 10: evstdh[x] */
+ INVALID, /* 1 00 11 */
+ INVALID, /* 1 01 00 */
+ INVALID, /* 1 01 01 */
+ INVALID, /* 1 01 10 */
+ INVALID, /* 1 01 11 */
+ { 4, ST }, /* 1 10 00: evstwhe[x] */
+ INVALID, /* 1 10 01 */
+ { 4, ST }, /* 1 10 10: evstwho[x] */
+ INVALID, /* 1 10 11 */
+ { 4, ST+E4 }, /* 1 11 00: evstwwe[x] */
+ INVALID, /* 1 11 01 */
+ { 4, ST+E4 }, /* 1 11 10: evstwwo[x] */
+ INVALID, /* 1 11 11 */
+};
+
+#define EVLDD 0x00
+#define EVLDW 0x01
+#define EVLDH 0x02
+#define EVLHHESPLAT 0x04
+#define EVLHHOUSPLAT 0x06
+#define EVLHHOSSPLAT 0x07
+#define EVLWHE 0x08
+#define EVLWHOU 0x0A
+#define EVLWHOS 0x0B
+#define EVLWWSPLAT 0x0C
+#define EVLWHSPLAT 0x0E
+#define EVSTDD 0x10
+#define EVSTDW 0x11
+#define EVSTDH 0x12
+#define EVSTWHE 0x18
+#define EVSTWHO 0x1A
+#define EVSTWWE 0x1C
+#define EVSTWWO 0x1E
+
+/*
+ * Emulate SPE loads and stores.
+ * Only Book-E has these instructions, and it does true little-endian,
+ * so we don't need the address swizzling.
+ */
+static int emulate_spe(struct pt_regs *regs, unsigned int reg,
+ unsigned int instr)
+{
+ int ret;
+ union {
+ u64 ll;
+ u32 w[2];
+ u16 h[4];
+ u8 v[8];
+ } data, temp;
+ unsigned char __user *p, *addr;
+ unsigned long *evr = &current->thread.evr[reg];
+ unsigned int nb, flags;
+
+ instr = (instr >> 1) & 0x1f;
+
+ /* DAR has the operand effective address */
+ addr = (unsigned char __user *)regs->dar;
+
+ nb = spe_aligninfo[instr].len;
+ flags = spe_aligninfo[instr].flags;
+
+ /* Verify the address of the operand */
+ if (unlikely(user_mode(regs) &&
+ !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
+ addr, nb)))
+ return -EFAULT;
+
+ /* userland only */
+ if (unlikely(!user_mode(regs)))
+ return 0;
+
+ flush_spe_to_thread(current);
+
+ /* If we are loading, get the data from user space, else
+ * get it from register values
+ */
+ if (flags & ST) {
+ data.ll = 0;
+ switch (instr) {
+ case EVSTDD:
+ case EVSTDW:
+ case EVSTDH:
+ data.w[0] = *evr;
+ data.w[1] = regs->gpr[reg];
+ break;
+ case EVSTWHE:
+ data.h[2] = *evr >> 16;
+ data.h[3] = regs->gpr[reg] >> 16;
+ break;
+ case EVSTWHO:
+ data.h[2] = *evr & 0xffff;
+ data.h[3] = regs->gpr[reg] & 0xffff;
+ break;
+ case EVSTWWE:
+ data.w[1] = *evr;
+ break;
+ case EVSTWWO:
+ data.w[1] = regs->gpr[reg];
+ break;
+ default:
+ return -EINVAL;
+ }
+ } else {
+ temp.ll = data.ll = 0;
+ ret = 0;
+ p = addr;
+
+ switch (nb) {
+ case 8:
+ ret |= __get_user_inatomic(temp.v[0], p++);
+ ret |= __get_user_inatomic(temp.v[1], p++);
+ ret |= __get_user_inatomic(temp.v[2], p++);
+ ret |= __get_user_inatomic(temp.v[3], p++);
+ case 4:
+ ret |= __get_user_inatomic(temp.v[4], p++);
+ ret |= __get_user_inatomic(temp.v[5], p++);
+ case 2:
+ ret |= __get_user_inatomic(temp.v[6], p++);
+ ret |= __get_user_inatomic(temp.v[7], p++);
+ if (unlikely(ret))
+ return -EFAULT;
+ }
+
+ switch (instr) {
+ case EVLDD:
+ case EVLDW:
+ case EVLDH:
+ data.ll = temp.ll;
+ break;
+ case EVLHHESPLAT:
+ data.h[0] = temp.h[3];
+ data.h[2] = temp.h[3];
+ break;
+ case EVLHHOUSPLAT:
+ case EVLHHOSSPLAT:
+ data.h[1] = temp.h[3];
+ data.h[3] = temp.h[3];
+ break;
+ case EVLWHE:
+ data.h[0] = temp.h[2];
+ data.h[2] = temp.h[3];
+ break;
+ case EVLWHOU:
+ case EVLWHOS:
+ data.h[1] = temp.h[2];
+ data.h[3] = temp.h[3];
+ break;
+ case EVLWWSPLAT:
+ data.w[0] = temp.w[1];
+ data.w[1] = temp.w[1];
+ break;
+ case EVLWHSPLAT:
+ data.h[0] = temp.h[2];
+ data.h[1] = temp.h[2];
+ data.h[2] = temp.h[3];
+ data.h[3] = temp.h[3];
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ if (flags & SW) {
+ switch (flags & 0xf0) {
+ case E8:
+ data.ll = swab64(data.ll);
+ break;
+ case E4:
+ data.w[0] = swab32(data.w[0]);
+ data.w[1] = swab32(data.w[1]);
+ break;
+ /* Its half word endian */
+ default:
+ data.h[0] = swab16(data.h[0]);
+ data.h[1] = swab16(data.h[1]);
+ data.h[2] = swab16(data.h[2]);
+ data.h[3] = swab16(data.h[3]);
+ break;
+ }
+ }
+
+ if (flags & SE) {
+ data.w[0] = (s16)data.h[1];
+ data.w[1] = (s16)data.h[3];
+ }
+
+ /* Store result to memory or update registers */
+ if (flags & ST) {
+ ret = 0;
+ p = addr;
+ switch (nb) {
+ case 8:
+ ret |= __put_user_inatomic(data.v[0], p++);
+ ret |= __put_user_inatomic(data.v[1], p++);
+ ret |= __put_user_inatomic(data.v[2], p++);
+ ret |= __put_user_inatomic(data.v[3], p++);
+ case 4:
+ ret |= __put_user_inatomic(data.v[4], p++);
+ ret |= __put_user_inatomic(data.v[5], p++);
+ case 2:
+ ret |= __put_user_inatomic(data.v[6], p++);
+ ret |= __put_user_inatomic(data.v[7], p++);
+ }
+ if (unlikely(ret))
+ return -EFAULT;
+ } else {
+ *evr = data.w[0];
+ regs->gpr[reg] = data.w[1];
+ }
+
+ return 1;
+}
+#endif /* CONFIG_SPE */
+
+#ifdef CONFIG_VSX
+/*
+ * Emulate VSX instructions...
+ */
+static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
+ unsigned int areg, struct pt_regs *regs,
+ unsigned int flags, unsigned int length,
+ unsigned int elsize)
+{
+ char *ptr;
+ unsigned long *lptr;
+ int ret = 0;
+ int sw = 0;
+ int i, j;
+
+ /* userland only */
+ if (unlikely(!user_mode(regs)))
+ return 0;
+
+ flush_vsx_to_thread(current);
+
+ if (reg < 32)
+ ptr = (char *) &current->thread.fp_state.fpr[reg][0];
+ else
+ ptr = (char *) &current->thread.vr_state.vr[reg - 32];
+
+ lptr = (unsigned long *) ptr;
+
+#ifdef __LITTLE_ENDIAN__
+ if (flags & SW) {
+ elsize = length;
+ sw = length-1;
+ } else {
+ /*
+ * The elements are BE ordered, even in LE mode, so process
+ * them in reverse order.
+ */
+ addr += length - elsize;
+
+ /* 8 byte memory accesses go in the top 8 bytes of the VR */
+ if (length == 8)
+ ptr += 8;
+ }
+#else
+ if (flags & SW)
+ sw = elsize-1;
+#endif
+
+ for (j = 0; j < length; j += elsize) {
+ for (i = 0; i < elsize; ++i) {
+ if (flags & ST)
+ ret |= __put_user(ptr[i^sw], addr + i);
+ else
+ ret |= __get_user(ptr[i^sw], addr + i);
+ }
+ ptr += elsize;
+#ifdef __LITTLE_ENDIAN__
+ addr -= elsize;
+#else
+ addr += elsize;
+#endif
+ }
+
+#ifdef __BIG_ENDIAN__
+#define VSX_HI 0
+#define VSX_LO 1
+#else
+#define VSX_HI 1
+#define VSX_LO 0
+#endif
+
+ if (!ret) {
+ if (flags & U)
+ regs->gpr[areg] = regs->dar;
+
+ /* Splat load copies the same data to top and bottom 8 bytes */
+ if (flags & SPLT)
+ lptr[VSX_LO] = lptr[VSX_HI];
+ /* For 8 byte loads, zero the low 8 bytes */
+ else if (!(flags & ST) && (8 == length))
+ lptr[VSX_LO] = 0;
+ } else
+ return -EFAULT;
+
+ return 1;
+}
+#endif
/*
* Called on alignment exception. Attempts to fixup
@@ -362,23 +735,33 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
int fix_alignment(struct pt_regs *regs)
{
- unsigned int instr, nb, flags;
+ unsigned int instr, nb, flags, instruction = 0;
unsigned int reg, areg;
unsigned int dsisr;
unsigned char __user *addr;
unsigned long p, swiz;
- int ret, t;
- union {
+ int ret, i;
+ union data {
u64 ll;
double dd;
unsigned char v[8];
struct {
+#ifdef __LITTLE_ENDIAN__
+ int low32;
+ unsigned hi32;
+#else
unsigned hi32;
int low32;
+#endif
} x32;
struct {
+#ifdef __LITTLE_ENDIAN__
+ short low16;
+ unsigned char hi48[6];
+#else
unsigned char hi48[6];
short low16;
+#endif
} x16;
} data;
@@ -398,16 +781,26 @@ int fix_alignment(struct pt_regs *regs)
if (cpu_has_feature(CPU_FTR_PPC_LE) && (regs->msr & MSR_LE))
pc ^= 4;
- if (unlikely(__get_user(instr, (unsigned int __user *)pc)))
+ if (unlikely(__get_user_inatomic(instr,
+ (unsigned int __user *)pc)))
return -EFAULT;
if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE))
instr = cpu_to_le32(instr);
dsisr = make_dsisr(instr);
+ instruction = instr;
}
/* extract the operation and registers from the dsisr */
reg = (dsisr >> 5) & 0x1f; /* source/dest register */
areg = dsisr & 0x1f; /* register to update */
+
+#ifdef CONFIG_SPE
+ if ((instr >> 26) == 0x4) {
+ PPC_WARN_ALIGNMENT(spe, regs);
+ return emulate_spe(regs, reg, instr);
+ }
+#endif
+
instr = (dsisr >> 10) & 0x7f;
instr |= (dsisr >> 13) & 0x60;
@@ -415,10 +808,21 @@ int fix_alignment(struct pt_regs *regs)
nb = aligninfo[instr].len;
flags = aligninfo[instr].flags;
+ /* ldbrx/stdbrx overlap lfs/stfs in the DSISR unfortunately */
+ if (IS_XFORM(instruction) && ((instruction >> 1) & 0x3ff) == 532) {
+ nb = 8;
+ flags = LD+SW;
+ } else if (IS_XFORM(instruction) &&
+ ((instruction >> 1) & 0x3ff) == 660) {
+ nb = 8;
+ flags = ST+SW;
+ }
+
/* Byteswap little endian loads and stores */
swiz = 0;
- if (regs->msr & MSR_LE) {
+ if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
flags ^= SW;
+#ifdef __BIG_ENDIAN__
/*
* So-called "PowerPC little endian" mode works by
* swizzling addresses rather than by actually doing
@@ -431,25 +835,64 @@ int fix_alignment(struct pt_regs *regs)
*/
if (cpu_has_feature(CPU_FTR_PPC_LE))
swiz = 7;
+#endif
}
/* DAR has the operand effective address */
addr = (unsigned char __user *)regs->dar;
+#ifdef CONFIG_VSX
+ if ((instruction & 0xfc00003e) == 0x7c000018) {
+ unsigned int elsize;
+
+ /* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */
+ reg |= (instruction & 0x1) << 5;
+ /* Simple inline decoder instead of a table */
+ /* VSX has only 8 and 16 byte memory accesses */
+ nb = 8;
+ if (instruction & 0x200)
+ nb = 16;
+
+ /* Vector stores in little-endian mode swap individual
+ elements, so process them separately */
+ elsize = 4;
+ if (instruction & 0x80)
+ elsize = 8;
+
+ flags = 0;
+ if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE))
+ flags |= SW;
+ if (instruction & 0x100)
+ flags |= ST;
+ if (instruction & 0x040)
+ flags |= U;
+ /* splat load needs a special decoder */
+ if ((instruction & 0x400) == 0){
+ flags |= SPLT;
+ nb = 8;
+ }
+ PPC_WARN_ALIGNMENT(vsx, regs);
+ return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize);
+ }
+#endif
/* A size of 0 indicates an instruction we don't support, with
* the exception of DCBZ which is handled as a special case here
*/
- if (instr == DCBZ)
+ if (instr == DCBZ) {
+ PPC_WARN_ALIGNMENT(dcbz, regs);
return emulate_dcbz(regs, addr);
+ }
if (unlikely(nb == 0))
return 0;
/* Load/Store Multiple instructions are handled in their own
* function
*/
- if (flags & M)
+ if (flags & M) {
+ PPC_WARN_ALIGNMENT(multiple, regs);
return emulate_multiple(regs, addr, reg, nb,
flags, instr, swiz);
+ }
/* Verify the address of the operand */
if (unlikely(user_mode(regs) &&
@@ -465,36 +908,58 @@ int fix_alignment(struct pt_regs *regs)
flush_fp_to_thread(current);
}
+ if ((nb == 16)) {
+ if (flags & F) {
+ /* Special case for 16-byte FP loads and stores */
+ PPC_WARN_ALIGNMENT(fp_pair, regs);
+ return emulate_fp_pair(addr, reg, flags);
+ } else {
+#ifdef CONFIG_PPC64
+ /* Special case for 16-byte loads and stores */
+ PPC_WARN_ALIGNMENT(lq_stq, regs);
+ return emulate_lq_stq(regs, addr, reg, flags);
+#else
+ return 0;
+#endif
+ }
+ }
+
+ PPC_WARN_ALIGNMENT(unaligned, regs);
+
/* If we are loading, get the data from user space, else
* get it from register values
*/
if (!(flags & ST)) {
- data.ll = 0;
- ret = 0;
- p = (unsigned long) addr;
+ unsigned int start = 0;
+
switch (nb) {
- case 8:
- ret |= __get_user(data.v[0], SWIZ_PTR(p++));
- ret |= __get_user(data.v[1], SWIZ_PTR(p++));
- ret |= __get_user(data.v[2], SWIZ_PTR(p++));
- ret |= __get_user(data.v[3], SWIZ_PTR(p++));
case 4:
- ret |= __get_user(data.v[4], SWIZ_PTR(p++));
- ret |= __get_user(data.v[5], SWIZ_PTR(p++));
+ start = offsetof(union data, x32.low32);
+ break;
case 2:
- ret |= __get_user(data.v[6], SWIZ_PTR(p++));
- ret |= __get_user(data.v[7], SWIZ_PTR(p++));
- if (unlikely(ret))
- return -EFAULT;
+ start = offsetof(union data, x16.low16);
+ break;
}
+
+ data.ll = 0;
+ ret = 0;
+ p = (unsigned long)addr;
+
+ for (i = 0; i < nb; i++)
+ ret |= __get_user_inatomic(data.v[start + i],
+ SWIZ_PTR(p++));
+
+ if (unlikely(ret))
+ return -EFAULT;
+
} else if (flags & F) {
- data.dd = current->thread.fpr[reg];
+ data.ll = current->thread.TS_FPR(reg);
if (flags & S) {
/* Single-precision FP store requires conversion... */
#ifdef CONFIG_PPC_FPU
preempt_disable();
enable_kernel_fp();
- cvt_df(&data.dd, (float *)&data.v[4], &current->thread);
+ cvt_df(&data.dd, (float *)&data.x32.low32);
preempt_enable();
#else
return 0;
@@ -506,17 +971,13 @@ int fix_alignment(struct pt_regs *regs)
if (flags & SW) {
switch (nb) {
case 8:
- SWAP(data.v[0], data.v[7]);
- SWAP(data.v[1], data.v[6]);
- SWAP(data.v[2], data.v[5]);
- SWAP(data.v[3], data.v[4]);
+ data.ll = swab64(data.ll);
break;
case 4:
- SWAP(data.v[4], data.v[7]);
- SWAP(data.v[5], data.v[6]);
+ data.x32.low32 = swab32(data.x32.low32);
break;
case 2:
- SWAP(data.v[6], data.v[7]);
+ data.x16.low16 = swab16(data.x16.low16);
break;
}
}
@@ -525,7 +986,8 @@ int fix_alignment(struct pt_regs *regs)
* or floating point single precision conversion
*/
switch (flags & ~(U|SW)) {
- case LD+SE: /* sign extend */
+ case LD+SE: /* sign extending integer loads */
+ case LD+F+SE: /* sign extend for lfiwax */
if ( nb == 2 )
data.ll = data.x16.low16;
else /* nb must be 4 */
@@ -537,7 +999,7 @@ int fix_alignment(struct pt_regs *regs)
#ifdef CONFIG_PPC_FPU
preempt_disable();
enable_kernel_fp();
- cvt_fd((float *)&data.v[4], &data.dd, &current->thread);
+ cvt_fd((float *)&data.x32.low32, &data.dd);
preempt_enable();
#else
return 0;
@@ -547,25 +1009,28 @@ int fix_alignment(struct pt_regs *regs)
/* Store result to memory or update registers */
if (flags & ST) {
- ret = 0;
- p = (unsigned long) addr;
+ unsigned int start = 0;
+
switch (nb) {
- case 8:
- ret |= __put_user(data.v[0], SWIZ_PTR(p++));
- ret |= __put_user(data.v[1], SWIZ_PTR(p++));
- ret |= __put_user(data.v[2], SWIZ_PTR(p++));
- ret |= __put_user(data.v[3], SWIZ_PTR(p++));
case 4:
- ret |= __put_user(data.v[4], SWIZ_PTR(p++));
- ret |= __put_user(data.v[5], SWIZ_PTR(p++));
+ start = offsetof(union data, x32.low32);
+ break;
case 2:
- ret |= __put_user(data.v[6], SWIZ_PTR(p++));
- ret |= __put_user(data.v[7], SWIZ_PTR(p++));
+ start = offsetof(union data, x16.low16);
+ break;
}
+
+ ret = 0;
+ p = (unsigned long)addr;
+
+ for (i = 0; i < nb; i++)
+ ret |= __put_user_inatomic(data.v[start + i],
+ SWIZ_PTR(p++));
+
if (unlikely(ret))
return -EFAULT;
} else if (flags & F)
- current->thread.fpr[reg] = data.dd;
+ current->thread.TS_FPR(reg) = data.ll;
else
regs->gpr[reg] = data.ll;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index d06f378597b..f5995a91221 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -21,13 +21,13 @@
#include <linux/types.h>
#include <linux/mman.h>
#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <linux/hrtimer.h>
#ifdef CONFIG_PPC64
#include <linux/time.h>
#include <linux/hardirq.h>
-#else
-#include <linux/ptrace.h>
-#include <linux/suspend.h>
#endif
+#include <linux/kbuild.h>
#include <asm/io.h>
#include <asm/page.h>
@@ -44,44 +44,70 @@
#include <asm/compat.h>
#include <asm/mmu.h>
#include <asm/hvcall.h>
+#include <asm/xics.h>
+#endif
+#ifdef CONFIG_PPC_POWERNV
+#include <asm/opal.h>
+#endif
+#if defined(CONFIG_KVM) || defined(CONFIG_KVM_GUEST)
+#include <linux/kvm_host.h>
+#endif
+#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
#endif
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+#ifdef CONFIG_PPC32
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#include "head_booke.h"
+#endif
+#endif
-#define BLANK() asm volatile("\n->" : : )
+#if defined(CONFIG_PPC_FSL_BOOK3E)
+#include "../mm/mmu_decl.h"
+#endif
int main(void)
{
DEFINE(THREAD, offsetof(struct task_struct, thread));
DEFINE(MM, offsetof(struct task_struct, mm));
+ DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id));
#ifdef CONFIG_PPC64
DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
+ DEFINE(SIGSEGV, SIGSEGV);
+ DEFINE(NMI_MASK, NMI_MASK);
+ DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr));
+ DEFINE(THREAD_DSCR_INHERIT, offsetof(struct thread_struct, dscr_inherit));
+ DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr));
#else
- DEFINE(THREAD_INFO, offsetof(struct task_struct, thread_info));
- DEFINE(PTRACE, offsetof(struct task_struct, ptrace));
+ DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
+ DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
+ DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
#endif /* CONFIG_PPC64 */
DEFINE(KSP, offsetof(struct thread_struct, ksp));
DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
+#ifdef CONFIG_BOOKE
+ DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
+#endif
DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
- DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0]));
- DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr));
+ DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state));
+ DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area));
+ DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr));
#ifdef CONFIG_ALTIVEC
- DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0]));
+ DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state));
+ DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area));
DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
- DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));
DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
+ DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr));
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
+#endif /* CONFIG_VSX */
#ifdef CONFIG_PPC64
DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid));
#else /* CONFIG_PPC64 */
DEFINE(PGDIR, offsetof(struct thread_struct, pgdir));
- DEFINE(LAST_SYSCALL, offsetof(struct thread_struct, last_syscall));
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
- DEFINE(PT_PTRACED, PT_PTRACED);
-#endif
#ifdef CONFIG_SPE
DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0]));
DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc));
@@ -89,15 +115,52 @@ int main(void)
DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe));
#endif /* CONFIG_SPE */
#endif /* CONFIG_PPC64 */
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+ DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, debug.dbcr0));
+#endif
+#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
+ DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
+#endif
+#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
+ DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu));
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ DEFINE(THREAD_TAR, offsetof(struct thread_struct, tar));
+ DEFINE(THREAD_BESCR, offsetof(struct thread_struct, bescr));
+ DEFINE(THREAD_EBBHR, offsetof(struct thread_struct, ebbhr));
+ DEFINE(THREAD_EBBRR, offsetof(struct thread_struct, ebbrr));
+ DEFINE(THREAD_SIAR, offsetof(struct thread_struct, siar));
+ DEFINE(THREAD_SDAR, offsetof(struct thread_struct, sdar));
+ DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier));
+ DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0));
+ DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2));
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch));
+ DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar));
+ DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr));
+ DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar));
+ DEFINE(THREAD_TM_TAR, offsetof(struct thread_struct, tm_tar));
+ DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));
+ DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));
+ DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs));
+ DEFINE(THREAD_TRANSACT_VRSTATE, offsetof(struct thread_struct,
+ transact_vr));
+ DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct,
+ transact_vrsave));
+ DEFINE(THREAD_TRANSACT_FPSTATE, offsetof(struct thread_struct,
+ transact_fp));
+ /* Local pt_regs on stack for Transactional Memory funcs. */
+ DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
+ sizeof(struct pt_regs) + 16);
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
DEFINE(TI_TASK, offsetof(struct thread_info, task));
-#ifdef CONFIG_PPC32
- DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
-#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PPC64
DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size));
@@ -108,47 +171,91 @@ int main(void)
DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
/* paca */
DEFINE(PACA_SIZE, sizeof(struct paca_struct));
+ DEFINE(PACA_LOCK_TOKEN, offsetof(struct paca_struct, lock_token));
DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index));
DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start));
DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack));
DEFINE(PACACURRENT, offsetof(struct paca_struct, __current));
DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr));
- DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
- DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr));
DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1));
DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
- DEFINE(PACAPROCENABLED, offsetof(struct paca_struct, proc_enabled));
+ DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase));
+ DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
+ DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
+ DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened));
+ DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+#ifdef CONFIG_PPC_MM_SLICES
+ DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
+ context.low_slices_psize));
+ DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
+ context.high_slices_psize));
+ DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#ifdef CONFIG_PPC_BOOK3E
+ DEFINE(PACAPGD, offsetof(struct paca_struct, pgd));
+ DEFINE(PACA_KERNELPGD, offsetof(struct paca_struct, kernel_pgd));
+ DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
+ DEFINE(PACA_EXTLB, offsetof(struct paca_struct, extlb));
+ DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
+ DEFINE(PACA_EXCRIT, offsetof(struct paca_struct, excrit));
+ DEFINE(PACA_EXDBG, offsetof(struct paca_struct, exdbg));
+ DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack));
+ DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack));
+ DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack));
+ DEFINE(PACA_TCD_PTR, offsetof(struct paca_struct, tcd_ptr));
+
+ DEFINE(TCD_ESEL_NEXT,
+ offsetof(struct tlb_core_data, esel_next));
+ DEFINE(TCD_ESEL_MAX,
+ offsetof(struct tlb_core_data, esel_max));
+ DEFINE(TCD_ESEL_FIRST,
+ offsetof(struct tlb_core_data, esel_first));
+ DEFINE(TCD_LOCK, offsetof(struct tlb_core_data, lock));
+#endif /* CONFIG_PPC_BOOK3E */
+
+#ifdef CONFIG_PPC_STD_MMU_64
+ DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
+ DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
- DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
- DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp));
-#ifdef CONFIG_HUGETLB_PAGE
- DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
- DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
-#endif /* CONFIG_HUGETLB_PAGE */
+#ifdef CONFIG_PPC_MM_SLICES
+ DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
+#else
+ DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
+#endif /* CONFIG_PPC_MM_SLICES */
DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
- DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr));
- DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
- DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr));
- DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
- DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
- DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
-
DEFINE(SLBSHADOW_STACKVSID,
offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
DEFINE(SLBSHADOW_STACKESID,
offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
- DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
- DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
- DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
- DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
+ DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use));
+ DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx));
+ DEFINE(LPPACA_YIELDCOUNT, offsetof(struct lppaca, yield_count));
+ DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
+#endif /* CONFIG_PPC_STD_MMU_64 */
+ DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
+#ifdef CONFIG_PPC_BOOK3S_64
+ DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp));
+ DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
+#endif
+ DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
+ DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
+ DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default));
+ DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
+ DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
+ DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
+ DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
+ DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
+ DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost));
+ DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso));
#endif /* CONFIG_PPC64 */
/* RTAS */
@@ -156,14 +263,9 @@ int main(void)
DEFINE(RTASENTRY, offsetof(struct rtas_t, entry));
/* Interrupt register frame */
- DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD);
-#ifndef CONFIG_PPC64
- DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
-#else /* CONFIG_PPC64 */
+ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
- /* 288 = # of volatile regs, int & fp, for leaf routines */
- /* which do not stack a frame. See the PPC64 ABI. */
- DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 288);
+#ifdef CONFIG_PPC64
/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
@@ -241,6 +343,26 @@ int main(void)
DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
#endif /* CONFIG_PPC64 */
+#if defined(CONFIG_PPC32)
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+ DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
+ DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
+ /* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */
+ DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
+ DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1));
+ DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2));
+ DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3));
+ DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6));
+ DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7));
+ DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0));
+ DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1));
+ DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0));
+ DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1));
+ DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0));
+ DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1));
+ DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit));
+#endif
+#endif
DEFINE(CLONE_VM, CLONE_VM);
DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
@@ -249,18 +371,15 @@ int main(void)
#endif /* ! CONFIG_PPC64 */
/* About the CPU features table */
- DEFINE(CPU_SPEC_ENTRY_SIZE, sizeof(struct cpu_spec));
- DEFINE(CPU_SPEC_PVR_MASK, offsetof(struct cpu_spec, pvr_mask));
- DEFINE(CPU_SPEC_PVR_VALUE, offsetof(struct cpu_spec, pvr_value));
DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features));
DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup));
DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore));
-#ifndef CONFIG_PPC64
DEFINE(pbe_address, offsetof(struct pbe, address));
DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
DEFINE(pbe_next, offsetof(struct pbe, next));
+#ifndef CONFIG_PPC64
DEFINE(TASK_SIZE, TASK_SIZE);
DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28);
#endif /* ! CONFIG_PPC64 */
@@ -276,6 +395,12 @@ int main(void)
DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
+ DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime));
+ DEFINE(STAMP_SEC_FRAC, offsetof(struct vdso_data, stamp_sec_fraction));
+ DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size));
+ DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size));
+ DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size));
+ DEFINE(CFG_DCACHE_LOGBLOCKSZ, offsetof(struct vdso_data, dcache_log_block_size));
#ifdef CONFIG_PPC64
DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64));
DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec));
@@ -300,7 +425,313 @@ int main(void)
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- DEFINE(CLOCK_REALTIME_RES, TICK_NSEC);
+ DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
+
+#ifdef CONFIG_BUG
+ DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
+#endif
+
+ DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
+ DEFINE(PTE_SIZE, sizeof(pte_t));
+
+#ifdef CONFIG_KVM
+ DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
+ DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
+ DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid));
+ DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
+ DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
+ DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fp.fpr));
+#ifdef CONFIG_ALTIVEC
+ DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr.vr));
+#endif
+ DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
+ DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
+ DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
+#ifdef CONFIG_PPC_BOOK3S
+ DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar));
+#endif
+ DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
+ DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr));
+ DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0));
+ DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1));
+ DEFINE(VCPU_SPRG0, offsetof(struct kvm_vcpu, arch.shregs.sprg0));
+ DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1));
+ DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
+ DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
+#endif
+ DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
+ DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
+ DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
+ DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6));
+ DEFINE(VCPU_SHARED_SPRG7, offsetof(struct kvm_vcpu_arch_shared, sprg7));
+ DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
+ DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1));
+ DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
+ DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
+ DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
+ DEFINE(VCPU_SHAREDBE, offsetof(struct kvm_vcpu, arch.shared_big_endian));
+#endif
+
+ DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
+ DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
+ DEFINE(VCPU_SHARED_MAS2, offsetof(struct kvm_vcpu_arch_shared, mas2));
+ DEFINE(VCPU_SHARED_MAS7_3, offsetof(struct kvm_vcpu_arch_shared, mas7_3));
+ DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4));
+ DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6));
+
+ DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
+ DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
+
+ /* book3s */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
+ DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
+ DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
+ DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
+ DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
+ DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
+ DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
+ DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
+ DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
+ DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
+ DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
+ DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
+ DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
+#endif
+#ifdef CONFIG_PPC_BOOK3S
+ DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
+ DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
+ DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
+ DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic));
+ DEFINE(VCPU_VTB, offsetof(struct kvm_vcpu, arch.vtb));
+ DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
+ DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr));
+ DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
+ DEFINE(VCPU_IAMR, offsetof(struct kvm_vcpu, arch.iamr));
+ DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));
+ DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr));
+ DEFINE(VCPU_DABRX, offsetof(struct kvm_vcpu, arch.dabrx));
+ DEFINE(VCPU_DAWR, offsetof(struct kvm_vcpu, arch.dawr));
+ DEFINE(VCPU_DAWRX, offsetof(struct kvm_vcpu, arch.dawrx));
+ DEFINE(VCPU_CIABR, offsetof(struct kvm_vcpu, arch.ciabr));
+ DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
+ DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
+ DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
+ DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
+ DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
+ DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
+ DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
+ DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
+ DEFINE(VCPU_SPMC, offsetof(struct kvm_vcpu, arch.spmc));
+ DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar));
+ DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar));
+ DEFINE(VCPU_SIER, offsetof(struct kvm_vcpu, arch.sier));
+ DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
+ DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
+ DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
+ DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
+ DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
+ DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
+ DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
+ DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
+ DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
+ DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
+ DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr));
+ DEFINE(VCPU_SHADOW_FSCR, offsetof(struct kvm_vcpu, arch.shadow_fscr));
+ DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb));
+ DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr));
+ DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr));
+ DEFINE(VCPU_BESCR, offsetof(struct kvm_vcpu, arch.bescr));
+ DEFINE(VCPU_CSIGR, offsetof(struct kvm_vcpu, arch.csigr));
+ DEFINE(VCPU_TACR, offsetof(struct kvm_vcpu, arch.tacr));
+ DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr));
+ DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
+ DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
+ DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
+ DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
+ DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
+ DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
+ DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
+ DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
+ DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
+ DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
+ DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
+ DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes));
+ DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
+ DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
+ DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar));
+ DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar));
+ DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr));
+ DEFINE(VCPU_GPR_TM, offsetof(struct kvm_vcpu, arch.gpr_tm));
+ DEFINE(VCPU_FPRS_TM, offsetof(struct kvm_vcpu, arch.fp_tm.fpr));
+ DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr));
+ DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm));
+ DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm));
+ DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm));
+ DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm));
+ DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm));
+ DEFINE(VCPU_PPR_TM, offsetof(struct kvm_vcpu, arch.ppr_tm));
+ DEFINE(VCPU_DSCR_TM, offsetof(struct kvm_vcpu, arch.dscr_tm));
+ DEFINE(VCPU_TAR_TM, offsetof(struct kvm_vcpu, arch.tar_tm));
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu));
+# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f))
+#else
+# define SVCPU_FIELD(x, f)
+#endif
+# define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, kvm_hstate.f))
+#else /* 32-bit */
+# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, f))
+# define HSTATE_FIELD(x, f) DEFINE(x, offsetof(struct kvmppc_book3s_shadow_vcpu, hstate.f))
+#endif
+
+ SVCPU_FIELD(SVCPU_CR, cr);
+ SVCPU_FIELD(SVCPU_XER, xer);
+ SVCPU_FIELD(SVCPU_CTR, ctr);
+ SVCPU_FIELD(SVCPU_LR, lr);
+ SVCPU_FIELD(SVCPU_PC, pc);
+ SVCPU_FIELD(SVCPU_R0, gpr[0]);
+ SVCPU_FIELD(SVCPU_R1, gpr[1]);
+ SVCPU_FIELD(SVCPU_R2, gpr[2]);
+ SVCPU_FIELD(SVCPU_R3, gpr[3]);
+ SVCPU_FIELD(SVCPU_R4, gpr[4]);
+ SVCPU_FIELD(SVCPU_R5, gpr[5]);
+ SVCPU_FIELD(SVCPU_R6, gpr[6]);
+ SVCPU_FIELD(SVCPU_R7, gpr[7]);
+ SVCPU_FIELD(SVCPU_R8, gpr[8]);
+ SVCPU_FIELD(SVCPU_R9, gpr[9]);
+ SVCPU_FIELD(SVCPU_R10, gpr[10]);
+ SVCPU_FIELD(SVCPU_R11, gpr[11]);
+ SVCPU_FIELD(SVCPU_R12, gpr[12]);
+ SVCPU_FIELD(SVCPU_R13, gpr[13]);
+ SVCPU_FIELD(SVCPU_FAULT_DSISR, fault_dsisr);
+ SVCPU_FIELD(SVCPU_FAULT_DAR, fault_dar);
+ SVCPU_FIELD(SVCPU_LAST_INST, last_inst);
+ SVCPU_FIELD(SVCPU_SHADOW_SRR1, shadow_srr1);
+#ifdef CONFIG_PPC_BOOK3S_32
+ SVCPU_FIELD(SVCPU_SR, sr);
+#endif
+#ifdef CONFIG_PPC64
+ SVCPU_FIELD(SVCPU_SLB, slb);
+ SVCPU_FIELD(SVCPU_SLB_MAX, slb_max);
+ SVCPU_FIELD(SVCPU_SHADOW_FSCR, shadow_fscr);
+#endif
+
+ HSTATE_FIELD(HSTATE_HOST_R1, host_r1);
+ HSTATE_FIELD(HSTATE_HOST_R2, host_r2);
+ HSTATE_FIELD(HSTATE_HOST_MSR, host_msr);
+ HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler);
+ HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);
+ HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
+ HSTATE_FIELD(HSTATE_SCRATCH2, scratch2);
+ HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
+ HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
+ HSTATE_FIELD(HSTATE_NAPPING, napping);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req);
+ HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
+ HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
+ HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
+ HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
+ HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
+ HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
+ HSTATE_FIELD(HSTATE_PTID, ptid);
+ HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
+ HSTATE_FIELD(HSTATE_PMC, host_pmc);
+ HSTATE_FIELD(HSTATE_PURR, host_purr);
+ HSTATE_FIELD(HSTATE_SPURR, host_spurr);
+ HSTATE_FIELD(HSTATE_DSCR, host_dscr);
+ HSTATE_FIELD(HSTATE_DABR, dabr);
+ HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
+ DEFINE(IPI_PRIORITY, IPI_PRIORITY);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ HSTATE_FIELD(HSTATE_CFAR, cfar);
+ HSTATE_FIELD(HSTATE_PPR, ppr);
+ HSTATE_FIELD(HSTATE_HOST_FSCR, host_fscr);
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#else /* CONFIG_PPC_BOOK3S */
+ DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
+ DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
+ DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
+ DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
+ DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
+ DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
+ DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
+ DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
+ DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
+#endif /* CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_KVM */
+
+#ifdef CONFIG_KVM_GUEST
+ DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared,
+ scratch1));
+ DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared,
+ scratch2));
+ DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared,
+ scratch3));
+ DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared,
+ int_pending));
+ DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
+ DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared,
+ critical));
+ DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr));
+#endif
+
+#ifdef CONFIG_44x
+ DEFINE(PGD_T_LOG2, PGD_T_LOG2);
+ DEFINE(PTE_T_LOG2, PTE_T_LOG2);
+#endif
+#ifdef CONFIG_PPC_FSL_BOOK3E
+ DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam));
+ DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0));
+ DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1));
+ DEFINE(TLBCAM_MAS2, offsetof(struct tlbcam, MAS2));
+ DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3));
+ DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7));
+#endif
+
+#if defined(CONFIG_KVM) && defined(CONFIG_SPE)
+ DEFINE(VCPU_EVR, offsetof(struct kvm_vcpu, arch.evr[0]));
+ DEFINE(VCPU_ACC, offsetof(struct kvm_vcpu, arch.acc));
+ DEFINE(VCPU_SPEFSCR, offsetof(struct kvm_vcpu, arch.spefscr));
+ DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr));
+#endif
+
+#ifdef CONFIG_KVM_BOOKE_HV
+ DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4));
+ DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6));
+ DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc));
+#endif
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
+ arch.timing_exit.tv32.tbu));
+ DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
+ arch.timing_exit.tv32.tbl));
+ DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
+ arch.timing_last_enter.tv32.tbu));
+ DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
+ arch.timing_last_enter.tv32.tbl));
+#endif
+
+#ifdef CONFIG_PPC_POWERNV
+ DEFINE(OPAL_MC_GPR3, offsetof(struct opal_machine_check_event, gpr3));
+ DEFINE(OPAL_MC_SRR0, offsetof(struct opal_machine_check_event, srr0));
+ DEFINE(OPAL_MC_SRR1, offsetof(struct opal_machine_check_event, srr1));
+ DEFINE(PACA_OPAL_MC_EVT, offsetof(struct paca_struct, opal_mc_evt));
+#endif
return 0;
}
diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c
index 7fe5e6300e9..a4dab7cab34 100644
--- a/arch/powerpc/kernel/audit.c
+++ b/arch/powerpc/kernel/audit.c
@@ -23,6 +23,20 @@ static unsigned chattr_class[] = {
~0U
};
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+#ifdef CONFIG_PPC64
+ if (arch == AUDIT_ARCH_PPC)
+ return 1;
+#endif
+ return 0;
+}
+
int audit_classify_syscall(int abi, unsigned syscall)
{
#ifdef CONFIG_PPC64
@@ -51,15 +65,18 @@ static int __init audit_classes_init(void)
extern __u32 ppc32_write_class[];
extern __u32 ppc32_read_class[];
extern __u32 ppc32_chattr_class[];
+ extern __u32 ppc32_signal_class[];
audit_register_class(AUDIT_CLASS_WRITE_32, ppc32_write_class);
audit_register_class(AUDIT_CLASS_READ_32, ppc32_read_class);
audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ppc32_dir_class);
audit_register_class(AUDIT_CLASS_CHATTR_32, ppc32_chattr_class);
+ audit_register_class(AUDIT_CLASS_SIGNAL_32, ppc32_signal_class);
#endif
audit_register_class(AUDIT_CLASS_WRITE, write_class);
audit_register_class(AUDIT_CLASS_READ, read_class);
audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+ audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
return 0;
}
diff --git a/arch/powerpc/kernel/binfmt_elf32.c b/arch/powerpc/kernel/binfmt_elf32.c
deleted file mode 100644
index 5cb58757e1b..00000000000
--- a/arch/powerpc/kernel/binfmt_elf32.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * binfmt_elf32.c: Support 32-bit PPC ELF binaries on Power3 and followons.
- * based on the SPARC64 version.
- * Copyright (C) 1995, 1996, 1997, 1998 David S. Miller (davem@redhat.com)
- * Copyright (C) 1995, 1996, 1997, 1998 Jakub Jelinek (jj@ultra.linux.cz)
- *
- * Copyright (C) 2000,2001 Ken Aaker (kdaaker@rchland.vnet.ibm.com), IBM Corp
- * Copyright (C) 2001 Anton Blanchard (anton@au.ibm.com), IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define ELF_ARCH EM_PPC
-#define ELF_CLASS ELFCLASS32
-#define ELF_DATA ELFDATA2MSB;
-
-#include <asm/processor.h>
-#include <linux/module.h>
-#include <linux/elfcore.h>
-#include <linux/compat.h>
-
-#define elf_prstatus elf_prstatus32
-struct elf_prstatus32
-{
- struct elf_siginfo pr_info; /* Info associated with signal */
- short pr_cursig; /* Current signal */
- unsigned int pr_sigpend; /* Set of pending signals */
- unsigned int pr_sighold; /* Set of held signals */
- pid_t pr_pid;
- pid_t pr_ppid;
- pid_t pr_pgrp;
- pid_t pr_sid;
- struct compat_timeval pr_utime; /* User time */
- struct compat_timeval pr_stime; /* System time */
- struct compat_timeval pr_cutime; /* Cumulative user time */
- struct compat_timeval pr_cstime; /* Cumulative system time */
- elf_gregset_t pr_reg; /* General purpose registers. */
- int pr_fpvalid; /* True if math co-processor being used. */
-};
-
-#define elf_prpsinfo elf_prpsinfo32
-struct elf_prpsinfo32
-{
- char pr_state; /* numeric process state */
- char pr_sname; /* char for pr_state */
- char pr_zomb; /* zombie */
- char pr_nice; /* nice val */
- unsigned int pr_flag; /* flags */
- u32 pr_uid;
- u32 pr_gid;
- pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
- /* Lots missing */
- char pr_fname[16]; /* filename of executable */
- char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
-};
-
-#include <linux/time.h>
-
-#undef cputime_to_timeval
-#define cputime_to_timeval cputime_to_compat_timeval
-static __inline__ void
-cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value)
-{
- unsigned long jiffies = cputime_to_jiffies(cputime);
- value->tv_usec = (jiffies % HZ) * (1000000L / HZ);
- value->tv_sec = jiffies / HZ;
-}
-
-#define init_elf_binfmt init_elf32_binfmt
-
-#include "../../../fs/binfmt_elf.c"
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 995fcef156f..41c011cb607 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -6,18 +6,18 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
#include <asm/sections.h>
#include <asm/prom.h>
#include <asm/btext.h>
-#include <asm/prom.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/pgtable.h>
#include <asm/io.h>
-#include <asm/lmb.h>
#include <asm/processor.h>
+#include <asm/udbg.h>
#define NO_SCROLL
@@ -25,11 +25,6 @@
static void scrollscreen(void);
#endif
-static void draw_byte(unsigned char c, long locX, long locY);
-static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb);
-static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb);
-static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb);
-
#define __force_data __attribute__((__section__(".data")))
static int g_loc_X __force_data;
@@ -52,6 +47,26 @@ static unsigned char vga_font[cmapsz];
int boot_text_mapped __force_data = 0;
int force_printk_to_btext = 0;
+extern void rmci_on(void);
+extern void rmci_off(void);
+
+static inline void rmci_maybe_on(void)
+{
+#if defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) && defined(CONFIG_PPC64)
+ if (!(mfmsr() & MSR_DR))
+ rmci_on();
+#endif
+}
+
+static inline void rmci_maybe_off(void)
+{
+#if defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) && defined(CONFIG_PPC64)
+ if (!(mfmsr() & MSR_DR))
+ rmci_off();
+#endif
+}
+
+
#ifdef CONFIG_PPC32
/* Calc BAT values for mapping the display and store them
* in disp_BAT. Those values are then used from head.S to map
@@ -99,7 +114,7 @@ void __init btext_prepare_BAT(void)
/* This function can be used to enable the early boot text when doing
* OF booting or within bootx init. It must be followed by a btext_unmap()
- * call before the logical address becomes unuseable
+ * call before the logical address becomes unusable
*/
void __init btext_setup_display(int width, int height, int depth, int pitch,
unsigned long address)
@@ -134,7 +149,7 @@ void __init btext_unmap(void)
* changes.
*/
-static void map_boot_text(void)
+void btext_map(void)
{
unsigned long base, offset, size;
unsigned char *vbase;
@@ -160,33 +175,35 @@ int btext_initialize(struct device_node *np)
unsigned long address = 0;
const u32 *prop;
- prop = get_property(np, "linux,bootx-width", NULL);
+ prop = of_get_property(np, "linux,bootx-width", NULL);
if (prop == NULL)
- prop = get_property(np, "width", NULL);
+ prop = of_get_property(np, "width", NULL);
if (prop == NULL)
return -EINVAL;
width = *prop;
- prop = get_property(np, "linux,bootx-height", NULL);
+ prop = of_get_property(np, "linux,bootx-height", NULL);
if (prop == NULL)
- prop = get_property(np, "height", NULL);
+ prop = of_get_property(np, "height", NULL);
if (prop == NULL)
return -EINVAL;
height = *prop;
- prop = get_property(np, "linux,bootx-depth", NULL);
+ prop = of_get_property(np, "linux,bootx-depth", NULL);
if (prop == NULL)
- prop = get_property(np, "depth", NULL);
+ prop = of_get_property(np, "depth", NULL);
if (prop == NULL)
return -EINVAL;
depth = *prop;
pitch = width * ((depth + 7) / 8);
- prop = get_property(np, "linux,bootx-linebytes", NULL);
+ prop = of_get_property(np, "linux,bootx-linebytes", NULL);
if (prop == NULL)
- prop = get_property(np, "linebytes", NULL);
- if (prop)
+ prop = of_get_property(np, "linebytes", NULL);
+ if (prop && *prop != 0xffffffffu)
pitch = *prop;
if (pitch == 1)
pitch = 0x1000;
- prop = get_property(np, "address", NULL);
+ prop = of_get_property(np, "linux,bootx-addr", NULL);
+ if (prop == NULL)
+ prop = of_get_property(np, "address", NULL);
if (prop)
address = *prop;
@@ -207,7 +224,7 @@ int btext_initialize(struct device_node *np)
dispDeviceRect[2] = width;
dispDeviceRect[3] = height;
- map_boot_text();
+ btext_map();
return 0;
}
@@ -218,7 +235,7 @@ int __init btext_find_display(int allow_nonstdout)
struct device_node *np = NULL;
int rc = -ENODEV;
- name = get_property(of_chosen, "linux,stdout-path", NULL);
+ name = of_get_property(of_chosen, "linux,stdout-path", NULL);
if (name != NULL) {
np = of_find_node_by_path(name);
if (np != NULL) {
@@ -234,8 +251,8 @@ int __init btext_find_display(int allow_nonstdout)
if (rc == 0 || !allow_nonstdout)
return rc;
- for (np = NULL; (np = of_find_node_by_type(np, "display"));) {
- if (get_property(np, "linux,opened", NULL)) {
+ for_each_node_by_type(np, "display") {
+ if (of_get_property(np, "linux,opened", NULL)) {
printk("trying %s ...\n", np->full_name);
rc = btext_initialize(np);
printk("result: %d\n", rc);
@@ -281,7 +298,7 @@ void btext_update_display(unsigned long phys, int width, int height,
iounmap(logicalDisplayBase);
boot_text_mapped = 0;
}
- map_boot_text();
+ btext_map();
g_loc_X = 0;
g_loc_Y = 0;
g_max_loc_X = width / 8;
@@ -296,6 +313,7 @@ void btext_clearscreen(void)
(dispDeviceDepth >> 3)) >> 2;
int i,j;
+ rmci_maybe_on();
for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++)
{
unsigned int *ptr = base;
@@ -303,6 +321,7 @@ void btext_clearscreen(void)
*(ptr++) = 0;
base += (dispDeviceRowBytes >> 2);
}
+ rmci_maybe_off();
}
void btext_flushscreen(void)
@@ -353,6 +372,8 @@ static void scrollscreen(void)
(dispDeviceDepth >> 3)) >> 2;
int i,j;
+ rmci_maybe_on();
+
for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++)
{
unsigned int *src_ptr = src;
@@ -369,122 +390,10 @@ static void scrollscreen(void)
*(dst_ptr++) = 0;
dst += (dispDeviceRowBytes >> 2);
}
-}
-#endif /* ndef NO_SCROLL */
-
-void btext_drawchar(char c)
-{
- int cline = 0;
-#ifdef NO_SCROLL
- int x;
-#endif
- if (!boot_text_mapped)
- return;
-
- switch (c) {
- case '\b':
- if (g_loc_X > 0)
- --g_loc_X;
- break;
- case '\t':
- g_loc_X = (g_loc_X & -8) + 8;
- break;
- case '\r':
- g_loc_X = 0;
- break;
- case '\n':
- g_loc_X = 0;
- g_loc_Y++;
- cline = 1;
- break;
- default:
- draw_byte(c, g_loc_X++, g_loc_Y);
- }
- if (g_loc_X >= g_max_loc_X) {
- g_loc_X = 0;
- g_loc_Y++;
- cline = 1;
- }
-#ifndef NO_SCROLL
- while (g_loc_Y >= g_max_loc_Y) {
- scrollscreen();
- g_loc_Y--;
- }
-#else
- /* wrap around from bottom to top of screen so we don't
- waste time scrolling each line. -- paulus. */
- if (g_loc_Y >= g_max_loc_Y)
- g_loc_Y = 0;
- if (cline) {
- for (x = 0; x < g_max_loc_X; ++x)
- draw_byte(' ', x, g_loc_Y);
- }
-#endif
-}
-void btext_drawstring(const char *c)
-{
- if (!boot_text_mapped)
- return;
- while (*c)
- btext_drawchar(*c++);
-}
-
-void btext_drawtext(const char *c, unsigned int len)
-{
- if (!boot_text_mapped)
- return;
- while (len--)
- btext_drawchar(*c++);
-}
-
-void btext_drawhex(unsigned long v)
-{
- char *hex_table = "0123456789abcdef";
-
- if (!boot_text_mapped)
- return;
-#ifdef CONFIG_PPC64
- btext_drawchar(hex_table[(v >> 60) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 56) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 52) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 48) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 44) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 40) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 36) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 32) & 0x0000000FUL]);
-#endif
- btext_drawchar(hex_table[(v >> 28) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 24) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 20) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 16) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 12) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 8) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 4) & 0x0000000FUL]);
- btext_drawchar(hex_table[(v >> 0) & 0x0000000FUL]);
- btext_drawchar(' ');
-}
-
-static void draw_byte(unsigned char c, long locX, long locY)
-{
- unsigned char *base = calc_base(locX << 3, locY << 4);
- unsigned char *font = &vga_font[((unsigned int)c) * 16];
- int rb = dispDeviceRowBytes;
-
- switch(dispDeviceDepth) {
- case 24:
- case 32:
- draw_byte_32(font, (unsigned int *)base, rb);
- break;
- case 15:
- case 16:
- draw_byte_16(font, (unsigned int *)base, rb);
- break;
- case 8:
- draw_byte_8(font, (unsigned int *)base, rb);
- break;
- }
+ rmci_maybe_off();
}
+#endif /* ndef NO_SCROLL */
static unsigned int expand_bits_8[16] = {
0x00000000,
@@ -534,7 +443,7 @@ static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
}
}
-static void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
+static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
{
int l, bits;
int fg = 0xFFFFFFFFUL;
@@ -552,7 +461,7 @@ static void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
}
}
-static void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
+static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
{
int l, bits;
int fg = 0x0F0F0F0FUL;
@@ -568,6 +477,128 @@ static void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
}
}
+static noinline void draw_byte(unsigned char c, long locX, long locY)
+{
+ unsigned char *base = calc_base(locX << 3, locY << 4);
+ unsigned char *font = &vga_font[((unsigned int)c) * 16];
+ int rb = dispDeviceRowBytes;
+
+ rmci_maybe_on();
+ switch(dispDeviceDepth) {
+ case 24:
+ case 32:
+ draw_byte_32(font, (unsigned int *)base, rb);
+ break;
+ case 15:
+ case 16:
+ draw_byte_16(font, (unsigned int *)base, rb);
+ break;
+ case 8:
+ draw_byte_8(font, (unsigned int *)base, rb);
+ break;
+ }
+ rmci_maybe_off();
+}
+
+void btext_drawchar(char c)
+{
+ int cline = 0;
+#ifdef NO_SCROLL
+ int x;
+#endif
+ if (!boot_text_mapped)
+ return;
+
+ switch (c) {
+ case '\b':
+ if (g_loc_X > 0)
+ --g_loc_X;
+ break;
+ case '\t':
+ g_loc_X = (g_loc_X & -8) + 8;
+ break;
+ case '\r':
+ g_loc_X = 0;
+ break;
+ case '\n':
+ g_loc_X = 0;
+ g_loc_Y++;
+ cline = 1;
+ break;
+ default:
+ draw_byte(c, g_loc_X++, g_loc_Y);
+ }
+ if (g_loc_X >= g_max_loc_X) {
+ g_loc_X = 0;
+ g_loc_Y++;
+ cline = 1;
+ }
+#ifndef NO_SCROLL
+ while (g_loc_Y >= g_max_loc_Y) {
+ scrollscreen();
+ g_loc_Y--;
+ }
+#else
+ /* wrap around from bottom to top of screen so we don't
+ waste time scrolling each line. -- paulus. */
+ if (g_loc_Y >= g_max_loc_Y)
+ g_loc_Y = 0;
+ if (cline) {
+ for (x = 0; x < g_max_loc_X; ++x)
+ draw_byte(' ', x, g_loc_Y);
+ }
+#endif
+}
+
+void btext_drawstring(const char *c)
+{
+ if (!boot_text_mapped)
+ return;
+ while (*c)
+ btext_drawchar(*c++);
+}
+
+void btext_drawtext(const char *c, unsigned int len)
+{
+ if (!boot_text_mapped)
+ return;
+ while (len--)
+ btext_drawchar(*c++);
+}
+
+void btext_drawhex(unsigned long v)
+{
+ if (!boot_text_mapped)
+ return;
+#ifdef CONFIG_PPC64
+ btext_drawchar(hex_asc_hi(v >> 56));
+ btext_drawchar(hex_asc_lo(v >> 56));
+ btext_drawchar(hex_asc_hi(v >> 48));
+ btext_drawchar(hex_asc_lo(v >> 48));
+ btext_drawchar(hex_asc_hi(v >> 40));
+ btext_drawchar(hex_asc_lo(v >> 40));
+ btext_drawchar(hex_asc_hi(v >> 32));
+ btext_drawchar(hex_asc_lo(v >> 32));
+#endif
+ btext_drawchar(hex_asc_hi(v >> 24));
+ btext_drawchar(hex_asc_lo(v >> 24));
+ btext_drawchar(hex_asc_hi(v >> 16));
+ btext_drawchar(hex_asc_lo(v >> 16));
+ btext_drawchar(hex_asc_hi(v >> 8));
+ btext_drawchar(hex_asc_lo(v >> 8));
+ btext_drawchar(hex_asc_hi(v));
+ btext_drawchar(hex_asc_lo(v));
+ btext_drawchar(' ');
+}
+
+void __init udbg_init_btext(void)
+{
+ /* If btext is enabled, we might have a BAT setup for early display,
+ * thus we do enable some very basic udbg output
+ */
+ udbg_putc = btext_drawchar;
+}
+
static unsigned char vga_font[cmapsz] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd,
@@ -912,3 +943,4 @@ static unsigned char vga_font[cmapsz] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
};
+
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
new file mode 100644
index 00000000000..40198d50b4c
--- /dev/null
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -0,0 +1,849 @@
+/*
+ * Processor cache information made available to userspace via sysfs;
+ * intended to be compatible with x86 intel_cacheinfo implementation.
+ *
+ * Copyright 2008 IBM Corporation
+ * Author: Nathan Lynch
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <asm/prom.h>
+
+#include "cacheinfo.h"
+
+/* per-cpu object for tracking:
+ * - a "cache" kobject for the top-level directory
+ * - a list of "index" objects representing the cpu's local cache hierarchy
+ */
+struct cache_dir {
+ struct kobject *kobj; /* bare (not embedded) kobject for cache
+ * directory */
+ struct cache_index_dir *index; /* list of index objects */
+};
+
+/* "index" object: each cpu's cache directory has an index
+ * subdirectory corresponding to a cache object associated with the
+ * cpu. This object's lifetime is managed via the embedded kobject.
+ */
+struct cache_index_dir {
+ struct kobject kobj;
+ struct cache_index_dir *next; /* next index in parent directory */
+ struct cache *cache;
+};
+
+/* Template for determining which OF properties to query for a given
+ * cache type */
+struct cache_type_info {
+ const char *name;
+ const char *size_prop;
+
+ /* Allow for both [di]-cache-line-size and
+ * [di]-cache-block-size properties. According to the PowerPC
+ * Processor binding, -line-size should be provided if it
+ * differs from the cache block size (that which is operated
+ * on by cache instructions), so we look for -line-size first.
+ * See cache_get_line_size(). */
+
+ const char *line_size_props[2];
+ const char *nr_sets_prop;
+};
+
+/* These are used to index the cache_type_info array. */
+#define CACHE_TYPE_UNIFIED 0
+#define CACHE_TYPE_INSTRUCTION 1
+#define CACHE_TYPE_DATA 2
+
+static const struct cache_type_info cache_type_info[] = {
+ {
+ /* PowerPC Processor binding says the [di]-cache-*
+ * must be equal on unified caches, so just use
+ * d-cache properties. */
+ .name = "Unified",
+ .size_prop = "d-cache-size",
+ .line_size_props = { "d-cache-line-size",
+ "d-cache-block-size", },
+ .nr_sets_prop = "d-cache-sets",
+ },
+ {
+ .name = "Instruction",
+ .size_prop = "i-cache-size",
+ .line_size_props = { "i-cache-line-size",
+ "i-cache-block-size", },
+ .nr_sets_prop = "i-cache-sets",
+ },
+ {
+ .name = "Data",
+ .size_prop = "d-cache-size",
+ .line_size_props = { "d-cache-line-size",
+ "d-cache-block-size", },
+ .nr_sets_prop = "d-cache-sets",
+ },
+};
+
+/* Cache object: each instance of this corresponds to a distinct cache
+ * in the system. There are separate objects for Harvard caches: one
+ * each for instruction and data, and each refers to the same OF node.
+ * The refcount of the OF node is elevated for the lifetime of the
+ * cache object. A cache object is released when its shared_cpu_map
+ * is cleared (see cache_cpu_clear).
+ *
+ * A cache object is on two lists: an unsorted global list
+ * (cache_list) of cache objects; and a singly-linked list
+ * representing the local cache hierarchy, which is ordered by level
+ * (e.g. L1d -> L1i -> L2 -> L3).
+ */
+struct cache {
+ struct device_node *ofnode; /* OF node for this cache, may be cpu */
+ struct cpumask shared_cpu_map; /* online CPUs using this cache */
+ int type; /* split cache disambiguation */
+ int level; /* level not explicit in device tree */
+ struct list_head list; /* global list of cache objects */
+ struct cache *next_local; /* next cache of >= level */
+};
+
+static DEFINE_PER_CPU(struct cache_dir *, cache_dir_pcpu);
+
+/* traversal/modification of this list occurs only at cpu hotplug time;
+ * access is serialized by cpu hotplug locking
+ */
+static LIST_HEAD(cache_list);
+
+static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *k)
+{
+ return container_of(k, struct cache_index_dir, kobj);
+}
+
+static const char *cache_type_string(const struct cache *cache)
+{
+ return cache_type_info[cache->type].name;
+}
+
+static void cache_init(struct cache *cache, int type, int level,
+ struct device_node *ofnode)
+{
+ cache->type = type;
+ cache->level = level;
+ cache->ofnode = of_node_get(ofnode);
+ INIT_LIST_HEAD(&cache->list);
+ list_add(&cache->list, &cache_list);
+}
+
+static struct cache *new_cache(int type, int level, struct device_node *ofnode)
+{
+ struct cache *cache;
+
+ cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+ if (cache)
+ cache_init(cache, type, level, ofnode);
+
+ return cache;
+}
+
+static void release_cache_debugcheck(struct cache *cache)
+{
+ struct cache *iter;
+
+ list_for_each_entry(iter, &cache_list, list)
+ WARN_ONCE(iter->next_local == cache,
+ "cache for %s(%s) refers to cache for %s(%s)\n",
+ iter->ofnode->full_name,
+ cache_type_string(iter),
+ cache->ofnode->full_name,
+ cache_type_string(cache));
+}
+
+static void release_cache(struct cache *cache)
+{
+ if (!cache)
+ return;
+
+ pr_debug("freeing L%d %s cache for %s\n", cache->level,
+ cache_type_string(cache), cache->ofnode->full_name);
+
+ release_cache_debugcheck(cache);
+ list_del(&cache->list);
+ of_node_put(cache->ofnode);
+ kfree(cache);
+}
+
+static void cache_cpu_set(struct cache *cache, int cpu)
+{
+ struct cache *next = cache;
+
+ while (next) {
+ WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
+ "CPU %i already accounted in %s(%s)\n",
+ cpu, next->ofnode->full_name,
+ cache_type_string(next));
+ cpumask_set_cpu(cpu, &next->shared_cpu_map);
+ next = next->next_local;
+ }
+}
+
+static int cache_size(const struct cache *cache, unsigned int *ret)
+{
+ const char *propname;
+ const __be32 *cache_size;
+
+ propname = cache_type_info[cache->type].size_prop;
+
+ cache_size = of_get_property(cache->ofnode, propname, NULL);
+ if (!cache_size)
+ return -ENODEV;
+
+ *ret = of_read_number(cache_size, 1);
+ return 0;
+}
+
+static int cache_size_kb(const struct cache *cache, unsigned int *ret)
+{
+ unsigned int size;
+
+ if (cache_size(cache, &size))
+ return -ENODEV;
+
+ *ret = size / 1024;
+ return 0;
+}
+
+/* not cache_line_size() because that's a macro in include/linux/cache.h */
+static int cache_get_line_size(const struct cache *cache, unsigned int *ret)
+{
+ const __be32 *line_size;
+ int i, lim;
+
+ lim = ARRAY_SIZE(cache_type_info[cache->type].line_size_props);
+
+ for (i = 0; i < lim; i++) {
+ const char *propname;
+
+ propname = cache_type_info[cache->type].line_size_props[i];
+ line_size = of_get_property(cache->ofnode, propname, NULL);
+ if (line_size)
+ break;
+ }
+
+ if (!line_size)
+ return -ENODEV;
+
+ *ret = of_read_number(line_size, 1);
+ return 0;
+}
+
+static int cache_nr_sets(const struct cache *cache, unsigned int *ret)
+{
+ const char *propname;
+ const __be32 *nr_sets;
+
+ propname = cache_type_info[cache->type].nr_sets_prop;
+
+ nr_sets = of_get_property(cache->ofnode, propname, NULL);
+ if (!nr_sets)
+ return -ENODEV;
+
+ *ret = of_read_number(nr_sets, 1);
+ return 0;
+}
+
+static int cache_associativity(const struct cache *cache, unsigned int *ret)
+{
+ unsigned int line_size;
+ unsigned int nr_sets;
+ unsigned int size;
+
+ if (cache_nr_sets(cache, &nr_sets))
+ goto err;
+
+ /* If the cache is fully associative, there is no need to
+ * check the other properties.
+ */
+ if (nr_sets == 1) {
+ *ret = 0;
+ return 0;
+ }
+
+ if (cache_get_line_size(cache, &line_size))
+ goto err;
+ if (cache_size(cache, &size))
+ goto err;
+
+ if (!(nr_sets > 0 && size > 0 && line_size > 0))
+ goto err;
+
+ *ret = (size / nr_sets) / line_size;
+ return 0;
+err:
+ return -ENODEV;
+}
+
+/* helper for dealing with split caches */
+static struct cache *cache_find_first_sibling(struct cache *cache)
+{
+ struct cache *iter;
+
+ if (cache->type == CACHE_TYPE_UNIFIED)
+ return cache;
+
+ list_for_each_entry(iter, &cache_list, list)
+ if (iter->ofnode == cache->ofnode && iter->next_local == cache)
+ return iter;
+
+ return cache;
+}
+
+/* return the first cache on a local list matching node */
+static struct cache *cache_lookup_by_node(const struct device_node *node)
+{
+ struct cache *cache = NULL;
+ struct cache *iter;
+
+ list_for_each_entry(iter, &cache_list, list) {
+ if (iter->ofnode != node)
+ continue;
+ cache = cache_find_first_sibling(iter);
+ break;
+ }
+
+ return cache;
+}
+
+static bool cache_node_is_unified(const struct device_node *np)
+{
+ return of_get_property(np, "cache-unified", NULL);
+}
+
+static struct cache *cache_do_one_devnode_unified(struct device_node *node,
+ int level)
+{
+ struct cache *cache;
+
+ pr_debug("creating L%d ucache for %s\n", level, node->full_name);
+
+ cache = new_cache(CACHE_TYPE_UNIFIED, level, node);
+
+ return cache;
+}
+
+static struct cache *cache_do_one_devnode_split(struct device_node *node,
+ int level)
+{
+ struct cache *dcache, *icache;
+
+ pr_debug("creating L%d dcache and icache for %s\n", level,
+ node->full_name);
+
+ dcache = new_cache(CACHE_TYPE_DATA, level, node);
+ icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
+
+ if (!dcache || !icache)
+ goto err;
+
+ dcache->next_local = icache;
+
+ return dcache;
+err:
+ release_cache(dcache);
+ release_cache(icache);
+ return NULL;
+}
+
+static struct cache *cache_do_one_devnode(struct device_node *node, int level)
+{
+ struct cache *cache;
+
+ if (cache_node_is_unified(node))
+ cache = cache_do_one_devnode_unified(node, level);
+ else
+ cache = cache_do_one_devnode_split(node, level);
+
+ return cache;
+}
+
+static struct cache *cache_lookup_or_instantiate(struct device_node *node,
+ int level)
+{
+ struct cache *cache;
+
+ cache = cache_lookup_by_node(node);
+
+ WARN_ONCE(cache && cache->level != level,
+ "cache level mismatch on lookup (got %d, expected %d)\n",
+ cache->level, level);
+
+ if (!cache)
+ cache = cache_do_one_devnode(node, level);
+
+ return cache;
+}
+
+static void link_cache_lists(struct cache *smaller, struct cache *bigger)
+{
+ while (smaller->next_local) {
+ if (smaller->next_local == bigger)
+ return; /* already linked */
+ smaller = smaller->next_local;
+ }
+
+ smaller->next_local = bigger;
+}
+
+static void do_subsidiary_caches_debugcheck(struct cache *cache)
+{
+ WARN_ON_ONCE(cache->level != 1);
+ WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu"));
+}
+
+static void do_subsidiary_caches(struct cache *cache)
+{
+ struct device_node *subcache_node;
+ int level = cache->level;
+
+ do_subsidiary_caches_debugcheck(cache);
+
+ while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
+ struct cache *subcache;
+
+ level++;
+ subcache = cache_lookup_or_instantiate(subcache_node, level);
+ of_node_put(subcache_node);
+ if (!subcache)
+ break;
+
+ link_cache_lists(cache, subcache);
+ cache = subcache;
+ }
+}
+
+static struct cache *cache_chain_instantiate(unsigned int cpu_id)
+{
+ struct device_node *cpu_node;
+ struct cache *cpu_cache = NULL;
+
+ pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
+
+ cpu_node = of_get_cpu_node(cpu_id, NULL);
+ WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
+ if (!cpu_node)
+ goto out;
+
+ cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
+ if (!cpu_cache)
+ goto out;
+
+ do_subsidiary_caches(cpu_cache);
+
+ cache_cpu_set(cpu_cache, cpu_id);
+out:
+ of_node_put(cpu_node);
+
+ return cpu_cache;
+}
+
+static struct cache_dir *cacheinfo_create_cache_dir(unsigned int cpu_id)
+{
+ struct cache_dir *cache_dir;
+ struct device *dev;
+ struct kobject *kobj = NULL;
+
+ dev = get_cpu_device(cpu_id);
+ WARN_ONCE(!dev, "no dev for CPU %i\n", cpu_id);
+ if (!dev)
+ goto err;
+
+ kobj = kobject_create_and_add("cache", &dev->kobj);
+ if (!kobj)
+ goto err;
+
+ cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL);
+ if (!cache_dir)
+ goto err;
+
+ cache_dir->kobj = kobj;
+
+ WARN_ON_ONCE(per_cpu(cache_dir_pcpu, cpu_id) != NULL);
+
+ per_cpu(cache_dir_pcpu, cpu_id) = cache_dir;
+
+ return cache_dir;
+err:
+ kobject_put(kobj);
+ return NULL;
+}
+
+static void cache_index_release(struct kobject *kobj)
+{
+ struct cache_index_dir *index;
+
+ index = kobj_to_cache_index_dir(kobj);
+
+ pr_debug("freeing index directory for L%d %s cache\n",
+ index->cache->level, cache_type_string(index->cache));
+
+ kfree(index);
+}
+
+static ssize_t cache_index_show(struct kobject *k, struct attribute *attr, char *buf)
+{
+ struct kobj_attribute *kobj_attr;
+
+ kobj_attr = container_of(attr, struct kobj_attribute, attr);
+
+ return kobj_attr->show(k, kobj_attr, buf);
+}
+
+static struct cache *index_kobj_to_cache(struct kobject *k)
+{
+ struct cache_index_dir *index;
+
+ index = kobj_to_cache_index_dir(k);
+
+ return index->cache;
+}
+
+static ssize_t size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ unsigned int size_kb;
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ if (cache_size_kb(cache, &size_kb))
+ return -ENODEV;
+
+ return sprintf(buf, "%uK\n", size_kb);
+}
+
+static struct kobj_attribute cache_size_attr =
+ __ATTR(size, 0444, size_show, NULL);
+
+
+static ssize_t line_size_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ unsigned int line_size;
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ if (cache_get_line_size(cache, &line_size))
+ return -ENODEV;
+
+ return sprintf(buf, "%u\n", line_size);
+}
+
+static struct kobj_attribute cache_line_size_attr =
+ __ATTR(coherency_line_size, 0444, line_size_show, NULL);
+
+static ssize_t nr_sets_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ unsigned int nr_sets;
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ if (cache_nr_sets(cache, &nr_sets))
+ return -ENODEV;
+
+ return sprintf(buf, "%u\n", nr_sets);
+}
+
+static struct kobj_attribute cache_nr_sets_attr =
+ __ATTR(number_of_sets, 0444, nr_sets_show, NULL);
+
+static ssize_t associativity_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ unsigned int associativity;
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ if (cache_associativity(cache, &associativity))
+ return -ENODEV;
+
+ return sprintf(buf, "%u\n", associativity);
+}
+
+static struct kobj_attribute cache_assoc_attr =
+ __ATTR(ways_of_associativity, 0444, associativity_show, NULL);
+
+static ssize_t type_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ struct cache *cache;
+
+ cache = index_kobj_to_cache(k);
+
+ return sprintf(buf, "%s\n", cache_type_string(cache));
+}
+
+static struct kobj_attribute cache_type_attr =
+ __ATTR(type, 0444, type_show, NULL);
+
+static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ struct cache_index_dir *index;
+ struct cache *cache;
+
+ index = kobj_to_cache_index_dir(k);
+ cache = index->cache;
+
+ return sprintf(buf, "%d\n", cache->level);
+}
+
+static struct kobj_attribute cache_level_attr =
+ __ATTR(level, 0444, level_show, NULL);
+
+static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ struct cache_index_dir *index;
+ struct cache *cache;
+ int len;
+ int n = 0;
+
+ index = kobj_to_cache_index_dir(k);
+ cache = index->cache;
+ len = PAGE_SIZE - 2;
+
+ if (len > 1) {
+ n = cpumask_scnprintf(buf, len, &cache->shared_cpu_map);
+ buf[n++] = '\n';
+ buf[n] = '\0';
+ }
+ return n;
+}
+
+static struct kobj_attribute cache_shared_cpu_map_attr =
+ __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
+
+/* Attributes which should always be created -- the kobject/sysfs core
+ * does this automatically via kobj_type->default_attrs. This is the
+ * minimum data required to uniquely identify a cache.
+ */
+static struct attribute *cache_index_default_attrs[] = {
+ &cache_type_attr.attr,
+ &cache_level_attr.attr,
+ &cache_shared_cpu_map_attr.attr,
+ NULL,
+};
+
+/* Attributes which should be created if the cache device node has the
+ * right properties -- see cacheinfo_create_index_opt_attrs
+ */
+static struct kobj_attribute *cache_index_opt_attrs[] = {
+ &cache_size_attr,
+ &cache_line_size_attr,
+ &cache_nr_sets_attr,
+ &cache_assoc_attr,
+};
+
+static const struct sysfs_ops cache_index_ops = {
+ .show = cache_index_show,
+};
+
+static struct kobj_type cache_index_type = {
+ .release = cache_index_release,
+ .sysfs_ops = &cache_index_ops,
+ .default_attrs = cache_index_default_attrs,
+};
+
+static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
+{
+ const char *cache_name;
+ const char *cache_type;
+ struct cache *cache;
+ char *buf;
+ int i;
+
+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!buf)
+ return;
+
+ cache = dir->cache;
+ cache_name = cache->ofnode->full_name;
+ cache_type = cache_type_string(cache);
+
+ /* We don't want to create an attribute that can't provide a
+ * meaningful value. Check the return value of each optional
+ * attribute's ->show method before registering the
+ * attribute.
+ */
+ for (i = 0; i < ARRAY_SIZE(cache_index_opt_attrs); i++) {
+ struct kobj_attribute *attr;
+ ssize_t rc;
+
+ attr = cache_index_opt_attrs[i];
+
+ rc = attr->show(&dir->kobj, attr, buf);
+ if (rc <= 0) {
+ pr_debug("not creating %s attribute for "
+ "%s(%s) (rc = %zd)\n",
+ attr->attr.name, cache_name,
+ cache_type, rc);
+ continue;
+ }
+ if (sysfs_create_file(&dir->kobj, &attr->attr))
+ pr_debug("could not create %s attribute for %s(%s)\n",
+ attr->attr.name, cache_name, cache_type);
+ }
+
+ kfree(buf);
+}
+
+static void cacheinfo_create_index_dir(struct cache *cache, int index,
+ struct cache_dir *cache_dir)
+{
+ struct cache_index_dir *index_dir;
+ int rc;
+
+ index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
+ if (!index_dir)
+ goto err;
+
+ index_dir->cache = cache;
+
+ rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
+ cache_dir->kobj, "index%d", index);
+ if (rc)
+ goto err;
+
+ index_dir->next = cache_dir->index;
+ cache_dir->index = index_dir;
+
+ cacheinfo_create_index_opt_attrs(index_dir);
+
+ return;
+err:
+ kfree(index_dir);
+}
+
+static void cacheinfo_sysfs_populate(unsigned int cpu_id,
+ struct cache *cache_list)
+{
+ struct cache_dir *cache_dir;
+ struct cache *cache;
+ int index = 0;
+
+ cache_dir = cacheinfo_create_cache_dir(cpu_id);
+ if (!cache_dir)
+ return;
+
+ cache = cache_list;
+ while (cache) {
+ cacheinfo_create_index_dir(cache, index, cache_dir);
+ index++;
+ cache = cache->next_local;
+ }
+}
+
+void cacheinfo_cpu_online(unsigned int cpu_id)
+{
+ struct cache *cache;
+
+ cache = cache_chain_instantiate(cpu_id);
+ if (!cache)
+ return;
+
+ cacheinfo_sysfs_populate(cpu_id, cache);
+}
+
+/* functions needed to remove cache entry for cpu offline or suspend/resume */
+
+#if (defined(CONFIG_PPC_PSERIES) && defined(CONFIG_SUSPEND)) || \
+ defined(CONFIG_HOTPLUG_CPU)
+
+static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
+{
+ struct device_node *cpu_node;
+ struct cache *cache;
+
+ cpu_node = of_get_cpu_node(cpu_id, NULL);
+ WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
+ if (!cpu_node)
+ return NULL;
+
+ cache = cache_lookup_by_node(cpu_node);
+ of_node_put(cpu_node);
+
+ return cache;
+}
+
+static void remove_index_dirs(struct cache_dir *cache_dir)
+{
+ struct cache_index_dir *index;
+
+ index = cache_dir->index;
+
+ while (index) {
+ struct cache_index_dir *next;
+
+ next = index->next;
+ kobject_put(&index->kobj);
+ index = next;
+ }
+}
+
+static void remove_cache_dir(struct cache_dir *cache_dir)
+{
+ remove_index_dirs(cache_dir);
+
+ /* Remove cache dir from sysfs */
+ kobject_del(cache_dir->kobj);
+
+ kobject_put(cache_dir->kobj);
+
+ kfree(cache_dir);
+}
+
+static void cache_cpu_clear(struct cache *cache, int cpu)
+{
+ while (cache) {
+ struct cache *next = cache->next_local;
+
+ WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
+ "CPU %i not accounted in %s(%s)\n",
+ cpu, cache->ofnode->full_name,
+ cache_type_string(cache));
+
+ cpumask_clear_cpu(cpu, &cache->shared_cpu_map);
+
+ /* Release the cache object if all the cpus using it
+ * are offline */
+ if (cpumask_empty(&cache->shared_cpu_map))
+ release_cache(cache);
+
+ cache = next;
+ }
+}
+
+void cacheinfo_cpu_offline(unsigned int cpu_id)
+{
+ struct cache_dir *cache_dir;
+ struct cache *cache;
+
+ /* Prevent userspace from seeing inconsistent state - remove
+ * the sysfs hierarchy first */
+ cache_dir = per_cpu(cache_dir_pcpu, cpu_id);
+
+ /* careful, sysfs population may have failed */
+ if (cache_dir)
+ remove_cache_dir(cache_dir);
+
+ per_cpu(cache_dir_pcpu, cpu_id) = NULL;
+
+ /* clear the CPU's bit in its cache chain, possibly freeing
+ * cache objects */
+ cache = cache_lookup_by_cpu(cpu_id);
+ if (cache)
+ cache_cpu_clear(cache, cpu_id);
+}
+#endif /* (CONFIG_PPC_PSERIES && CONFIG_SUSPEND) || CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h
new file mode 100644
index 00000000000..a7b74d36acd
--- /dev/null
+++ b/arch/powerpc/kernel/cacheinfo.h
@@ -0,0 +1,8 @@
+#ifndef _PPC_CACHEINFO_H
+#define _PPC_CACHEINFO_H
+
+/* These are just hooks for sysfs.c to use. */
+extern void cacheinfo_cpu_online(unsigned int cpu_id);
+extern void cacheinfo_cpu_offline(unsigned int cpu_id);
+
+#endif /* _PPC_CACHEINFO_H */
diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c
index 640d4bb2932..108ff14e212 100644
--- a/arch/powerpc/kernel/compat_audit.c
+++ b/arch/powerpc/kernel/compat_audit.c
@@ -21,6 +21,11 @@ unsigned ppc32_read_class[] = {
~0U
};
+unsigned ppc32_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
int ppc32_classify_syscall(unsigned syscall)
{
switch(syscall) {
diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S
new file mode 100644
index 00000000000..e32b4a9a2c2
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_44x.S
@@ -0,0 +1,74 @@
+/*
+ * This file contains low level CPU setup functions.
+ * Valentine Barshak <vbarshak@ru.mvista.com>
+ * MontaVista Software, Inc (c) 2007
+ *
+ * Based on cpu_setup_6xx code by
+ * Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(__setup_cpu_440ep)
+ b __init_fpu_44x
+_GLOBAL(__setup_cpu_440epx)
+ mflr r4
+ bl __init_fpu_44x
+ bl __plb_disable_wrp
+ bl __fixup_440A_mcheck
+ mtlr r4
+ blr
+_GLOBAL(__setup_cpu_440grx)
+ mflr r4
+ bl __plb_disable_wrp
+ bl __fixup_440A_mcheck
+ mtlr r4
+ blr
+_GLOBAL(__setup_cpu_460ex)
+_GLOBAL(__setup_cpu_460gt)
+_GLOBAL(__setup_cpu_460sx)
+_GLOBAL(__setup_cpu_apm821xx)
+ mflr r4
+ bl __init_fpu_44x
+ bl __fixup_440A_mcheck
+ mtlr r4
+ blr
+
+_GLOBAL(__setup_cpu_440x5)
+_GLOBAL(__setup_cpu_440gx)
+_GLOBAL(__setup_cpu_440spe)
+ b __fixup_440A_mcheck
+
+/* enable APU between CPU and FPU */
+_GLOBAL(__init_fpu_44x)
+ mfspr r3,SPRN_CCR0
+ /* Clear DAPUIB flag in CCR0 */
+ rlwinm r3,r3,0,12,10
+ mtspr SPRN_CCR0,r3
+ isync
+ blr
+
+/*
+ * Workaround for the incorrect write to DDR SDRAM errata.
+ * The write address can be corrupted during writes to
+ * DDR SDRAM when write pipelining is enabled on PLB0.
+ * Disable write pipelining here.
+ */
+#define DCRN_PLB4A0_ACR 0x81
+
+_GLOBAL(__plb_disable_wrp)
+ mfdcr r3,DCRN_PLB4A0_ACR
+ /* clear WRP bit in PLB4A0_ACR */
+ rlwinm r3,r3,0,8,6
+ mtdcr DCRN_PLB4A0_ACR,r3
+ isync
+ blr
+
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index 8b4a4ee85ec..f8cd9fba4d3 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -15,61 +15,72 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>
+#include <asm/mmu.h>
_GLOBAL(__setup_cpu_603)
- b setup_common_caches
+ mflr r5
+BEGIN_MMU_FTR_SECTION
+ li r10,0
+ mtspr SPRN_SPRG_603_LRU,r10 /* init SW LRU tracking */
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+BEGIN_FTR_SECTION
+ bl __init_fpu_registers
+END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
+ bl setup_common_caches
+ mtlr r5
+ blr
_GLOBAL(__setup_cpu_604)
- mflr r4
+ mflr r5
bl setup_common_caches
bl setup_604_hid0
- mtlr r4
+ mtlr r5
blr
_GLOBAL(__setup_cpu_750)
- mflr r4
+ mflr r5
bl __init_fpu_registers
bl setup_common_caches
bl setup_750_7400_hid0
- mtlr r4
+ mtlr r5
blr
_GLOBAL(__setup_cpu_750cx)
- mflr r4
+ mflr r5
bl __init_fpu_registers
bl setup_common_caches
bl setup_750_7400_hid0
bl setup_750cx
- mtlr r4
+ mtlr r5
blr
_GLOBAL(__setup_cpu_750fx)
- mflr r4
+ mflr r5
bl __init_fpu_registers
bl setup_common_caches
bl setup_750_7400_hid0
bl setup_750fx
- mtlr r4
+ mtlr r5
blr
_GLOBAL(__setup_cpu_7400)
- mflr r4
+ mflr r5
bl __init_fpu_registers
bl setup_7400_workarounds
bl setup_common_caches
bl setup_750_7400_hid0
- mtlr r4
+ mtlr r5
blr
_GLOBAL(__setup_cpu_7410)
- mflr r4
+ mflr r5
bl __init_fpu_registers
bl setup_7410_workarounds
bl setup_common_caches
bl setup_750_7400_hid0
li r3,0
mtspr SPRN_L2CR2,r3
- mtlr r4
+ mtlr r5
blr
_GLOBAL(__setup_cpu_745x)
- mflr r4
+ mflr r5
bl setup_common_caches
bl setup_745x_specifics
- mtlr r4
+ mtlr r5
blr
/* Enable caches for 603's, 604, 750 & 7400 */
@@ -113,7 +124,7 @@ setup_604_hid0:
* around #3 and with the same fix we use. We may want to
* check if the CPU is using 60x bus mode in which case
* the workaround for errata #4 is useless. Also, we may
- * want to explicitely clear HID0_NOPDST as this is not
+ * want to explicitly clear HID0_NOPDST as this is not
* needed once we have applied workaround #5 (though it's
* not set by Apple's firmware at least).
*/
@@ -183,10 +194,10 @@ setup_750cx:
cror 4*cr0+eq,4*cr0+eq,4*cr1+eq
cror 4*cr0+eq,4*cr0+eq,4*cr2+eq
bnelr
- lwz r6,CPU_SPEC_FEATURES(r5)
+ lwz r6,CPU_SPEC_FEATURES(r4)
li r7,CPU_FTR_CAN_NAP
andc r6,r6,r7
- stw r6,CPU_SPEC_FEATURES(r5)
+ stw r6,CPU_SPEC_FEATURES(r4)
blr
/* 750fx specific
@@ -214,12 +225,12 @@ BEGIN_FTR_SECTION
andis. r11,r11,L3CR_L3E@h
beq 1f
END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
- lwz r6,CPU_SPEC_FEATURES(r5)
+ lwz r6,CPU_SPEC_FEATURES(r4)
andi. r0,r6,CPU_FTR_L3_DISABLE_NAP
beq 1f
li r7,CPU_FTR_CAN_NAP
andc r6,r6,r7
- stw r6,CPU_SPEC_FEATURES(r5)
+ stw r6,CPU_SPEC_FEATURES(r4)
1:
mfspr r11,SPRN_HID0
@@ -228,6 +239,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
ori r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE
ori r11,r11,HID0_LRSTK | HID0_BTIC
oris r11,r11,HID0_DPM@h
+BEGIN_MMU_FTR_SECTION
+ oris r11,r11,HID0_HIGH_BAT@h
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
BEGIN_FTR_SECTION
xori r11,r11,HID0_BTIC
END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC)
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
new file mode 100644
index 00000000000..4f1393d2007
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -0,0 +1,225 @@
+/*
+ * This file contains low level CPU setup functions.
+ * Kumar Gala <galak@kernel.crashing.org>
+ * Copyright 2009 Freescale Semiconductor, Inc.
+ *
+ * Based on cpu_setup_6xx code by
+ * Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/mmu-book3e.h>
+#include <asm/asm-offsets.h>
+
+_GLOBAL(__e500_icache_setup)
+ mfspr r0, SPRN_L1CSR1
+ andi. r3, r0, L1CSR1_ICE
+ bnelr /* Already enabled */
+ oris r0, r0, L1CSR1_CPE@h
+ ori r0, r0, (L1CSR1_ICFI | L1CSR1_ICLFR | L1CSR1_ICE)
+ mtspr SPRN_L1CSR1, r0 /* Enable I-Cache */
+ isync
+ blr
+
+_GLOBAL(__e500_dcache_setup)
+ mfspr r0, SPRN_L1CSR0
+ andi. r3, r0, L1CSR0_DCE
+ bnelr /* Already enabled */
+ msync
+ isync
+ li r0, 0
+ mtspr SPRN_L1CSR0, r0 /* Disable */
+ msync
+ isync
+ li r0, (L1CSR0_DCFI | L1CSR0_CLFC)
+ mtspr SPRN_L1CSR0, r0 /* Invalidate */
+ isync
+1: mfspr r0, SPRN_L1CSR0
+ andi. r3, r0, L1CSR0_CLFC
+ bne+ 1b /* Wait for lock bits reset */
+ oris r0, r0, L1CSR0_CPE@h
+ ori r0, r0, L1CSR0_DCE
+ msync
+ isync
+ mtspr SPRN_L1CSR0, r0 /* Enable */
+ isync
+ blr
+
+/*
+ * FIXME - we haven't yet done testing to determine a reasonable default
+ * value for PW20_WAIT_IDLE_BIT.
+ */
+#define PW20_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */
+_GLOBAL(setup_pw20_idle)
+ mfspr r3, SPRN_PWRMGTCR0
+
+ /* Set PW20_WAIT bit, enable pw20 state*/
+ ori r3, r3, PWRMGTCR0_PW20_WAIT
+ li r11, PW20_WAIT_IDLE_BIT
+
+ /* Set Automatic PW20 Core Idle Count */
+ rlwimi r3, r11, PWRMGTCR0_PW20_ENT_SHIFT, PWRMGTCR0_PW20_ENT
+
+ mtspr SPRN_PWRMGTCR0, r3
+
+ blr
+
+/*
+ * FIXME - we haven't yet done testing to determine a reasonable default
+ * value for AV_WAIT_IDLE_BIT.
+ */
+#define AV_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */
+_GLOBAL(setup_altivec_idle)
+ mfspr r3, SPRN_PWRMGTCR0
+
+ /* Enable Altivec Idle */
+ oris r3, r3, PWRMGTCR0_AV_IDLE_PD_EN@h
+ li r11, AV_WAIT_IDLE_BIT
+
+ /* Set Automatic AltiVec Idle Count */
+ rlwimi r3, r11, PWRMGTCR0_AV_IDLE_CNT_SHIFT, PWRMGTCR0_AV_IDLE_CNT
+
+ mtspr SPRN_PWRMGTCR0, r3
+
+ blr
+
+_GLOBAL(__setup_cpu_e6500)
+ mflr r6
+#ifdef CONFIG_PPC64
+ bl setup_altivec_ivors
+ /* Touch IVOR42 only if the CPU supports E.HV category */
+ mfspr r10,SPRN_MMUCFG
+ rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
+ beq 1f
+ bl setup_lrat_ivor
+1:
+#endif
+ bl setup_pw20_idle
+ bl setup_altivec_idle
+ bl __setup_cpu_e5500
+ mtlr r6
+ blr
+
+#ifdef CONFIG_PPC32
+_GLOBAL(__setup_cpu_e200)
+ /* enable dedicated debug exception handling resources (Debug APU) */
+ mfspr r3,SPRN_HID0
+ ori r3,r3,HID0_DAPUEN@l
+ mtspr SPRN_HID0,r3
+ b __setup_e200_ivors
+_GLOBAL(__setup_cpu_e500v1)
+_GLOBAL(__setup_cpu_e500v2)
+ mflr r4
+ bl __e500_icache_setup
+ bl __e500_dcache_setup
+ bl __setup_e500_ivors
+#if defined(CONFIG_FSL_RIO) || defined(CONFIG_FSL_PCI)
+ /* Ensure that RFXE is set */
+ mfspr r3,SPRN_HID1
+ oris r3,r3,HID1_RFXE@h
+ mtspr SPRN_HID1,r3
+#endif
+ mtlr r4
+ blr
+_GLOBAL(__setup_cpu_e500mc)
+_GLOBAL(__setup_cpu_e5500)
+ mflr r5
+ bl __e500_icache_setup
+ bl __e500_dcache_setup
+ bl __setup_e500mc_ivors
+ /*
+ * We only want to touch IVOR38-41 if we're running on hardware
+ * that supports category E.HV. The architectural way to determine
+ * this is MMUCFG[LPIDSIZE].
+ */
+ mfspr r3, SPRN_MMUCFG
+ rlwinm. r3, r3, 0, MMUCFG_LPIDSIZE
+ beq 1f
+ bl __setup_ehv_ivors
+ b 2f
+1:
+ lwz r3, CPU_SPEC_FEATURES(r4)
+ /* We need this check as cpu_setup is also called for
+ * the secondary cores. So, if we have already cleared
+ * the feature on the primary core, avoid doing it on the
+ * secondary core.
+ */
+ andis. r6, r3, CPU_FTR_EMB_HV@h
+ beq 2f
+ rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV
+ stw r3, CPU_SPEC_FEATURES(r4)
+2:
+ mtlr r5
+ blr
+#endif
+
+#ifdef CONFIG_PPC_BOOK3E_64
+_GLOBAL(__restore_cpu_e6500)
+ mflr r5
+ bl setup_altivec_ivors
+ /* Touch IVOR42 only if the CPU supports E.HV category */
+ mfspr r10,SPRN_MMUCFG
+ rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
+ beq 1f
+ bl setup_lrat_ivor
+1:
+ bl setup_pw20_idle
+ bl setup_altivec_idle
+ bl __restore_cpu_e5500
+ mtlr r5
+ blr
+
+_GLOBAL(__restore_cpu_e5500)
+ mflr r4
+ bl __e500_icache_setup
+ bl __e500_dcache_setup
+ bl __setup_base_ivors
+ bl setup_perfmon_ivor
+ bl setup_doorbell_ivors
+ /*
+ * We only want to touch IVOR38-41 if we're running on hardware
+ * that supports category E.HV. The architectural way to determine
+ * this is MMUCFG[LPIDSIZE].
+ */
+ mfspr r10,SPRN_MMUCFG
+ rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
+ beq 1f
+ bl setup_ehv_ivors
+1:
+ mtlr r4
+ blr
+
+_GLOBAL(__setup_cpu_e5500)
+ mflr r5
+ bl __e500_icache_setup
+ bl __e500_dcache_setup
+ bl __setup_base_ivors
+ bl setup_perfmon_ivor
+ bl setup_doorbell_ivors
+ /*
+ * We only want to touch IVOR38-41 if we're running on hardware
+ * that supports category E.HV. The architectural way to determine
+ * this is MMUCFG[LPIDSIZE].
+ */
+ mfspr r10,SPRN_MMUCFG
+ rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
+ beq 1f
+ bl setup_ehv_ivors
+ b 2f
+1:
+ ld r10,CPU_SPEC_FEATURES(r4)
+ LOAD_REG_IMMEDIATE(r9,CPU_FTR_EMB_HV)
+ andc r10,r10,r9
+ std r10,CPU_SPEC_FEATURES(r4)
+2:
+ mtlr r5
+ blr
+#endif
diff --git a/arch/powerpc/kernel/cpu_setup_pa6t.S b/arch/powerpc/kernel/cpu_setup_pa6t.S
new file mode 100644
index 00000000000..d62cb9cae4e
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_pa6t.S
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+/* Right now, restore and setup are the same thing */
+_GLOBAL(__restore_cpu_pa6t)
+_GLOBAL(__setup_cpu_pa6t)
+ /* Do nothing if not running in HV mode */
+ mfmsr r0
+ rldicl. r0,r0,4,63
+ beqlr
+
+ mfspr r0,SPRN_HID5
+ ori r0,r0,0x38
+ mtspr SPRN_HID5,r0
+
+ mfspr r0,SPRN_LPCR
+ ori r0,r0,0x7000
+ mtspr SPRN_LPCR,r0
+
+ blr
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
new file mode 100644
index 00000000000..46733535cc0
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -0,0 +1,182 @@
+/*
+ * This file contains low level CPU setup functions.
+ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+/* Entry: r3 = crap, r4 = ptr to cputable entry
+ *
+ * Note that we can be called twice for pseudo-PVRs
+ */
+_GLOBAL(__setup_cpu_power7)
+ mflr r11
+ bl __init_hvmode_206
+ mtlr r11
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ mfspr r3,SPRN_LPCR
+ bl __init_LPCR
+ bl __init_tlb_power7
+ mtlr r11
+ blr
+
+_GLOBAL(__restore_cpu_power7)
+ mflr r11
+ mfmsr r3
+ rldicl. r0,r3,4,63
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ mfspr r3,SPRN_LPCR
+ bl __init_LPCR
+ bl __init_tlb_power7
+ mtlr r11
+ blr
+
+_GLOBAL(__setup_cpu_power8)
+ mflr r11
+ bl __init_FSCR
+ bl __init_PMU
+ bl __init_hvmode_206
+ mtlr r11
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ mfspr r3,SPRN_LPCR
+ ori r3, r3, LPCR_PECEDH
+ bl __init_LPCR
+ bl __init_HFSCR
+ bl __init_tlb_power8
+ bl __init_PMU_HV
+ mtlr r11
+ blr
+
+_GLOBAL(__restore_cpu_power8)
+ mflr r11
+ bl __init_FSCR
+ bl __init_PMU
+ mfmsr r3
+ rldicl. r0,r3,4,63
+ mtlr r11
+ beqlr
+ li r0,0
+ mtspr SPRN_LPID,r0
+ mfspr r3,SPRN_LPCR
+ ori r3, r3, LPCR_PECEDH
+ bl __init_LPCR
+ bl __init_HFSCR
+ bl __init_tlb_power8
+ bl __init_PMU_HV
+ mtlr r11
+ blr
+
+__init_hvmode_206:
+ /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */
+ mfmsr r3
+ rldicl. r0,r3,4,63
+ bnelr
+ ld r5,CPU_SPEC_FEATURES(r4)
+ LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE)
+ xor r5,r5,r6
+ std r5,CPU_SPEC_FEATURES(r4)
+ blr
+
+__init_LPCR:
+ /* Setup a sane LPCR:
+ * Called with initial LPCR in R3
+ *
+ * LPES = 0b01 (HSRR0/1 used for 0x500)
+ * PECE = 0b111
+ * DPFD = 4
+ * HDICE = 0
+ * VC = 0b100 (VPM0=1, VPM1=0, ISL=0)
+ * VRMASD = 0b10000 (L=1, LP=00)
+ *
+ * Other bits untouched for now
+ */
+ li r5,1
+ rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
+ ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
+ li r5,4
+ rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3
+ clrrdi r3,r3,1 /* clear HDICE */
+ li r5,4
+ rldimi r3,r5, LPCR_VC_SH, 0
+ li r5,0x10
+ rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5
+ mtspr SPRN_LPCR,r3
+ isync
+ blr
+
+__init_FSCR:
+ mfspr r3,SPRN_FSCR
+ ori r3,r3,FSCR_TAR|FSCR_DSCR|FSCR_EBB
+ mtspr SPRN_FSCR,r3
+ blr
+
+__init_HFSCR:
+ mfspr r3,SPRN_HFSCR
+ ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\
+ HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB
+ mtspr SPRN_HFSCR,r3
+ blr
+
+/*
+ * Clear the TLB using the specified IS form of tlbiel instruction
+ * (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
+ *
+ * r3 = IS field
+ */
+__init_tlb_power7:
+ li r3,0xc00 /* IS field = 0b11 */
+_GLOBAL(__flush_tlb_power7)
+ li r6,128
+ mtctr r6
+ mr r7,r3 /* IS field */
+ ptesync
+2: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 2b
+ ptesync
+1: blr
+
+__init_tlb_power8:
+ li r3,0xc00 /* IS field = 0b11 */
+_GLOBAL(__flush_tlb_power8)
+ li r6,512
+ mtctr r6
+ mr r7,r3 /* IS field */
+ ptesync
+2: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 2b
+ ptesync
+1: blr
+
+__init_PMU_HV:
+ li r5,0
+ mtspr SPRN_MMCRC,r5
+ mtspr SPRN_MMCRH,r5
+ blr
+
+__init_PMU:
+ li r5,0
+ mtspr SPRN_MMCRS,r5
+ mtspr SPRN_MMCRA,r5
+ mtspr SPRN_MMCR0,r5
+ mtspr SPRN_MMCR1,r5
+ mtspr SPRN_MMCR2,r5
+ blr
diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S
index 652594891d5..12fac8df01c 100644
--- a/arch/powerpc/kernel/cpu_setup_ppc970.S
+++ b/arch/powerpc/kernel/cpu_setup_ppc970.S
@@ -76,13 +76,29 @@ _GLOBAL(__setup_cpu_ppc970)
/* Do nothing if not running in HV mode */
mfmsr r0
rldicl. r0,r0,4,63
- beqlr
+ beq no_hv_mode
mfspr r0,SPRN_HID0
li r11,5 /* clear DOZE and SLEEP */
rldimi r0,r11,52,8 /* set NAP and DPM */
li r11,0
rldimi r0,r11,32,31 /* clear EN_ATTN */
+ b load_hids /* Jump to shared code */
+
+
+_GLOBAL(__setup_cpu_ppc970MP)
+ /* Do nothing if not running in HV mode */
+ mfmsr r0
+ rldicl. r0,r0,4,63
+ beq no_hv_mode
+
+ mfspr r0,SPRN_HID0
+ li r11,0x15 /* clear DOZE and SLEEP */
+ rldimi r0,r11,52,6 /* set DEEPNAP, NAP and DPM */
+ li r11,0
+ rldimi r0,r11,32,31 /* clear EN_ATTN */
+
+load_hids:
mtspr SPRN_HID0,r0
mfspr r0,SPRN_HID0
mfspr r0,SPRN_HID0
@@ -93,19 +109,37 @@ _GLOBAL(__setup_cpu_ppc970)
sync
isync
+ /* Try to set LPES = 01 in HID4 */
+ mfspr r0,SPRN_HID4
+ clrldi r0,r0,1 /* clear LPES0 */
+ ori r0,r0,HID4_LPES1 /* set LPES1 */
+ sync
+ mtspr SPRN_HID4,r0
+ isync
+
/* Save away cpu state */
- LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
+ LOAD_REG_ADDR(r5,cpu_state_storage)
/* Save HID0,1,4 and 5 */
mfspr r3,SPRN_HID0
std r3,CS_HID0(r5)
mfspr r3,SPRN_HID1
std r3,CS_HID1(r5)
- mfspr r3,SPRN_HID4
- std r3,CS_HID4(r5)
+ mfspr r4,SPRN_HID4
+ std r4,CS_HID4(r5)
mfspr r3,SPRN_HID5
std r3,CS_HID5(r5)
+ /* See if we successfully set LPES1 to 1; if not we are in Apple mode */
+ andi. r4,r4,HID4_LPES1
+ bnelr
+
+no_hv_mode:
+ /* Disable CPU_FTR_HVMODE and exit, since we don't have HV mode */
+ ld r5,CPU_SPEC_FEATURES(r4)
+ LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE)
+ andc r5,r5,r6
+ std r5,CPU_SPEC_FEATURES(r4)
blr
/* Called with no MMU context (typically MSR:IR/DR off) to
@@ -118,7 +152,7 @@ _GLOBAL(__restore_cpu_ppc970)
rldicl. r0,r0,4,63
beqlr
- LOAD_REG_IMMEDIATE(r5,cpu_state_storage)
+ LOAD_REG_ADDR(r5,cpu_state_storage)
/* Before accessing memory, we make sure rm_ci is clear */
li r0,0
mfspr r3,SPRN_HID4
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 190a57e2076..0c157642c2a 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -14,14 +14,20 @@
#include <linux/sched.h>
#include <linux/threads.h>
#include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <asm/oprofile_impl.h>
#include <asm/cputable.h>
+#include <asm/prom.h> /* for PTRRELOC on ARCH=ppc */
+#include <asm/mmu.h>
+#include <asm/setup.h>
struct cpu_spec* cur_cpu_spec = NULL;
EXPORT_SYMBOL(cur_cpu_spec);
+/* The platform string corresponding to the real PVR */
+const char *powerpc_base_platform;
+
/* NOTE:
* Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
* the responsibility of the appropriate CPU save/restore functions to
@@ -30,6 +36,20 @@ EXPORT_SYMBOL(cur_cpu_spec);
* and ppc64
*/
#ifdef CONFIG_PPC32
+extern void __setup_cpu_e200(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_440ep(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_440epx(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_440x5(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec);
+extern void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec);
extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec);
@@ -41,8 +61,27 @@ extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec);
#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PPC64
extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_a2(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_pa6t(void);
extern void __restore_cpu_ppc970(void);
+extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_power7(void);
+extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_power8(void);
+extern void __restore_cpu_a2(void);
+extern void __flush_tlb_power7(unsigned long inval_selector);
+extern void __flush_tlb_power8(unsigned long inval_selector);
+extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
#endif /* CONFIG_PPC64 */
+#if defined(CONFIG_E500)
+extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_e6500(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_e5500(void);
+extern void __restore_cpu_e6500(void);
+#endif /* CONFIG_E500 */
/* This table only contains "desktop" CPUs, it need to be filled with embedded
* ones as well...
@@ -57,33 +96,44 @@ extern void __restore_cpu_ppc970(void);
PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
#define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\
PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
- PPC_FEATURE_TRUE_LE)
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER_POWER7 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER7 (PPC_FEATURE2_DSCR)
+#define COMMON_USER_POWER8 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \
+ PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \
+ PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
+ PPC_FEATURE2_VEC_CRYPTO)
#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
PPC_FEATURE_TRUE_LE | \
PPC_FEATURE_HAS_ALTIVEC_COMP)
+#ifdef CONFIG_PPC_BOOK3E_64
+#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)
+#else
#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
PPC_FEATURE_BOOKE)
-
-/* We only set the spe features if the kernel was compiled with
- * spe support
- */
-#ifdef CONFIG_SPE
-#define PPC_FEATURE_SPE_COMP PPC_FEATURE_HAS_SPE
-#else
-#define PPC_FEATURE_SPE_COMP 0
#endif
-struct cpu_spec cpu_specs[] = {
-#ifdef CONFIG_PPC64
+static struct cpu_spec __initdata cpu_specs[] = {
+#ifdef CONFIG_PPC_BOOK3S_64
{ /* Power3 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00400000,
.cpu_name = "POWER3 (630)",
.cpu_features = CPU_FTRS_POWER3,
.cpu_user_features = COMMON_USER_PPC64|PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power3",
.oprofile_type = PPC_OPROFILE_RS64,
.platform = "power3",
@@ -94,9 +144,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "POWER3 (630+)",
.cpu_features = CPU_FTRS_POWER3,
.cpu_user_features = COMMON_USER_PPC64|PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power3",
.oprofile_type = PPC_OPROFILE_RS64,
.platform = "power3",
@@ -107,9 +159,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "RS64-II (northstar)",
.cpu_features = CPU_FTRS_RS64,
.cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/rs64",
.oprofile_type = PPC_OPROFILE_RS64,
.platform = "rs64",
@@ -120,9 +174,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "RS64-III (pulsar)",
.cpu_features = CPU_FTRS_RS64,
.cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/rs64",
.oprofile_type = PPC_OPROFILE_RS64,
.platform = "rs64",
@@ -133,9 +189,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "RS64-III (icestar)",
.cpu_features = CPU_FTRS_RS64,
.cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/rs64",
.oprofile_type = PPC_OPROFILE_RS64,
.platform = "rs64",
@@ -146,9 +204,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "RS64-IV (sstar)",
.cpu_features = CPU_FTRS_RS64,
.cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/rs64",
.oprofile_type = PPC_OPROFILE_RS64,
.platform = "rs64",
@@ -159,9 +219,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "POWER4 (gp)",
.cpu_features = CPU_FTRS_POWER4,
.cpu_user_features = COMMON_USER_POWER4,
+ .mmu_features = MMU_FTRS_POWER4,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power4",
.oprofile_type = PPC_OPROFILE_POWER4,
.platform = "power4",
@@ -172,9 +234,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "POWER4+ (gq)",
.cpu_features = CPU_FTRS_POWER4,
.cpu_user_features = COMMON_USER_POWER4,
+ .mmu_features = MMU_FTRS_POWER4,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power4",
.oprofile_type = PPC_OPROFILE_POWER4,
.platform = "power4",
@@ -186,9 +250,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_PPC970,
.cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
.cpu_setup = __setup_cpu_ppc970,
.cpu_restore = __restore_cpu_ppc970,
.oprofile_cpu_type = "ppc64/970",
@@ -202,15 +268,35 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_PPC970,
.cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
.cpu_setup = __setup_cpu_ppc970,
.cpu_restore = __restore_cpu_ppc970,
.oprofile_cpu_type = "ppc64/970",
.oprofile_type = PPC_OPROFILE_POWER4,
.platform = "ppc970",
},
+ { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x00440100,
+ .cpu_name = "PPC970MP",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 |
+ PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
+ .cpu_restore = __restore_cpu_ppc970,
+ .oprofile_cpu_type = "ppc64/970MP",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .platform = "ppc970",
+ },
{ /* PPC970MP */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00440000,
@@ -218,11 +304,30 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_PPC970,
.cpu_user_features = COMMON_USER_POWER4 |
PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 8,
- .cpu_setup = __setup_cpu_ppc970,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970MP,
.cpu_restore = __restore_cpu_ppc970,
+ .oprofile_cpu_type = "ppc64/970MP",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .platform = "ppc970",
+ },
+ { /* PPC970GX */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00450000,
+ .cpu_name = "PPC970GX",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 |
+ PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
.oprofile_cpu_type = "ppc64/970",
.oprofile_type = PPC_OPROFILE_POWER4,
.platform = "ppc970",
@@ -233,9 +338,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "POWER5 (gr)",
.cpu_features = CPU_FTRS_POWER5,
.cpu_user_features = COMMON_USER_POWER5,
+ .mmu_features = MMU_FTRS_POWER5,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power5",
.oprofile_type = PPC_OPROFILE_POWER4,
/* SIHV / SIPR bits are implemented on POWER4+ (GQ)
@@ -245,38 +352,221 @@ struct cpu_spec cpu_specs[] = {
.oprofile_mmcra_sipr = MMCRA_SIPR,
.platform = "power5",
},
+ { /* Power5++ */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x003b0300,
+ .cpu_name = "POWER5+ (gs)",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .oprofile_cpu_type = "ppc64/power5++",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .oprofile_mmcra_sihv = MMCRA_SIHV,
+ .oprofile_mmcra_sipr = MMCRA_SIPR,
+ .platform = "power5+",
+ },
{ /* Power5 GS */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003b0000,
.cpu_name = "POWER5+ (gs)",
.cpu_features = CPU_FTRS_POWER5,
.cpu_user_features = COMMON_USER_POWER5_PLUS,
+ .mmu_features = MMU_FTRS_POWER5,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power5+",
.oprofile_type = PPC_OPROFILE_POWER4,
.oprofile_mmcra_sihv = MMCRA_SIHV,
.oprofile_mmcra_sipr = MMCRA_SIPR,
.platform = "power5+",
},
+ { /* POWER6 in P5+ mode; 2.04-compliant processor */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000001,
+ .cpu_name = "POWER5+",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .oprofile_cpu_type = "ppc64/ibm-compat-v1",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .platform = "power5+",
+ },
{ /* Power6 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x003e0000,
- .cpu_name = "POWER6",
+ .cpu_name = "POWER6 (raw)",
.cpu_features = CPU_FTRS_POWER6,
- .cpu_user_features = COMMON_USER_POWER6,
+ .cpu_user_features = COMMON_USER_POWER6 |
+ PPC_FEATURE_POWER6_EXT,
+ .mmu_features = MMU_FTRS_POWER6,
.icache_bsize = 128,
.dcache_bsize = 128,
- .num_pmcs = 8,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
.oprofile_cpu_type = "ppc64/power6",
.oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV,
- .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR,
- .oprofile_mmcra_clear = POWER6_MMCRA_THRM |
- POWER6_MMCRA_OTHER,
+ .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV,
+ .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR,
+ .oprofile_mmcra_clear = POWER6_MMCRA_THRM |
+ POWER6_MMCRA_OTHER,
+ .platform = "power6x",
+ },
+ { /* 2.05-compliant processor, i.e. Power6 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000002,
+ .cpu_name = "POWER6 (architected)",
+ .cpu_features = CPU_FTRS_POWER6,
+ .cpu_user_features = COMMON_USER_POWER6,
+ .mmu_features = MMU_FTRS_POWER6,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .oprofile_cpu_type = "ppc64/ibm-compat-v1",
+ .oprofile_type = PPC_OPROFILE_POWER4,
.platform = "power6",
},
+ { /* 2.06-compliant processor, i.e. Power7 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000003,
+ .cpu_name = "POWER7 (architected)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .cpu_user_features2 = COMMON_USER2_POWER7,
+ .mmu_features = MMU_FTRS_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .oprofile_cpu_type = "ppc64/ibm-compat-v1",
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
+ .flush_tlb = __flush_tlb_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
+ .platform = "power7",
+ },
+ { /* 2.07-compliant processor, i.e. Power8 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000004,
+ .cpu_name = "POWER8 (architected)",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .oprofile_cpu_type = "ppc64/ibm-compat-v1",
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
+ { /* Power7 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003f0000,
+ .cpu_name = "POWER7 (raw)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .cpu_user_features2 = COMMON_USER2_POWER7,
+ .mmu_features = MMU_FTRS_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power7",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
+ .flush_tlb = __flush_tlb_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
+ .platform = "power7",
+ },
+ { /* Power7+ */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004A0000,
+ .cpu_name = "POWER7+ (raw)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .cpu_user_features2 = COMMON_USER2_POWER7,
+ .mmu_features = MMU_FTRS_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power7",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
+ .flush_tlb = __flush_tlb_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
+ .platform = "power7+",
+ },
+ { /* Power8E */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004b0000,
+ .cpu_name = "POWER8E (raw)",
+ .cpu_features = CPU_FTRS_POWER8E,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power8",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
+ { /* Power8 DD1: Does not support doorbell IPIs */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x004d0100,
+ .cpu_name = "POWER8 (raw)",
+ .cpu_features = CPU_FTRS_POWER8_DD1,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power8",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
+ { /* Power8 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004d0000,
+ .cpu_name = "POWER8 (raw)",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power8",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
{ /* Cell Broadband Engine */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00700000,
@@ -285,8 +575,13 @@ struct cpu_spec cpu_specs[] = {
.cpu_user_features = COMMON_USER_PPC64 |
PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP |
PPC_FEATURE_SMT,
+ .mmu_features = MMU_FTRS_CELL,
.icache_bsize = 128,
.dcache_bsize = 128,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/cell-be",
+ .oprofile_type = PPC_OPROFILE_CELL,
.platform = "ppc-cell-be",
},
{ /* PA Semi PA6T */
@@ -295,9 +590,15 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "PA6T",
.cpu_features = CPU_FTRS_PA6T,
.cpu_user_features = COMMON_USER_PA6T,
+ .mmu_features = MMU_FTRS_PA6T,
.icache_bsize = 64,
.dcache_bsize = 64,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_PA6T,
+ .cpu_setup = __setup_cpu_pa6t,
+ .cpu_restore = __restore_cpu_pa6t,
+ .oprofile_cpu_type = "ppc64/pa6t",
+ .oprofile_type = PPC_OPROFILE_PA6T,
.platform = "pa6t",
},
{ /* default match */
@@ -306,12 +607,15 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "POWER4 (compatible)",
.cpu_features = CPU_FTRS_COMPATIBLE,
.cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
.platform = "power4",
}
-#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
#ifdef CONFIG_PPC32
#if CLASSIC_PPC
{ /* 601 */
@@ -321,8 +625,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_PPC601,
.cpu_user_features = COMMON_USER | PPC_FEATURE_601_INSTR |
PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_generic,
.platform = "ppc601",
},
{ /* 603 */
@@ -331,9 +637,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "603",
.cpu_features = CPU_FTRS_603,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
.icache_bsize = 32,
.dcache_bsize = 32,
.cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
.platform = "ppc603",
},
{ /* 603e */
@@ -342,9 +650,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "603e",
.cpu_features = CPU_FTRS_603,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
.icache_bsize = 32,
.dcache_bsize = 32,
.cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
.platform = "ppc603",
},
{ /* 603ev */
@@ -353,9 +663,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "603ev",
.cpu_features = CPU_FTRS_603,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
.icache_bsize = 32,
.dcache_bsize = 32,
.cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
.platform = "ppc603",
},
{ /* 604 */
@@ -364,10 +676,12 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "604",
.cpu_features = CPU_FTRS_604,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 2,
.cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
.platform = "ppc604",
},
{ /* 604e */
@@ -376,10 +690,12 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "604e",
.cpu_features = CPU_FTRS_604,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
.cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
.platform = "ppc604",
},
{ /* 604r */
@@ -388,10 +704,12 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "604r",
.cpu_features = CPU_FTRS_604,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
.cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
.platform = "ppc604",
},
{ /* 604ev */
@@ -400,10 +718,12 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "604ev",
.cpu_features = CPU_FTRS_604,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
.cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
.platform = "ppc604",
},
{ /* 740/750 (0x4202, don't support TAU ?) */
@@ -412,10 +732,12 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "740/750",
.cpu_features = CPU_FTRS_740_NOTAU,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
.cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
.platform = "ppc750",
},
{ /* 750CX (80100 and 8010x?) */
@@ -424,10 +746,12 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "750CX",
.cpu_features = CPU_FTRS_750,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
.cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
.platform = "ppc750",
},
{ /* 750CX (82201 and 82202) */
@@ -436,10 +760,13 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "750CX",
.cpu_features = CPU_FTRS_750,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
.cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
.platform = "ppc750",
},
{ /* 750CXe (82214) */
@@ -448,10 +775,13 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "750CXe",
.cpu_features = CPU_FTRS_750,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
.cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
.platform = "ppc750",
},
{ /* 750CXe "Gekko" (83214) */
@@ -460,22 +790,45 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "750CXe",
.cpu_features = CPU_FTRS_750,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
.cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
.platform = "ppc750",
},
+ { /* 750CL (and "Broadway") */
+ .pvr_mask = 0xfffff0e0,
+ .pvr_value = 0x00087000,
+ .cpu_name = "750CL",
+ .cpu_features = CPU_FTRS_750CL,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ .oprofile_cpu_type = "ppc/750",
+ .oprofile_type = PPC_OPROFILE_G4,
+ },
{ /* 745/755 */
.pvr_mask = 0xfffff000,
.pvr_value = 0x00083000,
.cpu_name = "745/755",
.cpu_features = CPU_FTRS_750,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
.cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
.platform = "ppc750",
},
{ /* 750FX rev 1.x */
@@ -484,11 +837,16 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "750FX",
.cpu_features = CPU_FTRS_750FX1,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
.cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
.platform = "ppc750",
+ .oprofile_cpu_type = "ppc/750",
+ .oprofile_type = PPC_OPROFILE_G4,
},
{ /* 750FX rev 2.0 must disable HID0[DPM] */
.pvr_mask = 0xffffffff,
@@ -496,11 +854,16 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "750FX",
.cpu_features = CPU_FTRS_750FX2,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
.cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
.platform = "ppc750",
+ .oprofile_cpu_type = "ppc/750",
+ .oprofile_type = PPC_OPROFILE_G4,
},
{ /* 750FX (All revs except 2.0) */
.pvr_mask = 0xffff0000,
@@ -508,11 +871,16 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "750FX",
.cpu_features = CPU_FTRS_750FX,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
.cpu_setup = __setup_cpu_750fx,
+ .machine_check = machine_check_generic,
.platform = "ppc750",
+ .oprofile_cpu_type = "ppc/750",
+ .oprofile_type = PPC_OPROFILE_G4,
},
{ /* 750GX */
.pvr_mask = 0xffff0000,
@@ -520,11 +888,16 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "750GX",
.cpu_features = CPU_FTRS_750GX,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
.cpu_setup = __setup_cpu_750fx,
+ .machine_check = machine_check_generic,
.platform = "ppc750",
+ .oprofile_cpu_type = "ppc/750",
+ .oprofile_type = PPC_OPROFILE_G4,
},
{ /* 740/750 (L2CR bit need fixup for 740) */
.pvr_mask = 0xffff0000,
@@ -532,10 +905,13 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "740/750",
.cpu_features = CPU_FTRS_740,
.cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
.cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
.platform = "ppc750",
},
{ /* 7400 rev 1.1 ? (no TAU) */
@@ -545,10 +921,13 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_7400_NOTAU,
.cpu_user_features = COMMON_USER |
PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_7400,
+ .machine_check = machine_check_generic,
.platform = "ppc7400",
},
{ /* 7400 */
@@ -558,10 +937,13 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_7400,
.cpu_user_features = COMMON_USER |
PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_7400,
+ .machine_check = machine_check_generic,
.platform = "ppc7400",
},
{ /* 7410 */
@@ -571,10 +953,13 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_7400,
.cpu_user_features = COMMON_USER |
PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_7410,
+ .machine_check = machine_check_generic,
.platform = "ppc7400",
},
{ /* 7450 2.0 - no doze/nap */
@@ -584,12 +969,15 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_7450_20,
.cpu_user_features = COMMON_USER |
PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
.oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
.platform = "ppc7450",
},
{ /* 7450 2.1 */
@@ -599,12 +987,15 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_7450_21,
.cpu_user_features = COMMON_USER |
PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
.oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
.platform = "ppc7450",
},
{ /* 7450 2.3 and newer */
@@ -614,12 +1005,15 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_7450_23,
.cpu_user_features = COMMON_USER |
PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
.oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
.platform = "ppc7450",
},
{ /* 7455 rev 1.x */
@@ -629,12 +1023,15 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_7455_1,
.cpu_user_features = COMMON_USER |
PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
.oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
.platform = "ppc7450",
},
{ /* 7455 rev 2.0 */
@@ -644,12 +1041,15 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_7455_20,
.cpu_user_features = COMMON_USER |
PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
.oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
.platform = "ppc7450",
},
{ /* 7455 others */
@@ -659,12 +1059,15 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_7455,
.cpu_user_features = COMMON_USER |
PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
.oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
.platform = "ppc7450",
},
{ /* 7447/7457 Rev 1.0 */
@@ -674,12 +1077,15 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_7447_10,
.cpu_user_features = COMMON_USER |
PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
.oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
.platform = "ppc7450",
},
{ /* 7447/7457 Rev 1.1 */
@@ -689,12 +1095,15 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_7447_10,
.cpu_user_features = COMMON_USER |
PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
.oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
.platform = "ppc7450",
},
{ /* 7447/7457 Rev 1.2 and later */
@@ -703,12 +1112,15 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "7447/7457",
.cpu_features = CPU_FTRS_7447,
.cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
.oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
.platform = "ppc7450",
},
{ /* 7447A */
@@ -718,27 +1130,33 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_7447A,
.cpu_user_features = COMMON_USER |
PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
.oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
.platform = "ppc7450",
},
{ /* 7448 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x80040000,
.cpu_name = "7448",
- .cpu_features = CPU_FTRS_7447A,
+ .cpu_features = CPU_FTRS_7448,
.cpu_user_features = COMMON_USER |
PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
.cpu_setup = __setup_cpu_745x,
.oprofile_cpu_type = "ppc/7450",
.oprofile_type = PPC_OPROFILE_G4,
+ .machine_check = machine_check_generic,
.platform = "ppc7450",
},
{ /* 82xx (8240, 8245, 8260 are all 603e cores) */
@@ -747,9 +1165,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "82xx",
.cpu_features = CPU_FTRS_82XX,
.cpu_user_features = COMMON_USER,
+ .mmu_features = 0,
.icache_bsize = 32,
.dcache_bsize = 32,
.cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
.platform = "ppc603",
},
{ /* All G2_LE (603e core, plus some) have the same pvr */
@@ -758,20 +1178,71 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "G2_LE",
.cpu_features = CPU_FTRS_G2_LE,
.cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS,
.icache_bsize = 32,
.dcache_bsize = 32,
.cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
.platform = "ppc603",
},
- { /* e300 (a 603e core, plus some) on 83xx */
+ { /* e300c1 (a 603e core, plus some) on 83xx */
.pvr_mask = 0x7fff0000,
.pvr_value = 0x00830000,
- .cpu_name = "e300",
+ .cpu_name = "e300c1",
+ .cpu_features = CPU_FTRS_E300,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00840000,
+ .cpu_name = "e300c2",
+ .cpu_features = CPU_FTRS_E300C2,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS |
+ MMU_FTR_NEED_DTLB_SW_LRU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00850000,
+ .cpu_name = "e300c3",
.cpu_features = CPU_FTRS_E300,
.cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS |
+ MMU_FTR_NEED_DTLB_SW_LRU,
.icache_bsize = 32,
.dcache_bsize = 32,
.cpu_setup = __setup_cpu_603,
+ .num_pmcs = 4,
+ .oprofile_cpu_type = "ppc/e300",
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .platform = "ppc603",
+ },
+ { /* e300c4 (e300c1, plus one IU) */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00860000,
+ .cpu_name = "e300c4",
+ .cpu_features = CPU_FTRS_E300,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS |
+ MMU_FTR_NEED_DTLB_SW_LRU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .num_pmcs = 4,
+ .oprofile_cpu_type = "ppc/e300",
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
.platform = "ppc603",
},
{ /* default match, we assume split I/D cache & TB (non-601)... */
@@ -780,8 +1251,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "(generic PPC)",
.cpu_features = CPU_FTRS_CLASSIC32,
.cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_generic,
.platform = "ppc603",
},
#endif /* CLASSIC_PPC */
@@ -794,6 +1267,7 @@ struct cpu_spec cpu_specs[] = {
* if the 8xx code is there.... */
.cpu_features = CPU_FTRS_8XX,
.cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_TYPE_8xx,
.icache_bsize = 16,
.dcache_bsize = 16,
.platform = "ppc823",
@@ -806,8 +1280,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "403GC",
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 16,
.dcache_bsize = 16,
+ .machine_check = machine_check_4xx,
.platform = "ppc403",
},
{ /* 403GCX */
@@ -817,8 +1293,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 16,
.dcache_bsize = 16,
+ .machine_check = machine_check_4xx,
.platform = "ppc403",
},
{ /* 403G ?? */
@@ -827,8 +1305,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "403G ??",
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 16,
.dcache_bsize = 16,
+ .machine_check = machine_check_4xx,
.platform = "ppc403",
},
{ /* 405GP */
@@ -838,8 +1318,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc405",
},
{ /* STB 03xxx */
@@ -849,8 +1331,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc405",
},
{ /* STB 04xxx */
@@ -860,8 +1344,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc405",
},
{ /* NP405L */
@@ -871,8 +1357,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc405",
},
{ /* NP4GS3 */
@@ -882,8 +1370,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc405",
},
{ /* NP405H */
@@ -893,8 +1383,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc405",
},
{ /* 405GPr */
@@ -904,8 +1396,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc405",
},
{ /* STBx25xx */
@@ -915,8 +1409,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc405",
},
{ /* 405LP */
@@ -925,8 +1421,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "405LP",
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc405",
},
{ /* Xilinx Virtex-II Pro */
@@ -936,8 +1434,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc405",
},
{ /* Xilinx Virtex-4 FX */
@@ -947,8 +1447,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc405",
},
{ /* 405EP */
@@ -958,31 +1460,272 @@ struct cpu_spec cpu_specs[] = {
.cpu_features = CPU_FTRS_40X,
.cpu_user_features = PPC_FEATURE_32 |
PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. A/B with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910007,
+ .cpu_name = "405EX Rev. A/B",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. C without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x1291000d,
+ .cpu_name = "405EX Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. C with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x1291000f,
+ .cpu_name = "405EX Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. D without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910003,
+ .cpu_name = "405EX Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EX Rev. D with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910005,
+ .cpu_name = "405EX Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. A/B without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910001,
+ .cpu_name = "405EXr Rev. A/B",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. C without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910009,
+ .cpu_name = "405EXr Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. C with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x1291000b,
+ .cpu_name = "405EXr Rev. C",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. D without Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910000,
+ .cpu_name = "405EXr Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* 405EXr Rev. D with Security */
+ .pvr_mask = 0xffff000f,
+ .pvr_value = 0x12910002,
+ .cpu_name = "405EXr Rev. D",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ {
+ /* 405EZ */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x41510000,
+ .cpu_name = "405EZ",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ },
+ { /* APM8018X */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x7ff11432,
+ .cpu_name = "APM8018X",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc405",
},
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic 40x PPC)",
+ .cpu_features = CPU_FTRS_40X,
+ .cpu_user_features = PPC_FEATURE_32 |
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
+ .mmu_features = MMU_FTR_TYPE_40x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc405",
+ }
#endif /* CONFIG_40x */
#ifdef CONFIG_44x
{
.pvr_mask = 0xf0000fff,
.pvr_value = 0x40000850,
+ .cpu_name = "440GR Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x40000858,
.cpu_name = "440EP Rev. A",
.cpu_features = CPU_FTRS_44X,
.cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440ep,
+ .machine_check = machine_check_4xx,
.platform = "ppc440",
},
{
.pvr_mask = 0xf0000fff,
.pvr_value = 0x400008d3,
+ .cpu_name = "440GR Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000ff7,
+ .pvr_value = 0x400008d4,
+ .cpu_name = "440EP Rev. C",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440ep,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x400008db,
.cpu_name = "440EP Rev. B",
.cpu_features = CPU_FTRS_44X,
.cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440ep,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* 440GRX */
+ .pvr_mask = 0xf0000ffb,
+ .pvr_value = 0x200008D0,
+ .cpu_name = "440GRX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440grx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000ffb,
+ .pvr_value = 0x200008D8,
+ .cpu_name = "440EPX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440epx,
+ .machine_check = machine_check_440A,
.platform = "ppc440",
},
{ /* 440GP Rev. B */
@@ -991,8 +1734,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "440GP Rev. B",
.cpu_features = CPU_FTRS_44X,
.cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc440gp",
},
{ /* 440GP Rev. C */
@@ -1001,8 +1746,10 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "440GP Rev. C",
.cpu_features = CPU_FTRS_44X,
.cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc440gp",
},
{ /* 440GX Rev. A */
@@ -1011,8 +1758,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "440GX Rev. A",
.cpu_features = CPU_FTRS_44X,
.cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
.platform = "ppc440",
},
{ /* 440GX Rev. B */
@@ -1021,8 +1771,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "440GX Rev. B",
.cpu_features = CPU_FTRS_44X,
.cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
.platform = "ppc440",
},
{ /* 440GX Rev. C */
@@ -1031,8 +1784,11 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "440GX Rev. C",
.cpu_features = CPU_FTRS_44X,
.cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
.platform = "ppc440",
},
{ /* 440GX Rev. F */
@@ -1041,33 +1797,213 @@ struct cpu_spec cpu_specs[] = {
.cpu_name = "440GX Rev. F",
.cpu_features = CPU_FTRS_44X,
.cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
.platform = "ppc440",
},
{ /* 440SP Rev. A */
- .pvr_mask = 0xff000fff,
- .pvr_value = 0x53000891,
+ .pvr_mask = 0xfff00fff,
+ .pvr_value = 0x53200891,
.cpu_name = "440SP Rev. A",
.cpu_features = CPU_FTRS_44X,
.cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
.platform = "ppc440",
},
{ /* 440SPe Rev. A */
- .pvr_mask = 0xff000fff,
- .pvr_value = 0x53000890,
- .cpu_name = "440SPe Rev. A",
- .cpu_features = CPU_FTR_SPLIT_ID_CACHE |
- CPU_FTR_USE_TB,
+ .pvr_mask = 0xfff00fff,
+ .pvr_value = 0x53400890,
+ .cpu_name = "440SPe Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440spe,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440SPe Rev. B */
+ .pvr_mask = 0xfff00fff,
+ .pvr_value = 0x53400891,
+ .cpu_name = "440SPe Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440spe,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440 in Xilinx Virtex-5 FXT */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x7ff21910,
+ .cpu_name = "440 in Virtex-5 FXT",
+ .cpu_features = CPU_FTRS_44X,
.cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440x5,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460EX */
+ .pvr_mask = 0xffff0006,
+ .pvr_value = 0x13020002,
+ .cpu_name = "460EX",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460ex,
+ .machine_check = machine_check_440A,
.platform = "ppc440",
},
+ { /* 460EX Rev B */
+ .pvr_mask = 0xffff0007,
+ .pvr_value = 0x13020004,
+ .cpu_name = "460EX Rev. B",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460ex,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460GT */
+ .pvr_mask = 0xffff0006,
+ .pvr_value = 0x13020000,
+ .cpu_name = "460GT",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460gt,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460GT Rev B */
+ .pvr_mask = 0xffff0007,
+ .pvr_value = 0x13020005,
+ .cpu_name = "460GT Rev. B",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460gt,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460SX */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x13541800,
+ .cpu_name = "460SX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460sx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 464 in APM821xx */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x12C41C80,
+ .cpu_name = "APM821XX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_apm821xx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 476 DD2 core */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x11a52080,
+ .cpu_name = "476",
+ .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x |
+ MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* 476fpe */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x7ff50000,
+ .cpu_name = "476fpe",
+ .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x |
+ MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* 476 iss */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00050000,
+ .cpu_name = "476",
+ .cpu_features = CPU_FTRS_47X,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x |
+ MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* 476 others */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x11a50000,
+ .cpu_name = "476",
+ .cpu_features = CPU_FTRS_47X,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x |
+ MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic 44x PPC)",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ }
#endif /* CONFIG_44x */
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_E200
{ /* e200z5 */
.pvr_mask = 0xfff00000,
.pvr_value = 0x81000000,
@@ -1077,7 +2013,9 @@ struct cpu_spec cpu_specs[] = {
.cpu_user_features = COMMON_USER_BOOKE |
PPC_FEATURE_HAS_EFP_SINGLE |
PPC_FEATURE_UNIFIED_CACHE,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
.dcache_bsize = 32,
+ .machine_check = machine_check_e200,
.platform = "ppc5554",
},
{ /* e200z6 */
@@ -1087,57 +2025,235 @@ struct cpu_spec cpu_specs[] = {
/* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */
.cpu_features = CPU_FTRS_E200,
.cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_SPE_COMP |
- PPC_FEATURE_HAS_EFP_SINGLE |
+ PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP |
PPC_FEATURE_UNIFIED_CACHE,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
.dcache_bsize = 32,
+ .machine_check = machine_check_e200,
.platform = "ppc5554",
},
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic E200 PPC)",
+ .cpu_features = CPU_FTRS_E200,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_EFP_SINGLE |
+ PPC_FEATURE_UNIFIED_CACHE,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_e200,
+ .machine_check = machine_check_e200,
+ .platform = "ppc5554",
+ }
+#endif /* CONFIG_E200 */
+#endif /* CONFIG_PPC32 */
+#ifdef CONFIG_E500
+#ifdef CONFIG_PPC32
{ /* e500 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x80200000,
.cpu_name = "e500",
- /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */
.cpu_features = CPU_FTRS_E500,
.cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_SPE_COMP |
- PPC_FEATURE_HAS_EFP_SINGLE,
+ PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
.oprofile_cpu_type = "ppc/e500",
- .oprofile_type = PPC_OPROFILE_BOOKE,
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .cpu_setup = __setup_cpu_e500v1,
+ .machine_check = machine_check_e500,
.platform = "ppc8540",
},
{ /* e500v2 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x80210000,
.cpu_name = "e500v2",
- /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */
.cpu_features = CPU_FTRS_E500_2,
.cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_SPE_COMP |
- PPC_FEATURE_HAS_EFP_SINGLE |
- PPC_FEATURE_HAS_EFP_DOUBLE,
+ PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP |
+ PPC_FEATURE_HAS_EFP_DOUBLE_COMP,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS,
.icache_bsize = 32,
.dcache_bsize = 32,
.num_pmcs = 4,
.oprofile_cpu_type = "ppc/e500",
- .oprofile_type = PPC_OPROFILE_BOOKE,
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .cpu_setup = __setup_cpu_e500v2,
+ .machine_check = machine_check_e500,
.platform = "ppc8548",
},
+ { /* e500mc */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80230000,
+ .cpu_name = "e500mc",
+ .cpu_features = CPU_FTRS_E500MC,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
+ MMU_FTR_USE_TLBILX,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 4,
+ .oprofile_cpu_type = "ppc/e500mc",
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .cpu_setup = __setup_cpu_e500mc,
+ .machine_check = machine_check_e500mc,
+ .platform = "ppce500mc",
+ },
+#endif /* CONFIG_PPC32 */
+ { /* e5500 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80240000,
+ .cpu_name = "e5500",
+ .cpu_features = CPU_FTRS_E5500,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
+ MMU_FTR_USE_TLBILX,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 4,
+ .oprofile_cpu_type = "ppc/e500mc",
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .cpu_setup = __setup_cpu_e5500,
+#ifndef CONFIG_PPC32
+ .cpu_restore = __restore_cpu_e5500,
+#endif
+ .machine_check = machine_check_e500mc,
+ .platform = "ppce5500",
+ },
+ { /* e6500 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80400000,
+ .cpu_name = "e6500",
+ .cpu_features = CPU_FTRS_E6500,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU |
+ PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
+ MMU_FTR_USE_TLBILX,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 6,
+ .oprofile_cpu_type = "ppc/e6500",
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .cpu_setup = __setup_cpu_e6500,
+#ifndef CONFIG_PPC32
+ .cpu_restore = __restore_cpu_e6500,
#endif
-#if !CLASSIC_PPC
+ .machine_check = machine_check_e500mc,
+ .platform = "ppce6500",
+ },
+#ifdef CONFIG_PPC32
{ /* default match */
.pvr_mask = 0x00000000,
.pvr_value = 0x00000000,
- .cpu_name = "(generic PPC)",
- .cpu_features = CPU_FTRS_GENERIC_32,
- .cpu_user_features = PPC_FEATURE_32,
+ .cpu_name = "(generic E500 PPC)",
+ .cpu_features = CPU_FTRS_E500,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .machine_check = machine_check_e500,
.platform = "powerpc",
}
-#endif /* !CLASSIC_PPC */
#endif /* CONFIG_PPC32 */
+#endif /* CONFIG_E500 */
};
+
+static struct cpu_spec the_cpu_spec;
+
+static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
+ struct cpu_spec *s)
+{
+ struct cpu_spec *t = &the_cpu_spec;
+ struct cpu_spec old;
+
+ t = PTRRELOC(t);
+ old = *t;
+
+ /* Copy everything, then do fixups */
+ *t = *s;
+
+ /*
+ * If we are overriding a previous value derived from the real
+ * PVR with a new value obtained using a logical PVR value,
+ * don't modify the performance monitor fields.
+ */
+ if (old.num_pmcs && !s->num_pmcs) {
+ t->num_pmcs = old.num_pmcs;
+ t->pmc_type = old.pmc_type;
+ t->oprofile_type = old.oprofile_type;
+ t->oprofile_mmcra_sihv = old.oprofile_mmcra_sihv;
+ t->oprofile_mmcra_sipr = old.oprofile_mmcra_sipr;
+ t->oprofile_mmcra_clear = old.oprofile_mmcra_clear;
+
+ /*
+ * If we have passed through this logic once before and
+ * have pulled the default case because the real PVR was
+ * not found inside cpu_specs[], then we are possibly
+ * running in compatibility mode. In that case, let the
+ * oprofiler know which set of compatibility counters to
+ * pull from by making sure the oprofile_cpu_type string
+ * is set to that of compatibility mode. If the
+ * oprofile_cpu_type already has a value, then we are
+ * possibly overriding a real PVR with a logical one,
+ * and, in that case, keep the current value for
+ * oprofile_cpu_type.
+ */
+ if (old.oprofile_cpu_type != NULL) {
+ t->oprofile_cpu_type = old.oprofile_cpu_type;
+ t->oprofile_type = old.oprofile_type;
+ }
+ }
+
+ *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
+
+ /*
+ * Set the base platform string once; assumes
+ * we're called with real pvr first.
+ */
+ if (*PTRRELOC(&powerpc_base_platform) == NULL)
+ *PTRRELOC(&powerpc_base_platform) = t->platform;
+
+#if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE)
+ /* ppc64 and booke expect identify_cpu to also call setup_cpu for
+ * that processor. I will consolidate that at a later time, for now,
+ * just use #ifdef. We also don't need to PTRRELOC the function
+ * pointer on ppc64 and booke as we are running at 0 in real mode
+ * on ppc64 and reloc_offset is always 0 on booke.
+ */
+ if (t->cpu_setup) {
+ t->cpu_setup(offset, t);
+ }
+#endif /* CONFIG_PPC64 || CONFIG_BOOKE */
+
+ return t;
+}
+
+struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
+{
+ struct cpu_spec *s = cpu_specs;
+ int i;
+
+ s = PTRRELOC(s);
+
+ for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) {
+ if ((pvr & s->pvr_mask) == s->pvr_value)
+ return setup_cpu_spec(offset, s);
+ }
+
+ BUG();
+
+ return NULL;
+}
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 1af41f7616d..51dbace3269 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -10,132 +10,84 @@
*
*/
-#undef DEBUG
-
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/reboot.h>
#include <linux/kexec.h>
-#include <linux/bootmem.h>
+#include <linux/export.h>
#include <linux/crash_dump.h>
#include <linux/delay.h>
-#include <linux/elf.h>
-#include <linux/elfcore.h>
-#include <linux/init.h>
#include <linux/irq.h>
#include <linux/types.h>
-#include <linux/irq.h>
#include <asm/processor.h>
#include <asm/machdep.h>
#include <asm/kexec.h>
#include <asm/kdump.h>
-#include <asm/lmb.h>
-#include <asm/firmware.h>
+#include <asm/prom.h>
#include <asm/smp.h>
+#include <asm/setjmp.h>
+#include <asm/debug.h>
-#ifdef DEBUG
-#include <asm/udbg.h>
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
+/*
+ * The primary CPU waits a while for all secondary CPUs to enter. This is to
+ * avoid sending an IPI if the secondary CPUs are entering
+ * crash_kexec_secondary on their own (eg via a system reset).
+ *
+ * The secondary timeout has to be longer than the primary. Both timeouts are
+ * in milliseconds.
+ */
+#define PRIMARY_TIMEOUT 500
+#define SECONDARY_TIMEOUT 1000
-/* This keeps a track of which one is crashing cpu. */
+#define IPI_TIMEOUT 10000
+#define REAL_MODE_TIMEOUT 10000
+
+/* This keeps a track of which one is the crashing cpu. */
int crashing_cpu = -1;
-static cpumask_t cpus_in_crash = CPU_MASK_NONE;
-cpumask_t cpus_in_sr = CPU_MASK_NONE;
+static int time_to_dump;
-static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
- size_t data_len)
-{
- struct elf_note note;
-
- note.n_namesz = strlen(name) + 1;
- note.n_descsz = data_len;
- note.n_type = type;
- memcpy(buf, &note, sizeof(note));
- buf += (sizeof(note) +3)/4;
- memcpy(buf, name, note.n_namesz);
- buf += (note.n_namesz + 3)/4;
- memcpy(buf, data, note.n_descsz);
- buf += (note.n_descsz + 3)/4;
-
- return buf;
-}
+#define CRASH_HANDLER_MAX 3
+/* NULL terminated list of shutdown handles */
+static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1];
+static DEFINE_SPINLOCK(crash_handlers_lock);
-static void final_note(u32 *buf)
-{
- struct elf_note note;
+static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
+static int crash_shutdown_cpu = -1;
- note.n_namesz = 0;
- note.n_descsz = 0;
- note.n_type = 0;
- memcpy(buf, &note, sizeof(note));
-}
-
-static void crash_save_this_cpu(struct pt_regs *regs, int cpu)
+static int handle_fault(struct pt_regs *regs)
{
- struct elf_prstatus prstatus;
- u32 *buf;
-
- if ((cpu < 0) || (cpu >= NR_CPUS))
- return;
-
- /* Using ELF notes here is opportunistic.
- * I need a well defined structure format
- * for the data I pass, and I need tags
- * on the data to indicate what information I have
- * squirrelled away. ELF notes happen to provide
- * all of that that no need to invent something new.
- */
- buf = (u32*)per_cpu_ptr(crash_notes, cpu);
- if (!buf)
- return;
-
- memset(&prstatus, 0, sizeof(prstatus));
- prstatus.pr_pid = current->pid;
- elf_core_copy_regs(&prstatus.pr_reg, regs);
- buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
- sizeof(prstatus));
- final_note(buf);
+ if (crash_shutdown_cpu == smp_processor_id())
+ longjmp(crash_shutdown_buf, 1);
+ return 0;
}
#ifdef CONFIG_SMP
-static atomic_t enter_on_soft_reset = ATOMIC_INIT(0);
+static atomic_t cpus_in_crash;
void crash_ipi_callback(struct pt_regs *regs)
{
+ static cpumask_t cpus_state_saved = CPU_MASK_NONE;
+
int cpu = smp_processor_id();
if (!cpu_online(cpu))
return;
- local_irq_disable();
- if (!cpu_isset(cpu, cpus_in_crash))
- crash_save_this_cpu(regs, cpu);
- cpu_set(cpu, cpus_in_crash);
-
- /*
- * Entered via soft-reset - could be the kdump
- * process is invoked using soft-reset or user activated
- * it if some CPU did not respond to an IPI.
- * For soft-reset, the secondary CPU can enter this func
- * twice. 1 - using IPI, and 2. soft-reset.
- * Tell the kexec CPU that entered via soft-reset and ready
- * to go down.
- */
- if (cpu_isset(cpu, cpus_in_sr)) {
- cpu_clear(cpu, cpus_in_sr);
- atomic_inc(&enter_on_soft_reset);
+ hard_irq_disable();
+ if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
+ crash_save_cpu(regs, cpu);
+ cpumask_set_cpu(cpu, &cpus_state_saved);
}
+ atomic_inc(&cpus_in_crash);
+ smp_mb__after_atomic();
+
/*
* Starting the kdump boot.
* This barrier is needed to make sure that all CPUs are stopped.
- * If not, soft-reset will be invoked to bring other CPUs.
*/
- while (!cpu_isset(crashing_cpu, cpus_in_crash))
+ while (!time_to_dump)
cpu_relax();
if (ppc_md.kexec_cpu_down)
@@ -150,114 +102,108 @@ void crash_ipi_callback(struct pt_regs *regs)
/* NOTREACHED */
}
-/*
- * Wait until all CPUs are entered via soft-reset.
- */
-static void crash_soft_reset_check(int cpu)
-{
- unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
-
- cpu_clear(cpu, cpus_in_sr);
- while (atomic_read(&enter_on_soft_reset) != ncpus)
- cpu_relax();
-}
-
-
static void crash_kexec_prepare_cpus(int cpu)
{
unsigned int msecs;
-
unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+ int tries = 0;
+ int (*old_handler)(struct pt_regs *regs);
+
+ printk(KERN_EMERG "Sending IPI to other CPUs\n");
crash_send_ipi(crash_ipi_callback);
smp_wmb();
+again:
/*
- * FIXME: Until we will have the way to stop other CPUSs reliabally,
+ * FIXME: Until we will have the way to stop other CPUs reliably,
* the crash CPU will send an IPI and wait for other CPUs to
* respond.
- * Delay of at least 10 seconds.
*/
- printk(KERN_EMERG "Sending IPI to other cpus...\n");
- msecs = 10000;
- while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) {
- cpu_relax();
+ msecs = IPI_TIMEOUT;
+ while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
mdelay(1);
- }
/* Would it be better to replace the trap vector here? */
+ if (atomic_read(&cpus_in_crash) >= ncpus) {
+ printk(KERN_EMERG "IPI complete\n");
+ return;
+ }
+
+ printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
+ ncpus - atomic_read(&cpus_in_crash));
+
/*
- * FIXME: In case if we do not get all CPUs, one possibility: ask the
- * user to do soft reset such that we get all.
- * Soft-reset will be used until better mechanism is implemented.
+ * If we have a panic timeout set then we can't wait indefinitely
+ * for someone to activate system reset. We also give up on the
+ * second time through if system reset fail to work.
*/
- if (cpus_weight(cpus_in_crash) < ncpus) {
- printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n",
- ncpus - cpus_weight(cpus_in_crash));
- printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n");
- cpus_in_sr = CPU_MASK_NONE;
- atomic_set(&enter_on_soft_reset, 0);
- while (cpus_weight(cpus_in_crash) < ncpus)
- cpu_relax();
- }
+ if ((panic_timeout > 0) || (tries > 0))
+ return;
+
/*
- * Make sure all CPUs are entered via soft-reset if the kdump is
- * invoked using soft-reset.
+ * A system reset will cause all CPUs to take an 0x100 exception.
+ * The primary CPU returns here via setjmp, and the secondary
+ * CPUs reexecute the crash_kexec_secondary path.
*/
- if (cpu_isset(cpu, cpus_in_sr))
- crash_soft_reset_check(cpu);
- /* Leave the IPI callback set */
+ old_handler = __debugger;
+ __debugger = handle_fault;
+ crash_shutdown_cpu = smp_processor_id();
+
+ if (setjmp(crash_shutdown_buf) == 0) {
+ printk(KERN_EMERG "Activate system reset (dumprestart) "
+ "to stop other cpu(s)\n");
+
+ /*
+ * A system reset will force all CPUs to execute the
+ * crash code again. We need to reset cpus_in_crash so we
+ * wait for everyone to do this.
+ */
+ atomic_set(&cpus_in_crash, 0);
+ smp_mb();
+
+ while (atomic_read(&cpus_in_crash) < ncpus)
+ cpu_relax();
+ }
+
+ crash_shutdown_cpu = -1;
+ __debugger = old_handler;
+
+ tries++;
+ goto again;
}
/*
- * This function will be called by secondary cpus or by kexec cpu
- * if soft-reset is activated to stop some CPUs.
+ * This function will be called by secondary cpus.
*/
void crash_kexec_secondary(struct pt_regs *regs)
{
- int cpu = smp_processor_id();
unsigned long flags;
- int msecs = 5;
+ int msecs = SECONDARY_TIMEOUT;
local_irq_save(flags);
- /* Wait 5ms if the kexec CPU is not entered yet. */
+
+ /* Wait for the primary crash CPU to signal its progress */
while (crashing_cpu < 0) {
if (--msecs < 0) {
- /*
- * Either kdump image is not loaded or
- * kdump process is not started - Probably xmon
- * exited using 'x'(exit and recover) or
- * kexec_should_crash() failed for all running tasks.
- */
- cpu_clear(cpu, cpus_in_sr);
+ /* No response, kdump image may not have been loaded */
local_irq_restore(flags);
return;
}
+
mdelay(1);
- cpu_relax();
- }
- if (cpu == crashing_cpu) {
- /*
- * Panic CPU will enter this func only via soft-reset.
- * Wait until all secondary CPUs entered and
- * then start kexec boot.
- */
- crash_soft_reset_check(cpu);
- cpu_set(crashing_cpu, cpus_in_crash);
- if (ppc_md.kexec_cpu_down)
- ppc_md.kexec_cpu_down(1, 0);
- machine_kexec(kexec_crash_image);
- /* NOTREACHED */
}
+
crash_ipi_callback(regs);
}
-#else
+#else /* ! CONFIG_SMP */
+
static void crash_kexec_prepare_cpus(int cpu)
{
/*
- * move the secondarys to us so that we can copy
+ * move the secondaries to us so that we can copy
* the new kernel 0-0x100 safely
*
* do this if kexec in setup.c ?
@@ -271,13 +217,92 @@ static void crash_kexec_prepare_cpus(int cpu)
void crash_kexec_secondary(struct pt_regs *regs)
{
- cpus_in_sr = CPU_MASK_NONE;
}
-#endif
+#endif /* CONFIG_SMP */
+
+/* wait for all the CPUs to hit real mode but timeout if they don't come in */
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64)
+static void crash_kexec_wait_realmode(int cpu)
+{
+ unsigned int msecs;
+ int i;
+
+ msecs = REAL_MODE_TIMEOUT;
+ for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
+ if (i == cpu)
+ continue;
+
+ while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+ barrier();
+ if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
+ break;
+ msecs--;
+ mdelay(1);
+ }
+ }
+ mb();
+}
+#else
+static inline void crash_kexec_wait_realmode(int cpu) {}
+#endif /* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */
+
+/*
+ * Register a function to be called on shutdown. Only use this if you
+ * can't reset your device in the second kernel.
+ */
+int crash_shutdown_register(crash_shutdown_t handler)
+{
+ unsigned int i, rc;
+
+ spin_lock(&crash_handlers_lock);
+ for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
+ if (!crash_shutdown_handles[i]) {
+ /* Insert handle at first empty entry */
+ crash_shutdown_handles[i] = handler;
+ rc = 0;
+ break;
+ }
+
+ if (i == CRASH_HANDLER_MAX) {
+ printk(KERN_ERR "Crash shutdown handles full, "
+ "not registered.\n");
+ rc = 1;
+ }
+
+ spin_unlock(&crash_handlers_lock);
+ return rc;
+}
+EXPORT_SYMBOL(crash_shutdown_register);
+
+int crash_shutdown_unregister(crash_shutdown_t handler)
+{
+ unsigned int i, rc;
+
+ spin_lock(&crash_handlers_lock);
+ for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
+ if (crash_shutdown_handles[i] == handler)
+ break;
+
+ if (i == CRASH_HANDLER_MAX) {
+ printk(KERN_ERR "Crash shutdown handle not found\n");
+ rc = 1;
+ } else {
+ /* Shift handles down */
+ for (; crash_shutdown_handles[i]; i++)
+ crash_shutdown_handles[i] =
+ crash_shutdown_handles[i+1];
+ rc = 0;
+ }
+
+ spin_unlock(&crash_handlers_lock);
+ return rc;
+}
+EXPORT_SYMBOL(crash_shutdown_unregister);
void default_machine_crash_shutdown(struct pt_regs *regs)
{
- unsigned int irq;
+ unsigned int i;
+ int (*old_handler)(struct pt_regs *regs);
/*
* This function is only called after the system
@@ -289,26 +314,54 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
* an SMP system.
* The kernel is broken so disable interrupts.
*/
- local_irq_disable();
-
- for_each_irq(irq) {
- struct irq_desc *desc = irq_desc + irq;
-
- if (desc->status & IRQ_INPROGRESS)
- desc->chip->eoi(irq);
-
- if (!(desc->status & IRQ_DISABLED))
- desc->chip->disable(irq);
- }
+ hard_irq_disable();
/*
* Make a note of crashing cpu. Will be used in machine_kexec
* such that another IPI will not be sent.
*/
crashing_cpu = smp_processor_id();
- crash_save_this_cpu(regs, crashing_cpu);
+
+ /*
+ * If we came in via system reset, wait a while for the secondary
+ * CPUs to enter.
+ */
+ if (TRAP(regs) == 0x100)
+ mdelay(PRIMARY_TIMEOUT);
+
crash_kexec_prepare_cpus(crashing_cpu);
- cpu_set(crashing_cpu, cpus_in_crash);
+
+ crash_save_cpu(regs, crashing_cpu);
+
+ time_to_dump = 1;
+
+ crash_kexec_wait_realmode(crashing_cpu);
+
+ machine_kexec_mask_interrupts();
+
+ /*
+ * Call registered shutdown routines safely. Swap out
+ * __debugger_fault_handler, and replace on exit.
+ */
+ old_handler = __debugger_fault_handler;
+ __debugger_fault_handler = handle_fault;
+ crash_shutdown_cpu = smp_processor_id();
+ for (i = 0; crash_shutdown_handles[i]; i++) {
+ if (setjmp(crash_shutdown_buf) == 0) {
+ /*
+ * Insert syncs and delay to ensure
+ * instructions in the dangerous region don't
+ * leak away from this protected region.
+ */
+ asm volatile("sync; isync");
+ /* dangerous region */
+ crash_shutdown_handles[i]();
+ asm volatile("sync; isync");
+ }
+ }
+ crash_shutdown_cpu = -1;
+ __debugger_fault_handler = old_handler;
+
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(1, 0);
}
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 2f6f5a7bc69..7a13f378ca2 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -13,10 +13,13 @@
#include <linux/crash_dump.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <asm/code-patching.h>
#include <asm/kdump.h>
-#include <asm/lmb.h>
+#include <asm/prom.h>
#include <asm/firmware.h>
#include <asm/uaccess.h>
+#include <asm/rtas.h>
#ifdef DEBUG
#include <asm/udbg.h>
@@ -25,13 +28,16 @@
#define DBG(fmt...)
#endif
-void reserve_kdump_trampoline(void)
+#ifndef CONFIG_NONSTATIC_KERNEL
+void __init reserve_kdump_trampoline(void)
{
- lmb_reserve(0, KDUMP_RESERVE_LIMIT);
+ memblock_reserve(0, KDUMP_RESERVE_LIMIT);
}
static void __init create_trampoline(unsigned long addr)
{
+ unsigned int *p = (unsigned int *)addr;
+
/* The maximum range of a single instruction branch, is the current
* instruction's address + (32 MB - 4) bytes. For the trampoline we
* need to branch to current address + 32 MB. So we insert a nop at
@@ -40,8 +46,8 @@ static void __init create_trampoline(unsigned long addr)
* branch to "addr" we jump to ("addr" + 32 MB). Although it requires
* two instructions it doesn't require any registers.
*/
- create_instruction(addr, 0x60000000); /* nop */
- create_branch(addr + 4, addr + PHYSICAL_START, 0);
+ patch_instruction(p, PPC_INST_NOP);
+ patch_branch(++p, addr + PHYSICAL_START, 0);
}
void __init setup_kdump_trampoline(void)
@@ -54,31 +60,26 @@ void __init setup_kdump_trampoline(void)
create_trampoline(i);
}
+#ifdef CONFIG_PPC_PSERIES
create_trampoline(__pa(system_reset_fwnmi) - PHYSICAL_START);
create_trampoline(__pa(machine_check_fwnmi) - PHYSICAL_START);
+#endif /* CONFIG_PPC_PSERIES */
DBG(" <- setup_kdump_trampoline()\n");
}
+#endif /* CONFIG_NONSTATIC_KERNEL */
-#ifdef CONFIG_PROC_VMCORE
-static int __init parse_elfcorehdr(char *p)
+static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize,
+ unsigned long offset, int userbuf)
{
- if (p)
- elfcorehdr_addr = memparse(p, &p);
-
- return 1;
-}
-__setup("elfcorehdr=", parse_elfcorehdr);
-#endif
-
-static int __init parse_savemaxmem(char *p)
-{
- if (p)
- saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1;
+ if (userbuf) {
+ if (copy_to_user((char __user *)buf, (vaddr + offset), csize))
+ return -EFAULT;
+ } else
+ memcpy(buf, (vaddr + offset), csize);
- return 1;
+ return csize;
}
-__setup("savemaxmem=", parse_savemaxmem);
/**
* copy_oldmem_page - copy one page from "oldmem"
@@ -97,20 +98,51 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
size_t csize, unsigned long offset, int userbuf)
{
void *vaddr;
+ phys_addr_t paddr;
if (!csize)
return 0;
- vaddr = __ioremap(pfn << PAGE_SHIFT, PAGE_SIZE, 0);
+ csize = min_t(size_t, csize, PAGE_SIZE);
+ paddr = pfn << PAGE_SHIFT;
- if (userbuf) {
- if (copy_to_user((char __user *)buf, (vaddr + offset), csize)) {
- iounmap(vaddr);
- return -EFAULT;
- }
- } else
- memcpy(buf, (vaddr + offset), csize);
+ if (memblock_is_region_memory(paddr, csize)) {
+ vaddr = __va(paddr);
+ csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
+ } else {
+ vaddr = __ioremap(paddr, PAGE_SIZE, 0);
+ csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
+ iounmap(vaddr);
+ }
- iounmap(vaddr);
return csize;
}
+
+#ifdef CONFIG_PPC_RTAS
+/*
+ * The crashkernel region will almost always overlap the RTAS region, so
+ * we have to be careful when shrinking the crashkernel region.
+ */
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+ unsigned long addr;
+ const __be32 *basep, *sizep;
+ unsigned int rtas_start = 0, rtas_end = 0;
+
+ basep = of_get_property(rtas.dev, "linux,rtas-base", NULL);
+ sizep = of_get_property(rtas.dev, "rtas-size", NULL);
+
+ if (basep && sizep) {
+ rtas_start = be32_to_cpup(basep);
+ rtas_end = rtas_start + be32_to_cpup(sizep);
+ }
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ /* Does this page overlap with the RTAS region? */
+ if (addr <= rtas_end && ((addr + PAGE_SIZE) > rtas_start))
+ continue;
+
+ free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
+ }
+}
+#endif
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
new file mode 100644
index 00000000000..d55c76c571f
--- /dev/null
+++ b/arch/powerpc/kernel/dbell.c
@@ -0,0 +1,57 @@
+/*
+ * Author: Kumar Gala <galak@kernel.crashing.org>
+ *
+ * Copyright 2009 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/threads.h>
+#include <linux/hardirq.h>
+
+#include <asm/dbell.h>
+#include <asm/irq_regs.h>
+
+#ifdef CONFIG_SMP
+void doorbell_setup_this_cpu(void)
+{
+ unsigned long tag = mfspr(SPRN_DOORBELL_CPUTAG) & PPC_DBELL_TAG_MASK;
+
+ smp_muxed_ipi_set_data(smp_processor_id(), tag);
+}
+
+void doorbell_cause_ipi(int cpu, unsigned long data)
+{
+ /* Order previous accesses vs. msgsnd, which is treated as a store */
+ mb();
+ ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, data);
+}
+
+void doorbell_exception(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ irq_enter();
+
+ may_hard_irq_enable();
+
+ __get_cpu_var(irq_stat).doorbell_irqs++;
+
+ smp_ipi_demux();
+
+ irq_exit();
+ set_irq_regs(old_regs);
+}
+#else /* CONFIG_SMP */
+void doorbell_exception(struct pt_regs *regs)
+{
+ printk(KERN_WARNING "Received doorbell on non-smp system\n");
+}
+#endif /* CONFIG_SMP */
+
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
new file mode 100644
index 00000000000..54d0116256f
--- /dev/null
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * Provide default implementations of the DMA mapping callbacks for
+ * busses using the iommu infrastructure
+ */
+
+#include <linux/export.h>
+#include <asm/iommu.h>
+
+/*
+ * Generic iommu implementation
+ */
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
+ dma_handle, dev->coherent_dma_mask, flag,
+ dev_to_node(dev));
+}
+
+static void dma_iommu_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
+}
+
+/* Creates TCEs for a user provided buffer. The user buffer must be
+ * contiguous real kernel storage (not vmalloc). The address passed here
+ * comprises a page address and offset into that page. The dma_addr_t
+ * returned will point to the same byte within the page as was passed in.
+ */
+static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
+ size, device_to_mask(dev), direction, attrs);
+}
+
+
+static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
+ attrs);
+}
+
+
+static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ return iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
+ device_to_mask(dev), direction, attrs);
+}
+
+static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction,
+ attrs);
+}
+
+/* We support DMA to/from any memory page via the iommu */
+static int dma_iommu_dma_supported(struct device *dev, u64 mask)
+{
+ struct iommu_table *tbl = get_iommu_table_base(dev);
+
+ if (!tbl) {
+ dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx"
+ ", table unavailable\n", mask);
+ return 0;
+ }
+
+ if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
+ dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
+ dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
+ mask, tbl->it_offset << tbl->it_page_shift);
+ return 0;
+ } else
+ return 1;
+}
+
+static u64 dma_iommu_get_required_mask(struct device *dev)
+{
+ struct iommu_table *tbl = get_iommu_table_base(dev);
+ u64 mask;
+ if (!tbl)
+ return 0;
+
+ mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1);
+ mask += mask - 1;
+
+ return mask;
+}
+
+struct dma_map_ops dma_iommu_ops = {
+ .alloc = dma_iommu_alloc_coherent,
+ .free = dma_iommu_free_coherent,
+ .mmap = dma_direct_mmap_coherent,
+ .map_sg = dma_iommu_map_sg,
+ .unmap_sg = dma_iommu_unmap_sg,
+ .dma_supported = dma_iommu_dma_supported,
+ .map_page = dma_iommu_map_page,
+ .unmap_page = dma_iommu_unmap_page,
+ .get_required_mask = dma_iommu_get_required_mask,
+};
+EXPORT_SYMBOL(dma_iommu_ops);
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
new file mode 100644
index 00000000000..bd1a2aba599
--- /dev/null
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -0,0 +1,127 @@
+/*
+ * Contains routines needed to support swiotlb for ppc.
+ *
+ * Copyright (C) 2009-2010 Freescale Semiconductor, Inc.
+ * Author: Becky Bruce
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/memblock.h>
+#include <linux/pfn.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pci.h>
+
+#include <asm/machdep.h>
+#include <asm/swiotlb.h>
+#include <asm/dma.h>
+
+unsigned int ppc_swiotlb_enable;
+
+static u64 swiotlb_powerpc_get_required(struct device *dev)
+{
+ u64 end, mask, max_direct_dma_addr = dev->archdata.max_direct_dma_addr;
+
+ end = memblock_end_of_DRAM();
+ if (max_direct_dma_addr && end > max_direct_dma_addr)
+ end = max_direct_dma_addr;
+ end += get_dma_offset(dev);
+
+ mask = 1ULL << (fls64(end) - 1);
+ mask += mask - 1;
+
+ return mask;
+}
+
+/*
+ * At the moment, all platforms that use this code only require
+ * swiotlb to be used if we're operating on HIGHMEM. Since
+ * we don't ever call anything other than map_sg, unmap_sg,
+ * map_page, and unmap_page on highmem, use normal dma_ops
+ * for everything else.
+ */
+struct dma_map_ops swiotlb_dma_ops = {
+ .alloc = dma_direct_alloc_coherent,
+ .free = dma_direct_free_coherent,
+ .mmap = dma_direct_mmap_coherent,
+ .map_sg = swiotlb_map_sg_attrs,
+ .unmap_sg = swiotlb_unmap_sg_attrs,
+ .dma_supported = swiotlb_dma_supported,
+ .map_page = swiotlb_map_page,
+ .unmap_page = swiotlb_unmap_page,
+ .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+ .sync_single_for_device = swiotlb_sync_single_for_device,
+ .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+ .sync_sg_for_device = swiotlb_sync_sg_for_device,
+ .mapping_error = swiotlb_dma_mapping_error,
+ .get_required_mask = swiotlb_powerpc_get_required,
+};
+
+void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
+{
+ struct pci_controller *hose;
+ struct dev_archdata *sd;
+
+ hose = pci_bus_to_host(pdev->bus);
+ sd = &pdev->dev.archdata;
+ sd->max_direct_dma_addr =
+ hose->dma_window_base_cur + hose->dma_window_size;
+}
+
+static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ struct dev_archdata *sd;
+
+ /* We are only intereted in device addition */
+ if (action != BUS_NOTIFY_ADD_DEVICE)
+ return 0;
+
+ sd = &dev->archdata;
+ sd->max_direct_dma_addr = 0;
+
+ /* May need to bounce if the device can't address all of DRAM */
+ if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM())
+ set_dma_ops(dev, &swiotlb_dma_ops);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_swiotlb_plat_bus_notifier = {
+ .notifier_call = ppc_swiotlb_bus_notify,
+ .priority = 0,
+};
+
+int __init swiotlb_setup_bus_notifier(void)
+{
+ bus_register_notifier(&platform_bus_type,
+ &ppc_swiotlb_plat_bus_notifier);
+ return 0;
+}
+
+void swiotlb_detect_4g(void)
+{
+ if ((memblock_end_of_DRAM() - 1) > 0xffffffff)
+ ppc_swiotlb_enable = 1;
+}
+
+static int __init swiotlb_late_init(void)
+{
+ if (ppc_swiotlb_enable) {
+ swiotlb_print_info();
+ set_pci_dma_ops(&swiotlb_dma_ops);
+ ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
+ } else {
+ swiotlb_free();
+ }
+
+ return 0;
+}
+subsys_initcall(swiotlb_late_init);
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
new file mode 100644
index 00000000000..ee78f6e49d6
--- /dev/null
+++ b/arch/powerpc/kernel/dma.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * Provide default implementations of the DMA mapping callbacks for
+ * directly mapped busses.
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-debug.h>
+#include <linux/gfp.h>
+#include <linux/memblock.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <asm/vio.h>
+#include <asm/bug.h>
+#include <asm/machdep.h>
+
+/*
+ * Generic direct DMA implementation
+ *
+ * This implementation supports a per-device offset that can be applied if
+ * the address at which memory is visible to devices is not 0. Platform code
+ * can set archdata.dma_data to an unsigned long holding the offset. By
+ * default the offset is PCI_DRAM_OFFSET.
+ */
+
+
+void *dma_direct_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ void *ret;
+#ifdef CONFIG_NOT_COHERENT_CACHE
+ ret = __dma_alloc_coherent(dev, size, dma_handle, flag);
+ if (ret == NULL)
+ return NULL;
+ *dma_handle += get_dma_offset(dev);
+ return ret;
+#else
+ struct page *page;
+ int node = dev_to_node(dev);
+
+ /* ignore region specifiers */
+ flag &= ~(__GFP_HIGHMEM);
+
+ page = alloc_pages_node(node, flag, get_order(size));
+ if (page == NULL)
+ return NULL;
+ ret = page_address(page);
+ memset(ret, 0, size);
+ *dma_handle = __pa(ret) + get_dma_offset(dev);
+
+ return ret;
+#endif
+}
+
+void dma_direct_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+#ifdef CONFIG_NOT_COHERENT_CACHE
+ __dma_free_coherent(size, vaddr);
+#else
+ free_pages((unsigned long)vaddr, get_order(size));
+#endif
+}
+
+int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t handle, size_t size,
+ struct dma_attrs *attrs)
+{
+ unsigned long pfn;
+
+#ifdef CONFIG_NOT_COHERENT_CACHE
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr);
+#else
+ pfn = page_to_pfn(virt_to_page(cpu_addr));
+#endif
+ return remap_pfn_range(vma, vma->vm_start,
+ pfn + vma->vm_pgoff,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+}
+
+static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i) {
+ sg->dma_address = sg_phys(sg) + get_dma_offset(dev);
+ sg->dma_length = sg->length;
+ __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
+ }
+
+ return nents;
+}
+
+static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+}
+
+static int dma_direct_dma_supported(struct device *dev, u64 mask)
+{
+#ifdef CONFIG_PPC64
+ /* Could be improved so platforms can set the limit in case
+ * they have limited DMA windows
+ */
+ return mask >= get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
+#else
+ return 1;
+#endif
+}
+
+static u64 dma_direct_get_required_mask(struct device *dev)
+{
+ u64 end, mask;
+
+ end = memblock_end_of_DRAM() + get_dma_offset(dev);
+
+ mask = 1ULL << (fls64(end) - 1);
+ mask += mask - 1;
+
+ return mask;
+}
+
+static inline dma_addr_t dma_direct_map_page(struct device *dev,
+ struct page *page,
+ unsigned long offset,
+ size_t size,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ BUG_ON(dir == DMA_NONE);
+ __dma_sync_page(page, offset, size, dir);
+ return page_to_phys(page) + offset + get_dma_offset(dev);
+}
+
+static inline void dma_direct_unmap_page(struct device *dev,
+ dma_addr_t dma_address,
+ size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+}
+
+#ifdef CONFIG_NOT_COHERENT_CACHE
+static inline void dma_direct_sync_sg(struct device *dev,
+ struct scatterlist *sgl, int nents,
+ enum dma_data_direction direction)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i)
+ __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
+}
+
+static inline void dma_direct_sync_single(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ enum dma_data_direction direction)
+{
+ __dma_sync(bus_to_virt(dma_handle), size, direction);
+}
+#endif
+
+struct dma_map_ops dma_direct_ops = {
+ .alloc = dma_direct_alloc_coherent,
+ .free = dma_direct_free_coherent,
+ .mmap = dma_direct_mmap_coherent,
+ .map_sg = dma_direct_map_sg,
+ .unmap_sg = dma_direct_unmap_sg,
+ .dma_supported = dma_direct_dma_supported,
+ .map_page = dma_direct_map_page,
+ .unmap_page = dma_direct_unmap_page,
+ .get_required_mask = dma_direct_get_required_mask,
+#ifdef CONFIG_NOT_COHERENT_CACHE
+ .sync_single_for_cpu = dma_direct_sync_single,
+ .sync_single_for_device = dma_direct_sync_single,
+ .sync_sg_for_cpu = dma_direct_sync_sg,
+ .sync_sg_for_device = dma_direct_sync_sg,
+#endif
+};
+EXPORT_SYMBOL(dma_direct_ops);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+
+int __dma_set_mask(struct device *dev, u64 dma_mask)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL))
+ return dma_ops->set_dma_mask(dev, dma_mask);
+ if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+ return -EIO;
+ *dev->dma_mask = dma_mask;
+ return 0;
+}
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+ if (ppc_md.dma_set_mask)
+ return ppc_md.dma_set_mask(dev, dma_mask);
+ return __dma_set_mask(dev, dma_mask);
+}
+EXPORT_SYMBOL(dma_set_mask);
+
+u64 dma_get_required_mask(struct device *dev)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ if (ppc_md.dma_get_required_mask)
+ return ppc_md.dma_get_required_mask(dev);
+
+ if (unlikely(dma_ops == NULL))
+ return 0;
+
+ if (dma_ops->get_required_mask)
+ return dma_ops->get_required_mask(dev);
+
+ return DMA_BIT_MASK(8 * sizeof(dma_addr_t));
+}
+EXPORT_SYMBOL_GPL(dma_get_required_mask);
+
+static int __init dma_init(void)
+{
+ dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+#ifdef CONFIG_PCI
+ dma_debug_add_bus(&pci_bus_type);
+#endif
+#ifdef CONFIG_IBMVIO
+ dma_debug_add_bus(&vio_bus_type);
+#endif
+
+ return 0;
+}
+fs_initcall(dma_init);
+
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
deleted file mode 100644
index 6c168f6ea14..00000000000
--- a/arch/powerpc/kernel/dma_64.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (C) 2004 IBM Corporation
- *
- * Implements the generic device dma API for ppc64. Handles
- * the pci and vio busses
- */
-
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-/* Include the busses we support */
-#include <linux/pci.h>
-#include <asm/vio.h>
-#include <asm/ibmebus.h>
-#include <asm/scatterlist.h>
-#include <asm/bug.h>
-
-static struct dma_mapping_ops *get_dma_ops(struct device *dev)
-{
-#ifdef CONFIG_PCI
- if (dev->bus == &pci_bus_type)
- return &pci_dma_ops;
-#endif
-#ifdef CONFIG_IBMVIO
- if (dev->bus == &vio_bus_type)
- return &vio_dma_ops;
-#endif
-#ifdef CONFIG_IBMEBUS
- if (dev->bus == &ibmebus_bus_type)
- return &ibmebus_dma_ops;
-#endif
- return NULL;
-}
-
-int dma_supported(struct device *dev, u64 mask)
-{
- struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
- BUG_ON(!dma_ops);
-
- return dma_ops->dma_supported(dev, mask);
-}
-EXPORT_SYMBOL(dma_supported);
-
-int dma_set_mask(struct device *dev, u64 dma_mask)
-{
-#ifdef CONFIG_PCI
- if (dev->bus == &pci_bus_type)
- return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
-#endif
-#ifdef CONFIG_IBMVIO
- if (dev->bus == &vio_bus_type)
- return -EIO;
-#endif /* CONFIG_IBMVIO */
-#ifdef CONFIG_IBMEBUS
- if (dev->bus == &ibmebus_bus_type)
- return -EIO;
-#endif
- BUG();
- return 0;
-}
-EXPORT_SYMBOL(dma_set_mask);
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag)
-{
- struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
- BUG_ON(!dma_ops);
-
- return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
-}
-EXPORT_SYMBOL(dma_alloc_coherent);
-
-void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t dma_handle)
-{
- struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
- BUG_ON(!dma_ops);
-
- dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
-}
-EXPORT_SYMBOL(dma_free_coherent);
-
-dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size,
- enum dma_data_direction direction)
-{
- struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
- BUG_ON(!dma_ops);
-
- return dma_ops->map_single(dev, cpu_addr, size, direction);
-}
-EXPORT_SYMBOL(dma_map_single);
-
-void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
- enum dma_data_direction direction)
-{
- struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
- BUG_ON(!dma_ops);
-
- dma_ops->unmap_single(dev, dma_addr, size, direction);
-}
-EXPORT_SYMBOL(dma_unmap_single);
-
-dma_addr_t dma_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
-{
- struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
- BUG_ON(!dma_ops);
-
- return dma_ops->map_single(dev, page_address(page) + offset, size,
- direction);
-}
-EXPORT_SYMBOL(dma_map_page);
-
-void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
- enum dma_data_direction direction)
-{
- struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
- BUG_ON(!dma_ops);
-
- dma_ops->unmap_single(dev, dma_address, size, direction);
-}
-EXPORT_SYMBOL(dma_unmap_page);
-
-int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
- BUG_ON(!dma_ops);
-
- return dma_ops->map_sg(dev, sg, nents, direction);
-}
-EXPORT_SYMBOL(dma_map_sg);
-
-void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
- enum dma_data_direction direction)
-{
- struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
- BUG_ON(!dma_ops);
-
- dma_ops->unmap_sg(dev, sg, nhwentries, direction);
-}
-EXPORT_SYMBOL(dma_unmap_sg);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
new file mode 100644
index 00000000000..86e25702aac
--- /dev/null
+++ b/arch/powerpc/kernel/eeh.c
@@ -0,0 +1,1183 @@
+/*
+ * Copyright IBM Corporation 2001, 2005, 2006
+ * Copyright Dave Engebretsen & Todd Inglett 2001
+ * Copyright Linas Vepstas 2005, 2006
+ * Copyright 2001-2012 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/reboot.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+#include <linux/of.h>
+
+#include <linux/atomic.h>
+#include <asm/debug.h>
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/rtas.h>
+
+
+/** Overview:
+ * EEH, or "Extended Error Handling" is a PCI bridge technology for
+ * dealing with PCI bus errors that can't be dealt with within the
+ * usual PCI framework, except by check-stopping the CPU. Systems
+ * that are designed for high-availability/reliability cannot afford
+ * to crash due to a "mere" PCI error, thus the need for EEH.
+ * An EEH-capable bridge operates by converting a detected error
+ * into a "slot freeze", taking the PCI adapter off-line, making
+ * the slot behave, from the OS'es point of view, as if the slot
+ * were "empty": all reads return 0xff's and all writes are silently
+ * ignored. EEH slot isolation events can be triggered by parity
+ * errors on the address or data busses (e.g. during posted writes),
+ * which in turn might be caused by low voltage on the bus, dust,
+ * vibration, humidity, radioactivity or plain-old failed hardware.
+ *
+ * Note, however, that one of the leading causes of EEH slot
+ * freeze events are buggy device drivers, buggy device microcode,
+ * or buggy device hardware. This is because any attempt by the
+ * device to bus-master data to a memory address that is not
+ * assigned to the device will trigger a slot freeze. (The idea
+ * is to prevent devices-gone-wild from corrupting system memory).
+ * Buggy hardware/drivers will have a miserable time co-existing
+ * with EEH.
+ *
+ * Ideally, a PCI device driver, when suspecting that an isolation
+ * event has occurred (e.g. by reading 0xff's), will then ask EEH
+ * whether this is the case, and then take appropriate steps to
+ * reset the PCI slot, the PCI device, and then resume operations.
+ * However, until that day, the checking is done here, with the
+ * eeh_check_failure() routine embedded in the MMIO macros. If
+ * the slot is found to be isolated, an "EEH Event" is synthesized
+ * and sent out for processing.
+ */
+
+/* If a device driver keeps reading an MMIO register in an interrupt
+ * handler after a slot isolation event, it might be broken.
+ * This sets the threshold for how many read attempts we allow
+ * before printing an error message.
+ */
+#define EEH_MAX_FAILS 2100000
+
+/* Time to wait for a PCI slot to report status, in milliseconds */
+#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000)
+
+/*
+ * EEH probe mode support, which is part of the flags,
+ * is to support multiple platforms for EEH. Some platforms
+ * like pSeries do PCI emunation based on device tree.
+ * However, other platforms like powernv probe PCI devices
+ * from hardware. The flag is used to distinguish that.
+ * In addition, struct eeh_ops::probe would be invoked for
+ * particular OF node or PCI device so that the corresponding
+ * PE would be created there.
+ */
+int eeh_subsystem_flags;
+EXPORT_SYMBOL(eeh_subsystem_flags);
+
+/* Platform dependent EEH operations */
+struct eeh_ops *eeh_ops = NULL;
+
+/* Lock to avoid races due to multiple reports of an error */
+DEFINE_RAW_SPINLOCK(confirm_error_lock);
+
+/* Buffer for reporting pci register dumps. Its here in BSS, and
+ * not dynamically alloced, so that it ends up in RMO where RTAS
+ * can access it.
+ */
+#define EEH_PCI_REGS_LOG_LEN 4096
+static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
+
+/*
+ * The struct is used to maintain the EEH global statistic
+ * information. Besides, the EEH global statistics will be
+ * exported to user space through procfs
+ */
+struct eeh_stats {
+ u64 no_device; /* PCI device not found */
+ u64 no_dn; /* OF node not found */
+ u64 no_cfg_addr; /* Config address not found */
+ u64 ignored_check; /* EEH check skipped */
+ u64 total_mmio_ffs; /* Total EEH checks */
+ u64 false_positives; /* Unnecessary EEH checks */
+ u64 slot_resets; /* PE reset */
+};
+
+static struct eeh_stats eeh_stats;
+
+#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
+
+static int __init eeh_setup(char *str)
+{
+ if (!strcmp(str, "off"))
+ eeh_subsystem_flags |= EEH_FORCE_DISABLED;
+
+ return 1;
+}
+__setup("eeh=", eeh_setup);
+
+/**
+ * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
+ * @edev: device to report data for
+ * @buf: point to buffer in which to log
+ * @len: amount of room in buffer
+ *
+ * This routine captures assorted PCI configuration space data,
+ * and puts them into a buffer for RTAS error logging.
+ */
+static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
+{
+ struct device_node *dn = eeh_dev_to_of_node(edev);
+ u32 cfg;
+ int cap, i;
+ int n = 0;
+
+ n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
+ pr_warn("EEH: of node=%s\n", dn->full_name);
+
+ eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
+ pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
+
+ eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
+ pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
+
+ /* Gather bridge-specific registers */
+ if (edev->mode & EEH_DEV_BRIDGE) {
+ eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
+ n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
+ pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
+
+ eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
+ n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
+ pr_warn("EEH: Bridge control: %04x\n", cfg);
+ }
+
+ /* Dump out the PCI-X command and status regs */
+ cap = edev->pcix_cap;
+ if (cap) {
+ eeh_ops->read_config(dn, cap, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
+ pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
+
+ eeh_ops->read_config(dn, cap+4, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
+ pr_warn("EEH: PCI-X status: %08x\n", cfg);
+ }
+
+ /* If PCI-E capable, dump PCI-E cap 10 */
+ cap = edev->pcie_cap;
+ if (cap) {
+ n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
+ pr_warn("EEH: PCI-E capabilities and status follow:\n");
+
+ for (i=0; i<=8; i++) {
+ eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+ pr_warn("EEH: PCI-E %02x: %08x\n", i, cfg);
+ }
+ }
+
+ /* If AER capable, dump it */
+ cap = edev->aer_cap;
+ if (cap) {
+ n += scnprintf(buf+n, len-n, "pci-e AER:\n");
+ pr_warn("EEH: PCI-E AER capability register set follows:\n");
+
+ for (i=0; i<14; i++) {
+ eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
+ n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
+ pr_warn("EEH: PCI-E AER %02x: %08x\n", i, cfg);
+ }
+ }
+
+ return n;
+}
+
+/**
+ * eeh_slot_error_detail - Generate combined log including driver log and error log
+ * @pe: EEH PE
+ * @severity: temporary or permanent error log
+ *
+ * This routine should be called to generate the combined log, which
+ * is comprised of driver log and error log. The driver log is figured
+ * out from the config space of the corresponding PCI device, while
+ * the error log is fetched through platform dependent function call.
+ */
+void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
+{
+ size_t loglen = 0;
+ struct eeh_dev *edev, *tmp;
+
+ /*
+ * When the PHB is fenced or dead, it's pointless to collect
+ * the data from PCI config space because it should return
+ * 0xFF's. For ER, we still retrieve the data from the PCI
+ * config space.
+ *
+ * For pHyp, we have to enable IO for log retrieval. Otherwise,
+ * 0xFF's is always returned from PCI config space.
+ */
+ if (!(pe->type & EEH_PE_PHB)) {
+ if (eeh_probe_mode_devtree())
+ eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+ eeh_ops->configure_bridge(pe);
+ eeh_pe_restore_bars(pe);
+
+ pci_regs_buf[0] = 0;
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen,
+ EEH_PCI_REGS_LOG_LEN - loglen);
+ }
+ }
+
+ eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
+}
+
+/**
+ * eeh_token_to_phys - Convert EEH address token to phys address
+ * @token: I/O token, should be address in the form 0xA....
+ *
+ * This routine should be called to convert virtual I/O address
+ * to physical one.
+ */
+static inline unsigned long eeh_token_to_phys(unsigned long token)
+{
+ pte_t *ptep;
+ unsigned long pa;
+ int hugepage_shift;
+
+ /*
+ * We won't find hugepages here, iomem
+ */
+ ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
+ if (!ptep)
+ return token;
+ WARN_ON(hugepage_shift);
+ pa = pte_pfn(*ptep) << PAGE_SHIFT;
+
+ return pa | (token & (PAGE_SIZE-1));
+}
+
+/*
+ * On PowerNV platform, we might already have fenced PHB there.
+ * For that case, it's meaningless to recover frozen PE. Intead,
+ * We have to handle fenced PHB firstly.
+ */
+static int eeh_phb_check_failure(struct eeh_pe *pe)
+{
+ struct eeh_pe *phb_pe;
+ unsigned long flags;
+ int ret;
+
+ if (!eeh_probe_mode_dev())
+ return -EPERM;
+
+ /* Find the PHB PE */
+ phb_pe = eeh_phb_pe_get(pe->phb);
+ if (!phb_pe) {
+ pr_warning("%s Can't find PE for PHB#%d\n",
+ __func__, pe->phb->global_number);
+ return -EEXIST;
+ }
+
+ /* If the PHB has been in problematic state */
+ eeh_serialize_lock(&flags);
+ if (phb_pe->state & EEH_PE_ISOLATED) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Check PHB state */
+ ret = eeh_ops->get_state(phb_pe, NULL);
+ if ((ret < 0) ||
+ (ret == EEH_STATE_NOT_SUPPORT) ||
+ (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
+ (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Isolate the PHB and send event */
+ eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+ eeh_serialize_unlock(flags);
+
+ pr_err("EEH: PHB#%x failure detected, location: %s\n",
+ phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
+ dump_stack();
+ eeh_send_failure_event(phb_pe);
+
+ return 1;
+out:
+ eeh_serialize_unlock(flags);
+ return ret;
+}
+
+/**
+ * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
+ * @edev: eeh device
+ *
+ * Check for an EEH failure for the given device node. Call this
+ * routine if the result of a read was all 0xff's and you want to
+ * find out if this is due to an EEH slot freeze. This routine
+ * will query firmware for the EEH status.
+ *
+ * Returns 0 if there has not been an EEH error; otherwise returns
+ * a non-zero value and queues up a slot isolation event notification.
+ *
+ * It is safe to call this routine in an interrupt context.
+ */
+int eeh_dev_check_failure(struct eeh_dev *edev)
+{
+ int ret;
+ int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+ unsigned long flags;
+ struct device_node *dn;
+ struct pci_dev *dev;
+ struct eeh_pe *pe, *parent_pe, *phb_pe;
+ int rc = 0;
+ const char *location;
+
+ eeh_stats.total_mmio_ffs++;
+
+ if (!eeh_enabled())
+ return 0;
+
+ if (!edev) {
+ eeh_stats.no_dn++;
+ return 0;
+ }
+ dn = eeh_dev_to_of_node(edev);
+ dev = eeh_dev_to_pci_dev(edev);
+ pe = edev->pe;
+
+ /* Access to IO BARs might get this far and still not want checking. */
+ if (!pe) {
+ eeh_stats.ignored_check++;
+ pr_debug("EEH: Ignored check for %s %s\n",
+ eeh_pci_name(dev), dn->full_name);
+ return 0;
+ }
+
+ if (!pe->addr && !pe->config_addr) {
+ eeh_stats.no_cfg_addr++;
+ return 0;
+ }
+
+ /*
+ * On PowerNV platform, we might already have fenced PHB
+ * there and we need take care of that firstly.
+ */
+ ret = eeh_phb_check_failure(pe);
+ if (ret > 0)
+ return ret;
+
+ /* If we already have a pending isolation event for this
+ * slot, we know it's bad already, we don't need to check.
+ * Do this checking under a lock; as multiple PCI devices
+ * in one slot might report errors simultaneously, and we
+ * only want one error recovery routine running.
+ */
+ eeh_serialize_lock(&flags);
+ rc = 1;
+ if (pe->state & EEH_PE_ISOLATED) {
+ pe->check_count++;
+ if (pe->check_count % EEH_MAX_FAILS == 0) {
+ location = of_get_property(dn, "ibm,loc-code", NULL);
+ printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
+ "location=%s driver=%s pci addr=%s\n",
+ pe->check_count, location,
+ eeh_driver_name(dev), eeh_pci_name(dev));
+ printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
+ eeh_driver_name(dev));
+ dump_stack();
+ }
+ goto dn_unlock;
+ }
+
+ /*
+ * Now test for an EEH failure. This is VERY expensive.
+ * Note that the eeh_config_addr may be a parent device
+ * in the case of a device behind a bridge, or it may be
+ * function zero of a multi-function device.
+ * In any case they must share a common PHB.
+ */
+ ret = eeh_ops->get_state(pe, NULL);
+
+ /* Note that config-io to empty slots may fail;
+ * they are empty when they don't have children.
+ * We will punt with the following conditions: Failure to get
+ * PE's state, EEH not support and Permanently unavailable
+ * state, PE is in good state.
+ */
+ if ((ret < 0) ||
+ (ret == EEH_STATE_NOT_SUPPORT) ||
+ ((ret & active_flags) == active_flags)) {
+ eeh_stats.false_positives++;
+ pe->false_positives++;
+ rc = 0;
+ goto dn_unlock;
+ }
+
+ /*
+ * It should be corner case that the parent PE has been
+ * put into frozen state as well. We should take care
+ * that at first.
+ */
+ parent_pe = pe->parent;
+ while (parent_pe) {
+ /* Hit the ceiling ? */
+ if (parent_pe->type & EEH_PE_PHB)
+ break;
+
+ /* Frozen parent PE ? */
+ ret = eeh_ops->get_state(parent_pe, NULL);
+ if (ret > 0 &&
+ (ret & active_flags) != active_flags)
+ pe = parent_pe;
+
+ /* Next parent level */
+ parent_pe = parent_pe->parent;
+ }
+
+ eeh_stats.slot_resets++;
+
+ /* Avoid repeated reports of this failure, including problems
+ * with other functions on this device, and functions under
+ * bridges.
+ */
+ eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+ eeh_serialize_unlock(flags);
+
+ /* Most EEH events are due to device driver bugs. Having
+ * a stack trace will help the device-driver authors figure
+ * out what happened. So print that out.
+ */
+ phb_pe = eeh_phb_pe_get(pe->phb);
+ pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+ pe->phb->global_number, pe->addr);
+ pr_err("EEH: PE location: %s, PHB location: %s\n",
+ eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
+ dump_stack();
+
+ eeh_send_failure_event(pe);
+
+ return 1;
+
+dn_unlock:
+ eeh_serialize_unlock(flags);
+ return rc;
+}
+
+EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
+
+/**
+ * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
+ * @token: I/O token, should be address in the form 0xA....
+ * @val: value, should be all 1's (XXX why do we need this arg??)
+ *
+ * Check for an EEH failure at the given token address. Call this
+ * routine if the result of a read was all 0xff's and you want to
+ * find out if this is due to an EEH slot freeze event. This routine
+ * will query firmware for the EEH status.
+ *
+ * Note this routine is safe to call in an interrupt context.
+ */
+unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
+{
+ unsigned long addr;
+ struct eeh_dev *edev;
+
+ /* Finding the phys addr + pci device; this is pretty quick. */
+ addr = eeh_token_to_phys((unsigned long __force) token);
+ edev = eeh_addr_cache_get_dev(addr);
+ if (!edev) {
+ eeh_stats.no_device++;
+ return val;
+ }
+
+ eeh_dev_check_failure(edev);
+ return val;
+}
+
+EXPORT_SYMBOL(eeh_check_failure);
+
+
+/**
+ * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
+ * @pe: EEH PE
+ *
+ * This routine should be called to reenable frozen MMIO or DMA
+ * so that it would work correctly again. It's useful while doing
+ * recovery or log collection on the indicated device.
+ */
+int eeh_pci_enable(struct eeh_pe *pe, int function)
+{
+ int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+
+ /*
+ * pHyp doesn't allow to enable IO or DMA on unfrozen PE.
+ * Also, it's pointless to enable them on unfrozen PE. So
+ * we have the check here.
+ */
+ if (function == EEH_OPT_THAW_MMIO ||
+ function == EEH_OPT_THAW_DMA) {
+ rc = eeh_ops->get_state(pe, NULL);
+ if (rc < 0)
+ return rc;
+
+ /* Needn't to enable or already enabled */
+ if ((rc == EEH_STATE_NOT_SUPPORT) ||
+ ((rc & flags) == flags))
+ return 0;
+ }
+
+ rc = eeh_ops->set_option(pe, function);
+ if (rc)
+ pr_warn("%s: Unexpected state change %d on "
+ "PHB#%d-PE#%x, err=%d\n",
+ __func__, function, pe->phb->global_number,
+ pe->addr, rc);
+
+ rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+ if (rc <= 0)
+ return rc;
+
+ if ((function == EEH_OPT_THAW_MMIO) &&
+ (rc & EEH_STATE_MMIO_ENABLED))
+ return 0;
+
+ if ((function == EEH_OPT_THAW_DMA) &&
+ (rc & EEH_STATE_DMA_ENABLED))
+ return 0;
+
+ return rc;
+}
+
+/**
+ * pcibios_set_pcie_slot_reset - Set PCI-E reset state
+ * @dev: pci device struct
+ * @state: reset state to enter
+ *
+ * Return value:
+ * 0 if success
+ */
+int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
+{
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
+ struct eeh_pe *pe = edev->pe;
+
+ if (!pe) {
+ pr_err("%s: No PE found on PCI device %s\n",
+ __func__, pci_name(dev));
+ return -EINVAL;
+ }
+
+ switch (state) {
+ case pcie_deassert_reset:
+ eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
+ break;
+ case pcie_hot_reset:
+ eeh_ops->reset(pe, EEH_RESET_HOT);
+ break;
+ case pcie_warm_reset:
+ eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
+ break;
+ default:
+ return -EINVAL;
+ };
+
+ return 0;
+}
+
+/**
+ * eeh_set_pe_freset - Check the required reset for the indicated device
+ * @data: EEH device
+ * @flag: return value
+ *
+ * Each device might have its preferred reset type: fundamental or
+ * hot reset. The routine is used to collected the information for
+ * the indicated device and its children so that the bunch of the
+ * devices could be reset properly.
+ */
+static void *eeh_set_dev_freset(void *data, void *flag)
+{
+ struct pci_dev *dev;
+ unsigned int *freset = (unsigned int *)flag;
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+
+ dev = eeh_dev_to_pci_dev(edev);
+ if (dev)
+ *freset |= dev->needs_freset;
+
+ return NULL;
+}
+
+/**
+ * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
+ * @pe: EEH PE
+ *
+ * Assert the PCI #RST line for 1/4 second.
+ */
+static void eeh_reset_pe_once(struct eeh_pe *pe)
+{
+ unsigned int freset = 0;
+
+ /* Determine type of EEH reset required for
+ * Partitionable Endpoint, a hot-reset (1)
+ * or a fundamental reset (3).
+ * A fundamental reset required by any device under
+ * Partitionable Endpoint trumps hot-reset.
+ */
+ eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
+
+ if (freset)
+ eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
+ else
+ eeh_ops->reset(pe, EEH_RESET_HOT);
+
+ eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
+}
+
+/**
+ * eeh_reset_pe - Reset the indicated PE
+ * @pe: EEH PE
+ *
+ * This routine should be called to reset indicated device, including
+ * PE. A PE might include multiple PCI devices and sometimes PCI bridges
+ * might be involved as well.
+ */
+int eeh_reset_pe(struct eeh_pe *pe)
+{
+ int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+ int i, rc;
+
+ /* Take three shots at resetting the bus */
+ for (i=0; i<3; i++) {
+ eeh_reset_pe_once(pe);
+
+ /*
+ * EEH_PE_ISOLATED is expected to be removed after
+ * BAR restore.
+ */
+ rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+ if ((rc & flags) == flags)
+ return 0;
+
+ if (rc < 0) {
+ pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
+ __func__, pe->phb->global_number, pe->addr);
+ return -1;
+ }
+ pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n",
+ i+1, pe->phb->global_number, pe->addr, rc);
+ }
+
+ return -1;
+}
+
+/**
+ * eeh_save_bars - Save device bars
+ * @edev: PCI device associated EEH device
+ *
+ * Save the values of the device bars. Unlike the restore
+ * routine, this routine is *not* recursive. This is because
+ * PCI devices are added individually; but, for the restore,
+ * an entire slot is reset at a time.
+ */
+void eeh_save_bars(struct eeh_dev *edev)
+{
+ int i;
+ struct device_node *dn;
+
+ if (!edev)
+ return;
+ dn = eeh_dev_to_of_node(edev);
+
+ for (i = 0; i < 16; i++)
+ eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
+
+ /*
+ * For PCI bridges including root port, we need enable bus
+ * master explicitly. Otherwise, it can't fetch IODA table
+ * entries correctly. So we cache the bit in advance so that
+ * we can restore it after reset, either PHB range or PE range.
+ */
+ if (edev->mode & EEH_DEV_BRIDGE)
+ edev->config_space[1] |= PCI_COMMAND_MASTER;
+}
+
+/**
+ * eeh_ops_register - Register platform dependent EEH operations
+ * @ops: platform dependent EEH operations
+ *
+ * Register the platform dependent EEH operation callback
+ * functions. The platform should call this function before
+ * any other EEH operations.
+ */
+int __init eeh_ops_register(struct eeh_ops *ops)
+{
+ if (!ops->name) {
+ pr_warning("%s: Invalid EEH ops name for %p\n",
+ __func__, ops);
+ return -EINVAL;
+ }
+
+ if (eeh_ops && eeh_ops != ops) {
+ pr_warning("%s: EEH ops of platform %s already existing (%s)\n",
+ __func__, eeh_ops->name, ops->name);
+ return -EEXIST;
+ }
+
+ eeh_ops = ops;
+
+ return 0;
+}
+
+/**
+ * eeh_ops_unregister - Unreigster platform dependent EEH operations
+ * @name: name of EEH platform operations
+ *
+ * Unregister the platform dependent EEH operation callback
+ * functions.
+ */
+int __exit eeh_ops_unregister(const char *name)
+{
+ if (!name || !strlen(name)) {
+ pr_warning("%s: Invalid EEH ops name\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ if (eeh_ops && !strcmp(eeh_ops->name, name)) {
+ eeh_ops = NULL;
+ return 0;
+ }
+
+ return -EEXIST;
+}
+
+static int eeh_reboot_notifier(struct notifier_block *nb,
+ unsigned long action, void *unused)
+{
+ eeh_set_enable(false);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_reboot_nb = {
+ .notifier_call = eeh_reboot_notifier,
+};
+
+/**
+ * eeh_init - EEH initialization
+ *
+ * Initialize EEH by trying to enable it for all of the adapters in the system.
+ * As a side effect we can determine here if eeh is supported at all.
+ * Note that we leave EEH on so failed config cycles won't cause a machine
+ * check. If a user turns off EEH for a particular adapter they are really
+ * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
+ * grant access to a slot if EEH isn't enabled, and so we always enable
+ * EEH for all slots/all devices.
+ *
+ * The eeh-force-off option disables EEH checking globally, for all slots.
+ * Even if force-off is set, the EEH hardware is still enabled, so that
+ * newer systems can boot.
+ */
+int eeh_init(void)
+{
+ struct pci_controller *hose, *tmp;
+ struct device_node *phb;
+ static int cnt = 0;
+ int ret = 0;
+
+ /*
+ * We have to delay the initialization on PowerNV after
+ * the PCI hierarchy tree has been built because the PEs
+ * are figured out based on PCI devices instead of device
+ * tree nodes
+ */
+ if (machine_is(powernv) && cnt++ <= 0)
+ return ret;
+
+ /* Register reboot notifier */
+ ret = register_reboot_notifier(&eeh_reboot_nb);
+ if (ret) {
+ pr_warn("%s: Failed to register notifier (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+
+ /* call platform initialization function */
+ if (!eeh_ops) {
+ pr_warning("%s: Platform EEH operation not found\n",
+ __func__);
+ return -EEXIST;
+ } else if ((ret = eeh_ops->init())) {
+ pr_warning("%s: Failed to call platform init function (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+
+ /* Initialize EEH event */
+ ret = eeh_event_init();
+ if (ret)
+ return ret;
+
+ /* Enable EEH for all adapters */
+ if (eeh_probe_mode_devtree()) {
+ list_for_each_entry_safe(hose, tmp,
+ &hose_list, list_node) {
+ phb = hose->dn;
+ traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
+ }
+ } else if (eeh_probe_mode_dev()) {
+ list_for_each_entry_safe(hose, tmp,
+ &hose_list, list_node)
+ pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL);
+ } else {
+ pr_warn("%s: Invalid probe mode %x",
+ __func__, eeh_subsystem_flags);
+ return -EINVAL;
+ }
+
+ /*
+ * Call platform post-initialization. Actually, It's good chance
+ * to inform platform that EEH is ready to supply service if the
+ * I/O cache stuff has been built up.
+ */
+ if (eeh_ops->post_init) {
+ ret = eeh_ops->post_init();
+ if (ret)
+ return ret;
+ }
+
+ if (eeh_enabled())
+ pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
+ else
+ pr_warning("EEH: No capable adapters found\n");
+
+ return ret;
+}
+
+core_initcall_sync(eeh_init);
+
+/**
+ * eeh_add_device_early - Enable EEH for the indicated device_node
+ * @dn: device node for which to set up EEH
+ *
+ * This routine must be used to perform EEH initialization for PCI
+ * devices that were added after system boot (e.g. hotplug, dlpar).
+ * This routine must be called before any i/o is performed to the
+ * adapter (inluding any config-space i/o).
+ * Whether this actually enables EEH or not for this device depends
+ * on the CEC architecture, type of the device, on earlier boot
+ * command-line arguments & etc.
+ */
+void eeh_add_device_early(struct device_node *dn)
+{
+ struct pci_controller *phb;
+
+ /*
+ * If we're doing EEH probe based on PCI device, we
+ * would delay the probe until late stage because
+ * the PCI device isn't available this moment.
+ */
+ if (!eeh_probe_mode_devtree())
+ return;
+
+ if (!of_node_to_eeh_dev(dn))
+ return;
+ phb = of_node_to_eeh_dev(dn)->phb;
+
+ /* USB Bus children of PCI devices will not have BUID's */
+ if (NULL == phb || 0 == phb->buid)
+ return;
+
+ eeh_ops->of_probe(dn, NULL);
+}
+
+/**
+ * eeh_add_device_tree_early - Enable EEH for the indicated device
+ * @dn: device node
+ *
+ * This routine must be used to perform EEH initialization for the
+ * indicated PCI device that was added after system boot (e.g.
+ * hotplug, dlpar).
+ */
+void eeh_add_device_tree_early(struct device_node *dn)
+{
+ struct device_node *sib;
+
+ for_each_child_of_node(dn, sib)
+ eeh_add_device_tree_early(sib);
+ eeh_add_device_early(dn);
+}
+EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
+
+/**
+ * eeh_add_device_late - Perform EEH initialization for the indicated pci device
+ * @dev: pci device for which to set up EEH
+ *
+ * This routine must be used to complete EEH initialization for PCI
+ * devices that were added after system boot (e.g. hotplug, dlpar).
+ */
+void eeh_add_device_late(struct pci_dev *dev)
+{
+ struct device_node *dn;
+ struct eeh_dev *edev;
+
+ if (!dev || !eeh_enabled())
+ return;
+
+ pr_debug("EEH: Adding device %s\n", pci_name(dev));
+
+ dn = pci_device_to_OF_node(dev);
+ edev = of_node_to_eeh_dev(dn);
+ if (edev->pdev == dev) {
+ pr_debug("EEH: Already referenced !\n");
+ return;
+ }
+
+ /*
+ * The EEH cache might not be removed correctly because of
+ * unbalanced kref to the device during unplug time, which
+ * relies on pcibios_release_device(). So we have to remove
+ * that here explicitly.
+ */
+ if (edev->pdev) {
+ eeh_rmv_from_parent_pe(edev);
+ eeh_addr_cache_rmv_dev(edev->pdev);
+ eeh_sysfs_remove_device(edev->pdev);
+ edev->mode &= ~EEH_DEV_SYSFS;
+
+ /*
+ * We definitely should have the PCI device removed
+ * though it wasn't correctly. So we needn't call
+ * into error handler afterwards.
+ */
+ edev->mode |= EEH_DEV_NO_HANDLER;
+
+ edev->pdev = NULL;
+ dev->dev.archdata.edev = NULL;
+ }
+
+ edev->pdev = dev;
+ dev->dev.archdata.edev = edev;
+
+ /*
+ * We have to do the EEH probe here because the PCI device
+ * hasn't been created yet in the early stage.
+ */
+ if (eeh_probe_mode_dev())
+ eeh_ops->dev_probe(dev, NULL);
+
+ eeh_addr_cache_insert_dev(dev);
+}
+
+/**
+ * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
+ * @bus: PCI bus
+ *
+ * This routine must be used to perform EEH initialization for PCI
+ * devices which are attached to the indicated PCI bus. The PCI bus
+ * is added after system boot through hotplug or dlpar.
+ */
+void eeh_add_device_tree_late(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ eeh_add_device_late(dev);
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+ struct pci_bus *subbus = dev->subordinate;
+ if (subbus)
+ eeh_add_device_tree_late(subbus);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
+
+/**
+ * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
+ * @bus: PCI bus
+ *
+ * This routine must be used to add EEH sysfs files for PCI
+ * devices which are attached to the indicated PCI bus. The PCI bus
+ * is added after system boot through hotplug or dlpar.
+ */
+void eeh_add_sysfs_files(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ eeh_sysfs_add_device(dev);
+ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+ struct pci_bus *subbus = dev->subordinate;
+ if (subbus)
+ eeh_add_sysfs_files(subbus);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
+
+/**
+ * eeh_remove_device - Undo EEH setup for the indicated pci device
+ * @dev: pci device to be removed
+ *
+ * This routine should be called when a device is removed from
+ * a running system (e.g. by hotplug or dlpar). It unregisters
+ * the PCI device from the EEH subsystem. I/O errors affecting
+ * this device will no longer be detected after this call; thus,
+ * i/o errors affecting this slot may leave this device unusable.
+ */
+void eeh_remove_device(struct pci_dev *dev)
+{
+ struct eeh_dev *edev;
+
+ if (!dev || !eeh_enabled())
+ return;
+ edev = pci_dev_to_eeh_dev(dev);
+
+ /* Unregister the device with the EEH/PCI address search system */
+ pr_debug("EEH: Removing device %s\n", pci_name(dev));
+
+ if (!edev || !edev->pdev || !edev->pe) {
+ pr_debug("EEH: Not referenced !\n");
+ return;
+ }
+
+ /*
+ * During the hotplug for EEH error recovery, we need the EEH
+ * device attached to the parent PE in order for BAR restore
+ * a bit later. So we keep it for BAR restore and remove it
+ * from the parent PE during the BAR resotre.
+ */
+ edev->pdev = NULL;
+ dev->dev.archdata.edev = NULL;
+ if (!(edev->pe->state & EEH_PE_KEEP))
+ eeh_rmv_from_parent_pe(edev);
+ else
+ edev->mode |= EEH_DEV_DISCONNECTED;
+
+ /*
+ * We're removing from the PCI subsystem, that means
+ * the PCI device driver can't support EEH or not
+ * well. So we rely on hotplug completely to do recovery
+ * for the specific PCI device.
+ */
+ edev->mode |= EEH_DEV_NO_HANDLER;
+
+ eeh_addr_cache_rmv_dev(dev);
+ eeh_sysfs_remove_device(dev);
+ edev->mode &= ~EEH_DEV_SYSFS;
+}
+
+static int proc_eeh_show(struct seq_file *m, void *v)
+{
+ if (!eeh_enabled()) {
+ seq_printf(m, "EEH Subsystem is globally disabled\n");
+ seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
+ } else {
+ seq_printf(m, "EEH Subsystem is enabled\n");
+ seq_printf(m,
+ "no device=%llu\n"
+ "no device node=%llu\n"
+ "no config address=%llu\n"
+ "check not wanted=%llu\n"
+ "eeh_total_mmio_ffs=%llu\n"
+ "eeh_false_positives=%llu\n"
+ "eeh_slot_resets=%llu\n",
+ eeh_stats.no_device,
+ eeh_stats.no_dn,
+ eeh_stats.no_cfg_addr,
+ eeh_stats.ignored_check,
+ eeh_stats.total_mmio_ffs,
+ eeh_stats.false_positives,
+ eeh_stats.slot_resets);
+ }
+
+ return 0;
+}
+
+static int proc_eeh_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, proc_eeh_show, NULL);
+}
+
+static const struct file_operations proc_eeh_operations = {
+ .open = proc_eeh_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+#ifdef CONFIG_DEBUG_FS
+static int eeh_enable_dbgfs_set(void *data, u64 val)
+{
+ if (val)
+ eeh_subsystem_flags &= ~EEH_FORCE_DISABLED;
+ else
+ eeh_subsystem_flags |= EEH_FORCE_DISABLED;
+
+ /* Notify the backend */
+ if (eeh_ops->post_init)
+ eeh_ops->post_init();
+
+ return 0;
+}
+
+static int eeh_enable_dbgfs_get(void *data, u64 *val)
+{
+ if (eeh_enabled())
+ *val = 0x1ul;
+ else
+ *val = 0x0ul;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
+ eeh_enable_dbgfs_set, "0x%llx\n");
+#endif
+
+static int __init eeh_init_proc(void)
+{
+ if (machine_is(pseries) || machine_is(powernv)) {
+ proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
+#ifdef CONFIG_DEBUG_FS
+ debugfs_create_file("eeh_enable", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_enable_dbgfs_ops);
+#endif
+ }
+
+ return 0;
+}
+__initcall(eeh_init_proc);
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
new file mode 100644
index 00000000000..e8c9fd546a5
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -0,0 +1,310 @@
+/*
+ * PCI address cache; allows the lookup of PCI devices based on I/O address
+ *
+ * Copyright IBM Corporation 2004
+ * Copyright Linas Vepstas <linas@austin.ibm.com> 2004
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+
+/**
+ * The pci address cache subsystem. This subsystem places
+ * PCI device address resources into a red-black tree, sorted
+ * according to the address range, so that given only an i/o
+ * address, the corresponding PCI device can be **quickly**
+ * found. It is safe to perform an address lookup in an interrupt
+ * context; this ability is an important feature.
+ *
+ * Currently, the only customer of this code is the EEH subsystem;
+ * thus, this code has been somewhat tailored to suit EEH better.
+ * In particular, the cache does *not* hold the addresses of devices
+ * for which EEH is not enabled.
+ *
+ * (Implementation Note: The RB tree seems to be better/faster
+ * than any hash algo I could think of for this problem, even
+ * with the penalty of slow pointer chases for d-cache misses).
+ */
+struct pci_io_addr_range {
+ struct rb_node rb_node;
+ unsigned long addr_lo;
+ unsigned long addr_hi;
+ struct eeh_dev *edev;
+ struct pci_dev *pcidev;
+ unsigned int flags;
+};
+
+static struct pci_io_addr_cache {
+ struct rb_root rb_root;
+ spinlock_t piar_lock;
+} pci_io_addr_cache_root;
+
+static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
+{
+ struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
+
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+ if (addr < piar->addr_lo)
+ n = n->rb_left;
+ else if (addr > piar->addr_hi)
+ n = n->rb_right;
+ else
+ return piar->edev;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_addr_cache_get_dev - Get device, given only address
+ * @addr: mmio (PIO) phys address or i/o port number
+ *
+ * Given an mmio phys address, or a port number, find a pci device
+ * that implements this address. Be sure to pci_dev_put the device
+ * when finished. I/O port numbers are assumed to be offset
+ * from zero (that is, they do *not* have pci_io_addr added in).
+ * It is safe to call this function within an interrupt.
+ */
+struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr)
+{
+ struct eeh_dev *edev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ edev = __eeh_addr_cache_get_device(addr);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+ return edev;
+}
+
+#ifdef DEBUG
+/*
+ * Handy-dandy debug print routine, does nothing more
+ * than print out the contents of our addr cache.
+ */
+static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
+{
+ struct rb_node *n;
+ int cnt = 0;
+
+ n = rb_first(&cache->rb_root);
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+ pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n",
+ (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
+ piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
+ cnt++;
+ n = rb_next(n);
+ }
+}
+#endif
+
+/* Insert address range into the rb tree. */
+static struct pci_io_addr_range *
+eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
+ unsigned long ahi, unsigned int flags)
+{
+ struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ struct pci_io_addr_range *piar;
+
+ /* Walk tree, find a place to insert into tree */
+ while (*p) {
+ parent = *p;
+ piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
+ if (ahi < piar->addr_lo) {
+ p = &parent->rb_left;
+ } else if (alo > piar->addr_hi) {
+ p = &parent->rb_right;
+ } else {
+ if (dev != piar->pcidev ||
+ alo != piar->addr_lo || ahi != piar->addr_hi) {
+ pr_warning("PIAR: overlapping address range\n");
+ }
+ return piar;
+ }
+ }
+ piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
+ if (!piar)
+ return NULL;
+
+ piar->addr_lo = alo;
+ piar->addr_hi = ahi;
+ piar->edev = pci_dev_to_eeh_dev(dev);
+ piar->pcidev = dev;
+ piar->flags = flags;
+
+#ifdef DEBUG
+ pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n",
+ alo, ahi, pci_name(dev));
+#endif
+
+ rb_link_node(&piar->rb_node, parent, p);
+ rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
+
+ return piar;
+}
+
+static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
+{
+ struct device_node *dn;
+ struct eeh_dev *edev;
+ int i;
+
+ dn = pci_device_to_OF_node(dev);
+ if (!dn) {
+ pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev));
+ return;
+ }
+
+ edev = of_node_to_eeh_dev(dn);
+ if (!edev) {
+ pr_warning("PCI: no EEH dev found for dn=%s\n",
+ dn->full_name);
+ return;
+ }
+
+ /* Skip any devices for which EEH is not enabled. */
+ if (!eeh_probe_mode_dev() && !edev->pe) {
+#ifdef DEBUG
+ pr_info("PCI: skip building address cache for=%s - %s\n",
+ pci_name(dev), dn->full_name);
+#endif
+ return;
+ }
+
+ /* Walk resources on this device, poke them into the tree */
+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ unsigned long start = pci_resource_start(dev,i);
+ unsigned long end = pci_resource_end(dev,i);
+ unsigned int flags = pci_resource_flags(dev,i);
+
+ /* We are interested only bus addresses, not dma or other stuff */
+ if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
+ continue;
+ if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
+ continue;
+ eeh_addr_cache_insert(dev, start, end, flags);
+ }
+}
+
+/**
+ * eeh_addr_cache_insert_dev - Add a device to the address cache
+ * @dev: PCI device whose I/O addresses we are interested in.
+ *
+ * In order to support the fast lookup of devices based on addresses,
+ * we maintain a cache of devices that can be quickly searched.
+ * This routine adds a device to that cache.
+ */
+void eeh_addr_cache_insert_dev(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ /* Ignore PCI bridges */
+ if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
+ return;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ __eeh_addr_cache_insert_dev(dev);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
+
+static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev)
+{
+ struct rb_node *n;
+
+restart:
+ n = rb_first(&pci_io_addr_cache_root.rb_root);
+ while (n) {
+ struct pci_io_addr_range *piar;
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
+
+ if (piar->pcidev == dev) {
+ rb_erase(n, &pci_io_addr_cache_root.rb_root);
+ kfree(piar);
+ goto restart;
+ }
+ n = rb_next(n);
+ }
+}
+
+/**
+ * eeh_addr_cache_rmv_dev - remove pci device from addr cache
+ * @dev: device to remove
+ *
+ * Remove a device from the addr-cache tree.
+ * This is potentially expensive, since it will walk
+ * the tree multiple times (once per resource).
+ * But so what; device removal doesn't need to be that fast.
+ */
+void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ __eeh_addr_cache_rmv_dev(dev);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
+}
+
+/**
+ * eeh_addr_cache_build - Build a cache of I/O addresses
+ *
+ * Build a cache of pci i/o addresses. This cache will be used to
+ * find the pci device that corresponds to a given address.
+ * This routine scans all pci busses to build the cache.
+ * Must be run late in boot process, after the pci controllers
+ * have been scanned for devices (after all device resources are known).
+ */
+void eeh_addr_cache_build(void)
+{
+ struct device_node *dn;
+ struct eeh_dev *edev;
+ struct pci_dev *dev = NULL;
+
+ spin_lock_init(&pci_io_addr_cache_root.piar_lock);
+
+ for_each_pci_dev(dev) {
+ dn = pci_device_to_OF_node(dev);
+ if (!dn)
+ continue;
+
+ edev = of_node_to_eeh_dev(dn);
+ if (!edev)
+ continue;
+
+ dev->dev.archdata.edev = edev;
+ edev->pdev = dev;
+
+ eeh_addr_cache_insert_dev(dev);
+ eeh_sysfs_add_device(dev);
+ }
+
+#ifdef DEBUG
+ /* Verify tree built up above, echo back the list of addrs. */
+ eeh_addr_cache_print(&pci_io_addr_cache_root);
+#endif
+}
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
new file mode 100644
index 00000000000..1efa28f5fc5
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -0,0 +1,112 @@
+/*
+ * The file intends to implement dynamic creation of EEH device, which will
+ * be bound with OF node and PCI device simutaneously. The EEH devices would
+ * be foundamental information for EEH core components to work proerly. Besides,
+ * We have to support multiple situations where dynamic creation of EEH device
+ * is required:
+ *
+ * 1) Before PCI emunation starts, we need create EEH devices according to the
+ * PCI sensitive OF nodes.
+ * 2) When PCI emunation is done, we need do the binding between PCI device and
+ * the associated EEH device.
+ * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device
+ * will be created while PCI sensitive OF node is detected from DR.
+ * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If
+ * PHB is newly inserted, we also need create EEH devices accordingly.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+/**
+ * eeh_dev_init - Create EEH device according to OF node
+ * @dn: device node
+ * @data: PHB
+ *
+ * It will create EEH device according to the given OF node. The function
+ * might be called by PCI emunation, DR, PHB hotplug.
+ */
+void *eeh_dev_init(struct device_node *dn, void *data)
+{
+ struct pci_controller *phb = data;
+ struct eeh_dev *edev;
+
+ /* Allocate EEH device */
+ edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+ if (!edev) {
+ pr_warning("%s: out of memory\n", __func__);
+ return NULL;
+ }
+
+ /* Associate EEH device with OF node */
+ PCI_DN(dn)->edev = edev;
+ edev->dn = dn;
+ edev->phb = phb;
+ INIT_LIST_HEAD(&edev->list);
+
+ return NULL;
+}
+
+/**
+ * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB
+ * @phb: PHB
+ *
+ * Scan the PHB OF node and its child association, then create the
+ * EEH devices accordingly
+ */
+void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
+{
+ struct device_node *dn = phb->dn;
+
+ /* EEH PE for PHB */
+ eeh_phb_pe_create(phb);
+
+ /* EEH device for PHB */
+ eeh_dev_init(dn, phb);
+
+ /* EEH devices for children OF nodes */
+ traverse_pci_devices(dn, eeh_dev_init, phb);
+}
+
+/**
+ * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
+ *
+ * Scan all the existing PHBs and create EEH devices for their OF
+ * nodes and their children OF nodes
+ */
+static int __init eeh_dev_phb_init(void)
+{
+ struct pci_controller *phb, *tmp;
+
+ list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
+ eeh_dev_phb_init_dynamic(phb);
+
+ pr_info("EEH: devices created\n");
+
+ return 0;
+}
+
+core_initcall(eeh_dev_phb_init);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
new file mode 100644
index 00000000000..420da61d4ce
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -0,0 +1,870 @@
+/*
+ * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
+ * Copyright IBM Corp. 2004 2005
+ * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
+ */
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+
+/**
+ * eeh_pcid_name - Retrieve name of PCI device driver
+ * @pdev: PCI device
+ *
+ * This routine is used to retrieve the name of PCI device driver
+ * if that's valid.
+ */
+static inline const char *eeh_pcid_name(struct pci_dev *pdev)
+{
+ if (pdev && pdev->dev.driver)
+ return pdev->dev.driver->name;
+ return "";
+}
+
+/**
+ * eeh_pcid_get - Get the PCI device driver
+ * @pdev: PCI device
+ *
+ * The function is used to retrieve the PCI device driver for
+ * the indicated PCI device. Besides, we will increase the reference
+ * of the PCI device driver to prevent that being unloaded on
+ * the fly. Otherwise, kernel crash would be seen.
+ */
+static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
+{
+ if (!pdev || !pdev->driver)
+ return NULL;
+
+ if (!try_module_get(pdev->driver->driver.owner))
+ return NULL;
+
+ return pdev->driver;
+}
+
+/**
+ * eeh_pcid_put - Dereference on the PCI device driver
+ * @pdev: PCI device
+ *
+ * The function is called to do dereference on the PCI device
+ * driver of the indicated PCI device.
+ */
+static inline void eeh_pcid_put(struct pci_dev *pdev)
+{
+ if (!pdev || !pdev->driver)
+ return;
+
+ module_put(pdev->driver->driver.owner);
+}
+
+#if 0
+static void print_device_node_tree(struct pci_dn *pdn, int dent)
+{
+ int i;
+ struct device_node *pc;
+
+ if (!pdn)
+ return;
+ for (i = 0; i < dent; i++)
+ printk(" ");
+ printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",
+ pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,
+ pdn->eeh_pe_config_addr, pdn->node->full_name);
+ dent += 3;
+ pc = pdn->node->child;
+ while (pc) {
+ print_device_node_tree(PCI_DN(pc), dent);
+ pc = pc->sibling;
+ }
+}
+#endif
+
+/**
+ * eeh_disable_irq - Disable interrupt for the recovering device
+ * @dev: PCI device
+ *
+ * This routine must be called when reporting temporary or permanent
+ * error to the particular PCI device to disable interrupt of that
+ * device. If the device has enabled MSI or MSI-X interrupt, we needn't
+ * do real work because EEH should freeze DMA transfers for those PCI
+ * devices encountering EEH errors, which includes MSI or MSI-X.
+ */
+static void eeh_disable_irq(struct pci_dev *dev)
+{
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
+
+ /* Don't disable MSI and MSI-X interrupts. They are
+ * effectively disabled by the DMA Stopped state
+ * when an EEH error occurs.
+ */
+ if (dev->msi_enabled || dev->msix_enabled)
+ return;
+
+ if (!irq_has_action(dev->irq))
+ return;
+
+ edev->mode |= EEH_DEV_IRQ_DISABLED;
+ disable_irq_nosync(dev->irq);
+}
+
+/**
+ * eeh_enable_irq - Enable interrupt for the recovering device
+ * @dev: PCI device
+ *
+ * This routine must be called to enable interrupt while failed
+ * device could be resumed.
+ */
+static void eeh_enable_irq(struct pci_dev *dev)
+{
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
+
+ if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
+ edev->mode &= ~EEH_DEV_IRQ_DISABLED;
+ /*
+ * FIXME !!!!!
+ *
+ * This is just ass backwards. This maze has
+ * unbalanced irq_enable/disable calls. So instead of
+ * finding the root cause it works around the warning
+ * in the irq_enable code by conditionally calling
+ * into it.
+ *
+ * That's just wrong.The warning in the core code is
+ * there to tell people to fix their assymetries in
+ * their own code, not by abusing the core information
+ * to avoid it.
+ *
+ * I so wish that the assymetry would be the other way
+ * round and a few more irq_disable calls render that
+ * shit unusable forever.
+ *
+ * tglx
+ */
+ if (irqd_irq_disabled(irq_get_irq_data(dev->irq)))
+ enable_irq(dev->irq);
+ }
+}
+
+static bool eeh_dev_removed(struct eeh_dev *edev)
+{
+ /* EEH device removed ? */
+ if (!edev || (edev->mode & EEH_DEV_REMOVED))
+ return true;
+
+ return false;
+}
+
+/**
+ * eeh_report_error - Report pci error to each device driver
+ * @data: eeh device
+ * @userdata: return value
+ *
+ * Report an EEH error to each device driver, collect up and
+ * merge the device driver responses. Cumulative response
+ * passed back in "userdata".
+ */
+static void *eeh_report_error(void *data, void *userdata)
+{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ enum pci_ers_result rc, *res = userdata;
+ struct pci_driver *driver;
+
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
+ dev->error_state = pci_channel_io_frozen;
+
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
+
+ eeh_disable_irq(dev);
+
+ if (!driver->err_handler ||
+ !driver->err_handler->error_detected) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
+
+ rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
+
+ /* A driver that needs a reset trumps all others */
+ if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+ if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+
+ eeh_pcid_put(dev);
+ return NULL;
+}
+
+/**
+ * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
+ * @data: eeh device
+ * @userdata: return value
+ *
+ * Tells each device driver that IO ports, MMIO and config space I/O
+ * are now enabled. Collects up and merges the device driver responses.
+ * Cumulative response passed back in "userdata".
+ */
+static void *eeh_report_mmio_enabled(void *data, void *userdata)
+{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ enum pci_ers_result rc, *res = userdata;
+ struct pci_driver *driver;
+
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
+
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
+
+ if (!driver->err_handler ||
+ !driver->err_handler->mmio_enabled ||
+ (edev->mode & EEH_DEV_NO_HANDLER)) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
+
+ rc = driver->err_handler->mmio_enabled(dev);
+
+ /* A driver that needs a reset trumps all others */
+ if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+ if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+
+ eeh_pcid_put(dev);
+ return NULL;
+}
+
+/**
+ * eeh_report_reset - Tell device that slot has been reset
+ * @data: eeh device
+ * @userdata: return value
+ *
+ * This routine must be called while EEH tries to reset particular
+ * PCI device so that the associated PCI device driver could take
+ * some actions, usually to save data the driver needs so that the
+ * driver can work again while the device is recovered.
+ */
+static void *eeh_report_reset(void *data, void *userdata)
+{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ enum pci_ers_result rc, *res = userdata;
+ struct pci_driver *driver;
+
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
+ dev->error_state = pci_channel_io_normal;
+
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
+
+ eeh_enable_irq(dev);
+
+ if (!driver->err_handler ||
+ !driver->err_handler->slot_reset ||
+ (edev->mode & EEH_DEV_NO_HANDLER)) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
+
+ rc = driver->err_handler->slot_reset(dev);
+ if ((*res == PCI_ERS_RESULT_NONE) ||
+ (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
+ if (*res == PCI_ERS_RESULT_DISCONNECT &&
+ rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+
+ eeh_pcid_put(dev);
+ return NULL;
+}
+
+/**
+ * eeh_report_resume - Tell device to resume normal operations
+ * @data: eeh device
+ * @userdata: return value
+ *
+ * This routine must be called to notify the device driver that it
+ * could resume so that the device driver can do some initialization
+ * to make the recovered device work again.
+ */
+static void *eeh_report_resume(void *data, void *userdata)
+{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ struct pci_driver *driver;
+
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
+ dev->error_state = pci_channel_io_normal;
+
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
+
+ eeh_enable_irq(dev);
+
+ if (!driver->err_handler ||
+ !driver->err_handler->resume ||
+ (edev->mode & EEH_DEV_NO_HANDLER)) {
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+ eeh_pcid_put(dev);
+ return NULL;
+ }
+
+ driver->err_handler->resume(dev);
+
+ eeh_pcid_put(dev);
+ return NULL;
+}
+
+/**
+ * eeh_report_failure - Tell device driver that device is dead.
+ * @data: eeh device
+ * @userdata: return value
+ *
+ * This informs the device driver that the device is permanently
+ * dead, and that no further recovery attempts will be made on it.
+ */
+static void *eeh_report_failure(void *data, void *userdata)
+{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ struct pci_driver *driver;
+
+ if (!dev || eeh_dev_removed(edev))
+ return NULL;
+ dev->error_state = pci_channel_io_perm_failure;
+
+ driver = eeh_pcid_get(dev);
+ if (!driver) return NULL;
+
+ eeh_disable_irq(dev);
+
+ if (!driver->err_handler ||
+ !driver->err_handler->error_detected) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
+
+ driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
+
+ eeh_pcid_put(dev);
+ return NULL;
+}
+
+static void *eeh_rmv_device(void *data, void *userdata)
+{
+ struct pci_driver *driver;
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+ int *removed = (int *)userdata;
+
+ /*
+ * Actually, we should remove the PCI bridges as well.
+ * However, that's lots of complexity to do that,
+ * particularly some of devices under the bridge might
+ * support EEH. So we just care about PCI devices for
+ * simplicity here.
+ */
+ if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
+ return NULL;
+
+ /*
+ * We rely on count-based pcibios_release_device() to
+ * detach permanently offlined PEs. Unfortunately, that's
+ * not reliable enough. We might have the permanently
+ * offlined PEs attached, but we needn't take care of
+ * them and their child devices.
+ */
+ if (eeh_dev_removed(edev))
+ return NULL;
+
+ driver = eeh_pcid_get(dev);
+ if (driver) {
+ eeh_pcid_put(dev);
+ if (driver->err_handler)
+ return NULL;
+ }
+
+ /* Remove it from PCI subsystem */
+ pr_debug("EEH: Removing %s without EEH sensitive driver\n",
+ pci_name(dev));
+ edev->bus = dev->bus;
+ edev->mode |= EEH_DEV_DISCONNECTED;
+ (*removed)++;
+
+ pci_lock_rescan_remove();
+ pci_stop_and_remove_bus_device(dev);
+ pci_unlock_rescan_remove();
+
+ return NULL;
+}
+
+static void *eeh_pe_detach_dev(void *data, void *userdata)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ struct eeh_dev *edev, *tmp;
+
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ if (!(edev->mode & EEH_DEV_DISCONNECTED))
+ continue;
+
+ edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
+ eeh_rmv_from_parent_pe(edev);
+ }
+
+ return NULL;
+}
+
+/*
+ * Explicitly clear PE's frozen state for PowerNV where
+ * we have frozen PE until BAR restore is completed. It's
+ * harmless to clear it for pSeries. To be consistent with
+ * PE reset (for 3 times), we try to clear the frozen state
+ * for 3 times as well.
+ */
+static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ int i, rc;
+
+ for (i = 0; i < 3; i++) {
+ rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+ if (rc)
+ continue;
+ rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+ if (!rc)
+ break;
+ }
+
+ /* The PE has been isolated, clear it */
+ if (rc) {
+ pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n",
+ __func__, pe->phb->global_number, pe->addr, rc);
+ return (void *)pe;
+ }
+
+ return NULL;
+}
+
+static int eeh_clear_pe_frozen_state(struct eeh_pe *pe)
+{
+ void *rc;
+
+ rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL);
+ if (!rc)
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+
+ return rc ? -EIO : 0;
+}
+
+/**
+ * eeh_reset_device - Perform actual reset of a pci slot
+ * @pe: EEH PE
+ * @bus: PCI bus corresponding to the isolcated slot
+ *
+ * This routine must be called to do reset on the indicated PE.
+ * During the reset, udev might be invoked because those affected
+ * PCI devices will be removed and then added.
+ */
+static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
+{
+ struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
+ struct timeval tstamp;
+ int cnt, rc, removed = 0;
+
+ /* pcibios will clear the counter; save the value */
+ cnt = pe->freeze_count;
+ tstamp = pe->tstamp;
+
+ /*
+ * We don't remove the corresponding PE instances because
+ * we need the information afterwords. The attached EEH
+ * devices are expected to be attached soon when calling
+ * into pcibios_add_pci_devices().
+ */
+ eeh_pe_state_mark(pe, EEH_PE_KEEP);
+ if (bus) {
+ pci_lock_rescan_remove();
+ pcibios_remove_pci_devices(bus);
+ pci_unlock_rescan_remove();
+ } else if (frozen_bus) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
+ }
+
+ /*
+ * Reset the pci controller. (Asserts RST#; resets config space).
+ * Reconfigure bridges and devices. Don't try to bring the system
+ * up if the reset failed for some reason.
+ *
+ * During the reset, it's very dangerous to have uncontrolled PCI
+ * config accesses. So we prefer to block them. However, controlled
+ * PCI config accesses initiated from EEH itself are allowed.
+ */
+ eeh_pe_state_mark(pe, EEH_PE_RESET);
+ rc = eeh_reset_pe(pe);
+ if (rc) {
+ eeh_pe_state_clear(pe, EEH_PE_RESET);
+ return rc;
+ }
+
+ pci_lock_rescan_remove();
+
+ /* Restore PE */
+ eeh_ops->configure_bridge(pe);
+ eeh_pe_restore_bars(pe);
+ eeh_pe_state_clear(pe, EEH_PE_RESET);
+
+ /* Clear frozen state */
+ rc = eeh_clear_pe_frozen_state(pe);
+ if (rc)
+ return rc;
+
+ /* Give the system 5 seconds to finish running the user-space
+ * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes,
+ * this is a hack, but if we don't do this, and try to bring
+ * the device up before the scripts have taken it down,
+ * potentially weird things happen.
+ */
+ if (bus) {
+ pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
+ ssleep(5);
+
+ /*
+ * The EEH device is still connected with its parent
+ * PE. We should disconnect it so the binding can be
+ * rebuilt when adding PCI devices.
+ */
+ eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
+ pcibios_add_pci_devices(bus);
+ } else if (frozen_bus && removed) {
+ pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
+ ssleep(5);
+
+ eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
+ pcibios_add_pci_devices(frozen_bus);
+ }
+ eeh_pe_state_clear(pe, EEH_PE_KEEP);
+
+ pe->tstamp = tstamp;
+ pe->freeze_count = cnt;
+
+ pci_unlock_rescan_remove();
+ return 0;
+}
+
+/* The longest amount of time to wait for a pci device
+ * to come back on line, in seconds.
+ */
+#define MAX_WAIT_FOR_RECOVERY 300
+
+static void eeh_handle_normal_event(struct eeh_pe *pe)
+{
+ struct pci_bus *frozen_bus;
+ int rc = 0;
+ enum pci_ers_result result = PCI_ERS_RESULT_NONE;
+
+ frozen_bus = eeh_pe_bus_get(pe);
+ if (!frozen_bus) {
+ pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
+ __func__, pe->phb->global_number, pe->addr);
+ return;
+ }
+
+ eeh_pe_update_time_stamp(pe);
+ pe->freeze_count++;
+ if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES)
+ goto excess_failures;
+ pr_warning("EEH: This PCI device has failed %d times in the last hour\n",
+ pe->freeze_count);
+
+ /* Walk the various device drivers attached to this slot through
+ * a reset sequence, giving each an opportunity to do what it needs
+ * to accomplish the reset. Each child gets a report of the
+ * status ... if any child can't handle the reset, then the entire
+ * slot is dlpar removed and added.
+ */
+ pr_info("EEH: Notify device drivers to shutdown\n");
+ eeh_pe_dev_traverse(pe, eeh_report_error, &result);
+
+ /* Get the current PCI slot state. This can take a long time,
+ * sometimes over 3 seconds for certain systems.
+ */
+ rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
+ if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
+ pr_warning("EEH: Permanent failure\n");
+ goto hard_fail;
+ }
+
+ /* Since rtas may enable MMIO when posting the error log,
+ * don't post the error log until after all dev drivers
+ * have been informed.
+ */
+ pr_info("EEH: Collect temporary log\n");
+ eeh_slot_error_detail(pe, EEH_LOG_TEMP);
+
+ /* If all device drivers were EEH-unaware, then shut
+ * down all of the device drivers, and hope they
+ * go down willingly, without panicing the system.
+ */
+ if (result == PCI_ERS_RESULT_NONE) {
+ pr_info("EEH: Reset with hotplug activity\n");
+ rc = eeh_reset_device(pe, frozen_bus);
+ if (rc) {
+ pr_warning("%s: Unable to reset, err=%d\n",
+ __func__, rc);
+ goto hard_fail;
+ }
+ }
+
+ /* If all devices reported they can proceed, then re-enable MMIO */
+ if (result == PCI_ERS_RESULT_CAN_RECOVER) {
+ pr_info("EEH: Enable I/O for affected devices\n");
+ rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+
+ if (rc < 0)
+ goto hard_fail;
+ if (rc) {
+ result = PCI_ERS_RESULT_NEED_RESET;
+ } else {
+ pr_info("EEH: Notify device drivers to resume I/O\n");
+ eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
+ }
+ }
+
+ /* If all devices reported they can proceed, then re-enable DMA */
+ if (result == PCI_ERS_RESULT_CAN_RECOVER) {
+ pr_info("EEH: Enabled DMA for affected devices\n");
+ rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+
+ if (rc < 0)
+ goto hard_fail;
+ if (rc) {
+ result = PCI_ERS_RESULT_NEED_RESET;
+ } else {
+ /*
+ * We didn't do PE reset for the case. The PE
+ * is still in frozen state. Clear it before
+ * resuming the PE.
+ */
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+ result = PCI_ERS_RESULT_RECOVERED;
+ }
+ }
+
+ /* If any device has a hard failure, then shut off everything. */
+ if (result == PCI_ERS_RESULT_DISCONNECT) {
+ pr_warning("EEH: Device driver gave up\n");
+ goto hard_fail;
+ }
+
+ /* If any device called out for a reset, then reset the slot */
+ if (result == PCI_ERS_RESULT_NEED_RESET) {
+ pr_info("EEH: Reset without hotplug activity\n");
+ rc = eeh_reset_device(pe, NULL);
+ if (rc) {
+ pr_warning("%s: Cannot reset, err=%d\n",
+ __func__, rc);
+ goto hard_fail;
+ }
+
+ pr_info("EEH: Notify device drivers "
+ "the completion of reset\n");
+ result = PCI_ERS_RESULT_NONE;
+ eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
+ }
+
+ /* All devices should claim they have recovered by now. */
+ if ((result != PCI_ERS_RESULT_RECOVERED) &&
+ (result != PCI_ERS_RESULT_NONE)) {
+ pr_warning("EEH: Not recovered\n");
+ goto hard_fail;
+ }
+
+ /* Tell all device drivers that they can resume operations */
+ pr_info("EEH: Notify device driver to resume\n");
+ eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
+
+ return;
+
+excess_failures:
+ /*
+ * About 90% of all real-life EEH failures in the field
+ * are due to poorly seated PCI cards. Only 10% or so are
+ * due to actual, failed cards.
+ */
+ pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n"
+ "last hour and has been permanently disabled.\n"
+ "Please try reseating or replacing it.\n",
+ pe->phb->global_number, pe->addr,
+ pe->freeze_count);
+ goto perm_error;
+
+hard_fail:
+ pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n"
+ "Please try reseating or replacing it\n",
+ pe->phb->global_number, pe->addr);
+
+perm_error:
+ eeh_slot_error_detail(pe, EEH_LOG_PERM);
+
+ /* Notify all devices that they're about to go down. */
+ eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
+
+ /* Mark the PE to be removed permanently */
+ pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1;
+
+ /*
+ * Shut down the device drivers for good. We mark
+ * all removed devices correctly to avoid access
+ * the their PCI config any more.
+ */
+ if (frozen_bus) {
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+
+ pci_lock_rescan_remove();
+ pcibios_remove_pci_devices(frozen_bus);
+ pci_unlock_rescan_remove();
+ }
+}
+
+static void eeh_handle_special_event(void)
+{
+ struct eeh_pe *pe, *phb_pe;
+ struct pci_bus *bus;
+ struct pci_controller *hose;
+ unsigned long flags;
+ int rc;
+
+
+ do {
+ rc = eeh_ops->next_error(&pe);
+
+ switch (rc) {
+ case EEH_NEXT_ERR_DEAD_IOC:
+ /* Mark all PHBs in dead state */
+ eeh_serialize_lock(&flags);
+
+ /* Purge all events */
+ eeh_remove_event(NULL, true);
+
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb_pe = eeh_phb_pe_get(hose);
+ if (!phb_pe) continue;
+
+ eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+ }
+
+ eeh_serialize_unlock(flags);
+
+ break;
+ case EEH_NEXT_ERR_FROZEN_PE:
+ case EEH_NEXT_ERR_FENCED_PHB:
+ case EEH_NEXT_ERR_DEAD_PHB:
+ /* Mark the PE in fenced state */
+ eeh_serialize_lock(&flags);
+
+ /* Purge all events of the PHB */
+ eeh_remove_event(pe, true);
+
+ if (rc == EEH_NEXT_ERR_DEAD_PHB)
+ eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+ else
+ eeh_pe_state_mark(pe,
+ EEH_PE_ISOLATED | EEH_PE_RECOVERING);
+
+ eeh_serialize_unlock(flags);
+
+ break;
+ case EEH_NEXT_ERR_NONE:
+ return;
+ default:
+ pr_warn("%s: Invalid value %d from next_error()\n",
+ __func__, rc);
+ return;
+ }
+
+ /*
+ * For fenced PHB and frozen PE, it's handled as normal
+ * event. We have to remove the affected PHBs for dead
+ * PHB and IOC
+ */
+ if (rc == EEH_NEXT_ERR_FROZEN_PE ||
+ rc == EEH_NEXT_ERR_FENCED_PHB) {
+ eeh_handle_normal_event(pe);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ } else {
+ pci_lock_rescan_remove();
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb_pe = eeh_phb_pe_get(hose);
+ if (!phb_pe ||
+ !(phb_pe->state & EEH_PE_ISOLATED) ||
+ (phb_pe->state & EEH_PE_RECOVERING))
+ continue;
+
+ /* Notify all devices to be down */
+ bus = eeh_pe_bus_get(phb_pe);
+ eeh_pe_dev_traverse(pe,
+ eeh_report_failure, NULL);
+ pcibios_remove_pci_devices(bus);
+ }
+ pci_unlock_rescan_remove();
+ }
+
+ /*
+ * If we have detected dead IOC, we needn't proceed
+ * any more since all PHBs would have been removed
+ */
+ if (rc == EEH_NEXT_ERR_DEAD_IOC)
+ break;
+ } while (rc != EEH_NEXT_ERR_NONE);
+}
+
+/**
+ * eeh_handle_event - Reset a PCI device after hard lockup.
+ * @pe: EEH PE
+ *
+ * While PHB detects address or data parity errors on particular PCI
+ * slot, the associated PE will be frozen. Besides, DMA's occurring
+ * to wild addresses (which usually happen due to bugs in device
+ * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
+ * #PERR or other misc PCI-related errors also can trigger EEH errors.
+ *
+ * Recovery process consists of unplugging the device driver (which
+ * generated hotplug events to userspace), then issuing a PCI #RST to
+ * the device, then reconfiguring the PCI config space for all bridges
+ * & devices under this slot, and then finally restarting the device
+ * drivers (which cause a second set of hotplug events to go out to
+ * userspace).
+ */
+void eeh_handle_event(struct eeh_pe *pe)
+{
+ if (pe)
+ eeh_handle_normal_event(pe);
+ else
+ eeh_handle_special_event();
+}
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
new file mode 100644
index 00000000000..4eefb6e34db
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -0,0 +1,196 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
+ */
+
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <asm/eeh_event.h>
+#include <asm/ppc-pci.h>
+
+/** Overview:
+ * EEH error states may be detected within exception handlers;
+ * however, the recovery processing needs to occur asynchronously
+ * in a normal kernel context and not an interrupt context.
+ * This pair of routines creates an event and queues it onto a
+ * work-queue, where a worker thread can drive recovery.
+ */
+
+static DEFINE_SPINLOCK(eeh_eventlist_lock);
+static struct semaphore eeh_eventlist_sem;
+LIST_HEAD(eeh_eventlist);
+
+/**
+ * eeh_event_handler - Dispatch EEH events.
+ * @dummy - unused
+ *
+ * The detection of a frozen slot can occur inside an interrupt,
+ * where it can be hard to do anything about it. The goal of this
+ * routine is to pull these detection events out of the context
+ * of the interrupt handler, and re-dispatch them for processing
+ * at a later time in a normal context.
+ */
+static int eeh_event_handler(void * dummy)
+{
+ unsigned long flags;
+ struct eeh_event *event;
+ struct eeh_pe *pe;
+
+ while (!kthread_should_stop()) {
+ if (down_interruptible(&eeh_eventlist_sem))
+ break;
+
+ /* Fetch EEH event from the queue */
+ spin_lock_irqsave(&eeh_eventlist_lock, flags);
+ event = NULL;
+ if (!list_empty(&eeh_eventlist)) {
+ event = list_entry(eeh_eventlist.next,
+ struct eeh_event, list);
+ list_del(&event->list);
+ }
+ spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+ if (!event)
+ continue;
+
+ /* We might have event without binding PE */
+ pe = event->pe;
+ if (pe) {
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ if (pe->type & EEH_PE_PHB)
+ pr_info("EEH: Detected error on PHB#%d\n",
+ pe->phb->global_number);
+ else
+ pr_info("EEH: Detected PCI bus error on "
+ "PHB#%d-PE#%x\n",
+ pe->phb->global_number, pe->addr);
+ eeh_handle_event(pe);
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ } else {
+ eeh_handle_event(NULL);
+ }
+
+ kfree(event);
+ }
+
+ return 0;
+}
+
+/**
+ * eeh_event_init - Start kernel thread to handle EEH events
+ *
+ * This routine is called to start the kernel thread for processing
+ * EEH event.
+ */
+int eeh_event_init(void)
+{
+ struct task_struct *t;
+ int ret = 0;
+
+ /* Initialize semaphore */
+ sema_init(&eeh_eventlist_sem, 0);
+
+ t = kthread_run(eeh_event_handler, NULL, "eehd");
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
+ pr_err("%s: Failed to start EEH daemon (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * eeh_send_failure_event - Generate a PCI error event
+ * @pe: EEH PE
+ *
+ * This routine can be called within an interrupt context;
+ * the actual event will be delivered in a normal context
+ * (from a workqueue).
+ */
+int eeh_send_failure_event(struct eeh_pe *pe)
+{
+ unsigned long flags;
+ struct eeh_event *event;
+
+ event = kzalloc(sizeof(*event), GFP_ATOMIC);
+ if (!event) {
+ pr_err("EEH: out of memory, event not handled\n");
+ return -ENOMEM;
+ }
+ event->pe = pe;
+
+ /* We may or may not be called in an interrupt context */
+ spin_lock_irqsave(&eeh_eventlist_lock, flags);
+ list_add(&event->list, &eeh_eventlist);
+ spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+
+ /* For EEH deamon to knick in */
+ up(&eeh_eventlist_sem);
+
+ return 0;
+}
+
+/**
+ * eeh_remove_event - Remove EEH event from the queue
+ * @pe: Event binding to the PE
+ * @force: Event will be removed unconditionally
+ *
+ * On PowerNV platform, we might have subsequent coming events
+ * is part of the former one. For that case, those subsequent
+ * coming events are totally duplicated and unnecessary, thus
+ * they should be removed.
+ */
+void eeh_remove_event(struct eeh_pe *pe, bool force)
+{
+ unsigned long flags;
+ struct eeh_event *event, *tmp;
+
+ /*
+ * If we have NULL PE passed in, we have dead IOC
+ * or we're sure we can report all existing errors
+ * by the caller.
+ *
+ * With "force", the event with associated PE that
+ * have been isolated, the event won't be removed
+ * to avoid event lost.
+ */
+ spin_lock_irqsave(&eeh_eventlist_lock, flags);
+ list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) {
+ if (!force && event->pe &&
+ (event->pe->state & EEH_PE_ISOLATED))
+ continue;
+
+ if (!pe) {
+ list_del(&event->list);
+ kfree(event);
+ } else if (pe->type & EEH_PE_PHB) {
+ if (event->pe && event->pe->phb == pe->phb) {
+ list_del(&event->list);
+ kfree(event);
+ }
+ } else if (event->pe == pe) {
+ list_del(&event->list);
+ kfree(event);
+ }
+ }
+ spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
+}
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
new file mode 100644
index 00000000000..fbd01eba447
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -0,0 +1,887 @@
+/*
+ * The file intends to implement PE based on the information from
+ * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
+ * All the PEs should be organized as hierarchy tree. The first level
+ * of the tree will be associated to existing PHBs since the particular
+ * PE is only meaningful in one PHB domain.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+static LIST_HEAD(eeh_phb_pe);
+
+/**
+ * eeh_pe_alloc - Allocate PE
+ * @phb: PCI controller
+ * @type: PE type
+ *
+ * Allocate PE instance dynamically.
+ */
+static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
+{
+ struct eeh_pe *pe;
+
+ /* Allocate PHB PE */
+ pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL);
+ if (!pe) return NULL;
+
+ /* Initialize PHB PE */
+ pe->type = type;
+ pe->phb = phb;
+ INIT_LIST_HEAD(&pe->child_list);
+ INIT_LIST_HEAD(&pe->child);
+ INIT_LIST_HEAD(&pe->edevs);
+
+ return pe;
+}
+
+/**
+ * eeh_phb_pe_create - Create PHB PE
+ * @phb: PCI controller
+ *
+ * The function should be called while the PHB is detected during
+ * system boot or PCI hotplug in order to create PHB PE.
+ */
+int eeh_phb_pe_create(struct pci_controller *phb)
+{
+ struct eeh_pe *pe;
+
+ /* Allocate PHB PE */
+ pe = eeh_pe_alloc(phb, EEH_PE_PHB);
+ if (!pe) {
+ pr_err("%s: out of memory!\n", __func__);
+ return -ENOMEM;
+ }
+
+ /* Put it into the list */
+ list_add_tail(&pe->child, &eeh_phb_pe);
+
+ pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
+
+ return 0;
+}
+
+/**
+ * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
+ * @phb: PCI controller
+ *
+ * The overall PEs form hierarchy tree. The first layer of the
+ * hierarchy tree is composed of PHB PEs. The function is used
+ * to retrieve the corresponding PHB PE according to the given PHB.
+ */
+struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
+{
+ struct eeh_pe *pe;
+
+ list_for_each_entry(pe, &eeh_phb_pe, child) {
+ /*
+ * Actually, we needn't check the type since
+ * the PE for PHB has been determined when that
+ * was created.
+ */
+ if ((pe->type & EEH_PE_PHB) && pe->phb == phb)
+ return pe;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_next - Retrieve the next PE in the tree
+ * @pe: current PE
+ * @root: root PE
+ *
+ * The function is used to retrieve the next PE in the
+ * hierarchy PE tree.
+ */
+static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
+ struct eeh_pe *root)
+{
+ struct list_head *next = pe->child_list.next;
+
+ if (next == &pe->child_list) {
+ while (1) {
+ if (pe == root)
+ return NULL;
+ next = pe->child.next;
+ if (next != &pe->parent->child_list)
+ break;
+ pe = pe->parent;
+ }
+ }
+
+ return list_entry(next, struct eeh_pe, child);
+}
+
+/**
+ * eeh_pe_traverse - Traverse PEs in the specified PHB
+ * @root: root PE
+ * @fn: callback
+ * @flag: extra parameter to callback
+ *
+ * The function is used to traverse the specified PE and its
+ * child PEs. The traversing is to be terminated once the
+ * callback returns something other than NULL, or no more PEs
+ * to be traversed.
+ */
+void *eeh_pe_traverse(struct eeh_pe *root,
+ eeh_traverse_func fn, void *flag)
+{
+ struct eeh_pe *pe;
+ void *ret;
+
+ for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+ ret = fn(pe, flag);
+ if (ret) return ret;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_dev_traverse - Traverse the devices from the PE
+ * @root: EEH PE
+ * @fn: function callback
+ * @flag: extra parameter to callback
+ *
+ * The function is used to traverse the devices of the specified
+ * PE and its child PEs.
+ */
+void *eeh_pe_dev_traverse(struct eeh_pe *root,
+ eeh_traverse_func fn, void *flag)
+{
+ struct eeh_pe *pe;
+ struct eeh_dev *edev, *tmp;
+ void *ret;
+
+ if (!root) {
+ pr_warning("%s: Invalid PE %p\n", __func__, root);
+ return NULL;
+ }
+
+ /* Traverse root PE */
+ for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ ret = fn(edev, flag);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * __eeh_pe_get - Check the PE address
+ * @data: EEH PE
+ * @flag: EEH device
+ *
+ * For one particular PE, it can be identified by PE address
+ * or tranditional BDF address. BDF address is composed of
+ * Bus/Device/Function number. The extra data referred by flag
+ * indicates which type of address should be used.
+ */
+static void *__eeh_pe_get(void *data, void *flag)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ struct eeh_dev *edev = (struct eeh_dev *)flag;
+
+ /* Unexpected PHB PE */
+ if (pe->type & EEH_PE_PHB)
+ return NULL;
+
+ /* We prefer PE address */
+ if (edev->pe_config_addr &&
+ (edev->pe_config_addr == pe->addr))
+ return pe;
+
+ /* Try BDF address */
+ if (edev->config_addr &&
+ (edev->config_addr == pe->config_addr))
+ return pe;
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_get - Search PE based on the given address
+ * @edev: EEH device
+ *
+ * Search the corresponding PE based on the specified address which
+ * is included in the eeh device. The function is used to check if
+ * the associated PE has been created against the PE address. It's
+ * notable that the PE address has 2 format: traditional PE address
+ * which is composed of PCI bus/device/function number, or unified
+ * PE address.
+ */
+struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
+{
+ struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
+ struct eeh_pe *pe;
+
+ pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
+
+ return pe;
+}
+
+/**
+ * eeh_pe_get_parent - Retrieve the parent PE
+ * @edev: EEH device
+ *
+ * The whole PEs existing in the system are organized as hierarchy
+ * tree. The function is used to retrieve the parent PE according
+ * to the parent EEH device.
+ */
+static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
+{
+ struct device_node *dn;
+ struct eeh_dev *parent;
+
+ /*
+ * It might have the case for the indirect parent
+ * EEH device already having associated PE, but
+ * the direct parent EEH device doesn't have yet.
+ */
+ dn = edev->dn->parent;
+ while (dn) {
+ /* We're poking out of PCI territory */
+ if (!PCI_DN(dn)) return NULL;
+
+ parent = of_node_to_eeh_dev(dn);
+ /* We're poking out of PCI territory */
+ if (!parent) return NULL;
+
+ if (parent->pe)
+ return parent->pe;
+
+ dn = dn->parent;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_add_to_parent_pe - Add EEH device to parent PE
+ * @edev: EEH device
+ *
+ * Add EEH device to the parent PE. If the parent PE already
+ * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
+ * we have to create new PE to hold the EEH device and the new
+ * PE will be linked to its parent PE as well.
+ */
+int eeh_add_to_parent_pe(struct eeh_dev *edev)
+{
+ struct eeh_pe *pe, *parent;
+
+ /*
+ * Search the PE has been existing or not according
+ * to the PE address. If that has been existing, the
+ * PE should be composed of PCI bus and its subordinate
+ * components.
+ */
+ pe = eeh_pe_get(edev);
+ if (pe && !(pe->type & EEH_PE_INVALID)) {
+ if (!edev->pe_config_addr) {
+ pr_err("%s: PE with addr 0x%x already exists\n",
+ __func__, edev->config_addr);
+ return -EEXIST;
+ }
+
+ /* Mark the PE as type of PCI bus */
+ pe->type = EEH_PE_BUS;
+ edev->pe = pe;
+
+ /* Put the edev to PE */
+ list_add_tail(&edev->list, &pe->edevs);
+ pr_debug("EEH: Add %s to Bus PE#%x\n",
+ edev->dn->full_name, pe->addr);
+
+ return 0;
+ } else if (pe && (pe->type & EEH_PE_INVALID)) {
+ list_add_tail(&edev->list, &pe->edevs);
+ edev->pe = pe;
+ /*
+ * We're running to here because of PCI hotplug caused by
+ * EEH recovery. We need clear EEH_PE_INVALID until the top.
+ */
+ parent = pe;
+ while (parent) {
+ if (!(parent->type & EEH_PE_INVALID))
+ break;
+ parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
+ parent = parent->parent;
+ }
+ pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
+ edev->dn->full_name, pe->addr, pe->parent->addr);
+
+ return 0;
+ }
+
+ /* Create a new EEH PE */
+ pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
+ if (!pe) {
+ pr_err("%s: out of memory!\n", __func__);
+ return -ENOMEM;
+ }
+ pe->addr = edev->pe_config_addr;
+ pe->config_addr = edev->config_addr;
+
+ /*
+ * While doing PE reset, we probably hot-reset the
+ * upstream bridge. However, the PCI devices including
+ * the associated EEH devices might be removed when EEH
+ * core is doing recovery. So that won't safe to retrieve
+ * the bridge through downstream EEH device. We have to
+ * trace the parent PCI bus, then the upstream bridge.
+ */
+ if (eeh_probe_mode_dev())
+ pe->bus = eeh_dev_to_pci_dev(edev)->bus;
+
+ /*
+ * Put the new EEH PE into hierarchy tree. If the parent
+ * can't be found, the newly created PE will be attached
+ * to PHB directly. Otherwise, we have to associate the
+ * PE with its parent.
+ */
+ parent = eeh_pe_get_parent(edev);
+ if (!parent) {
+ parent = eeh_phb_pe_get(edev->phb);
+ if (!parent) {
+ pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
+ __func__, edev->phb->global_number);
+ edev->pe = NULL;
+ kfree(pe);
+ return -EEXIST;
+ }
+ }
+ pe->parent = parent;
+
+ /*
+ * Put the newly created PE into the child list and
+ * link the EEH device accordingly.
+ */
+ list_add_tail(&pe->child, &parent->child_list);
+ list_add_tail(&edev->list, &pe->edevs);
+ edev->pe = pe;
+ pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
+ edev->dn->full_name, pe->addr, pe->parent->addr);
+
+ return 0;
+}
+
+/**
+ * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
+ * @edev: EEH device
+ *
+ * The PE hierarchy tree might be changed when doing PCI hotplug.
+ * Also, the PCI devices or buses could be removed from the system
+ * during EEH recovery. So we have to call the function remove the
+ * corresponding PE accordingly if necessary.
+ */
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
+{
+ struct eeh_pe *pe, *parent, *child;
+ int cnt;
+
+ if (!edev->pe) {
+ pr_debug("%s: No PE found for EEH device %s\n",
+ __func__, edev->dn->full_name);
+ return -EEXIST;
+ }
+
+ /* Remove the EEH device */
+ pe = edev->pe;
+ edev->pe = NULL;
+ list_del(&edev->list);
+
+ /*
+ * Check if the parent PE includes any EEH devices.
+ * If not, we should delete that. Also, we should
+ * delete the parent PE if it doesn't have associated
+ * child PEs and EEH devices.
+ */
+ while (1) {
+ parent = pe->parent;
+ if (pe->type & EEH_PE_PHB)
+ break;
+
+ if (!(pe->state & EEH_PE_KEEP)) {
+ if (list_empty(&pe->edevs) &&
+ list_empty(&pe->child_list)) {
+ list_del(&pe->child);
+ kfree(pe);
+ } else {
+ break;
+ }
+ } else {
+ if (list_empty(&pe->edevs)) {
+ cnt = 0;
+ list_for_each_entry(child, &pe->child_list, child) {
+ if (!(child->type & EEH_PE_INVALID)) {
+ cnt++;
+ break;
+ }
+ }
+
+ if (!cnt)
+ pe->type |= EEH_PE_INVALID;
+ else
+ break;
+ }
+ }
+
+ pe = parent;
+ }
+
+ return 0;
+}
+
+/**
+ * eeh_pe_update_time_stamp - Update PE's frozen time stamp
+ * @pe: EEH PE
+ *
+ * We have time stamp for each PE to trace its time of getting
+ * frozen in last hour. The function should be called to update
+ * the time stamp on first error of the specific PE. On the other
+ * handle, we needn't account for errors happened in last hour.
+ */
+void eeh_pe_update_time_stamp(struct eeh_pe *pe)
+{
+ struct timeval tstamp;
+
+ if (!pe) return;
+
+ if (pe->freeze_count <= 0) {
+ pe->freeze_count = 0;
+ do_gettimeofday(&pe->tstamp);
+ } else {
+ do_gettimeofday(&tstamp);
+ if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) {
+ pe->tstamp = tstamp;
+ pe->freeze_count = 0;
+ }
+ }
+}
+
+/**
+ * __eeh_pe_state_mark - Mark the state for the PE
+ * @data: EEH PE
+ * @flag: state
+ *
+ * The function is used to mark the indicated state for the given
+ * PE. Also, the associated PCI devices will be put into IO frozen
+ * state as well.
+ */
+static void *__eeh_pe_state_mark(void *data, void *flag)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ int state = *((int *)flag);
+ struct eeh_dev *edev, *tmp;
+ struct pci_dev *pdev;
+
+ /* Keep the state of permanently removed PE intact */
+ if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
+ (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
+ return NULL;
+
+ pe->state |= state;
+
+ /* Offline PCI devices if applicable */
+ if (state != EEH_PE_ISOLATED)
+ return NULL;
+
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (pdev)
+ pdev->error_state = pci_channel_io_frozen;
+ }
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_state_mark - Mark specified state for PE and its associated device
+ * @pe: EEH PE
+ *
+ * EEH error affects the current PE and its child PEs. The function
+ * is used to mark appropriate state for the affected PEs and the
+ * associated devices.
+ */
+void eeh_pe_state_mark(struct eeh_pe *pe, int state)
+{
+ eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
+}
+
+static void *__eeh_pe_dev_mode_mark(void *data, void *flag)
+{
+ struct eeh_dev *edev = data;
+ int mode = *((int *)flag);
+
+ edev->mode |= mode;
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_dev_state_mark - Mark state for all device under the PE
+ * @pe: EEH PE
+ *
+ * Mark specific state for all child devices of the PE.
+ */
+void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
+{
+ eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode);
+}
+
+/**
+ * __eeh_pe_state_clear - Clear state for the PE
+ * @data: EEH PE
+ * @flag: state
+ *
+ * The function is used to clear the indicated state from the
+ * given PE. Besides, we also clear the check count of the PE
+ * as well.
+ */
+static void *__eeh_pe_state_clear(void *data, void *flag)
+{
+ struct eeh_pe *pe = (struct eeh_pe *)data;
+ int state = *((int *)flag);
+
+ /* Keep the state of permanently removed PE intact */
+ if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
+ (state & EEH_PE_ISOLATED))
+ return NULL;
+
+ pe->state &= ~state;
+
+ /* Clear check count since last isolation */
+ if (state & EEH_PE_ISOLATED)
+ pe->check_count = 0;
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_state_clear - Clear state for the PE and its children
+ * @pe: PE
+ * @state: state to be cleared
+ *
+ * When the PE and its children has been recovered from error,
+ * we need clear the error state for that. The function is used
+ * for the purpose.
+ */
+void eeh_pe_state_clear(struct eeh_pe *pe, int state)
+{
+ eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
+}
+
+/*
+ * Some PCI bridges (e.g. PLX bridges) have primary/secondary
+ * buses assigned explicitly by firmware, and we probably have
+ * lost that after reset. So we have to delay the check until
+ * the PCI-CFG registers have been restored for the parent
+ * bridge.
+ *
+ * Don't use normal PCI-CFG accessors, which probably has been
+ * blocked on normal path during the stage. So we need utilize
+ * eeh operations, which is always permitted.
+ */
+static void eeh_bridge_check_link(struct eeh_dev *edev,
+ struct device_node *dn)
+{
+ int cap;
+ uint32_t val;
+ int timeout = 0;
+
+ /*
+ * We only check root port and downstream ports of
+ * PCIe switches
+ */
+ if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
+ return;
+
+ pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
+ __func__, edev->phb->global_number,
+ edev->config_addr >> 8,
+ PCI_SLOT(edev->config_addr & 0xFF),
+ PCI_FUNC(edev->config_addr & 0xFF));
+
+ /* Check slot status */
+ cap = edev->pcie_cap;
+ eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val);
+ if (!(val & PCI_EXP_SLTSTA_PDS)) {
+ pr_debug(" No card in the slot (0x%04x) !\n", val);
+ return;
+ }
+
+ /* Check power status if we have the capability */
+ eeh_ops->read_config(dn, cap + PCI_EXP_SLTCAP, 2, &val);
+ if (val & PCI_EXP_SLTCAP_PCP) {
+ eeh_ops->read_config(dn, cap + PCI_EXP_SLTCTL, 2, &val);
+ if (val & PCI_EXP_SLTCTL_PCC) {
+ pr_debug(" In power-off state, power it on ...\n");
+ val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
+ val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
+ eeh_ops->write_config(dn, cap + PCI_EXP_SLTCTL, 2, val);
+ msleep(2 * 1000);
+ }
+ }
+
+ /* Enable link */
+ eeh_ops->read_config(dn, cap + PCI_EXP_LNKCTL, 2, &val);
+ val &= ~PCI_EXP_LNKCTL_LD;
+ eeh_ops->write_config(dn, cap + PCI_EXP_LNKCTL, 2, val);
+
+ /* Check link */
+ eeh_ops->read_config(dn, cap + PCI_EXP_LNKCAP, 4, &val);
+ if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
+ pr_debug(" No link reporting capability (0x%08x) \n", val);
+ msleep(1000);
+ return;
+ }
+
+ /* Wait the link is up until timeout (5s) */
+ timeout = 0;
+ while (timeout < 5000) {
+ msleep(20);
+ timeout += 20;
+
+ eeh_ops->read_config(dn, cap + PCI_EXP_LNKSTA, 2, &val);
+ if (val & PCI_EXP_LNKSTA_DLLLA)
+ break;
+ }
+
+ if (val & PCI_EXP_LNKSTA_DLLLA)
+ pr_debug(" Link up (%s)\n",
+ (val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
+ else
+ pr_debug(" Link not ready (0x%04x)\n", val);
+}
+
+#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
+#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
+
+static void eeh_restore_bridge_bars(struct eeh_dev *edev,
+ struct device_node *dn)
+{
+ int i;
+
+ /*
+ * Device BARs: 0x10 - 0x18
+ * Bus numbers and windows: 0x18 - 0x30
+ */
+ for (i = 4; i < 13; i++)
+ eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
+ /* Rom: 0x38 */
+ eeh_ops->write_config(dn, 14*4, 4, edev->config_space[14]);
+
+ /* Cache line & Latency timer: 0xC 0xD */
+ eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
+ SAVED_BYTE(PCI_CACHE_LINE_SIZE));
+ eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
+ SAVED_BYTE(PCI_LATENCY_TIMER));
+ /* Max latency, min grant, interrupt ping and line: 0x3C */
+ eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
+
+ /* PCI Command: 0x4 */
+ eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]);
+
+ /* Check the PCIe link is ready */
+ eeh_bridge_check_link(edev, dn);
+}
+
+static void eeh_restore_device_bars(struct eeh_dev *edev,
+ struct device_node *dn)
+{
+ int i;
+ u32 cmd;
+
+ for (i = 4; i < 10; i++)
+ eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
+ /* 12 == Expansion ROM Address */
+ eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
+
+ eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
+ SAVED_BYTE(PCI_CACHE_LINE_SIZE));
+ eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
+ SAVED_BYTE(PCI_LATENCY_TIMER));
+
+ /* max latency, min grant, interrupt pin and line */
+ eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
+
+ /*
+ * Restore PERR & SERR bits, some devices require it,
+ * don't touch the other command bits
+ */
+ eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
+ if (edev->config_space[1] & PCI_COMMAND_PARITY)
+ cmd |= PCI_COMMAND_PARITY;
+ else
+ cmd &= ~PCI_COMMAND_PARITY;
+ if (edev->config_space[1] & PCI_COMMAND_SERR)
+ cmd |= PCI_COMMAND_SERR;
+ else
+ cmd &= ~PCI_COMMAND_SERR;
+ eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
+}
+
+/**
+ * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
+ * @data: EEH device
+ * @flag: Unused
+ *
+ * Loads the PCI configuration space base address registers,
+ * the expansion ROM base address, the latency timer, and etc.
+ * from the saved values in the device node.
+ */
+static void *eeh_restore_one_device_bars(void *data, void *flag)
+{
+ struct eeh_dev *edev = (struct eeh_dev *)data;
+ struct device_node *dn = eeh_dev_to_of_node(edev);
+
+ /* Do special restore for bridges */
+ if (edev->mode & EEH_DEV_BRIDGE)
+ eeh_restore_bridge_bars(edev, dn);
+ else
+ eeh_restore_device_bars(edev, dn);
+
+ if (eeh_ops->restore_config)
+ eeh_ops->restore_config(dn);
+
+ return NULL;
+}
+
+/**
+ * eeh_pe_restore_bars - Restore the PCI config space info
+ * @pe: EEH PE
+ *
+ * This routine performs a recursive walk to the children
+ * of this device as well.
+ */
+void eeh_pe_restore_bars(struct eeh_pe *pe)
+{
+ /*
+ * We needn't take the EEH lock since eeh_pe_dev_traverse()
+ * will take that.
+ */
+ eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
+}
+
+/**
+ * eeh_pe_loc_get - Retrieve location code binding to the given PE
+ * @pe: EEH PE
+ *
+ * Retrieve the location code of the given PE. If the primary PE bus
+ * is root bus, we will grab location code from PHB device tree node
+ * or root port. Otherwise, the upstream bridge's device tree node
+ * of the primary PE bus will be checked for the location code.
+ */
+const char *eeh_pe_loc_get(struct eeh_pe *pe)
+{
+ struct pci_controller *hose;
+ struct pci_bus *bus = eeh_pe_bus_get(pe);
+ struct pci_dev *pdev;
+ struct device_node *dn;
+ const char *loc;
+
+ if (!bus)
+ return "N/A";
+
+ /* PHB PE or root PE ? */
+ if (pci_is_root_bus(bus)) {
+ hose = pci_bus_to_host(bus);
+ loc = of_get_property(hose->dn,
+ "ibm,loc-code", NULL);
+ if (loc)
+ return loc;
+ loc = of_get_property(hose->dn,
+ "ibm,io-base-loc-code", NULL);
+ if (loc)
+ return loc;
+
+ pdev = pci_get_slot(bus, 0x0);
+ } else {
+ pdev = bus->self;
+ }
+
+ if (!pdev) {
+ loc = "N/A";
+ goto out;
+ }
+
+ dn = pci_device_to_OF_node(pdev);
+ if (!dn) {
+ loc = "N/A";
+ goto out;
+ }
+
+ loc = of_get_property(dn, "ibm,loc-code", NULL);
+ if (!loc)
+ loc = of_get_property(dn, "ibm,slot-location-code", NULL);
+ if (!loc)
+ loc = "N/A";
+
+out:
+ if (pci_is_root_bus(bus) && pdev)
+ pci_dev_put(pdev);
+ return loc;
+}
+
+/**
+ * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
+ * @pe: EEH PE
+ *
+ * Retrieve the PCI bus according to the given PE. Basically,
+ * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
+ * primary PCI bus will be retrieved. The parent bus will be
+ * returned for BUS PE. However, we don't have associated PCI
+ * bus for DEVICE PE.
+ */
+struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
+{
+ struct pci_bus *bus = NULL;
+ struct eeh_dev *edev;
+ struct pci_dev *pdev;
+
+ if (pe->type & EEH_PE_PHB) {
+ bus = pe->phb->bus;
+ } else if (pe->type & EEH_PE_BUS ||
+ pe->type & EEH_PE_DEVICE) {
+ if (pe->bus) {
+ bus = pe->bus;
+ goto out;
+ }
+
+ edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (pdev)
+ bus = pdev->bus;
+ }
+
+out:
+ return bus;
+}
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
new file mode 100644
index 00000000000..e2595ba4b72
--- /dev/null
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -0,0 +1,98 @@
+/*
+ * Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
+ * Copyright IBM Corporation 2007
+ * Copyright Linas Vepstas <linas@austin.ibm.com> 2007
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
+ */
+#include <linux/pci.h>
+#include <linux/stat.h>
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+
+/**
+ * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
+ * @_name: name of file in sysfs directory
+ * @_memb: name of member in struct pci_dn to access
+ * @_format: printf format for display
+ *
+ * All of the attributes look very similar, so just
+ * auto-gen a cut-n-paste routine to display them.
+ */
+#define EEH_SHOW_ATTR(_name,_memb,_format) \
+static ssize_t eeh_show_##_name(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct pci_dev *pdev = to_pci_dev(dev); \
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); \
+ \
+ if (!edev) \
+ return 0; \
+ \
+ return sprintf(buf, _format "\n", edev->_memb); \
+} \
+static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
+
+EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
+EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x");
+EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
+
+void eeh_sysfs_add_device(struct pci_dev *pdev)
+{
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+ int rc=0;
+
+ if (!eeh_enabled())
+ return;
+
+ if (edev && (edev->mode & EEH_DEV_SYSFS))
+ return;
+
+ rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
+ rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
+ rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+
+ if (rc)
+ printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
+ else if (edev)
+ edev->mode |= EEH_DEV_SYSFS;
+}
+
+void eeh_sysfs_remove_device(struct pci_dev *pdev)
+{
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+ /*
+ * The parent directory might have been removed. We needn't
+ * continue for that case.
+ */
+ if (!pdev->dev.kobj.sd) {
+ if (edev)
+ edev->mode &= ~EEH_DEV_SYSFS;
+ return;
+ }
+
+ device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
+ device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
+ device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+
+ if (edev)
+ edev->mode &= ~EEH_DEV_SYSFS;
+}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index c03e829fee3..22b45a4955c 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -30,6 +30,8 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
+#include <asm/ftrace.h>
+#include <asm/ptrace.h>
#undef SHOW_SYSCALLS
#undef SHOW_SYSCALLS_TASK
@@ -44,29 +46,58 @@
#endif
#ifdef CONFIG_BOOKE
-#include "head_booke.h"
-#define TRANSFER_TO_HANDLER_EXC_LEVEL(exc_level) \
- mtspr exc_level##_SPRG,r8; \
- BOOKE_LOAD_EXC_LEVEL_STACK(exc_level); \
- lwz r0,GPR10-INT_FRAME_SIZE(r8); \
- stw r0,GPR10(r11); \
- lwz r0,GPR11-INT_FRAME_SIZE(r8); \
- stw r0,GPR11(r11); \
- mfspr r8,exc_level##_SPRG
-
.globl mcheck_transfer_to_handler
mcheck_transfer_to_handler:
- TRANSFER_TO_HANDLER_EXC_LEVEL(MCHECK)
- b transfer_to_handler_full
+ mfspr r0,SPRN_DSRR0
+ stw r0,_DSRR0(r11)
+ mfspr r0,SPRN_DSRR1
+ stw r0,_DSRR1(r11)
+ /* fall through */
.globl debug_transfer_to_handler
debug_transfer_to_handler:
- TRANSFER_TO_HANDLER_EXC_LEVEL(DEBUG)
- b transfer_to_handler_full
+ mfspr r0,SPRN_CSRR0
+ stw r0,_CSRR0(r11)
+ mfspr r0,SPRN_CSRR1
+ stw r0,_CSRR1(r11)
+ /* fall through */
.globl crit_transfer_to_handler
crit_transfer_to_handler:
- TRANSFER_TO_HANDLER_EXC_LEVEL(CRIT)
+#ifdef CONFIG_PPC_BOOK3E_MMU
+ mfspr r0,SPRN_MAS0
+ stw r0,MAS0(r11)
+ mfspr r0,SPRN_MAS1
+ stw r0,MAS1(r11)
+ mfspr r0,SPRN_MAS2
+ stw r0,MAS2(r11)
+ mfspr r0,SPRN_MAS3
+ stw r0,MAS3(r11)
+ mfspr r0,SPRN_MAS6
+ stw r0,MAS6(r11)
+#ifdef CONFIG_PHYS_64BIT
+ mfspr r0,SPRN_MAS7
+ stw r0,MAS7(r11)
+#endif /* CONFIG_PHYS_64BIT */
+#endif /* CONFIG_PPC_BOOK3E_MMU */
+#ifdef CONFIG_44x
+ mfspr r0,SPRN_MMUCR
+ stw r0,MMUCR(r11)
+#endif
+ mfspr r0,SPRN_SRR0
+ stw r0,_SRR0(r11)
+ mfspr r0,SPRN_SRR1
+ stw r0,_SRR1(r11)
+
+ /* set the stack limit to the current stack
+ * and set the limit to protect the thread_info
+ * struct
+ */
+ mfspr r8,SPRN_SPRG_THREAD
+ lwz r0,KSP_LIMIT(r8)
+ stw r0,SAVED_KSP_LIMIT(r11)
+ rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
+ stw r0,KSP_LIMIT(r8)
/* fall through */
#endif
@@ -77,6 +108,20 @@ crit_transfer_to_handler:
stw r0,GPR10(r11)
lwz r0,crit_r11@l(0)
stw r0,GPR11(r11)
+ mfspr r0,SPRN_SRR0
+ stw r0,crit_srr0@l(0)
+ mfspr r0,SPRN_SRR1
+ stw r0,crit_srr1@l(0)
+
+ /* set the stack limit to the current stack
+ * and set the limit to protect the thread_info
+ * struct
+ */
+ mfspr r8,SPRN_SPRG_THREAD
+ lwz r0,KSP_LIMIT(r8)
+ stw r0,saved_ksp_limit@l(0)
+ rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
+ stw r0,KSP_LIMIT(r8)
/* fall through */
#endif
@@ -102,7 +147,7 @@ transfer_to_handler:
mfspr r2,SPRN_XER
stw r12,_CTR(r11)
stw r2,_XER(r11)
- mfspr r12,SPRN_SPRG3
+ mfspr r12,SPRN_SPRG_THREAD
addi r2,r12,-THREAD
tovirt(r2,r2) /* set r2 to current */
beq 2f /* if from user, fix up THREAD.regs */
@@ -110,9 +155,9 @@ transfer_to_handler:
stw r11,PT_REGS(r12)
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
/* Check to see if the dbcr0 register is set up to debug. Use the
- single-step bit to do this. */
+ internal debug mode bit to do this. */
lwz r12,THREAD_DBCR0(r12)
- andis. r12,r12,DBCR0_IC@h
+ andis. r12,r12,DBCR0_IDM@h
beq+ 3f
/* From user and task is ptraced - load up global dbcr0 */
li r12,-1 /* clear all pending debug events */
@@ -120,6 +165,12 @@ transfer_to_handler:
lis r11,global_dbcr0@ha
tophys(r11,r11)
addi r11,r11,global_dbcr0@l
+#ifdef CONFIG_SMP
+ CURRENT_THREAD_INFO(r9, r1)
+ lwz r9,TI_CPU(r9)
+ slwi r9,r9,3
+ add r11,r11,r9
+#endif
lwz r12,0(r11)
mtspr SPRN_DBCR0,r12
lwz r12,4(r11)
@@ -131,32 +182,91 @@ transfer_to_handler:
2: /* if from kernel, check interrupted DOZE/NAP mode and
* check for stack overflow
*/
- lwz r9,THREAD_INFO-THREAD(r12)
- cmplw r1,r9 /* if r1 <= current->thread_info */
+ lwz r9,KSP_LIMIT(r12)
+ cmplw r1,r9 /* if r1 <= ksp_limit */
ble- stack_ovf /* then the kernel stack overflowed */
5:
-#ifdef CONFIG_6xx
+#if defined(CONFIG_6xx) || defined(CONFIG_E500)
+ CURRENT_THREAD_INFO(r9, r1)
tophys(r9,r9) /* check local flags */
lwz r12,TI_LOCAL_FLAGS(r9)
mtcrf 0x01,r12
bt- 31-TLF_NAPPING,4f
-#endif /* CONFIG_6xx */
+ bt- 31-TLF_SLEEPING,7f
+#endif /* CONFIG_6xx || CONFIG_E500 */
.globl transfer_to_handler_cont
transfer_to_handler_cont:
3:
mflr r9
lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ lis r12,reenable_mmu@h
+ ori r12,r12,reenable_mmu@l
+ mtspr SPRN_SRR0,r12
+ mtspr SPRN_SRR1,r10
+ SYNC
+ RFI
+reenable_mmu: /* re-enable mmu so we can */
+ mfmsr r10
+ lwz r12,_MSR(r1)
+ xor r10,r10,r12
+ andi. r10,r10,MSR_EE /* Did EE change? */
+ beq 1f
+
+ /*
+ * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1.
+ * If from user mode there is only one stack frame on the stack, and
+ * accessing CALLER_ADDR1 will cause oops. So we need create a dummy
+ * stack frame to make trace_hardirqs_off happy.
+ *
+ * This is handy because we also need to save a bunch of GPRs,
+ * r3 can be different from GPR3(r1) at this point, r9 and r11
+ * contains the old MSR and handler address respectively,
+ * r4 & r5 can contain page fault arguments that need to be passed
+ * along as well. r12, CCR, CTR, XER etc... are left clobbered as
+ * they aren't useful past this point (aren't syscall arguments),
+ * the rest is restored from the exception frame.
+ */
+ stwu r1,-32(r1)
+ stw r9,8(r1)
+ stw r11,12(r1)
+ stw r3,16(r1)
+ stw r4,20(r1)
+ stw r5,24(r1)
+ bl trace_hardirqs_off
+ lwz r5,24(r1)
+ lwz r4,20(r1)
+ lwz r3,16(r1)
+ lwz r11,12(r1)
+ lwz r9,8(r1)
+ addi r1,r1,32
+ lwz r0,GPR0(r1)
+ lwz r6,GPR6(r1)
+ lwz r7,GPR7(r1)
+ lwz r8,GPR8(r1)
+1: mtctr r11
+ mtlr r9
+ bctr /* jump to handler */
+#else /* CONFIG_TRACE_IRQFLAGS */
mtspr SPRN_SRR0,r11
mtspr SPRN_SRR1,r10
mtlr r9
SYNC
RFI /* jump to handler, enable MMU */
+#endif /* CONFIG_TRACE_IRQFLAGS */
-#ifdef CONFIG_6xx
+#if defined (CONFIG_6xx) || defined(CONFIG_E500)
4: rlwinm r12,r12,0,~_TLF_NAPPING
stw r12,TI_LOCAL_FLAGS(r9)
- b power_save_6xx_restore
+ b power_save_ppc32_restore
+
+7: rlwinm r12,r12,0,~_TLF_SLEEPING
+ stw r12,TI_LOCAL_FLAGS(r9)
+ lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
+ rlwinm r9,r9,0,~MSR_EE
+ lwz r12,_LINK(r11) /* and return to address in LR */
+ b fast_exception_return
#endif
/*
@@ -191,7 +301,6 @@ stack_ovf:
0:
_GLOBAL(DoSyscall)
- stw r0,THREAD+LAST_SYSCALL(r2)
stw r3,ORIG_GPR3(r1)
li r12,0
stw r12,RESULT(r1)
@@ -201,7 +310,32 @@ _GLOBAL(DoSyscall)
#ifdef SHOW_SYSCALLS
bl do_show_syscall
#endif /* SHOW_SYSCALLS */
- rlwinm r10,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* Return from syscalls can (and generally will) hard enable
+ * interrupts. You aren't supposed to call a syscall with
+ * interrupts disabled in the first place. However, to ensure
+ * that we get it right vs. lockdep if it happens, we force
+ * that hard enable here with appropriate tracing if we see
+ * that we have been called with interrupts off
+ */
+ mfmsr r11
+ andi. r12,r11,MSR_EE
+ bne+ 1f
+ /* We came in with interrupts disabled, we enable them now */
+ bl trace_hardirqs_on
+ mfmsr r11
+ lwz r0,GPR0(r1)
+ lwz r3,GPR3(r1)
+ lwz r4,GPR4(r1)
+ ori r11,r11,MSR_EE
+ lwz r5,GPR5(r1)
+ lwz r6,GPR6(r1)
+ lwz r7,GPR7(r1)
+ lwz r8,GPR8(r1)
+ mtmsr r11
+1:
+#endif /* CONFIG_TRACE_IRQFLAGS */
+ CURRENT_THREAD_INFO(r10, r1)
lwz r11,TI_FLAGS(r10)
andi. r11,r11,_TIF_SYSCALL_T_OR_A
bne- syscall_dotrace
@@ -222,9 +356,10 @@ ret_from_syscall:
bl do_show_syscall_exit
#endif
mr r6,r3
- rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */
+ CURRENT_THREAD_INFO(r12, r1)
/* disable interrupts so current_thread_info()->flags can't change */
LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
+ /* Note: We don't bother telling lockdep about it */
SYNC
MTMSRD(r10)
lwz r9,TI_FLAGS(r12)
@@ -238,20 +373,44 @@ ret_from_syscall:
oris r11,r11,0x1000 /* Set SO bit in CR */
stw r11,_CCR(r1)
syscall_exit_cont:
+ lwz r8,_MSR(r1)
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* If we are going to return from the syscall with interrupts
+ * off, we trace that here. It shouldn't happen though but we
+ * want to catch the bugger if it does right ?
+ */
+ andi. r10,r8,MSR_EE
+ bne+ 1f
+ stw r3,GPR3(r1)
+ bl trace_hardirqs_off
+ lwz r3,GPR3(r1)
+1:
+#endif /* CONFIG_TRACE_IRQFLAGS */
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- /* If the process has its own DBCR0 value, load it up. The single
- step bit tells us that dbcr0 should be loaded. */
+ /* If the process has its own DBCR0 value, load it up. The internal
+ debug mode bit tells us that dbcr0 should be loaded. */
lwz r0,THREAD+THREAD_DBCR0(r2)
- andis. r10,r0,DBCR0_IC@h
+ andis. r10,r0,DBCR0_IDM@h
bnel- load_dbcr0
#endif
+#ifdef CONFIG_44x
+BEGIN_MMU_FTR_SECTION
+ lis r4,icache_44x_need_flush@ha
+ lwz r5,icache_44x_need_flush@l(r4)
+ cmplwi cr0,r5,0
+ bne- 2f
+1:
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x)
+#endif /* CONFIG_44x */
+BEGIN_FTR_SECTION
+ lwarx r7,0,r1
+END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
stwcx. r0,0,r1 /* to clear the reservation */
lwz r4,_LINK(r1)
lwz r5,_CCR(r1)
mtlr r4
mtcr r5
lwz r7,_NIP(r1)
- lwz r8,_MSR(r1)
FIX_SRR1(r8, r0)
lwz r2,GPR2(r1)
lwz r1,GPR1(r1)
@@ -259,6 +418,12 @@ syscall_exit_cont:
mtspr SPRN_SRR1,r8
SYNC
RFI
+#ifdef CONFIG_44x
+2: li r7,0
+ iccci r0,r0
+ stw r7,icache_44x_need_flush@l(r4)
+ b 1b
+#endif /* CONFIG_44x */
66: li r3,-ENOSYS
b ret_from_syscall
@@ -270,6 +435,17 @@ ret_from_fork:
li r3,0
b ret_from_syscall
+ .globl ret_from_kernel_thread
+ret_from_kernel_thread:
+ REST_NVGPRS(r1)
+ bl schedule_tail
+ mtlr r14
+ mr r3,r15
+ PPC440EP_ERR42
+ blrl
+ li r3,0
+ b ret_from_syscall
+
/* Traced system call support */
syscall_dotrace:
SAVE_NVGPRS(r1)
@@ -277,7 +453,12 @@ syscall_dotrace:
stw r0,_TRAP(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_syscall_trace_enter
- lwz r0,GPR0(r1) /* Restore original registers */
+ /*
+ * Restore argument registers possibly just changed.
+ * We use the return value of do_syscall_trace_enter
+ * for call number to look up in the table (r0).
+ */
+ mr r0,r3
lwz r3,GPR3(r1)
lwz r4,GPR4(r1)
lwz r5,GPR5(r1)
@@ -323,7 +504,9 @@ syscall_exit_work:
andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
beq ret_from_except
- /* Re-enable interrupts */
+ /* Re-enable interrupts. There is no need to trace that with
+ * lockdep as we are supposed to have IRQs on at this point
+ */
ori r10,r10,MSR_EE
SYNC
MTMSRD(r10)
@@ -505,9 +688,11 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_SPE
+BEGIN_FTR_SECTION
oris r0,r0,MSR_SPE@h /* Disable SPE */
mfspr r12,SPRN_SPEFSCR /* save spefscr register value */
stw r12,THREAD+THREAD_SPEFSCR(r2)
+END_FTR_SECTION_IFSET(CPU_FTR_SPE)
#endif /* CONFIG_SPE */
and. r0,r0,r11 /* FP or altivec or SPE enabled? */
beq+ 1f
@@ -529,7 +714,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
tophys(r0,r4)
CLR_TOP32(r0)
- mtspr SPRN_SPRG3,r0 /* Update current THREAD phys addr */
+ mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */
lwz r1,KSP(r4) /* Load new stack pointer */
/* save the old current 'last' for return value */
@@ -543,8 +728,10 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_SPE
+BEGIN_FTR_SECTION
lwz r0,THREAD+THREAD_SPEFSCR(r2)
mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */
+END_FTR_SECTION_IFSET(CPU_FTR_SPE)
#endif /* CONFIG_SPE */
lwz r0,_CCR(r1)
@@ -597,7 +784,11 @@ fast_exception_return:
mr r12,r4 /* restart at exc_exit_restart */
b 2b
- .comm fee_restarts,4
+ .section .bss
+ .align 2
+fee_restarts:
+ .space 4
+ .previous
/* aargh, a nonrecoverable interrupt, panic */
/* aargh, we don't know which trap this is */
@@ -626,6 +817,7 @@ ret_from_except:
/* Hard-disable interrupts so that current_thread_info()->flags
* can't change between when we test it and when we return
* from the interrupt. */
+ /* Note: We don't bother telling lockdep about it */
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
SYNC /* Some chip revs have problems here... */
MTMSRD(r10) /* disable interrupts */
@@ -636,46 +828,140 @@ ret_from_except:
user_exc_return: /* r10 contains MSR_KERNEL here */
/* Check current_thread_info()->flags */
- rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ CURRENT_THREAD_INFO(r9, r1)
lwz r9,TI_FLAGS(r9)
- andi. r0,r9,(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NEED_RESCHED)
+ andi. r0,r9,_TIF_USER_WORK_MASK
bne do_work
restore_user:
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- /* Check whether this process has its own DBCR0 value. The single
- step bit tells us that dbcr0 should be loaded. */
+ /* Check whether this process has its own DBCR0 value. The internal
+ debug mode bit tells us that dbcr0 should be loaded. */
lwz r0,THREAD+THREAD_DBCR0(r2)
- andis. r10,r0,DBCR0_IC@h
+ andis. r10,r0,DBCR0_IDM@h
bnel- load_dbcr0
#endif
-#ifdef CONFIG_PREEMPT
b restore
/* N.B. the only way to get here is from the beq following ret_from_except. */
resume_kernel:
+ /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
+ CURRENT_THREAD_INFO(r9, r1)
+ lwz r8,TI_FLAGS(r9)
+ andis. r0,r8,_TIF_EMULATE_STACK_STORE@h
+ beq+ 1f
+
+ addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+
+ lwz r3,GPR1(r1)
+ subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
+ mr r4,r1 /* src: current exception frame */
+ mr r1,r3 /* Reroute the trampoline frame to r1 */
+
+ /* Copy from the original to the trampoline. */
+ li r5,INT_FRAME_SIZE/4 /* size: INT_FRAME_SIZE */
+ li r6,0 /* start offset: 0 */
+ mtctr r5
+2: lwzx r0,r6,r4
+ stwx r0,r6,r3
+ addi r6,r6,4
+ bdnz 2b
+
+ /* Do real store operation to complete stwu */
+ lwz r5,GPR1(r1)
+ stw r8,0(r5)
+
+ /* Clear _TIF_EMULATE_STACK_STORE flag */
+ lis r11,_TIF_EMULATE_STACK_STORE@h
+ addi r5,r9,TI_FLAGS
+0: lwarx r8,0,r5
+ andc r8,r8,r11
+#ifdef CONFIG_IBM405_ERR77
+ dcbt 0,r5
+#endif
+ stwcx. r8,0,r5
+ bne- 0b
+1:
+
+#ifdef CONFIG_PREEMPT
/* check current_thread_info->preempt_count */
- rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
lwz r0,TI_PREEMPT(r9)
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
bne restore
- lwz r0,TI_FLAGS(r9)
- andi. r0,r0,_TIF_NEED_RESCHED
+ andi. r8,r8,_TIF_NEED_RESCHED
beq+ restore
+ lwz r3,_MSR(r1)
andi. r0,r3,MSR_EE /* interrupts off? */
beq restore /* don't schedule if so */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* Lockdep thinks irqs are enabled, we need to call
+ * preempt_schedule_irq with IRQs off, so we inform lockdep
+ * now that we -did- turn them off already
+ */
+ bl trace_hardirqs_off
+#endif
1: bl preempt_schedule_irq
- rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ CURRENT_THREAD_INFO(r9, r1)
lwz r3,TI_FLAGS(r9)
andi. r0,r3,_TIF_NEED_RESCHED
bne- 1b
-#else
-resume_kernel:
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* And now, to properly rebalance the above, we tell lockdep they
+ * are being turned back on, which will happen when we return
+ */
+ bl trace_hardirqs_on
+#endif
#endif /* CONFIG_PREEMPT */
/* interrupts are hard-disabled at this point */
restore:
+#ifdef CONFIG_44x
+BEGIN_MMU_FTR_SECTION
+ b 1f
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
+ lis r4,icache_44x_need_flush@ha
+ lwz r5,icache_44x_need_flush@l(r4)
+ cmplwi cr0,r5,0
+ beq+ 1f
+ li r6,0
+ iccci r0,r0
+ stw r6,icache_44x_need_flush@l(r4)
+1:
+#endif /* CONFIG_44x */
+
+ lwz r9,_MSR(r1)
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* Lockdep doesn't know about the fact that IRQs are temporarily turned
+ * off in this assembly code while peeking at TI_FLAGS() and such. However
+ * we need to inform it if the exception turned interrupts off, and we
+ * are about to trun them back on.
+ *
+ * The problem here sadly is that we don't know whether the exceptions was
+ * one that turned interrupts off or not. So we always tell lockdep about
+ * turning them on here when we go back to wherever we came from with EE
+ * on, even if that may meen some redudant calls being tracked. Maybe later
+ * we could encode what the exception did somewhere or test the exception
+ * type in the pt_regs but that sounds overkill
+ */
+ andi. r10,r9,MSR_EE
+ beq 1f
+ /*
+ * Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
+ * which is the stack frame here, we need to force a stack frame
+ * in case we came from user space.
+ */
+ stwu r1,-32(r1)
+ mflr r0
+ stw r0,4(r1)
+ stwu r1,-32(r1)
+ bl trace_hardirqs_on
+ lwz r1,0(r1)
+ lwz r1,0(r1)
+ lwz r9,_MSR(r1)
+1:
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
lwz r0,GPR0(r1)
lwz r2,GPR2(r1)
REST_4GPRS(3, r1)
@@ -687,10 +973,12 @@ restore:
mtctr r11
PPC405_ERR77(0,r1)
+BEGIN_FTR_SECTION
+ lwarx r11,0,r1
+END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
stwcx. r0,0,r1 /* to clear the reservation */
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
- lwz r9,_MSR(r1)
andi. r10,r9,MSR_RI /* check if this exception occurred */
beql nonrecoverable /* at a bad place (MSR:RI = 0) */
@@ -713,7 +1001,6 @@ restore:
MTMSRD(r10) /* clear the RI bit */
.globl exc_exit_restart
exc_exit_restart:
- lwz r9,_MSR(r1)
lwz r12,_NIP(r1)
FIX_SRR1(r9,r10)
mtspr SPRN_SRR0,r12
@@ -816,18 +1103,91 @@ exc_exit_restart_end:
exc_lvl_rfi; \
b .; /* prevent prefetch past exc_lvl_rfi */
+#define RESTORE_xSRR(exc_lvl_srr0, exc_lvl_srr1) \
+ lwz r9,_##exc_lvl_srr0(r1); \
+ lwz r10,_##exc_lvl_srr1(r1); \
+ mtspr SPRN_##exc_lvl_srr0,r9; \
+ mtspr SPRN_##exc_lvl_srr1,r10;
+
+#if defined(CONFIG_PPC_BOOK3E_MMU)
+#ifdef CONFIG_PHYS_64BIT
+#define RESTORE_MAS7 \
+ lwz r11,MAS7(r1); \
+ mtspr SPRN_MAS7,r11;
+#else
+#define RESTORE_MAS7
+#endif /* CONFIG_PHYS_64BIT */
+#define RESTORE_MMU_REGS \
+ lwz r9,MAS0(r1); \
+ lwz r10,MAS1(r1); \
+ lwz r11,MAS2(r1); \
+ mtspr SPRN_MAS0,r9; \
+ lwz r9,MAS3(r1); \
+ mtspr SPRN_MAS1,r10; \
+ lwz r10,MAS6(r1); \
+ mtspr SPRN_MAS2,r11; \
+ mtspr SPRN_MAS3,r9; \
+ mtspr SPRN_MAS6,r10; \
+ RESTORE_MAS7;
+#elif defined(CONFIG_44x)
+#define RESTORE_MMU_REGS \
+ lwz r9,MMUCR(r1); \
+ mtspr SPRN_MMUCR,r9;
+#else
+#define RESTORE_MMU_REGS
+#endif
+
+#ifdef CONFIG_40x
.globl ret_from_crit_exc
ret_from_crit_exc:
- RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, RFCI)
+ mfspr r9,SPRN_SPRG_THREAD
+ lis r10,saved_ksp_limit@ha;
+ lwz r10,saved_ksp_limit@l(r10);
+ tovirt(r9,r9);
+ stw r10,KSP_LIMIT(r9)
+ lis r9,crit_srr0@ha;
+ lwz r9,crit_srr0@l(r9);
+ lis r10,crit_srr1@ha;
+ lwz r10,crit_srr1@l(r10);
+ mtspr SPRN_SRR0,r9;
+ mtspr SPRN_SRR1,r10;
+ RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
+#endif /* CONFIG_40x */
#ifdef CONFIG_BOOKE
+ .globl ret_from_crit_exc
+ret_from_crit_exc:
+ mfspr r9,SPRN_SPRG_THREAD
+ lwz r10,SAVED_KSP_LIMIT(r1)
+ stw r10,KSP_LIMIT(r9)
+ RESTORE_xSRR(SRR0,SRR1);
+ RESTORE_MMU_REGS;
+ RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
+
.globl ret_from_debug_exc
ret_from_debug_exc:
- RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, RFDI)
+ mfspr r9,SPRN_SPRG_THREAD
+ lwz r10,SAVED_KSP_LIMIT(r1)
+ stw r10,KSP_LIMIT(r9)
+ lwz r9,THREAD_INFO-THREAD(r9)
+ CURRENT_THREAD_INFO(r10, r1)
+ lwz r10,TI_PREEMPT(r10)
+ stw r10,TI_PREEMPT(r9)
+ RESTORE_xSRR(SRR0,SRR1);
+ RESTORE_xSRR(CSRR0,CSRR1);
+ RESTORE_MMU_REGS;
+ RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, PPC_RFDI)
.globl ret_from_mcheck_exc
ret_from_mcheck_exc:
- RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, RFMCI)
+ mfspr r9,SPRN_SPRG_THREAD
+ lwz r10,SAVED_KSP_LIMIT(r1)
+ stw r10,KSP_LIMIT(r9)
+ RESTORE_xSRR(SRR0,SRR1);
+ RESTORE_xSRR(CSRR0,CSRR1);
+ RESTORE_xSRR(DSRR0,DSRR1);
+ RESTORE_MMU_REGS;
+ RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI)
#endif /* CONFIG_BOOKE */
/*
@@ -843,6 +1203,12 @@ load_dbcr0:
mfspr r10,SPRN_DBCR0
lis r11,global_dbcr0@ha
addi r11,r11,global_dbcr0@l
+#ifdef CONFIG_SMP
+ CURRENT_THREAD_INFO(r9, r1)
+ lwz r9,TI_CPU(r9)
+ slwi r9,r9,3
+ add r11,r11,r9
+#endif
stw r10,0(r11)
mtspr SPRN_DBCR0,r0
lwz r10,4(r11)
@@ -852,7 +1218,11 @@ load_dbcr0:
mtspr SPRN_DBSR,r11 /* clear all pending debug events */
blr
- .comm global_dbcr0,8
+ .section .bss
+ .align 4
+global_dbcr0:
+ .space 8*NR_CPUS
+ .previous
#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
do_work: /* r10 contains MSR_KERNEL here */
@@ -860,19 +1230,26 @@ do_work: /* r10 contains MSR_KERNEL here */
beq do_user_signal
do_resched: /* r10 contains MSR_KERNEL here */
+ /* Note: We don't need to inform lockdep that we are enabling
+ * interrupts here. As far as it knows, they are already enabled
+ */
ori r10,r10,MSR_EE
SYNC
MTMSRD(r10) /* hard-enable interrupts */
bl schedule
recheck:
+ /* Note: And we don't tell it we are disabling them again
+ * neither. Those disable/enable cycles used to peek at
+ * TI_FLAGS aren't advertised.
+ */
LOAD_MSR_KERNEL(r10,MSR_KERNEL)
SYNC
MTMSRD(r10) /* disable interrupts */
- rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
+ CURRENT_THREAD_INFO(r9, r1)
lwz r9,TI_FLAGS(r9)
andi. r0,r9,_TIF_NEED_RESCHED
bne- do_resched
- andi. r0,r9,_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK
+ andi. r0,r9,_TIF_USER_WORK_MASK
beq restore_user
do_user_signal: /* r10 contains MSR_KERNEL here */
ori r10,r10,MSR_EE
@@ -885,9 +1262,9 @@ do_user_signal: /* r10 contains MSR_KERNEL here */
SAVE_NVGPRS(r1)
rlwinm r3,r3,0,0,30
stw r3,_TRAP(r1)
-2: li r3,0
- addi r4,r1,STACK_FRAME_OVERHEAD
- bl do_signal
+2: addi r3,r1,STACK_FRAME_OVERHEAD
+ mr r4,r9
+ bl do_notify_resume
REST_NVGPRS(r1)
b recheck
@@ -927,7 +1304,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_601)
/* shouldn't return */
b 4b
- .comm ee_restarts,4
+ .section .bss
+ .align 2
+ee_restarts:
+ .space 4
+ .previous
/*
* PROM code for specific machines follows. Put it
@@ -957,7 +1338,7 @@ _GLOBAL(enter_rtas)
MTMSRD(r0) /* don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
mtlr r6
- mtspr SPRN_SPRG2,r7
+ mtspr SPRN_SPRG_RTAS,r7
mtspr SPRN_SRR0,r8
mtspr SPRN_SRR1,r9
RFI
@@ -967,7 +1348,7 @@ _GLOBAL(enter_rtas)
FIX_SRR1(r9,r0)
addi r1,r1,INT_FRAME_SIZE
li r0,0
- mtspr SPRN_SPRG2,r0
+ mtspr SPRN_SPRG_RTAS,r0
mtspr SPRN_SRR0,r8
mtspr SPRN_SRR1,r9
RFI /* return to caller */
@@ -978,3 +1359,102 @@ machine_check_in_rtas:
/* XXX load up BATs and panic */
#endif /* CONFIG_PPC_RTAS */
+
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+ /*
+ * It is required that _mcount on PPC32 must preserve the
+ * link register. But we have r0 to play with. We use r0
+ * to push the return address back to the caller of mcount
+ * into the ctr register, restore the link register and
+ * then jump back using the ctr register.
+ */
+ mflr r0
+ mtctr r0
+ lwz r0, 4(r1)
+ mtlr r0
+ bctr
+
+_GLOBAL(ftrace_caller)
+ MCOUNT_SAVE_FRAME
+ /* r3 ends up with link register */
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+ MCOUNT_RESTORE_FRAME
+ /* old link register ends up in ctr reg */
+ bctr
+#else
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+
+ MCOUNT_SAVE_FRAME
+
+ subi r3, r3, MCOUNT_INSN_SIZE
+ LOAD_REG_ADDR(r5, ftrace_trace_function)
+ lwz r5,0(r5)
+
+ mtctr r5
+ bctrl
+ nop
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ b ftrace_graph_caller
+#endif
+ MCOUNT_RESTORE_FRAME
+ bctr
+#endif
+
+_GLOBAL(ftrace_stub)
+ blr
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+ /* load r4 with local address */
+ lwz r4, 44(r1)
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ /* get the parent address */
+ addi r3, r1, 52
+
+ bl prepare_ftrace_return
+ nop
+
+ MCOUNT_RESTORE_FRAME
+ /* old link register ends up in ctr reg */
+ bctr
+
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+ stwu r1, -32(r1)
+ stw r3, 20(r1)
+ stw r4, 16(r1)
+ stw r31, 12(r1)
+ mr r31, r1
+
+ bl ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+ lwz r3, 20(r1)
+ lwz r4, 16(r1)
+ lwz r31,12(r1)
+ lwz r1, 0(r1)
+
+ /* Jump back to real return address */
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#endif /* CONFIG_MCOUNT */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2cd872b5283..6528c5e2cc4 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -27,21 +27,24 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cputable.h>
-
-#ifdef CONFIG_PPC_ISERIES
-#define DO_SOFT_DISABLE
-#endif
+#include <asm/firmware.h>
+#include <asm/bug.h>
+#include <asm/ptrace.h>
+#include <asm/irqflags.h>
+#include <asm/ftrace.h>
+#include <asm/hw_irq.h>
+#include <asm/context_tracking.h>
/*
* System calls.
*/
.section ".toc","aw"
-.SYS_CALL_TABLE:
- .tc .sys_call_table[TC],.sys_call_table
+SYS_CALL_TABLE:
+ .tc sys_call_table[TC],sys_call_table
/* This value is used to mark exception frames on the stack. */
exception_marker:
- .tc ID_72656773_68657265[TC],0x7265677368657265
+ .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
.section ".text"
.align 7
@@ -56,14 +59,15 @@ system_call_common:
beq- 1f
ld r1,PACAKSAVE(r13)
1: std r10,0(r1)
- crclr so
std r11,_NIP(r1)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
+ beq 2f /* if from kernel mode */
ACCOUNT_CPU_USER_ENTRY(r10, r11)
- std r2,GPR2(r1)
+2: std r2,GPR2(r1)
std r3,GPR3(r1)
+ mfcr r2
std r4,GPR4(r1)
std r5,GPR5(r1)
std r6,GPR6(r1)
@@ -74,47 +78,82 @@ system_call_common:
std r11,GPR10(r1)
std r11,GPR11(r1)
std r11,GPR12(r1)
+ std r11,_XER(r1)
+ std r11,_CTR(r1)
std r9,GPR13(r1)
- mfcr r9
mflr r10
+ /*
+ * This clears CR0.SO (bit 28), which is the error indication on
+ * return from this system call.
+ */
+ rldimi r2,r11,28,(63-28)
li r11,0xc01
- std r9,_CCR(r1)
std r10,_LINK(r1)
std r11,_TRAP(r1)
- mfxer r9
- mfctr r10
- std r9,_XER(r1)
- std r10,_CTR(r1)
std r3,ORIG_GPR3(r1)
+ std r2,_CCR(r1)
ld r2,PACATOC(r13)
addi r9,r1,STACK_FRAME_OVERHEAD
ld r11,exception_marker@toc(r2)
std r11,-16(r9) /* "regshere" marker */
-#ifdef CONFIG_PPC_ISERIES
- /* Hack for handling interrupts when soft-enabling on iSeries */
- cmpdi cr1,r0,0x5555 /* syscall 0x5555 */
- andi. r10,r12,MSR_PR /* from kernel */
- crand 4*cr0+eq,4*cr1+eq,4*cr0+eq
- beq hardware_interrupt_entry
- lbz r10,PACAPROCENABLED(r13)
- std r10,SOFTE(r1)
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
+BEGIN_FW_FTR_SECTION
+ beq 33f
+ /* if from user, see if there are any DTL entries to process */
+ ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
+ ld r11,PACA_DTL_RIDX(r13) /* get log read index */
+ addi r10,r10,LPPACA_DTLIDX
+ LDX_BE r10,0,r10 /* get log write index */
+ cmpd cr1,r11,r10
+ beq+ cr1,33f
+ bl accumulate_stolen_time
+ REST_GPR(0,r1)
+ REST_4GPRS(3,r1)
+ REST_2GPRS(7,r1)
+ addi r9,r1,STACK_FRAME_OVERHEAD
+33:
+END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
+
+ /*
+ * A syscall should always be called with interrupts enabled
+ * so we just unconditionally hard-enable here. When some kind
+ * of irq tracing is used, we additionally check that condition
+ * is correct
+ */
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG)
+ lbz r10,PACASOFTIRQEN(r13)
+ xori r10,r10,1
+1: tdnei r10,0
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
#endif
- mfmsr r11
+
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 1
+#else
+ ld r11,PACAKMSR(r13)
ori r11,r11,MSR_EE
mtmsrd r11,1
+#endif /* CONFIG_PPC_BOOK3E */
+
+ /* We do need to set SOFTE in the stack frame or the return
+ * from interrupt will be painful
+ */
+ li r10,1
+ std r10,SOFTE(r1)
#ifdef SHOW_SYSCALLS
- bl .do_show_syscall
+ bl do_show_syscall
REST_GPR(0,r1)
REST_4GPRS(3,r1)
REST_2GPRS(7,r1)
addi r9,r1,STACK_FRAME_OVERHEAD
#endif
- clrrdi r11,r1,THREAD_SHIFT
+ CURRENT_THREAD_INFO(r11, r1)
ld r10,TI_FLAGS(r11)
andi. r11,r10,_TIF_SYSCALL_T_OR_A
- bne- syscall_dotrace
-syscall_dotrace_cont:
+ bne syscall_dotrace
+.Lsyscall_dotrace_cont:
cmpldi 0,r0,NR_syscalls
bge- syscall_enosys
@@ -123,7 +162,7 @@ system_call: /* label this so stack traces look sane */
* Need to vector to 32 Bit or default sys_call_table here,
* based on caller's run-mode / personality.
*/
- ld r11,.SYS_CALL_TABLE@toc(2)
+ ld r11,SYS_CALL_TABLE@toc(2)
andi. r10,r10,_TIF_32BIT
beq 15f
addi r11,r11,8 /* use 32-bit syscall entries */
@@ -135,27 +174,44 @@ system_call: /* label this so stack traces look sane */
clrldi r8,r8,32
15:
slwi r0,r0,4
- ldx r10,r11,r0 /* Fetch system call handler [ptr] */
- mtctr r10
+ ldx r12,r11,r0 /* Fetch system call handler [ptr] */
+ mtctr r12
bctrl /* Call handler */
syscall_exit:
std r3,RESULT(r1)
#ifdef SHOW_SYSCALLS
- bl .do_show_syscall_exit
+ bl do_show_syscall_exit
ld r3,RESULT(r1)
#endif
- clrrdi r12,r1,THREAD_SHIFT
+ CURRENT_THREAD_INFO(r12, r1)
- /* disable interrupts so current_thread_info()->flags can't change,
- and so that we don't get interrupted after loading SRR0/1. */
ld r8,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S
+ /* No MSR:RI on BookE */
andi. r10,r8,MSR_RI
beq- unrecov_restore
- mfmsr r10
- rldicl r10,r10,48,1
- rotldi r10,r10,16
- mtmsrd r10,1
+#endif
+ /*
+ * Disable interrupts so current_thread_info()->flags can't change,
+ * and so that we don't get interrupted after loading SRR0/1.
+ */
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 0
+#else
+ ld r10,PACAKMSR(r13)
+ /*
+ * For performance reasons we clear RI the same time that we
+ * clear EE. We only need to clear RI just before we restore r13
+ * below, but batching it with EE saves us one expensive mtmsrd call.
+ * We have to be careful to restore RI if we branch anywhere from
+ * here (eg syscall_exit_work).
+ */
+ li r9,MSR_RI
+ andc r11,r10,r9
+ mtmsrd r11,1
+#endif /* CONFIG_PPC_BOOK3E */
+
ld r9,TI_FLAGS(r12)
li r11,-_LAST_ERRNO
andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
@@ -163,38 +219,44 @@ syscall_exit:
cmpld r3,r11
ld r5,_CCR(r1)
bge- syscall_error
-syscall_error_cont:
+.Lsyscall_error_cont:
ld r7,_NIP(r1)
+BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
+
beq- 1f
ACCOUNT_CPU_USER_EXIT(r11, r12)
+ HMT_MEDIUM_LOW_HAS_PPR
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
1: ld r2,GPR2(r1)
- li r12,MSR_RI
- andc r11,r10,r12
- mtmsrd r11,1 /* clear MSR.RI */
ld r1,GPR1(r1)
mtlr r4
mtcr r5
mtspr SPRN_SRR0,r7
mtspr SPRN_SRR1,r8
- rfid
+ RFI
b . /* prevent speculative execution */
syscall_error:
oris r5,r5,0x1000 /* Set SO bit in CR */
neg r3,r3
std r5,_CCR(r1)
- b syscall_error_cont
+ b .Lsyscall_error_cont
/* Traced system call support */
syscall_dotrace:
- bl .save_nvgprs
+ bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_syscall_trace_enter
- ld r0,GPR0(r1) /* Restore original registers */
+ bl do_syscall_trace_enter
+ /*
+ * Restore argument registers possibly just changed.
+ * We use the return value of do_syscall_trace_enter
+ * for the call number to look up in the table (r0).
+ */
+ mr r0,r3
ld r3,GPR3(r1)
ld r4,GPR4(r1)
ld r5,GPR5(r1)
@@ -202,15 +264,18 @@ syscall_dotrace:
ld r7,GPR7(r1)
ld r8,GPR8(r1)
addi r9,r1,STACK_FRAME_OVERHEAD
- clrrdi r10,r1,THREAD_SHIFT
+ CURRENT_THREAD_INFO(r10, r1)
ld r10,TI_FLAGS(r10)
- b syscall_dotrace_cont
+ b .Lsyscall_dotrace_cont
syscall_enosys:
li r3,-ENOSYS
b syscall_exit
syscall_exit_work:
+#ifdef CONFIG_PPC_BOOK3S
+ mtmsrd r10,1 /* Restore RI */
+#endif
/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
If TIF_NOERROR is set, just save r3 as it is. */
@@ -241,18 +306,23 @@ syscall_exit_work:
subi r12,r12,TI_FLAGS
4: /* Anything else left to do? */
+ SET_DEFAULT_THREAD_PPR(r3, r10) /* Set thread.ppr = 3 */
andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
- beq .ret_from_except_lite
+ beq ret_from_except_lite
/* Re-enable interrupts */
- mfmsr r10
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 1
+#else
+ ld r10,PACAKMSR(r13)
ori r10,r10,MSR_EE
mtmsrd r10,1
+#endif /* CONFIG_PPC_BOOK3E */
- bl .save_nvgprs
+ bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_syscall_trace_leave
- b .ret_from_except
+ bl do_syscall_trace_leave
+ b ret_from_except
/* Save non-volatile GPRs, if not already saved. */
_GLOBAL(save_nvgprs)
@@ -275,36 +345,48 @@ _GLOBAL(save_nvgprs)
*/
_GLOBAL(ppc_fork)
- bl .save_nvgprs
- bl .sys_fork
+ bl save_nvgprs
+ bl sys_fork
b syscall_exit
_GLOBAL(ppc_vfork)
- bl .save_nvgprs
- bl .sys_vfork
+ bl save_nvgprs
+ bl sys_vfork
b syscall_exit
_GLOBAL(ppc_clone)
- bl .save_nvgprs
- bl .sys_clone
+ bl save_nvgprs
+ bl sys_clone
b syscall_exit
_GLOBAL(ppc32_swapcontext)
- bl .save_nvgprs
- bl .compat_sys_swapcontext
+ bl save_nvgprs
+ bl compat_sys_swapcontext
b syscall_exit
_GLOBAL(ppc64_swapcontext)
- bl .save_nvgprs
- bl .sys_swapcontext
+ bl save_nvgprs
+ bl sys_swapcontext
b syscall_exit
_GLOBAL(ret_from_fork)
- bl .schedule_tail
+ bl schedule_tail
REST_NVGPRS(r1)
li r3,0
b syscall_exit
+_GLOBAL(ret_from_kernel_thread)
+ bl schedule_tail
+ REST_NVGPRS(r1)
+ mtlr r14
+ mr r3,r15
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+ mr r12,r14
+#endif
+ blrl
+ li r3,0
+ b syscall_exit
+
/*
* This routine switches between two different tasks. The process
* state of one is saved on its kernel stack. Then the state
@@ -334,6 +416,11 @@ _GLOBAL(_switch)
mflr r20 /* Return to switch caller */
mfmsr r22
li r0, MSR_FP
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r0,r0,MSR_VSX@h /* Disable VSX */
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif /* CONFIG_VSX */
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
oris r0,r0,MSR_VEC@h /* Disable altivec */
@@ -344,13 +431,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
and. r0,r0,r22
beq+ 1f
andc r22,r22,r0
- mtmsrd r22
+ MTMSRD(r22)
isync
1: std r20,_NIP(r1)
mfcr r23
std r23,_CCR(r1)
std r1,KSP(r3) /* Set old stack pointer */
+#ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_FTR_SECTION
+ /* Event based branch registers */
+ mfspr r0, SPRN_BESCR
+ std r0, THREAD_BESCR(r3)
+ mfspr r0, SPRN_EBBHR
+ std r0, THREAD_EBBHR(r3)
+ mfspr r0, SPRN_EBBRR
+ std r0, THREAD_EBBRR(r3)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+#endif
+
#ifdef CONFIG_SMP
/* We need a sync somewhere here to make sure that if the
* previous task gets rescheduled on another CPU, it sees all
@@ -359,13 +458,40 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
sync
#endif /* CONFIG_SMP */
+ /*
+ * If we optimise away the clear of the reservation in system
+ * calls because we know the CPU tracks the address of the
+ * reservation, then we need to clear it here to cover the
+ * case that the kernel context switch path has no larx
+ * instructions.
+ */
+BEGIN_FTR_SECTION
+ ldarx r6,0,r1
+END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+#ifdef CONFIG_PPC_BOOK3S
+/* Cancel all explict user streams as they will have no use after context
+ * switch and will stop the HW from creating streams itself
+ */
+ DCBT_STOP_ALL_STREAM_IDS(r6)
+#endif
+
addi r6,r4,-THREAD /* Convert THREAD to 'current' */
std r6,PACACURRENT(r13) /* Set new 'current' */
ld r8,KSP(r4) /* new stack pointer */
+#ifdef CONFIG_PPC_BOOK3S
BEGIN_FTR_SECTION
+ BEGIN_FTR_SECTION_NESTED(95)
clrrdi r6,r8,28 /* get its ESID */
clrrdi r9,r1,28 /* get current sp ESID */
+ FTR_SECTION_ELSE_NESTED(95)
+ clrrdi r6,r8,40 /* get its 1T ESID */
+ clrrdi r9,r1,40 /* get current sp 1T ESID */
+ ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_1T_SEGMENT, 95)
+FTR_SECTION_ELSE
+ b 2f
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_SLB)
clrldi. r0,r6,2 /* is new ESID c00000000? */
cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */
cror eq,4*cr1+eq,eq
@@ -375,22 +501,38 @@ BEGIN_FTR_SECTION
ld r7,KSP_VSID(r4) /* Get new stack's VSID */
oris r0,r6,(SLB_ESID_V)@h
ori r0,r0,(SLB_NUM_BOLTED-1)@l
-
- /* Update the last bolted SLB */
+BEGIN_FTR_SECTION
+ li r9,MMU_SEGSIZE_1T /* insert B field */
+ oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
+ rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
+
+ /* Update the last bolted SLB. No write barriers are needed
+ * here, provided we only update the current CPU's SLB shadow
+ * buffer.
+ */
ld r9,PACA_SLBSHADOWPTR(r13)
li r12,0
- std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
- std r7,SLBSHADOW_STACKVSID(r9) /* Save VSID */
- std r0,SLBSHADOW_STACKESID(r9) /* Save ESID */
+ std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
+ li r12,SLBSHADOW_STACKVSID
+ STDX_BE r7,r12,r9 /* Save VSID */
+ li r12,SLBSHADOW_STACKESID
+ STDX_BE r0,r12,r9 /* Save ESID */
+
+ /* No need to check for MMU_FTR_NO_SLBIE_B here, since when
+ * we have 1TB segments, the only CPUs known to have the errata
+ * only support less than 1TB of system memory and we'll never
+ * actually hit this code path.
+ */
slbie r6
slbie r6 /* Workaround POWER5 < DD2.1 issue */
slbmte r7,r0
isync
-
2:
-END_FTR_SECTION_IFSET(CPU_FTR_SLB)
- clrrdi r7,r8,THREAD_SHIFT /* base of new stack */
+#endif /* !CONFIG_PPC_BOOK3S */
+
+ CURRENT_THREAD_INFO(r7, r8) /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
because we don't need to leave the 288-byte ABI gap at the
top of the kernel stack. */
@@ -399,8 +541,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_SLB)
mr r1,r8 /* start using new stack pointer */
std r7,PACAKSAVE(r13)
- ld r6,_CCR(r1)
- mtcrf 0xFF,r6
+#ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_FTR_SECTION
+ /* Event based branch registers */
+ ld r0, THREAD_BESCR(r4)
+ mtspr SPRN_BESCR, r0
+ ld r0, THREAD_EBBHR(r4)
+ mtspr SPRN_EBBHR, r0
+ ld r0, THREAD_EBBRR(r4)
+ mtspr SPRN_EBBRR, r0
+
+ ld r0,THREAD_TAR(r4)
+ mtspr SPRN_TAR,r0
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+#endif
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
@@ -408,6 +562,28 @@ BEGIN_FTR_SECTION
mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_PPC64
+BEGIN_FTR_SECTION
+ lwz r6,THREAD_DSCR_INHERIT(r4)
+ ld r0,THREAD_DSCR(r4)
+ cmpwi r6,0
+ bne 1f
+ ld r0,PACA_DSCR(r13)
+1:
+BEGIN_FTR_SECTION_NESTED(70)
+ mfspr r8, SPRN_FSCR
+ rldimi r8, r6, FSCR_DSCR_LG, (63 - FSCR_DSCR_LG)
+ mtspr SPRN_FSCR, r8
+END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70)
+ cmpd r0,r25
+ beq 2f
+ mtspr SPRN_DSCR,r0
+2:
+END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
+#endif
+
+ ld r6,_CCR(r1)
+ mtcrf 0xFF,r6
/* r3-r13 are destroyed -- Cort */
REST_8GPRS(14, r1)
@@ -424,7 +600,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
_GLOBAL(ret_from_except)
ld r11,_TRAP(r1)
andi. r0,r11,1
- bne .ret_from_except_lite
+ bne ret_from_except_lite
REST_NVGPRS(r1)
_GLOBAL(ret_from_except_lite)
@@ -433,87 +609,245 @@ _GLOBAL(ret_from_except_lite)
* can't change between when we test it and when we return
* from the interrupt.
*/
- mfmsr r10 /* Get current interrupt state */
- rldicl r9,r10,48,1 /* clear MSR_EE */
- rotldi r9,r9,16
- mtmsrd r9,1 /* Update machine state */
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 0
+#else
+ ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */
+ mtmsrd r10,1 /* Update machine state */
+#endif /* CONFIG_PPC_BOOK3E */
-#ifdef CONFIG_PREEMPT
- clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */
- li r0,_TIF_NEED_RESCHED /* bits to check */
+ CURRENT_THREAD_INFO(r9, r1)
ld r3,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3E
+ ld r10,PACACURRENT(r13)
+#endif /* CONFIG_PPC_BOOK3E */
ld r4,TI_FLAGS(r9)
- /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */
- rlwimi r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING
- and. r0,r4,r0 /* check NEED_RESCHED and maybe SIGPENDING */
- bne do_work
-
-#else /* !CONFIG_PREEMPT */
- ld r3,_MSR(r1) /* Returning to user mode? */
andi. r3,r3,MSR_PR
- beq restore /* if not, just restore regs and return */
+ beq resume_kernel
+#ifdef CONFIG_PPC_BOOK3E
+ lwz r3,(THREAD+THREAD_DBCR0)(r10)
+#endif /* CONFIG_PPC_BOOK3E */
/* Check current_thread_info()->flags */
- clrrdi r9,r1,THREAD_SHIFT
- ld r4,TI_FLAGS(r9)
andi. r0,r4,_TIF_USER_WORK_MASK
- bne do_work
+#ifdef CONFIG_PPC_BOOK3E
+ bne 1f
+ /*
+ * Check to see if the dbcr0 register is set up to debug.
+ * Use the internal debug mode bit to do this.
+ */
+ andis. r0,r3,DBCR0_IDM@h
+ beq restore
+ mfmsr r0
+ rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */
+ mtmsr r0
+ mtspr SPRN_DBCR0,r3
+ li r10, -1
+ mtspr SPRN_DBSR,r10
+ b restore
+#else
+ beq restore
+#endif
+1: andi. r0,r4,_TIF_NEED_RESCHED
+ beq 2f
+ bl restore_interrupts
+ SCHEDULE_USER
+ b ret_from_except_lite
+2:
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ andi. r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM
+ bne 3f /* only restore TM if nothing else to do */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl restore_tm_state
+ b restore
+3:
#endif
+ bl save_nvgprs
+ bl restore_interrupts
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_notify_resume
+ b ret_from_except
+
+resume_kernel:
+ /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
+ andis. r8,r4,_TIF_EMULATE_STACK_STORE@h
+ beq+ 1f
+
+ addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+
+ lwz r3,GPR1(r1)
+ subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
+ mr r4,r1 /* src: current exception frame */
+ mr r1,r3 /* Reroute the trampoline frame to r1 */
+
+ /* Copy from the original to the trampoline. */
+ li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */
+ li r6,0 /* start offset: 0 */
+ mtctr r5
+2: ldx r0,r6,r4
+ stdx r0,r6,r3
+ addi r6,r6,8
+ bdnz 2b
+
+ /* Do real store operation to complete stwu */
+ lwz r5,GPR1(r1)
+ std r8,0(r5)
+
+ /* Clear _TIF_EMULATE_STACK_STORE flag */
+ lis r11,_TIF_EMULATE_STACK_STORE@h
+ addi r5,r9,TI_FLAGS
+0: ldarx r4,0,r5
+ andc r4,r4,r11
+ stdcx. r4,0,r5
+ bne- 0b
+1:
+
+#ifdef CONFIG_PREEMPT
+ /* Check if we need to preempt */
+ andi. r0,r4,_TIF_NEED_RESCHED
+ beq+ restore
+ /* Check that preempt_count() == 0 and interrupts are enabled */
+ lwz r8,TI_PREEMPT(r9)
+ cmpwi cr1,r8,0
+ ld r0,SOFTE(r1)
+ cmpdi r0,0
+ crandc eq,cr1*4+eq,eq
+ bne restore
+ /*
+ * Here we are preempting the current task. We want to make
+ * sure we are soft-disabled first and reconcile irq state.
+ */
+ RECONCILE_IRQ_STATE(r3,r4)
+1: bl preempt_schedule_irq
+
+ /* Re-test flags and eventually loop */
+ CURRENT_THREAD_INFO(r9, r1)
+ ld r4,TI_FLAGS(r9)
+ andi. r0,r4,_TIF_NEED_RESCHED
+ bne 1b
+
+ /*
+ * arch_local_irq_restore() from preempt_schedule_irq above may
+ * enable hard interrupt but we really should disable interrupts
+ * when we return from the interrupt, and so that we don't get
+ * interrupted after loading SRR0/1.
+ */
+#ifdef CONFIG_PPC_BOOK3E
+ wrteei 0
+#else
+ ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */
+ mtmsrd r10,1 /* Update machine state */
+#endif /* CONFIG_PPC_BOOK3E */
+#endif /* CONFIG_PREEMPT */
+
+ .globl fast_exc_return_irq
+fast_exc_return_irq:
restore:
-#ifdef CONFIG_PPC_ISERIES
+ /*
+ * This is the main kernel exit path. First we check if we
+ * are about to re-enable interrupts
+ */
ld r5,SOFTE(r1)
- cmpdi 0,r5,0
- beq 4f
- /* Check for pending interrupts (iSeries) */
- ld r3,PACALPPACAPTR(r13)
- ld r3,LPPACAANYINT(r3)
- cmpdi r3,0
- beq+ 4f /* skip do_IRQ if no interrupts */
+ lbz r6,PACASOFTIRQEN(r13)
+ cmpwi cr0,r5,0
+ beq restore_irq_off
- li r3,0
- stb r3,PACAPROCENABLED(r13) /* ensure we are soft-disabled */
- ori r10,r10,MSR_EE
- mtmsrd r10 /* hard-enable again */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_IRQ
- b .ret_from_except_lite /* loop back and handle more */
+ /* We are enabling, were we already enabled ? Yes, just return */
+ cmpwi cr0,r6,1
+ beq cr0,do_restore
-4: stb r5,PACAPROCENABLED(r13)
-#endif
+ /*
+ * We are about to soft-enable interrupts (we are hard disabled
+ * at this point). We check if there's anything that needs to
+ * be replayed first.
+ */
+ lbz r0,PACAIRQHAPPENED(r13)
+ cmpwi cr0,r0,0
+ bne- restore_check_irq_replay
- ld r3,_MSR(r1)
- andi. r0,r3,MSR_RI
- beq- unrecov_restore
+ /*
+ * Get here when nothing happened while soft-disabled, just
+ * soft-enable and move-on. We will hard-enable as a side
+ * effect of rfi
+ */
+restore_no_replay:
+ TRACE_ENABLE_INTS
+ li r0,1
+ stb r0,PACASOFTIRQEN(r13);
- andi. r0,r3,MSR_PR
+ /*
+ * Final return path. BookE is handled in a different file
+ */
+do_restore:
+#ifdef CONFIG_PPC_BOOK3E
+ b exception_return_book3e
+#else
+ /*
+ * Clear the reservation. If we know the CPU tracks the address of
+ * the reservation then we can potentially save some cycles and use
+ * a larx. On POWER6 and POWER7 this is significantly faster.
+ */
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ ldarx r4,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
/*
- * r13 is our per cpu area, only restore it if we are returning to
- * userspace
+ * Some code path such as load_up_fpu or altivec return directly
+ * here. They run entirely hard disabled and do not alter the
+ * interrupt state. They also don't use lwarx/stwcx. and thus
+ * are known not to leave dangling reservations.
*/
- beq 1f
- ACCOUNT_CPU_USER_EXIT(r3, r4)
- REST_GPR(13, r1)
-1:
- ld r3,_CTR(r1)
+ .globl fast_exception_return
+fast_exception_return:
+ ld r3,_MSR(r1)
+ ld r4,_CTR(r1)
ld r0,_LINK(r1)
- mtctr r3
+ mtctr r4
mtlr r0
- ld r3,_XER(r1)
- mtspr SPRN_XER,r3
+ ld r4,_XER(r1)
+ mtspr SPRN_XER,r4
REST_8GPRS(5, r1)
- stdcx. r0,0,r1 /* to clear the reservation */
+ andi. r0,r3,MSR_RI
+ beq- unrecov_restore
- mfmsr r0
- li r2, MSR_RI
- andc r0,r0,r2
- mtmsrd r0,1
+ /* Load PPR from thread struct before we clear MSR:RI */
+BEGIN_FTR_SECTION
+ ld r2,PACACURRENT(r13)
+ ld r2,TASKTHREADPPR(r2)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
- ld r0,_MSR(r1)
- mtspr SPRN_SRR1,r0
+ /*
+ * Clear RI before restoring r13. If we are returning to
+ * userspace and we take an exception after restoring r13,
+ * we end up corrupting the userspace r13 value.
+ */
+ ld r4,PACAKMSR(r13) /* Get kernel MSR without EE */
+ andc r4,r4,r0 /* r0 contains MSR_RI here */
+ mtmsrd r4,1
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* TM debug */
+ std r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */
+#endif
+ /*
+ * r13 is our per cpu area, only restore it if we are returning to
+ * userspace the value stored in the stack frame may belong to
+ * another CPU.
+ */
+ andi. r0,r3,MSR_PR
+ beq 1f
+BEGIN_FTR_SECTION
+ mtspr SPRN_PPR,r2 /* Restore PPR */
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ ACCOUNT_CPU_USER_EXIT(r2, r4)
+ REST_GPR(13, r1)
+1:
+ mtspr SPRN_SRR1,r3
ld r2,_CCR(r1)
mtcrf 0xFF,r2
@@ -529,61 +863,88 @@ restore:
rfid
b . /* prevent speculative execution */
-/* Note: this must change if we start using the TIF_NOTIFY_RESUME bit */
-do_work:
-#ifdef CONFIG_PREEMPT
- andi. r0,r3,MSR_PR /* Returning to user mode? */
- bne user_work
- /* Check that preempt_count() == 0 and interrupts are enabled */
- lwz r8,TI_PREEMPT(r9)
- cmpwi cr1,r8,0
-#ifdef CONFIG_PPC_ISERIES
- ld r0,SOFTE(r1)
- cmpdi r0,0
-#else
- andi. r0,r3,MSR_EE
-#endif
- crandc eq,cr1*4+eq,eq
- bne restore
- /* here we are preempting the current task */
-1:
-#ifdef CONFIG_PPC_ISERIES
- li r0,1
- stb r0,PACAPROCENABLED(r13)
-#endif
- ori r10,r10,MSR_EE
- mtmsrd r10,1 /* reenable interrupts */
- bl .preempt_schedule
- mfmsr r10
- clrrdi r9,r1,THREAD_SHIFT
- rldicl r10,r10,48,1 /* disable interrupts again */
- rotldi r10,r10,16
- mtmsrd r10,1
- ld r4,TI_FLAGS(r9)
- andi. r0,r4,_TIF_NEED_RESCHED
- bne 1b
- b restore
-
-user_work:
-#endif
- /* Enable interrupts */
- ori r10,r10,MSR_EE
- mtmsrd r10,1
+#endif /* CONFIG_PPC_BOOK3E */
- andi. r0,r4,_TIF_NEED_RESCHED
+ /*
+ * We are returning to a context with interrupts soft disabled.
+ *
+ * However, we may also about to hard enable, so we need to
+ * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
+ * or that bit can get out of sync and bad things will happen
+ */
+restore_irq_off:
+ ld r3,_MSR(r1)
+ lbz r7,PACAIRQHAPPENED(r13)
+ andi. r0,r3,MSR_EE
beq 1f
- bl .schedule
- b .ret_from_except_lite
+ rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
+ stb r7,PACAIRQHAPPENED(r13)
+1: li r0,0
+ stb r0,PACASOFTIRQEN(r13);
+ TRACE_DISABLE_INTS
+ b do_restore
-1: bl .save_nvgprs
- li r3,0
- addi r4,r1,STACK_FRAME_OVERHEAD
- bl .do_signal
- b .ret_from_except
+ /*
+ * Something did happen, check if a re-emit is needed
+ * (this also clears paca->irq_happened)
+ */
+restore_check_irq_replay:
+ /* XXX: We could implement a fast path here where we check
+ * for irq_happened being just 0x01, in which case we can
+ * clear it and return. That means that we would potentially
+ * miss a decrementer having wrapped all the way around.
+ *
+ * Still, this might be useful for things like hash_page
+ */
+ bl __check_irq_replay
+ cmpwi cr0,r3,0
+ beq restore_no_replay
+
+ /*
+ * We need to re-emit an interrupt. We do so by re-using our
+ * existing exception frame. We first change the trap value,
+ * but we need to ensure we preserve the low nibble of it
+ */
+ ld r4,_TRAP(r1)
+ clrldi r4,r4,60
+ or r4,r4,r3
+ std r4,_TRAP(r1)
+ /*
+ * Then find the right handler and call it. Interrupts are
+ * still soft-disabled and we keep them that way.
+ */
+ cmpwi cr0,r3,0x500
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl do_IRQ
+ b ret_from_except
+1: cmpwi cr0,r3,0x900
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl timer_interrupt
+ b ret_from_except
+#ifdef CONFIG_PPC_DOORBELL
+1:
+#ifdef CONFIG_PPC_BOOK3E
+ cmpwi cr0,r3,0x280
+#else
+ BEGIN_FTR_SECTION
+ cmpwi cr0,r3,0xe80
+ FTR_SECTION_ELSE
+ cmpwi cr0,r3,0xa00
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+#endif /* CONFIG_PPC_BOOK3E */
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl doorbell_exception
+ b ret_from_except
+#endif /* CONFIG_PPC_DOORBELL */
+1: b ret_from_except /* What else to do here ? */
+
unrecov_restore:
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unrecoverable_exception
+ bl unrecoverable_exception
b unrecov_restore
#ifdef CONFIG_PPC_RTAS
@@ -619,10 +980,6 @@ _GLOBAL(enter_rtas)
std r7,_DAR(r1)
mfdsisr r8
std r8,_DSISR(r1)
- mfsrr0 r9
- std r9,_SRR0(r1)
- mfsrr1 r10
- std r10,_SRR1(r1)
/* Temporary workaround to clear CR until RTAS can be modified to
* ignore all bits.
@@ -630,20 +987,21 @@ _GLOBAL(enter_rtas)
li r0,0
mtcr r0
+#ifdef CONFIG_BUG
/* There is no way it is acceptable to get here with interrupts enabled,
* check it with the asm equivalent of WARN_ON
*/
- mfmsr r6
- andi. r0,r6,MSR_EE
+ lbz r0,PACASOFTIRQEN(r13)
1: tdnei r0,0
-.section __bug_table,"a"
- .llong 1b,__LINE__ + 0x1000000, 1f, 2f
-.previous
-.section .rodata,"a"
-1: .asciz __FILE__
-2: .asciz "enter_rtas"
-.previous
+ EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
+#endif
+ /* Hard-disable interrupts */
+ mfmsr r6
+ rldicl r7,r6,48,1
+ rotldi r7,r7,16
+ mtmsrd r7,1
+
/* Unfortunately, the stack pointer and the MSR are also clobbered,
* so they are saved in the PACA which allows us to restore
* our original state after RTAS returns.
@@ -652,7 +1010,7 @@ _GLOBAL(enter_rtas)
std r6,PACASAVEDMSR(r13)
/* Setup our real return addr */
- LOAD_REG_ADDR(r4,.rtas_return_loc)
+ LOAD_REG_ADDR(r4,rtas_return_loc)
clrldi r4,r4,2 /* convert to realmode address */
mtlr r4
@@ -662,9 +1020,8 @@ _GLOBAL(enter_rtas)
li r9,1
rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
- ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP
+ ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE
andc r6,r0,r9
- ori r6,r6,MSR_RI
sync /* disable interrupts so SRR0/1 */
mtmsrd r0 /* don't get trashed */
@@ -677,11 +1034,17 @@ _GLOBAL(enter_rtas)
rfid
b . /* prevent speculative execution */
-_STATIC(rtas_return_loc)
+rtas_return_loc:
+ FIXUP_ENDIAN
+
/* relocation is off at this point */
- mfspr r4,SPRN_SPRG3 /* Get PACA */
+ GET_PACA(r4)
clrldi r4,r4,2 /* convert to realmode address */
+ bcl 20,31,$+4
+0: mflr r3
+ ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */
+
mfmsr r6
li r0,MSR_RI
andc r6,r6,r0
@@ -689,7 +1052,6 @@ _STATIC(rtas_return_loc)
mtmsrd r6
ld r1,PACAR1(r4) /* Restore our SP */
- LOAD_REG_IMMEDIATE(r3,.rtas_restore_regs)
ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
mtspr SPRN_SRR0,r3
@@ -697,14 +1059,17 @@ _STATIC(rtas_return_loc)
rfid
b . /* prevent speculative execution */
-_STATIC(rtas_restore_regs)
+ .align 3
+1: .llong rtas_restore_regs
+
+rtas_restore_regs:
/* relocation is on at this point */
REST_GPR(2, r1) /* Restore the TOC */
REST_GPR(13, r1) /* Restore paca */
REST_8GPRS(14, r1) /* Restore the non-volatiles */
REST_10GPRS(22, r1) /* ditto */
- mfspr r13,SPRN_SPRG3
+ GET_PACA(r13)
ld r4,_CCR(r1)
mtcr r4
@@ -716,10 +1081,6 @@ _STATIC(rtas_restore_regs)
mtdar r7
ld r8,_DSISR(r1)
mtdsisr r8
- ld r9,_SRR0(r1)
- mtsrr0 r9
- ld r10,_SRR1(r1)
- mtsrr1 r10
addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */
ld r0,16(r1) /* get return address */
@@ -729,8 +1090,6 @@ _STATIC(rtas_restore_regs)
#endif /* CONFIG_PPC_RTAS */
-#ifdef CONFIG_PPC_MULTIPLATFORM
-
_GLOBAL(enter_prom)
mflr r0
std r0,16(r1)
@@ -740,46 +1099,39 @@ _GLOBAL(enter_prom)
* of all registers that it saves. We therefore save those registers
* PROM might touch to the stack. (r0, r3-r13 are caller saved)
*/
- SAVE_8GPRS(2, r1)
+ SAVE_GPR(2, r1)
SAVE_GPR(13, r1)
SAVE_8GPRS(14, r1)
SAVE_10GPRS(22, r1)
- mfcr r4
- std r4,_CCR(r1)
- mfctr r5
- std r5,_CTR(r1)
- mfspr r6,SPRN_XER
- std r6,_XER(r1)
- mfdar r7
- std r7,_DAR(r1)
- mfdsisr r8
- std r8,_DSISR(r1)
- mfsrr0 r9
- std r9,_SRR0(r1)
- mfsrr1 r10
- std r10,_SRR1(r1)
+ mfcr r10
mfmsr r11
+ std r10,_CCR(r1)
std r11,_MSR(r1)
- /* Get the PROM entrypoint */
- ld r0,GPR4(r1)
- mtlr r0
+ /* Put PROM address in SRR0 */
+ mtsrr0 r4
- /* Switch MSR to 32 bits mode
+ /* Setup our trampoline return addr in LR */
+ bcl 20,31,$+4
+0: mflr r4
+ addi r4,r4,(1f - 0b)
+ mtlr r4
+
+ /* Prepare a 32-bit mode big endian MSR
*/
- mfmsr r11
- li r12,1
- rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
- andc r11,r11,r12
- li r12,1
- rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
- andc r11,r11,r12
- mtmsrd r11
- isync
+#ifdef CONFIG_PPC_BOOK3E
+ rlwinm r11,r11,0,1,31
+ mtsrr1 r11
+ rfi
+#else /* CONFIG_PPC_BOOK3E */
+ LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
+ andc r11,r11,r12
+ mtsrr1 r11
+ rfid
+#endif /* CONFIG_PPC_BOOK3E */
- /* Restore arguments & enter PROM here... */
- ld r3,GPR3(r1)
- blrl
+1: /* Return from OF */
+ FIXUP_ENDIAN
/* Just make sure that r1 top 32 bits didn't get
* corrupt by OF
@@ -788,7 +1140,7 @@ _GLOBAL(enter_prom)
/* Restore the MSR (back to 64 bits) */
ld r0,_MSR(r1)
- mtmsrd r0
+ MTMSRD(r0)
isync
/* Restore other registers */
@@ -798,22 +1150,139 @@ _GLOBAL(enter_prom)
REST_10GPRS(22, r1)
ld r4,_CCR(r1)
mtcr r4
- ld r5,_CTR(r1)
- mtctr r5
- ld r6,_XER(r1)
- mtspr SPRN_XER,r6
- ld r7,_DAR(r1)
- mtdar r7
- ld r8,_DSISR(r1)
- mtdsisr r8
- ld r9,_SRR0(r1)
- mtsrr0 r9
- ld r10,_SRR1(r1)
- mtsrr1 r10
addi r1,r1,PROM_FRAME_SIZE
ld r0,16(r1)
mtlr r0
blr
-
-#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+ blr
+
+_GLOBAL_TOC(ftrace_caller)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+_GLOBAL(ftrace_stub)
+ blr
+#else
+_GLOBAL_TOC(_mcount)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+
+ subi r3, r3, MCOUNT_INSN_SIZE
+ LOAD_REG_ADDR(r5,ftrace_trace_function)
+ ld r5,0(r5)
+ ld r5,0(r5)
+ mtctr r5
+ bctrl
+ nop
+
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ b ftrace_graph_caller
+#endif
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+_GLOBAL(ftrace_stub)
+ blr
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+ /* load r4 with local address */
+ ld r4, 128(r1)
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ /* get the parent address */
+ ld r11, 112(r1)
+ addi r3, r11, 16
+
+ bl prepare_ftrace_return
+ nop
+
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+ blr
+
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+ std r4, -24(r1)
+ std r3, -16(r1)
+ std r31, -8(r1)
+ mr r31, r1
+ stdu r1, -112(r1)
+
+ bl ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+ ld r1, 0(r1)
+ ld r4, -24(r1)
+ ld r3, -16(r1)
+ ld r31, -8(r1)
+
+ /* Jump back to real return address */
+ blr
+
+_GLOBAL(mod_return_to_handler)
+ /* need to save return values */
+ std r4, -32(r1)
+ std r3, -24(r1)
+ /* save TOC */
+ std r2, -16(r1)
+ std r31, -8(r1)
+ mr r31, r1
+ stdu r1, -112(r1)
+
+ /*
+ * We are in a module using the module's TOC.
+ * Switch to our TOC to run inside the core kernel.
+ */
+ ld r2, PACATOC(r13)
+
+ bl ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+ ld r1, 0(r1)
+ ld r4, -32(r1)
+ ld r3, -24(r1)
+ ld r2, -16(r1)
+ ld r31, -8(r1)
+
+ /* Jump back to real return address */
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
new file mode 100644
index 00000000000..9f1ebf7338f
--- /dev/null
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/epapr_hcalls.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
+#include <asm/asm-offsets.h>
+
+#ifndef CONFIG_PPC64
+/* epapr_ev_idle() was derived from e500_idle() */
+_GLOBAL(epapr_ev_idle)
+ CURRENT_THREAD_INFO(r3, r1)
+ PPC_LL r4, TI_LOCAL_FLAGS(r3) /* set napping bit */
+ ori r4, r4,_TLF_NAPPING /* so when we take an exception */
+ PPC_STL r4, TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+
+ wrteei 1
+
+idle_loop:
+ LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE))
+
+.global epapr_ev_idle_start
+epapr_ev_idle_start:
+ li r3, -1
+ nop
+ nop
+ nop
+
+ /*
+ * Guard against spurious wakeups from a hypervisor --
+ * only interrupt will cause us to return to LR due to
+ * _TLF_NAPPING.
+ */
+ b idle_loop
+#endif
+
+/* Hypercall entry point. Will be patched with device tree instructions. */
+.global epapr_hypercall_start
+epapr_hypercall_start:
+ li r3, -1
+ nop
+ nop
+ nop
+ blr
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
new file mode 100644
index 00000000000..59e4ba74975
--- /dev/null
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -0,0 +1,85 @@
+/*
+ * ePAPR para-virtualization support.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <asm/epapr_hcalls.h>
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+#include <asm/machdep.h>
+
+#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
+extern void epapr_ev_idle(void);
+extern u32 epapr_ev_idle_start[];
+#endif
+
+bool epapr_paravirt_enabled;
+static bool __maybe_unused epapr_has_idle;
+
+static int __init early_init_dt_scan_epapr(unsigned long node,
+ const char *uname,
+ int depth, void *data)
+{
+ const u32 *insts;
+ int len;
+ int i;
+
+ insts = of_get_flat_dt_prop(node, "hcall-instructions", &len);
+ if (!insts)
+ return 0;
+
+ if (len % 4 || len > (4 * 4))
+ return -1;
+
+ for (i = 0; i < (len / 4); i++) {
+ u32 inst = be32_to_cpu(insts[i]);
+ patch_instruction(epapr_hypercall_start + i, inst);
+#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
+ patch_instruction(epapr_ev_idle_start + i, inst);
+#endif
+ }
+
+#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
+ if (of_get_flat_dt_prop(node, "has-idle", NULL))
+ epapr_has_idle = true;
+#endif
+
+ epapr_paravirt_enabled = true;
+
+ return 1;
+}
+
+int __init epapr_paravirt_early_init(void)
+{
+ of_scan_flat_dt(early_init_dt_scan_epapr, NULL);
+
+ return 0;
+}
+
+static int __init epapr_idle_init(void)
+{
+#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
+ if (epapr_has_idle)
+ ppc_md.power_save = epapr_ev_idle;
+#endif
+
+ return 0;
+}
+
+postcore_initcall(epapr_idle_init);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
new file mode 100644
index 00000000000..bb9cac6c805
--- /dev/null
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -0,0 +1,1650 @@
+/*
+ * Boot code and exception vectors for Book3E processors
+ *
+ * Copyright (C) 2007 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cputable.h>
+#include <asm/setup.h>
+#include <asm/thread_info.h>
+#include <asm/reg_a2.h>
+#include <asm/exception-64e.h>
+#include <asm/bug.h>
+#include <asm/irqflags.h>
+#include <asm/ptrace.h>
+#include <asm/ppc-opcode.h>
+#include <asm/mmu.h>
+#include <asm/hw_irq.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_booke_hv_asm.h>
+
+/* XXX This will ultimately add space for a special exception save
+ * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
+ * when taking special interrupts. For now we don't support that,
+ * special interrupts from within a non-standard level will probably
+ * blow you up
+ */
+#define SPECIAL_EXC_SRR0 0
+#define SPECIAL_EXC_SRR1 1
+#define SPECIAL_EXC_SPRG_GEN 2
+#define SPECIAL_EXC_SPRG_TLB 3
+#define SPECIAL_EXC_MAS0 4
+#define SPECIAL_EXC_MAS1 5
+#define SPECIAL_EXC_MAS2 6
+#define SPECIAL_EXC_MAS3 7
+#define SPECIAL_EXC_MAS6 8
+#define SPECIAL_EXC_MAS7 9
+#define SPECIAL_EXC_MAS5 10 /* E.HV only */
+#define SPECIAL_EXC_MAS8 11 /* E.HV only */
+#define SPECIAL_EXC_IRQHAPPENED 12
+#define SPECIAL_EXC_DEAR 13
+#define SPECIAL_EXC_ESR 14
+#define SPECIAL_EXC_SOFTE 15
+#define SPECIAL_EXC_CSRR0 16
+#define SPECIAL_EXC_CSRR1 17
+/* must be even to keep 16-byte stack alignment */
+#define SPECIAL_EXC_END 18
+
+#define SPECIAL_EXC_FRAME_SIZE (INT_FRAME_SIZE + SPECIAL_EXC_END * 8)
+#define SPECIAL_EXC_FRAME_OFFS (INT_FRAME_SIZE - 288)
+
+#define SPECIAL_EXC_STORE(reg, name) \
+ std reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
+
+#define SPECIAL_EXC_LOAD(reg, name) \
+ ld reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
+
+special_reg_save:
+ lbz r9,PACAIRQHAPPENED(r13)
+ RECONCILE_IRQ_STATE(r3,r4)
+
+ /*
+ * We only need (or have stack space) to save this stuff if
+ * we interrupted the kernel.
+ */
+ ld r3,_MSR(r1)
+ andi. r3,r3,MSR_PR
+ bnelr
+
+ /* Copy info into temporary exception thread info */
+ ld r11,PACAKSAVE(r13)
+ CURRENT_THREAD_INFO(r11, r11)
+ CURRENT_THREAD_INFO(r12, r1)
+ ld r10,TI_FLAGS(r11)
+ std r10,TI_FLAGS(r12)
+ ld r10,TI_PREEMPT(r11)
+ std r10,TI_PREEMPT(r12)
+ ld r10,TI_TASK(r11)
+ std r10,TI_TASK(r12)
+
+ /*
+ * Advance to the next TLB exception frame for handler
+ * types that don't do it automatically.
+ */
+ LOAD_REG_ADDR(r11,extlb_level_exc)
+ lwz r12,0(r11)
+ mfspr r10,SPRN_SPRG_TLB_EXFRAME
+ add r10,r10,r12
+ mtspr SPRN_SPRG_TLB_EXFRAME,r10
+
+ /*
+ * Save registers needed to allow nesting of certain exceptions
+ * (such as TLB misses) inside special exception levels
+ */
+ mfspr r10,SPRN_SRR0
+ SPECIAL_EXC_STORE(r10,SRR0)
+ mfspr r10,SPRN_SRR1
+ SPECIAL_EXC_STORE(r10,SRR1)
+ mfspr r10,SPRN_SPRG_GEN_SCRATCH
+ SPECIAL_EXC_STORE(r10,SPRG_GEN)
+ mfspr r10,SPRN_SPRG_TLB_SCRATCH
+ SPECIAL_EXC_STORE(r10,SPRG_TLB)
+ mfspr r10,SPRN_MAS0
+ SPECIAL_EXC_STORE(r10,MAS0)
+ mfspr r10,SPRN_MAS1
+ SPECIAL_EXC_STORE(r10,MAS1)
+ mfspr r10,SPRN_MAS2
+ SPECIAL_EXC_STORE(r10,MAS2)
+ mfspr r10,SPRN_MAS3
+ SPECIAL_EXC_STORE(r10,MAS3)
+ mfspr r10,SPRN_MAS6
+ SPECIAL_EXC_STORE(r10,MAS6)
+ mfspr r10,SPRN_MAS7
+ SPECIAL_EXC_STORE(r10,MAS7)
+BEGIN_FTR_SECTION
+ mfspr r10,SPRN_MAS5
+ SPECIAL_EXC_STORE(r10,MAS5)
+ mfspr r10,SPRN_MAS8
+ SPECIAL_EXC_STORE(r10,MAS8)
+
+ /* MAS5/8 could have inappropriate values if we interrupted KVM code */
+ li r10,0
+ mtspr SPRN_MAS5,r10
+ mtspr SPRN_MAS8,r10
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+ SPECIAL_EXC_STORE(r9,IRQHAPPENED)
+
+ mfspr r10,SPRN_DEAR
+ SPECIAL_EXC_STORE(r10,DEAR)
+ mfspr r10,SPRN_ESR
+ SPECIAL_EXC_STORE(r10,ESR)
+
+ lbz r10,PACASOFTIRQEN(r13)
+ SPECIAL_EXC_STORE(r10,SOFTE)
+ ld r10,_NIP(r1)
+ SPECIAL_EXC_STORE(r10,CSRR0)
+ ld r10,_MSR(r1)
+ SPECIAL_EXC_STORE(r10,CSRR1)
+
+ blr
+
+ret_from_level_except:
+ ld r3,_MSR(r1)
+ andi. r3,r3,MSR_PR
+ beq 1f
+ b ret_from_except
+1:
+
+ LOAD_REG_ADDR(r11,extlb_level_exc)
+ lwz r12,0(r11)
+ mfspr r10,SPRN_SPRG_TLB_EXFRAME
+ sub r10,r10,r12
+ mtspr SPRN_SPRG_TLB_EXFRAME,r10
+
+ /*
+ * It's possible that the special level exception interrupted a
+ * TLB miss handler, and inserted the same entry that the
+ * interrupted handler was about to insert. On CPUs without TLB
+ * write conditional, this can result in a duplicate TLB entry.
+ * Wipe all non-bolted entries to be safe.
+ *
+ * Note that this doesn't protect against any TLB misses
+ * we may take accessing the stack from here to the end of
+ * the special level exception. It's not clear how we can
+ * reasonably protect against that, but only CPUs with
+ * neither TLB write conditional nor bolted kernel memory
+ * are affected. Do any such CPUs even exist?
+ */
+ PPC_TLBILX_ALL(0,R0)
+
+ REST_NVGPRS(r1)
+
+ SPECIAL_EXC_LOAD(r10,SRR0)
+ mtspr SPRN_SRR0,r10
+ SPECIAL_EXC_LOAD(r10,SRR1)
+ mtspr SPRN_SRR1,r10
+ SPECIAL_EXC_LOAD(r10,SPRG_GEN)
+ mtspr SPRN_SPRG_GEN_SCRATCH,r10
+ SPECIAL_EXC_LOAD(r10,SPRG_TLB)
+ mtspr SPRN_SPRG_TLB_SCRATCH,r10
+ SPECIAL_EXC_LOAD(r10,MAS0)
+ mtspr SPRN_MAS0,r10
+ SPECIAL_EXC_LOAD(r10,MAS1)
+ mtspr SPRN_MAS1,r10
+ SPECIAL_EXC_LOAD(r10,MAS2)
+ mtspr SPRN_MAS2,r10
+ SPECIAL_EXC_LOAD(r10,MAS3)
+ mtspr SPRN_MAS3,r10
+ SPECIAL_EXC_LOAD(r10,MAS6)
+ mtspr SPRN_MAS6,r10
+ SPECIAL_EXC_LOAD(r10,MAS7)
+ mtspr SPRN_MAS7,r10
+BEGIN_FTR_SECTION
+ SPECIAL_EXC_LOAD(r10,MAS5)
+ mtspr SPRN_MAS5,r10
+ SPECIAL_EXC_LOAD(r10,MAS8)
+ mtspr SPRN_MAS8,r10
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+
+ lbz r6,PACASOFTIRQEN(r13)
+ ld r5,SOFTE(r1)
+
+ /* Interrupts had better not already be enabled... */
+ twnei r6,0
+
+ cmpwi cr0,r5,0
+ beq 1f
+
+ TRACE_ENABLE_INTS
+ stb r5,PACASOFTIRQEN(r13)
+1:
+ /*
+ * Restore PACAIRQHAPPENED rather than setting it based on
+ * the return MSR[EE], since we could have interrupted
+ * __check_irq_replay() or other inconsistent transitory
+ * states that must remain that way.
+ */
+ SPECIAL_EXC_LOAD(r10,IRQHAPPENED)
+ stb r10,PACAIRQHAPPENED(r13)
+
+ SPECIAL_EXC_LOAD(r10,DEAR)
+ mtspr SPRN_DEAR,r10
+ SPECIAL_EXC_LOAD(r10,ESR)
+ mtspr SPRN_ESR,r10
+
+ stdcx. r0,0,r1 /* to clear the reservation */
+
+ REST_4GPRS(2, r1)
+ REST_4GPRS(6, r1)
+
+ ld r10,_CTR(r1)
+ ld r11,_XER(r1)
+ mtctr r10
+ mtxer r11
+
+ blr
+
+.macro ret_from_level srr0 srr1 paca_ex scratch
+ bl ret_from_level_except
+
+ ld r10,_LINK(r1)
+ ld r11,_CCR(r1)
+ ld r0,GPR13(r1)
+ mtlr r10
+ mtcr r11
+
+ ld r10,GPR10(r1)
+ ld r11,GPR11(r1)
+ ld r12,GPR12(r1)
+ mtspr \scratch,r0
+
+ std r10,\paca_ex+EX_R10(r13);
+ std r11,\paca_ex+EX_R11(r13);
+ ld r10,_NIP(r1)
+ ld r11,_MSR(r1)
+ ld r0,GPR0(r1)
+ ld r1,GPR1(r1)
+ mtspr \srr0,r10
+ mtspr \srr1,r11
+ ld r10,\paca_ex+EX_R10(r13)
+ ld r11,\paca_ex+EX_R11(r13)
+ mfspr r13,\scratch
+.endm
+
+ret_from_crit_except:
+ ret_from_level SPRN_CSRR0 SPRN_CSRR1 PACA_EXCRIT SPRN_SPRG_CRIT_SCRATCH
+ rfci
+
+ret_from_mc_except:
+ ret_from_level SPRN_MCSRR0 SPRN_MCSRR1 PACA_EXMC SPRN_SPRG_MC_SCRATCH
+ rfmci
+
+/* Exception prolog code for all exceptions */
+#define EXCEPTION_PROLOG(n, intnum, type, addition) \
+ mtspr SPRN_SPRG_##type##_SCRATCH,r13; /* get spare registers */ \
+ mfspr r13,SPRN_SPRG_PACA; /* get PACA */ \
+ std r10,PACA_EX##type+EX_R10(r13); \
+ std r11,PACA_EX##type+EX_R11(r13); \
+ mfcr r10; /* save CR */ \
+ mfspr r11,SPRN_##type##_SRR1;/* what are we coming from */ \
+ DO_KVM intnum,SPRN_##type##_SRR1; /* KVM hook */ \
+ stw r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \
+ addition; /* additional code for that exc. */ \
+ std r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA */ \
+ type##_SET_KSTACK; /* get special stack if necessary */\
+ andi. r10,r11,MSR_PR; /* save stack pointer */ \
+ beq 1f; /* branch around if supervisor */ \
+ ld r1,PACAKSAVE(r13); /* get kernel stack coming from usr */\
+1: cmpdi cr1,r1,0; /* check if SP makes sense */ \
+ bge- cr1,exc_##n##_bad_stack;/* bad stack (TODO: out of line) */ \
+ mfspr r10,SPRN_##type##_SRR0; /* read SRR0 before touching stack */
+
+/* Exception type-specific macros */
+#define GEN_SET_KSTACK \
+ subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */
+#define SPRN_GEN_SRR0 SPRN_SRR0
+#define SPRN_GEN_SRR1 SPRN_SRR1
+
+#define GDBELL_SET_KSTACK GEN_SET_KSTACK
+#define SPRN_GDBELL_SRR0 SPRN_GSRR0
+#define SPRN_GDBELL_SRR1 SPRN_GSRR1
+
+#define CRIT_SET_KSTACK \
+ ld r1,PACA_CRIT_STACK(r13); \
+ subi r1,r1,SPECIAL_EXC_FRAME_SIZE
+#define SPRN_CRIT_SRR0 SPRN_CSRR0
+#define SPRN_CRIT_SRR1 SPRN_CSRR1
+
+#define DBG_SET_KSTACK \
+ ld r1,PACA_DBG_STACK(r13); \
+ subi r1,r1,SPECIAL_EXC_FRAME_SIZE
+#define SPRN_DBG_SRR0 SPRN_DSRR0
+#define SPRN_DBG_SRR1 SPRN_DSRR1
+
+#define MC_SET_KSTACK \
+ ld r1,PACA_MC_STACK(r13); \
+ subi r1,r1,SPECIAL_EXC_FRAME_SIZE
+#define SPRN_MC_SRR0 SPRN_MCSRR0
+#define SPRN_MC_SRR1 SPRN_MCSRR1
+
+#define NORMAL_EXCEPTION_PROLOG(n, intnum, addition) \
+ EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n))
+
+#define CRIT_EXCEPTION_PROLOG(n, intnum, addition) \
+ EXCEPTION_PROLOG(n, intnum, CRIT, addition##_CRIT(n))
+
+#define DBG_EXCEPTION_PROLOG(n, intnum, addition) \
+ EXCEPTION_PROLOG(n, intnum, DBG, addition##_DBG(n))
+
+#define MC_EXCEPTION_PROLOG(n, intnum, addition) \
+ EXCEPTION_PROLOG(n, intnum, MC, addition##_MC(n))
+
+#define GDBELL_EXCEPTION_PROLOG(n, intnum, addition) \
+ EXCEPTION_PROLOG(n, intnum, GDBELL, addition##_GDBELL(n))
+
+/* Variants of the "addition" argument for the prolog
+ */
+#define PROLOG_ADDITION_NONE_GEN(n)
+#define PROLOG_ADDITION_NONE_GDBELL(n)
+#define PROLOG_ADDITION_NONE_CRIT(n)
+#define PROLOG_ADDITION_NONE_DBG(n)
+#define PROLOG_ADDITION_NONE_MC(n)
+
+#define PROLOG_ADDITION_MASKABLE_GEN(n) \
+ lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \
+ cmpwi cr0,r10,0; /* yes -> go out of line */ \
+ beq masked_interrupt_book3e_##n
+
+#define PROLOG_ADDITION_2REGS_GEN(n) \
+ std r14,PACA_EXGEN+EX_R14(r13); \
+ std r15,PACA_EXGEN+EX_R15(r13)
+
+#define PROLOG_ADDITION_1REG_GEN(n) \
+ std r14,PACA_EXGEN+EX_R14(r13);
+
+#define PROLOG_ADDITION_2REGS_CRIT(n) \
+ std r14,PACA_EXCRIT+EX_R14(r13); \
+ std r15,PACA_EXCRIT+EX_R15(r13)
+
+#define PROLOG_ADDITION_2REGS_DBG(n) \
+ std r14,PACA_EXDBG+EX_R14(r13); \
+ std r15,PACA_EXDBG+EX_R15(r13)
+
+#define PROLOG_ADDITION_2REGS_MC(n) \
+ std r14,PACA_EXMC+EX_R14(r13); \
+ std r15,PACA_EXMC+EX_R15(r13)
+
+
+/* Core exception code for all exceptions except TLB misses. */
+#define EXCEPTION_COMMON_LVL(n, scratch, excf) \
+exc_##n##_common: \
+ std r0,GPR0(r1); /* save r0 in stackframe */ \
+ std r2,GPR2(r1); /* save r2 in stackframe */ \
+ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
+ SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
+ std r9,GPR9(r1); /* save r9 in stackframe */ \
+ std r10,_NIP(r1); /* save SRR0 to stackframe */ \
+ std r11,_MSR(r1); /* save SRR1 to stackframe */ \
+ beq 2f; /* if from kernel mode */ \
+ ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */ \
+2: ld r3,excf+EX_R10(r13); /* get back r10 */ \
+ ld r4,excf+EX_R11(r13); /* get back r11 */ \
+ mfspr r5,scratch; /* get back r13 */ \
+ std r12,GPR12(r1); /* save r12 in stackframe */ \
+ ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
+ mflr r6; /* save LR in stackframe */ \
+ mfctr r7; /* save CTR in stackframe */ \
+ mfspr r8,SPRN_XER; /* save XER in stackframe */ \
+ ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \
+ lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \
+ lbz r11,PACASOFTIRQEN(r13); /* get current IRQ softe */ \
+ ld r12,exception_marker@toc(r2); \
+ li r0,0; \
+ std r3,GPR10(r1); /* save r10 to stackframe */ \
+ std r4,GPR11(r1); /* save r11 to stackframe */ \
+ std r5,GPR13(r1); /* save it to stackframe */ \
+ std r6,_LINK(r1); \
+ std r7,_CTR(r1); \
+ std r8,_XER(r1); \
+ li r3,(n)+1; /* indicate partial regs in trap */ \
+ std r9,0(r1); /* store stack frame back link */ \
+ std r10,_CCR(r1); /* store orig CR in stackframe */ \
+ std r9,GPR1(r1); /* store stack frame back link */ \
+ std r11,SOFTE(r1); /* and save it to stackframe */ \
+ std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \
+ std r3,_TRAP(r1); /* set trap number */ \
+ std r0,RESULT(r1); /* clear regs->result */
+
+#define EXCEPTION_COMMON(n) \
+ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
+#define EXCEPTION_COMMON_CRIT(n) \
+ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_CRIT_SCRATCH, PACA_EXCRIT)
+#define EXCEPTION_COMMON_MC(n) \
+ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_MC_SCRATCH, PACA_EXMC)
+#define EXCEPTION_COMMON_DBG(n) \
+ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG)
+
+/*
+ * This is meant for exceptions that don't immediately hard-enable. We
+ * set a bit in paca->irq_happened to ensure that a subsequent call to
+ * arch_local_irq_restore() will properly hard-enable and avoid the
+ * fast-path, and then reconcile irq state.
+ */
+#define INTS_DISABLE RECONCILE_IRQ_STATE(r3,r4)
+
+/*
+ * This is called by exceptions that don't use INTS_DISABLE (that did not
+ * touch irq indicators in the PACA). This will restore MSR:EE to it's
+ * previous value
+ *
+ * XXX In the long run, we may want to open-code it in order to separate the
+ * load from the wrtee, thus limiting the latency caused by the dependency
+ * but at this point, I'll favor code clarity until we have a near to final
+ * implementation
+ */
+#define INTS_RESTORE_HARD \
+ ld r11,_MSR(r1); \
+ wrtee r11;
+
+/* XXX FIXME: Restore r14/r15 when necessary */
+#define BAD_STACK_TRAMPOLINE(n) \
+exc_##n##_bad_stack: \
+ li r1,(n); /* get exception number */ \
+ sth r1,PACA_TRAP_SAVE(r13); /* store trap */ \
+ b bad_stack_book3e; /* bad stack error */
+
+/* WARNING: If you change the layout of this stub, make sure you chcek
+ * the debug exception handler which handles single stepping
+ * into exceptions from userspace, and the MM code in
+ * arch/powerpc/mm/tlb_nohash.c which patches the branch here
+ * and would need to be updated if that branch is moved
+ */
+#define EXCEPTION_STUB(loc, label) \
+ . = interrupt_base_book3e + loc; \
+ nop; /* To make debug interrupts happy */ \
+ b exc_##label##_book3e;
+
+#define ACK_NONE(r)
+#define ACK_DEC(r) \
+ lis r,TSR_DIS@h; \
+ mtspr SPRN_TSR,r
+#define ACK_FIT(r) \
+ lis r,TSR_FIS@h; \
+ mtspr SPRN_TSR,r
+
+/* Used by asynchronous interrupt that may happen in the idle loop.
+ *
+ * This check if the thread was in the idle loop, and if yes, returns
+ * to the caller rather than the PC. This is to avoid a race if
+ * interrupts happen before the wait instruction.
+ */
+#define CHECK_NAPPING() \
+ CURRENT_THREAD_INFO(r11, r1); \
+ ld r10,TI_LOCAL_FLAGS(r11); \
+ andi. r9,r10,_TLF_NAPPING; \
+ beq+ 1f; \
+ ld r8,_LINK(r1); \
+ rlwinm r7,r10,0,~_TLF_NAPPING; \
+ std r8,_NIP(r1); \
+ std r7,TI_LOCAL_FLAGS(r11); \
+1:
+
+
+#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack) \
+ START_EXCEPTION(label); \
+ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
+ EXCEPTION_COMMON(trapnum) \
+ INTS_DISABLE; \
+ ack(r8); \
+ CHECK_NAPPING(); \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ bl hdlr; \
+ b ret_from_except_lite;
+
+/* This value is used to mark exception frames on the stack. */
+ .section ".toc","aw"
+exception_marker:
+ .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
+
+
+/*
+ * And here we have the exception vectors !
+ */
+
+ .text
+ .balign 0x1000
+ .globl interrupt_base_book3e
+interrupt_base_book3e: /* fake trap */
+ EXCEPTION_STUB(0x000, machine_check)
+ EXCEPTION_STUB(0x020, critical_input) /* 0x0100 */
+ EXCEPTION_STUB(0x040, debug_crit) /* 0x0d00 */
+ EXCEPTION_STUB(0x060, data_storage) /* 0x0300 */
+ EXCEPTION_STUB(0x080, instruction_storage) /* 0x0400 */
+ EXCEPTION_STUB(0x0a0, external_input) /* 0x0500 */
+ EXCEPTION_STUB(0x0c0, alignment) /* 0x0600 */
+ EXCEPTION_STUB(0x0e0, program) /* 0x0700 */
+ EXCEPTION_STUB(0x100, fp_unavailable) /* 0x0800 */
+ EXCEPTION_STUB(0x120, system_call) /* 0x0c00 */
+ EXCEPTION_STUB(0x140, ap_unavailable) /* 0x0f20 */
+ EXCEPTION_STUB(0x160, decrementer) /* 0x0900 */
+ EXCEPTION_STUB(0x180, fixed_interval) /* 0x0980 */
+ EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */
+ EXCEPTION_STUB(0x1c0, data_tlb_miss)
+ EXCEPTION_STUB(0x1e0, instruction_tlb_miss)
+ EXCEPTION_STUB(0x200, altivec_unavailable)
+ EXCEPTION_STUB(0x220, altivec_assist)
+ EXCEPTION_STUB(0x260, perfmon)
+ EXCEPTION_STUB(0x280, doorbell)
+ EXCEPTION_STUB(0x2a0, doorbell_crit)
+ EXCEPTION_STUB(0x2c0, guest_doorbell)
+ EXCEPTION_STUB(0x2e0, guest_doorbell_crit)
+ EXCEPTION_STUB(0x300, hypercall)
+ EXCEPTION_STUB(0x320, ehpriv)
+ EXCEPTION_STUB(0x340, lrat_error)
+
+ .globl interrupt_end_book3e
+interrupt_end_book3e:
+
+/* Critical Input Interrupt */
+ START_EXCEPTION(critical_input);
+ CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
+ PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON_CRIT(0x100)
+ bl save_nvgprs
+ bl special_reg_save
+ CHECK_NAPPING();
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b ret_from_crit_except
+
+/* Machine Check Interrupt */
+ START_EXCEPTION(machine_check);
+ MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK,
+ PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON_MC(0x000)
+ bl save_nvgprs
+ bl special_reg_save
+ CHECK_NAPPING();
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_exception
+ b ret_from_mc_except
+
+/* Data Storage Interrupt */
+ START_EXCEPTION(data_storage)
+ NORMAL_EXCEPTION_PROLOG(0x300, BOOKE_INTERRUPT_DATA_STORAGE,
+ PROLOG_ADDITION_2REGS)
+ mfspr r14,SPRN_DEAR
+ mfspr r15,SPRN_ESR
+ EXCEPTION_COMMON(0x300)
+ INTS_DISABLE
+ b storage_fault_common
+
+/* Instruction Storage Interrupt */
+ START_EXCEPTION(instruction_storage);
+ NORMAL_EXCEPTION_PROLOG(0x400, BOOKE_INTERRUPT_INST_STORAGE,
+ PROLOG_ADDITION_2REGS)
+ li r15,0
+ mr r14,r10
+ EXCEPTION_COMMON(0x400)
+ INTS_DISABLE
+ b storage_fault_common
+
+/* External Input Interrupt */
+ MASKABLE_EXCEPTION(0x500, BOOKE_INTERRUPT_EXTERNAL,
+ external_input, do_IRQ, ACK_NONE)
+
+/* Alignment */
+ START_EXCEPTION(alignment);
+ NORMAL_EXCEPTION_PROLOG(0x600, BOOKE_INTERRUPT_ALIGNMENT,
+ PROLOG_ADDITION_2REGS)
+ mfspr r14,SPRN_DEAR
+ mfspr r15,SPRN_ESR
+ EXCEPTION_COMMON(0x600)
+ b alignment_more /* no room, go out of line */
+
+/* Program Interrupt */
+ START_EXCEPTION(program);
+ NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
+ PROLOG_ADDITION_1REG)
+ mfspr r14,SPRN_ESR
+ EXCEPTION_COMMON(0x700)
+ INTS_DISABLE
+ std r14,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ bl save_nvgprs
+ bl program_check_exception
+ b ret_from_except
+
+/* Floating Point Unavailable Interrupt */
+ START_EXCEPTION(fp_unavailable);
+ NORMAL_EXCEPTION_PROLOG(0x800, BOOKE_INTERRUPT_FP_UNAVAIL,
+ PROLOG_ADDITION_NONE)
+ /* we can probably do a shorter exception entry for that one... */
+ EXCEPTION_COMMON(0x800)
+ ld r12,_MSR(r1)
+ andi. r0,r12,MSR_PR;
+ beq- 1f
+ bl load_up_fpu
+ b fast_exception_return
+1: INTS_DISABLE
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl kernel_fp_unavailable_exception
+ b ret_from_except
+
+/* Altivec Unavailable Interrupt */
+ START_EXCEPTION(altivec_unavailable);
+ NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL,
+ PROLOG_ADDITION_NONE)
+ /* we can probably do a shorter exception entry for that one... */
+ EXCEPTION_COMMON(0x200)
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ ld r12,_MSR(r1)
+ andi. r0,r12,MSR_PR;
+ beq- 1f
+ bl load_up_altivec
+ b fast_exception_return
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+ INTS_DISABLE
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl altivec_unavailable_exception
+ b ret_from_except
+
+/* AltiVec Assist */
+ START_EXCEPTION(altivec_assist);
+ NORMAL_EXCEPTION_PROLOG(0x220,
+ BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST,
+ PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON(0x220)
+ INTS_DISABLE
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ bl altivec_assist_exception
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#else
+ bl unknown_exception
+#endif
+ b ret_from_except
+
+
+/* Decrementer Interrupt */
+ MASKABLE_EXCEPTION(0x900, BOOKE_INTERRUPT_DECREMENTER,
+ decrementer, timer_interrupt, ACK_DEC)
+
+/* Fixed Interval Timer Interrupt */
+ MASKABLE_EXCEPTION(0x980, BOOKE_INTERRUPT_FIT,
+ fixed_interval, unknown_exception, ACK_FIT)
+
+/* Watchdog Timer Interrupt */
+ START_EXCEPTION(watchdog);
+ CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,
+ PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON_CRIT(0x9f0)
+ bl save_nvgprs
+ bl special_reg_save
+ CHECK_NAPPING();
+ addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_BOOKE_WDT
+ bl WatchdogException
+#else
+ bl unknown_exception
+#endif
+ b ret_from_crit_except
+
+/* System Call Interrupt */
+ START_EXCEPTION(system_call)
+ mr r9,r13 /* keep a copy of userland r13 */
+ mfspr r11,SPRN_SRR0 /* get return address */
+ mfspr r12,SPRN_SRR1 /* get previous MSR */
+ mfspr r13,SPRN_SPRG_PACA /* get our PACA */
+ b system_call_common
+
+/* Auxiliary Processor Unavailable Interrupt */
+ START_EXCEPTION(ap_unavailable);
+ NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
+ PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON(0xf20)
+ INTS_DISABLE
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b ret_from_except
+
+/* Debug exception as a critical interrupt*/
+ START_EXCEPTION(debug_crit);
+ CRIT_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG,
+ PROLOG_ADDITION_2REGS)
+
+ /*
+ * If there is a single step or branch-taken exception in an
+ * exception entry sequence, it was probably meant to apply to
+ * the code where the exception occurred (since exception entry
+ * doesn't turn off DE automatically). We simulate the effect
+ * of turning off DE on entry to an exception handler by turning
+ * off DE in the CSRR1 value and clearing the debug status.
+ */
+
+ mfspr r14,SPRN_DBSR /* check single-step/branch taken */
+ andis. r15,r14,(DBSR_IC|DBSR_BT)@h
+ beq+ 1f
+
+ LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
+ LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e)
+ cmpld cr0,r10,r14
+ cmpld cr1,r10,r15
+ blt+ cr0,1f
+ bge+ cr1,1f
+
+ /* here it looks like we got an inappropriate debug exception. */
+ lis r14,(DBSR_IC|DBSR_BT)@h /* clear the event */
+ rlwinm r11,r11,0,~MSR_DE /* clear DE in the CSRR1 value */
+ mtspr SPRN_DBSR,r14
+ mtspr SPRN_CSRR1,r11
+ lwz r10,PACA_EXCRIT+EX_CR(r13) /* restore registers */
+ ld r1,PACA_EXCRIT+EX_R1(r13)
+ ld r14,PACA_EXCRIT+EX_R14(r13)
+ ld r15,PACA_EXCRIT+EX_R15(r13)
+ mtcr r10
+ ld r10,PACA_EXCRIT+EX_R10(r13) /* restore registers */
+ ld r11,PACA_EXCRIT+EX_R11(r13)
+ mfspr r13,SPRN_SPRG_CRIT_SCRATCH
+ rfci
+
+ /* Normal debug exception */
+ /* XXX We only handle coming from userspace for now since we can't
+ * quite save properly an interrupted kernel state yet
+ */
+1: andi. r14,r11,MSR_PR; /* check for userspace again */
+ beq kernel_dbg_exc; /* if from kernel mode */
+
+ /* Now we mash up things to make it look like we are coming on a
+ * normal exception
+ */
+ mfspr r14,SPRN_DBSR
+ EXCEPTION_COMMON_CRIT(0xd00)
+ std r14,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ mr r4,r14
+ ld r14,PACA_EXCRIT+EX_R14(r13)
+ ld r15,PACA_EXCRIT+EX_R15(r13)
+ bl save_nvgprs
+ bl DebugException
+ b ret_from_except
+
+kernel_dbg_exc:
+ b . /* NYI */
+
+/* Debug exception as a debug interrupt*/
+ START_EXCEPTION(debug_debug);
+ DBG_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG,
+ PROLOG_ADDITION_2REGS)
+
+ /*
+ * If there is a single step or branch-taken exception in an
+ * exception entry sequence, it was probably meant to apply to
+ * the code where the exception occurred (since exception entry
+ * doesn't turn off DE automatically). We simulate the effect
+ * of turning off DE on entry to an exception handler by turning
+ * off DE in the DSRR1 value and clearing the debug status.
+ */
+
+ mfspr r14,SPRN_DBSR /* check single-step/branch taken */
+ andis. r15,r14,(DBSR_IC|DBSR_BT)@h
+ beq+ 1f
+
+ LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
+ LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e)
+ cmpld cr0,r10,r14
+ cmpld cr1,r10,r15
+ blt+ cr0,1f
+ bge+ cr1,1f
+
+ /* here it looks like we got an inappropriate debug exception. */
+ lis r14,(DBSR_IC|DBSR_BT)@h /* clear the event */
+ rlwinm r11,r11,0,~MSR_DE /* clear DE in the DSRR1 value */
+ mtspr SPRN_DBSR,r14
+ mtspr SPRN_DSRR1,r11
+ lwz r10,PACA_EXDBG+EX_CR(r13) /* restore registers */
+ ld r1,PACA_EXDBG+EX_R1(r13)
+ ld r14,PACA_EXDBG+EX_R14(r13)
+ ld r15,PACA_EXDBG+EX_R15(r13)
+ mtcr r10
+ ld r10,PACA_EXDBG+EX_R10(r13) /* restore registers */
+ ld r11,PACA_EXDBG+EX_R11(r13)
+ mfspr r13,SPRN_SPRG_DBG_SCRATCH
+ rfdi
+
+ /* Normal debug exception */
+ /* XXX We only handle coming from userspace for now since we can't
+ * quite save properly an interrupted kernel state yet
+ */
+1: andi. r14,r11,MSR_PR; /* check for userspace again */
+ beq kernel_dbg_exc; /* if from kernel mode */
+
+ /* Now we mash up things to make it look like we are coming on a
+ * normal exception
+ */
+ mfspr r14,SPRN_DBSR
+ EXCEPTION_COMMON_DBG(0xd08)
+ INTS_DISABLE
+ std r14,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ mr r4,r14
+ ld r14,PACA_EXDBG+EX_R14(r13)
+ ld r15,PACA_EXDBG+EX_R15(r13)
+ bl save_nvgprs
+ bl DebugException
+ b ret_from_except
+
+ START_EXCEPTION(perfmon);
+ NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
+ PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON(0x260)
+ INTS_DISABLE
+ CHECK_NAPPING()
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl performance_monitor_exception
+ b ret_from_except_lite
+
+/* Doorbell interrupt */
+ MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL,
+ doorbell, doorbell_exception, ACK_NONE)
+
+/* Doorbell critical Interrupt */
+ START_EXCEPTION(doorbell_crit);
+ CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,
+ PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON_CRIT(0x2a0)
+ bl save_nvgprs
+ bl special_reg_save
+ CHECK_NAPPING();
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b ret_from_crit_except
+
+/*
+ * Guest doorbell interrupt
+ * This general exception use GSRRx save/restore registers
+ */
+ START_EXCEPTION(guest_doorbell);
+ GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL,
+ PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON(0x2c0)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl save_nvgprs
+ INTS_RESTORE_HARD
+ bl unknown_exception
+ b ret_from_except
+
+/* Guest Doorbell critical Interrupt */
+ START_EXCEPTION(guest_doorbell_crit);
+ CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
+ PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON_CRIT(0x2e0)
+ bl save_nvgprs
+ bl special_reg_save
+ CHECK_NAPPING();
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b ret_from_crit_except
+
+/* Hypervisor call */
+ START_EXCEPTION(hypercall);
+ NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL,
+ PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON(0x310)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl save_nvgprs
+ INTS_RESTORE_HARD
+ bl unknown_exception
+ b ret_from_except
+
+/* Embedded Hypervisor priviledged */
+ START_EXCEPTION(ehpriv);
+ NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV,
+ PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON(0x320)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl save_nvgprs
+ INTS_RESTORE_HARD
+ bl unknown_exception
+ b ret_from_except
+
+/* LRAT Error interrupt */
+ START_EXCEPTION(lrat_error);
+ NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR,
+ PROLOG_ADDITION_NONE)
+ EXCEPTION_COMMON(0x340)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .save_nvgprs
+ INTS_RESTORE_HARD
+ bl .unknown_exception
+ b .ret_from_except
+
+/*
+ * An interrupt came in while soft-disabled; We mark paca->irq_happened
+ * accordingly and if the interrupt is level sensitive, we hard disable
+ */
+
+.macro masked_interrupt_book3e paca_irq full_mask
+ lbz r10,PACAIRQHAPPENED(r13)
+ ori r10,r10,\paca_irq
+ stb r10,PACAIRQHAPPENED(r13)
+
+ .if \full_mask == 1
+ rldicl r10,r11,48,1 /* clear MSR_EE */
+ rotldi r11,r10,16
+ mtspr SPRN_SRR1,r11
+ .endif
+
+ lwz r11,PACA_EXGEN+EX_CR(r13)
+ mtcr r11
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ mfspr r13,SPRN_SPRG_GEN_SCRATCH
+ rfi
+ b .
+.endm
+
+masked_interrupt_book3e_0x500:
+ // XXX When adding support for EPR, use PACA_IRQ_EE_EDGE
+ masked_interrupt_book3e PACA_IRQ_EE 1
+
+masked_interrupt_book3e_0x900:
+ ACK_DEC(r10);
+ masked_interrupt_book3e PACA_IRQ_DEC 0
+
+masked_interrupt_book3e_0x980:
+ ACK_FIT(r10);
+ masked_interrupt_book3e PACA_IRQ_DEC 0
+
+masked_interrupt_book3e_0x280:
+masked_interrupt_book3e_0x2c0:
+ masked_interrupt_book3e PACA_IRQ_DBELL 0
+
+/*
+ * Called from arch_local_irq_enable when an interrupt needs
+ * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280
+ * to indicate the kind of interrupt. MSR:EE is already off.
+ * We generate a stackframe like if a real interrupt had happened.
+ *
+ * Note: While MSR:EE is off, we need to make sure that _MSR
+ * in the generated frame has EE set to 1 or the exception
+ * handler will not properly re-enable them.
+ */
+_GLOBAL(__replay_interrupt)
+ /* We are going to jump to the exception common code which
+ * will retrieve various register values from the PACA which
+ * we don't give a damn about.
+ */
+ mflr r10
+ mfmsr r11
+ mfcr r4
+ mtspr SPRN_SPRG_GEN_SCRATCH,r13;
+ std r1,PACA_EXGEN+EX_R1(r13);
+ stw r4,PACA_EXGEN+EX_CR(r13);
+ ori r11,r11,MSR_EE
+ subi r1,r1,INT_FRAME_SIZE;
+ cmpwi cr0,r3,0x500
+ beq exc_0x500_common
+ cmpwi cr0,r3,0x900
+ beq exc_0x900_common
+ cmpwi cr0,r3,0x280
+ beq exc_0x280_common
+ blr
+
+
+/*
+ * This is called from 0x300 and 0x400 handlers after the prologs with
+ * r14 and r15 containing the fault address and error code, with the
+ * original values stashed away in the PACA
+ */
+storage_fault_common:
+ std r14,_DAR(r1)
+ std r15,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ mr r4,r14
+ mr r5,r15
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
+ bl do_page_fault
+ cmpdi r3,0
+ bne- 1f
+ b ret_from_except_lite
+1: bl save_nvgprs
+ mr r5,r3
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ ld r4,_DAR(r1)
+ bl bad_page_fault
+ b ret_from_except
+
+/*
+ * Alignment exception doesn't fit entirely in the 0x100 bytes so it
+ * continues here.
+ */
+alignment_more:
+ std r14,_DAR(r1)
+ std r15,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
+ bl save_nvgprs
+ INTS_RESTORE_HARD
+ bl alignment_exception
+ b ret_from_except
+
+/*
+ * We branch here from entry_64.S for the last stage of the exception
+ * return code path. MSR:EE is expected to be off at that point
+ */
+_GLOBAL(exception_return_book3e)
+ b 1f
+
+/* This is the return from load_up_fpu fast path which could do with
+ * less GPR restores in fact, but for now we have a single return path
+ */
+ .globl fast_exception_return
+fast_exception_return:
+ wrteei 0
+1: mr r0,r13
+ ld r10,_MSR(r1)
+ REST_4GPRS(2, r1)
+ andi. r6,r10,MSR_PR
+ REST_2GPRS(6, r1)
+ beq 1f
+ ACCOUNT_CPU_USER_EXIT(r10, r11)
+ ld r0,GPR13(r1)
+
+1: stdcx. r0,0,r1 /* to clear the reservation */
+
+ ld r8,_CCR(r1)
+ ld r9,_LINK(r1)
+ ld r10,_CTR(r1)
+ ld r11,_XER(r1)
+ mtcr r8
+ mtlr r9
+ mtctr r10
+ mtxer r11
+ REST_2GPRS(8, r1)
+ ld r10,GPR10(r1)
+ ld r11,GPR11(r1)
+ ld r12,GPR12(r1)
+ mtspr SPRN_SPRG_GEN_SCRATCH,r0
+
+ std r10,PACA_EXGEN+EX_R10(r13);
+ std r11,PACA_EXGEN+EX_R11(r13);
+ ld r10,_NIP(r1)
+ ld r11,_MSR(r1)
+ ld r0,GPR0(r1)
+ ld r1,GPR1(r1)
+ mtspr SPRN_SRR0,r10
+ mtspr SPRN_SRR1,r11
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ mfspr r13,SPRN_SPRG_GEN_SCRATCH
+ rfi
+
+/*
+ * Trampolines used when spotting a bad kernel stack pointer in
+ * the exception entry code.
+ *
+ * TODO: move some bits like SRR0 read to trampoline, pass PACA
+ * index around, etc... to handle crit & mcheck
+ */
+BAD_STACK_TRAMPOLINE(0x000)
+BAD_STACK_TRAMPOLINE(0x100)
+BAD_STACK_TRAMPOLINE(0x200)
+BAD_STACK_TRAMPOLINE(0x220)
+BAD_STACK_TRAMPOLINE(0x260)
+BAD_STACK_TRAMPOLINE(0x280)
+BAD_STACK_TRAMPOLINE(0x2a0)
+BAD_STACK_TRAMPOLINE(0x2c0)
+BAD_STACK_TRAMPOLINE(0x2e0)
+BAD_STACK_TRAMPOLINE(0x300)
+BAD_STACK_TRAMPOLINE(0x310)
+BAD_STACK_TRAMPOLINE(0x320)
+BAD_STACK_TRAMPOLINE(0x340)
+BAD_STACK_TRAMPOLINE(0x400)
+BAD_STACK_TRAMPOLINE(0x500)
+BAD_STACK_TRAMPOLINE(0x600)
+BAD_STACK_TRAMPOLINE(0x700)
+BAD_STACK_TRAMPOLINE(0x800)
+BAD_STACK_TRAMPOLINE(0x900)
+BAD_STACK_TRAMPOLINE(0x980)
+BAD_STACK_TRAMPOLINE(0x9f0)
+BAD_STACK_TRAMPOLINE(0xa00)
+BAD_STACK_TRAMPOLINE(0xb00)
+BAD_STACK_TRAMPOLINE(0xc00)
+BAD_STACK_TRAMPOLINE(0xd00)
+BAD_STACK_TRAMPOLINE(0xd08)
+BAD_STACK_TRAMPOLINE(0xe00)
+BAD_STACK_TRAMPOLINE(0xf00)
+BAD_STACK_TRAMPOLINE(0xf20)
+
+ .globl bad_stack_book3e
+bad_stack_book3e:
+ /* XXX: Needs to make SPRN_SPRG_GEN depend on exception type */
+ mfspr r10,SPRN_SRR0; /* read SRR0 before touching stack */
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,64+INT_FRAME_SIZE
+ std r10,_NIP(r1)
+ std r11,_MSR(r1)
+ ld r10,PACA_EXGEN+EX_R1(r13) /* FIXME for crit & mcheck */
+ lwz r11,PACA_EXGEN+EX_CR(r13) /* FIXME for crit & mcheck */
+ std r10,GPR1(r1)
+ std r11,_CCR(r1)
+ mfspr r10,SPRN_DEAR
+ mfspr r11,SPRN_ESR
+ std r10,_DAR(r1)
+ std r11,_DSISR(r1)
+ std r0,GPR0(r1); /* save r0 in stackframe */ \
+ std r2,GPR2(r1); /* save r2 in stackframe */ \
+ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
+ SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
+ std r9,GPR9(r1); /* save r9 in stackframe */ \
+ ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \
+ ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \
+ mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \
+ std r3,GPR10(r1); /* save r10 to stackframe */ \
+ std r4,GPR11(r1); /* save r11 to stackframe */ \
+ std r12,GPR12(r1); /* save r12 in stackframe */ \
+ std r5,GPR13(r1); /* save it to stackframe */ \
+ mflr r10
+ mfctr r11
+ mfxer r12
+ std r10,_LINK(r1)
+ std r11,_CTR(r1)
+ std r12,_XER(r1)
+ SAVE_10GPRS(14,r1)
+ SAVE_8GPRS(24,r1)
+ lhz r12,PACA_TRAP_SAVE(r13)
+ std r12,_TRAP(r1)
+ addi r11,r1,INT_FRAME_SIZE
+ std r11,0(r1)
+ li r12,0
+ std r12,0(r11)
+ ld r2,PACATOC(r13)
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl kernel_bad_stack
+ b 1b
+
+/*
+ * Setup the initial TLB for a core. This current implementation
+ * assume that whatever we are running off will not conflict with
+ * the new mapping at PAGE_OFFSET.
+ */
+_GLOBAL(initial_tlb_book3e)
+
+ /* Look for the first TLB with IPROT set */
+ mfspr r4,SPRN_TLB0CFG
+ andi. r3,r4,TLBnCFG_IPROT
+ lis r3,MAS0_TLBSEL(0)@h
+ bne found_iprot
+
+ mfspr r4,SPRN_TLB1CFG
+ andi. r3,r4,TLBnCFG_IPROT
+ lis r3,MAS0_TLBSEL(1)@h
+ bne found_iprot
+
+ mfspr r4,SPRN_TLB2CFG
+ andi. r3,r4,TLBnCFG_IPROT
+ lis r3,MAS0_TLBSEL(2)@h
+ bne found_iprot
+
+ lis r3,MAS0_TLBSEL(3)@h
+ mfspr r4,SPRN_TLB3CFG
+ /* fall through */
+
+found_iprot:
+ andi. r5,r4,TLBnCFG_HES
+ bne have_hes
+
+ mflr r8 /* save LR */
+/* 1. Find the index of the entry we're executing in
+ *
+ * r3 = MAS0_TLBSEL (for the iprot array)
+ * r4 = SPRN_TLBnCFG
+ */
+ bl invstr /* Find our address */
+invstr: mflr r6 /* Make it accessible */
+ mfmsr r7
+ rlwinm r5,r7,27,31,31 /* extract MSR[IS] */
+ mfspr r7,SPRN_PID
+ slwi r7,r7,16
+ or r7,r7,r5
+ mtspr SPRN_MAS6,r7
+ tlbsx 0,r6 /* search MSR[IS], SPID=PID */
+
+ mfspr r3,SPRN_MAS0
+ rlwinm r5,r3,16,20,31 /* Extract MAS0(Entry) */
+
+ mfspr r7,SPRN_MAS1 /* Insure IPROT set */
+ oris r7,r7,MAS1_IPROT@h
+ mtspr SPRN_MAS1,r7
+ tlbwe
+
+/* 2. Invalidate all entries except the entry we're executing in
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we are running in
+ * r4 = SPRN_TLBnCFG
+ * r5 = ESEL of entry we are running in
+ */
+ andi. r4,r4,TLBnCFG_N_ENTRY /* Extract # entries */
+ li r6,0 /* Set Entry counter to 0 */
+1: mr r7,r3 /* Set MAS0(TLBSEL) */
+ rlwimi r7,r6,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */
+ mtspr SPRN_MAS0,r7
+ tlbre
+ mfspr r7,SPRN_MAS1
+ rlwinm r7,r7,0,2,31 /* Clear MAS1 Valid and IPROT */
+ cmpw r5,r6
+ beq skpinv /* Dont update the current execution TLB */
+ mtspr SPRN_MAS1,r7
+ tlbwe
+ isync
+skpinv: addi r6,r6,1 /* Increment */
+ cmpw r6,r4 /* Are we done? */
+ bne 1b /* If not, repeat */
+
+ /* Invalidate all TLBs */
+ PPC_TLBILX_ALL(0,R0)
+ sync
+ isync
+
+/* 3. Setup a temp mapping and jump to it
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we are running in
+ * r5 = ESEL of entry we are running in
+ */
+ andi. r7,r5,0x1 /* Find an entry not used and is non-zero */
+ addi r7,r7,0x1
+ mr r4,r3 /* Set MAS0(TLBSEL) = 1 */
+ mtspr SPRN_MAS0,r4
+ tlbre
+
+ rlwimi r4,r7,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r7) */
+ mtspr SPRN_MAS0,r4
+
+ mfspr r7,SPRN_MAS1
+ xori r6,r7,MAS1_TS /* Setup TMP mapping in the other Address space */
+ mtspr SPRN_MAS1,r6
+
+ tlbwe
+
+ mfmsr r6
+ xori r6,r6,MSR_IS
+ mtspr SPRN_SRR1,r6
+ bl 1f /* Find our address */
+1: mflr r6
+ addi r6,r6,(2f - 1b)
+ mtspr SPRN_SRR0,r6
+ rfi
+2:
+
+/* 4. Clear out PIDs & Search info
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
+ * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
+ * r5 = MAS3
+ */
+ li r6,0
+ mtspr SPRN_MAS6,r6
+ mtspr SPRN_PID,r6
+
+/* 5. Invalidate mapping we started in
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
+ * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
+ * r5 = MAS3
+ */
+ mtspr SPRN_MAS0,r3
+ tlbre
+ mfspr r6,SPRN_MAS1
+ rlwinm r6,r6,0,2,31 /* clear IPROT and VALID */
+ mtspr SPRN_MAS1,r6
+ tlbwe
+ sync
+ isync
+
+/* The mapping only needs to be cache-coherent on SMP */
+#ifdef CONFIG_SMP
+#define M_IF_SMP MAS2_M
+#else
+#define M_IF_SMP 0
+#endif
+
+/* 6. Setup KERNELBASE mapping in TLB[0]
+ *
+ * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
+ * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
+ * r5 = MAS3
+ */
+ rlwinm r3,r3,0,16,3 /* clear ESEL */
+ mtspr SPRN_MAS0,r3
+ lis r6,(MAS1_VALID|MAS1_IPROT)@h
+ ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
+ mtspr SPRN_MAS1,r6
+
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | M_IF_SMP)
+ mtspr SPRN_MAS2,r6
+
+ rlwinm r5,r5,0,0,25
+ ori r5,r5,MAS3_SR | MAS3_SW | MAS3_SX
+ mtspr SPRN_MAS3,r5
+ li r5,-1
+ rlwinm r5,r5,0,0,25
+
+ tlbwe
+
+/* 7. Jump to KERNELBASE mapping
+ *
+ * r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
+ */
+ /* Now we branch the new virtual address mapped by this entry */
+ LOAD_REG_IMMEDIATE(r6,2f)
+ lis r7,MSR_KERNEL@h
+ ori r7,r7,MSR_KERNEL@l
+ mtspr SPRN_SRR0,r6
+ mtspr SPRN_SRR1,r7
+ rfi /* start execution out of TLB1[0] entry */
+2:
+
+/* 8. Clear out the temp mapping
+ *
+ * r4 = MAS0 w/TLBSEL & ESEL for the entry we are running in
+ */
+ mtspr SPRN_MAS0,r4
+ tlbre
+ mfspr r5,SPRN_MAS1
+ rlwinm r5,r5,0,2,31 /* clear IPROT and VALID */
+ mtspr SPRN_MAS1,r5
+ tlbwe
+ sync
+ isync
+
+ /* We translate LR and return */
+ tovirt(r8,r8)
+ mtlr r8
+ blr
+
+have_hes:
+ /* Setup MAS 0,1,2,3 and 7 for tlbwe of a 1G entry that maps the
+ * kernel linear mapping. We also set MAS8 once for all here though
+ * that will have to be made dependent on whether we are running under
+ * a hypervisor I suppose.
+ */
+
+ /* BEWARE, MAGIC
+ * This code is called as an ordinary function on the boot CPU. But to
+ * avoid duplication, this code is also used in SCOM bringup of
+ * secondary CPUs. We read the code between the initial_tlb_code_start
+ * and initial_tlb_code_end labels one instruction at a time and RAM it
+ * into the new core via SCOM. That doesn't process branches, so there
+ * must be none between those two labels. It also means if this code
+ * ever takes any parameters, the SCOM code must also be updated to
+ * provide them.
+ */
+ .globl a2_tlbinit_code_start
+a2_tlbinit_code_start:
+
+ ori r11,r3,MAS0_WQ_ALLWAYS
+ oris r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */
+ mtspr SPRN_MAS0,r11
+ lis r3,(MAS1_VALID | MAS1_IPROT)@h
+ ori r3,r3,BOOK3E_PAGESZ_1GB << MAS1_TSIZE_SHIFT
+ mtspr SPRN_MAS1,r3
+ LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET | MAS2_M)
+ mtspr SPRN_MAS2,r3
+ li r3,MAS3_SR | MAS3_SW | MAS3_SX
+ mtspr SPRN_MAS7_MAS3,r3
+ li r3,0
+ mtspr SPRN_MAS8,r3
+
+ /* Write the TLB entry */
+ tlbwe
+
+ .globl a2_tlbinit_after_linear_map
+a2_tlbinit_after_linear_map:
+
+ /* Now we branch the new virtual address mapped by this entry */
+ LOAD_REG_IMMEDIATE(r3,1f)
+ mtctr r3
+ bctr
+
+1: /* We are now running at PAGE_OFFSET, clean the TLB of everything
+ * else (including IPROTed things left by firmware)
+ * r4 = TLBnCFG
+ * r3 = current address (more or less)
+ */
+
+ li r5,0
+ mtspr SPRN_MAS6,r5
+ tlbsx 0,r3
+
+ rlwinm r9,r4,0,TLBnCFG_N_ENTRY
+ rlwinm r10,r4,8,0xff
+ addi r10,r10,-1 /* Get inner loop mask */
+
+ li r3,1
+
+ mfspr r5,SPRN_MAS1
+ rlwinm r5,r5,0,(~(MAS1_VALID|MAS1_IPROT))
+
+ mfspr r6,SPRN_MAS2
+ rldicr r6,r6,0,51 /* Extract EPN */
+
+ mfspr r7,SPRN_MAS0
+ rlwinm r7,r7,0,0xffff0fff /* Clear HES and WQ */
+
+ rlwinm r8,r7,16,0xfff /* Extract ESEL */
+
+2: add r4,r3,r8
+ and r4,r4,r10
+
+ rlwimi r7,r4,16,MAS0_ESEL_MASK
+
+ mtspr SPRN_MAS0,r7
+ mtspr SPRN_MAS1,r5
+ mtspr SPRN_MAS2,r6
+ tlbwe
+
+ addi r3,r3,1
+ and. r4,r3,r10
+
+ bne 3f
+ addis r6,r6,(1<<30)@h
+3:
+ cmpw r3,r9
+ blt 2b
+
+ .globl a2_tlbinit_after_iprot_flush
+a2_tlbinit_after_iprot_flush:
+
+ PPC_TLBILX(0,0,R0)
+ sync
+ isync
+
+ .globl a2_tlbinit_code_end
+a2_tlbinit_code_end:
+
+ /* We translate LR and return */
+ mflr r3
+ tovirt(r3,r3)
+ mtlr r3
+ blr
+
+/*
+ * Main entry (boot CPU, thread 0)
+ *
+ * We enter here from head_64.S, possibly after the prom_init trampoline
+ * with r3 and r4 already saved to r31 and 30 respectively and in 64 bits
+ * mode. Anything else is as it was left by the bootloader
+ *
+ * Initial requirements of this port:
+ *
+ * - Kernel loaded at 0 physical
+ * - A good lump of memory mapped 0:0 by UTLB entry 0
+ * - MSR:IS & MSR:DS set to 0
+ *
+ * Note that some of the above requirements will be relaxed in the future
+ * as the kernel becomes smarter at dealing with different initial conditions
+ * but for now you have to be careful
+ */
+_GLOBAL(start_initialization_book3e)
+ mflr r28
+
+ /* First, we need to setup some initial TLBs to map the kernel
+ * text, data and bss at PAGE_OFFSET. We don't have a real mode
+ * and always use AS 0, so we just set it up to match our link
+ * address and never use 0 based addresses.
+ */
+ bl initial_tlb_book3e
+
+ /* Init global core bits */
+ bl init_core_book3e
+
+ /* Init per-thread bits */
+ bl init_thread_book3e
+
+ /* Return to common init code */
+ tovirt(r28,r28)
+ mtlr r28
+ blr
+
+
+/*
+ * Secondary core/processor entry
+ *
+ * This is entered for thread 0 of a secondary core, all other threads
+ * are expected to be stopped. It's similar to start_initialization_book3e
+ * except that it's generally entered from the holding loop in head_64.S
+ * after CPUs have been gathered by Open Firmware.
+ *
+ * We assume we are in 32 bits mode running with whatever TLB entry was
+ * set for us by the firmware or POR engine.
+ */
+_GLOBAL(book3e_secondary_core_init_tlb_set)
+ li r4,1
+ b generic_secondary_smp_init
+
+_GLOBAL(book3e_secondary_core_init)
+ mflr r28
+
+ /* Do we need to setup initial TLB entry ? */
+ cmplwi r4,0
+ bne 2f
+
+ /* Setup TLB for this core */
+ bl initial_tlb_book3e
+
+ /* We can return from the above running at a different
+ * address, so recalculate r2 (TOC)
+ */
+ bl relative_toc
+
+ /* Init global core bits */
+2: bl init_core_book3e
+
+ /* Init per-thread bits */
+3: bl init_thread_book3e
+
+ /* Return to common init code at proper virtual address.
+ *
+ * Due to various previous assumptions, we know we entered this
+ * function at either the final PAGE_OFFSET mapping or using a
+ * 1:1 mapping at 0, so we don't bother doing a complicated check
+ * here, we just ensure the return address has the right top bits.
+ *
+ * Note that if we ever want to be smarter about where we can be
+ * started from, we have to be careful that by the time we reach
+ * the code below we may already be running at a different location
+ * than the one we were called from since initial_tlb_book3e can
+ * have moved us already.
+ */
+ cmpdi cr0,r28,0
+ blt 1f
+ lis r3,PAGE_OFFSET@highest
+ sldi r3,r3,32
+ or r28,r28,r3
+1: mtlr r28
+ blr
+
+_GLOBAL(book3e_secondary_thread_init)
+ mflr r28
+ b 3b
+
+init_core_book3e:
+ /* Establish the interrupt vector base */
+ LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e)
+ mtspr SPRN_IVPR,r3
+ sync
+ blr
+
+init_thread_book3e:
+ lis r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h
+ mtspr SPRN_EPCR,r3
+
+ /* Make sure interrupts are off */
+ wrteei 0
+
+ /* disable all timers and clear out status */
+ li r3,0
+ mtspr SPRN_TCR,r3
+ mfspr r3,SPRN_TSR
+ mtspr SPRN_TSR,r3
+
+ blr
+
+_GLOBAL(__setup_base_ivors)
+ SET_IVOR(0, 0x020) /* Critical Input */
+ SET_IVOR(1, 0x000) /* Machine Check */
+ SET_IVOR(2, 0x060) /* Data Storage */
+ SET_IVOR(3, 0x080) /* Instruction Storage */
+ SET_IVOR(4, 0x0a0) /* External Input */
+ SET_IVOR(5, 0x0c0) /* Alignment */
+ SET_IVOR(6, 0x0e0) /* Program */
+ SET_IVOR(7, 0x100) /* FP Unavailable */
+ SET_IVOR(8, 0x120) /* System Call */
+ SET_IVOR(9, 0x140) /* Auxiliary Processor Unavailable */
+ SET_IVOR(10, 0x160) /* Decrementer */
+ SET_IVOR(11, 0x180) /* Fixed Interval Timer */
+ SET_IVOR(12, 0x1a0) /* Watchdog Timer */
+ SET_IVOR(13, 0x1c0) /* Data TLB Error */
+ SET_IVOR(14, 0x1e0) /* Instruction TLB Error */
+ SET_IVOR(15, 0x040) /* Debug */
+
+ sync
+
+ blr
+
+_GLOBAL(setup_altivec_ivors)
+ SET_IVOR(32, 0x200) /* AltiVec Unavailable */
+ SET_IVOR(33, 0x220) /* AltiVec Assist */
+ blr
+
+_GLOBAL(setup_perfmon_ivor)
+ SET_IVOR(35, 0x260) /* Performance Monitor */
+ blr
+
+_GLOBAL(setup_doorbell_ivors)
+ SET_IVOR(36, 0x280) /* Processor Doorbell */
+ SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */
+ blr
+
+_GLOBAL(setup_ehv_ivors)
+ SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */
+ SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */
+ SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */
+ SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */
+ blr
+
+_GLOBAL(setup_lrat_ivor)
+ SET_IVOR(42, 0x340) /* LRAT Error */
+ blr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
new file mode 100644
index 00000000000..a7d36b19221
--- /dev/null
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -0,0 +1,1793 @@
+/*
+ * This file contains the 64-bit "server" PowerPC variant
+ * of the low level exception handling including exception
+ * vectors, exception return, part of the slb and stab
+ * handling and other fixed offset specific things.
+ *
+ * This file is meant to be #included from head_64.S due to
+ * position dependent assembly.
+ *
+ * Most of this originates from head_64.S and thus has the same
+ * copyright history.
+ *
+ */
+
+#include <asm/hw_irq.h>
+#include <asm/exception-64s.h>
+#include <asm/ptrace.h>
+
+/*
+ * We layout physical memory as follows:
+ * 0x0000 - 0x00ff : Secondary processor spin code
+ * 0x0100 - 0x17ff : pSeries Interrupt prologs
+ * 0x1800 - 0x4000 : interrupt support common interrupt prologs
+ * 0x4000 - 0x5fff : pSeries interrupts with IR=1,DR=1
+ * 0x6000 - 0x6fff : more interrupt support including for IR=1,DR=1
+ * 0x7000 - 0x7fff : FWNMI data area
+ * 0x8000 - 0x8fff : Initial (CPU0) segment table
+ * 0x9000 - : Early init and support code
+ */
+ /* Syscall routine is used twice, in reloc-off and reloc-on paths */
+#define SYSCALL_PSERIES_1 \
+BEGIN_FTR_SECTION \
+ cmpdi r0,0x1ebe ; \
+ beq- 1f ; \
+END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
+ mr r9,r13 ; \
+ GET_PACA(r13) ; \
+ mfspr r11,SPRN_SRR0 ; \
+0:
+
+#define SYSCALL_PSERIES_2_RFID \
+ mfspr r12,SPRN_SRR1 ; \
+ ld r10,PACAKBASE(r13) ; \
+ LOAD_HANDLER(r10, system_call_entry) ; \
+ mtspr SPRN_SRR0,r10 ; \
+ ld r10,PACAKMSR(r13) ; \
+ mtspr SPRN_SRR1,r10 ; \
+ rfid ; \
+ b . ; /* prevent speculative execution */
+
+#define SYSCALL_PSERIES_3 \
+ /* Fast LE/BE switch system call */ \
+1: mfspr r12,SPRN_SRR1 ; \
+ xori r12,r12,MSR_LE ; \
+ mtspr SPRN_SRR1,r12 ; \
+ rfid ; /* return to userspace */ \
+ b . ; /* prevent speculative execution */
+
+#if defined(CONFIG_RELOCATABLE)
+ /*
+ * We can't branch directly; in the direct case we use LR
+ * and system_call_entry restores LR. (We thus need to move
+ * LR to r10 in the RFID case too.)
+ */
+#define SYSCALL_PSERIES_2_DIRECT \
+ mflr r10 ; \
+ ld r12,PACAKBASE(r13) ; \
+ LOAD_HANDLER(r12, system_call_entry_direct) ; \
+ mtctr r12 ; \
+ mfspr r12,SPRN_SRR1 ; \
+ /* Re-use of r13... No spare regs to do this */ \
+ li r13,MSR_RI ; \
+ mtmsrd r13,1 ; \
+ GET_PACA(r13) ; /* get r13 back */ \
+ bctr ;
+#else
+ /* We can branch directly */
+#define SYSCALL_PSERIES_2_DIRECT \
+ mfspr r12,SPRN_SRR1 ; \
+ li r10,MSR_RI ; \
+ mtmsrd r10,1 ; /* Set RI (EE=0) */ \
+ b system_call_entry_direct ;
+#endif
+
+/*
+ * This is the start of the interrupt handlers for pSeries
+ * This code runs with relocation off.
+ * Code from here to __end_interrupts gets copied down to real
+ * address 0x100 when we are running a relocatable kernel.
+ * Therefore any relative branches in this section must only
+ * branch to labels in this section.
+ */
+ . = 0x100
+ .globl __start_interrupts
+__start_interrupts:
+
+ .globl system_reset_pSeries;
+system_reset_pSeries:
+ HMT_MEDIUM_PPR_DISCARD
+ SET_SCRATCH0(r13)
+#ifdef CONFIG_PPC_P7_NAP
+BEGIN_FTR_SECTION
+ /* Running native on arch 2.06 or later, check if we are
+ * waking up from nap. We only handle no state loss and
+ * supervisor state loss. We do -not- handle hypervisor
+ * state loss at this time.
+ */
+ mfspr r13,SPRN_SRR1
+ rlwinm. r13,r13,47-31,30,31
+ beq 9f
+
+ /* waking up from powersave (nap) state */
+ cmpwi cr1,r13,2
+ /* Total loss of HV state is fatal, we could try to use the
+ * PIR to locate a PACA, then use an emergency stack etc...
+ * OPAL v3 based powernv platforms have new idle states
+ * which fall in this catagory.
+ */
+ bgt cr1,8f
+ GET_PACA(r13)
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ li r0,KVM_HWTHREAD_IN_KERNEL
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ sync
+ lbz r0,HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r0,0
+ beq 1f
+ b kvm_start_guest
+1:
+#endif
+
+ beq cr1,2f
+ b power7_wakeup_noloss
+2: b power7_wakeup_loss
+
+ /* Fast Sleep wakeup on PowerNV */
+8: GET_PACA(r13)
+ b power7_wakeup_tb_loss
+
+9:
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif /* CONFIG_PPC_P7_NAP */
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
+ NOTEST, 0x100)
+
+ . = 0x200
+machine_check_pSeries_1:
+ /* This is moved out of line as it can be patched by FW, but
+ * some code path might still want to branch into the original
+ * vector
+ */
+ HMT_MEDIUM_PPR_DISCARD
+ SET_SCRATCH0(r13) /* save r13 */
+#ifdef CONFIG_PPC_P7_NAP
+BEGIN_FTR_SECTION
+ /* Running native on arch 2.06 or later, check if we are
+ * waking up from nap. We only handle no state loss and
+ * supervisor state loss. We do -not- handle hypervisor
+ * state loss at this time.
+ */
+ mfspr r13,SPRN_SRR1
+ rlwinm. r13,r13,47-31,30,31
+ OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
+ beq 9f
+
+ mfspr r13,SPRN_SRR1
+ rlwinm. r13,r13,47-31,30,31
+ /* waking up from powersave (nap) state */
+ cmpwi cr1,r13,2
+ /* Total loss of HV state is fatal. let's just stay stuck here */
+ OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
+ bgt cr1,.
+9:
+ OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif /* CONFIG_PPC_P7_NAP */
+ EXCEPTION_PROLOG_0(PACA_EXMC)
+BEGIN_FTR_SECTION
+ b machine_check_pSeries_early
+FTR_SECTION_ELSE
+ b machine_check_pSeries_0
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+
+ . = 0x300
+ .globl data_access_pSeries
+data_access_pSeries:
+ HMT_MEDIUM_PPR_DISCARD
+ SET_SCRATCH0(r13)
+BEGIN_FTR_SECTION
+ b data_access_check_stab
+data_access_not_stab:
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
+ KVMTEST, 0x300)
+
+ . = 0x380
+ .globl data_access_slb_pSeries
+data_access_slb_pSeries:
+ HMT_MEDIUM_PPR_DISCARD
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXSLB)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380)
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_DAR
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
+ mfspr r12,SPRN_SRR1
+#ifndef CONFIG_RELOCATABLE
+ b slb_miss_realmode
+#else
+ /*
+ * We can't just use a direct branch to slb_miss_realmode
+ * because the distance from here to there depends on where
+ * the kernel ends up being put.
+ */
+ mfctr r11
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10, slb_miss_realmode)
+ mtctr r10
+ bctr
+#endif
+
+ STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access)
+
+ . = 0x480
+ .globl instruction_access_slb_pSeries
+instruction_access_slb_pSeries:
+ HMT_MEDIUM_PPR_DISCARD
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXSLB)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
+#ifdef __DISABLED__
+ /* Keep that around for when we re-implement dynamic VSIDs */
+ cmpdi r3,0
+ bge slb_miss_user_pseries
+#endif /* __DISABLED__ */
+ mfspr r12,SPRN_SRR1
+#ifndef CONFIG_RELOCATABLE
+ b slb_miss_realmode
+#else
+ mfctr r11
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10, slb_miss_realmode)
+ mtctr r10
+ bctr
+#endif
+
+ /* We open code these as we can't have a ". = x" (even with
+ * x = "." within a feature section
+ */
+ . = 0x500;
+ .globl hardware_interrupt_pSeries;
+ .globl hardware_interrupt_hv;
+hardware_interrupt_pSeries:
+hardware_interrupt_hv:
+ HMT_MEDIUM_PPR_DISCARD
+ BEGIN_FTR_SECTION
+ _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt,
+ EXC_HV, SOFTEN_TEST_HV)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502)
+ FTR_SECTION_ELSE
+ _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt,
+ EXC_STD, SOFTEN_TEST_HV_201)
+ KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+
+ STD_EXCEPTION_PSERIES(0x600, 0x600, alignment)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600)
+
+ STD_EXCEPTION_PSERIES(0x700, 0x700, program_check)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x700)
+
+ STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800)
+
+ . = 0x900
+ .globl decrementer_pSeries
+decrementer_pSeries:
+ _MASKABLE_EXCEPTION_PSERIES(0x900, decrementer, EXC_STD, SOFTEN_TEST_PR)
+
+ STD_EXCEPTION_HV(0x980, 0x982, hdecrementer)
+
+ MASKABLE_EXCEPTION_PSERIES(0xa00, 0xa00, doorbell_super)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00)
+
+ STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xb00)
+
+ . = 0xc00
+ .globl system_call_pSeries
+system_call_pSeries:
+ HMT_MEDIUM
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+ SET_SCRATCH0(r13)
+ GET_PACA(r13)
+ std r9,PACA_EXGEN+EX_R9(r13)
+ std r10,PACA_EXGEN+EX_R10(r13)
+ mfcr r9
+ KVMTEST(0xc00)
+ GET_SCRATCH0(r13)
+#endif
+ SYSCALL_PSERIES_1
+ SYSCALL_PSERIES_2_RFID
+ SYSCALL_PSERIES_3
+ KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
+
+ STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00)
+
+ /* At 0xe??? we have a bunch of hypervisor exceptions, we branch
+ * out of line to handle them
+ */
+ . = 0xe00
+hv_data_storage_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b h_data_storage_hv
+
+ . = 0xe20
+hv_instr_storage_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b h_instr_storage_hv
+
+ . = 0xe40
+emulation_assist_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b emulation_assist_hv
+
+ . = 0xe60
+hv_exception_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b hmi_exception_hv
+
+ . = 0xe80
+hv_doorbell_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b h_doorbell_hv
+
+ /* We need to deal with the Altivec unavailable exception
+ * here which is at 0xf20, thus in the middle of the
+ * prolog code of the PerformanceMonitor one. A little
+ * trickery is thus necessary
+ */
+ . = 0xf00
+performance_monitor_pseries_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b performance_monitor_pSeries
+
+ . = 0xf20
+altivec_unavailable_pseries_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b altivec_unavailable_pSeries
+
+ . = 0xf40
+vsx_unavailable_pseries_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b vsx_unavailable_pSeries
+
+ . = 0xf60
+facility_unavailable_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b facility_unavailable_pSeries
+
+ . = 0xf80
+hv_facility_unavailable_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b facility_unavailable_hv
+
+#ifdef CONFIG_CBE_RAS
+ STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202)
+#endif /* CONFIG_CBE_RAS */
+
+ STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint)
+ KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300)
+
+ . = 0x1500
+ .global denorm_exception_hv
+denorm_exception_hv:
+ HMT_MEDIUM_PPR_DISCARD
+ mtspr SPRN_SPRG_HSCRATCH0,r13
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500)
+
+#ifdef CONFIG_PPC_DENORMALISATION
+ mfspr r10,SPRN_HSRR1
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ addi r11,r11,-4 /* HSRR0 is next instruction */
+ bne+ denorm_assist
+#endif
+
+ KVMTEST(0x1500)
+ EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV)
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1500)
+
+#ifdef CONFIG_CBE_RAS
+ STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance)
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602)
+#endif /* CONFIG_CBE_RAS */
+
+ STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x1700)
+
+#ifdef CONFIG_CBE_RAS
+ STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802)
+#else
+ . = 0x1800
+#endif /* CONFIG_CBE_RAS */
+
+
+/*** Out of line interrupts support ***/
+
+ .align 7
+ /* moved from 0x200 */
+machine_check_pSeries_early:
+BEGIN_FTR_SECTION
+ EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
+ /*
+ * Register contents:
+ * R13 = PACA
+ * R9 = CR
+ * Original R9 to R13 is saved on PACA_EXMC
+ *
+ * Switch to mc_emergency stack and handle re-entrancy (we limit
+ * the nested MCE upto level 4 to avoid stack overflow).
+ * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
+ *
+ * We use paca->in_mce to check whether this is the first entry or
+ * nested machine check. We increment paca->in_mce to track nested
+ * machine checks.
+ *
+ * If this is the first entry then set stack pointer to
+ * paca->mc_emergency_sp, otherwise r1 is already pointing to
+ * stack frame on mc_emergency stack.
+ *
+ * NOTE: We are here with MSR_ME=0 (off), which means we risk a
+ * checkstop if we get another machine check exception before we do
+ * rfid with MSR_ME=1.
+ */
+ mr r11,r1 /* Save r1 */
+ lhz r10,PACA_IN_MCE(r13)
+ cmpwi r10,0 /* Are we in nested machine check */
+ bne 0f /* Yes, we are. */
+ /* First machine check entry */
+ ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
+0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ addi r10,r10,1 /* increment paca->in_mce */
+ sth r10,PACA_IN_MCE(r13)
+ /* Limit nested MCE to level 4 to avoid stack overflow */
+ cmpwi r10,4
+ bgt 2f /* Check if we hit limit of 4 */
+ std r11,GPR1(r1) /* Save r1 on the stack. */
+ std r11,0(r1) /* make stack chain pointer */
+ mfspr r11,SPRN_SRR0 /* Save SRR0 */
+ std r11,_NIP(r1)
+ mfspr r11,SPRN_SRR1 /* Save SRR1 */
+ std r11,_MSR(r1)
+ mfspr r11,SPRN_DAR /* Save DAR */
+ std r11,_DAR(r1)
+ mfspr r11,SPRN_DSISR /* Save DSISR */
+ std r11,_DSISR(r1)
+ std r9,_CCR(r1) /* Save CR in stackframe */
+ /* Save r9 through r13 from EXMC save area to stack frame. */
+ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
+ mfmsr r11 /* get MSR value */
+ ori r11,r11,MSR_ME /* turn on ME bit */
+ ori r11,r11,MSR_RI /* turn on RI bit */
+ ld r12,PACAKBASE(r13) /* get high part of &label */
+ LOAD_HANDLER(r12, machine_check_handle_early)
+1: mtspr SPRN_SRR0,r12
+ mtspr SPRN_SRR1,r11
+ rfid
+ b . /* prevent speculative execution */
+2:
+ /* Stack overflow. Stay on emergency stack and panic.
+ * Keep the ME bit off while panic-ing, so that if we hit
+ * another machine check we checkstop.
+ */
+ addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */
+ ld r11,PACAKMSR(r13)
+ ld r12,PACAKBASE(r13)
+ LOAD_HANDLER(r12, unrecover_mce)
+ li r10,MSR_ME
+ andc r11,r11,r10 /* Turn off MSR_ME */
+ b 1b
+ b . /* prevent speculative execution */
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+
+machine_check_pSeries:
+ .globl machine_check_fwnmi
+machine_check_fwnmi:
+ HMT_MEDIUM_PPR_DISCARD
+ SET_SCRATCH0(r13) /* save r13 */
+ EXCEPTION_PROLOG_0(PACA_EXMC)
+machine_check_pSeries_0:
+ EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200)
+ EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD)
+ KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
+
+ /* moved from 0x300 */
+data_access_check_stab:
+ GET_PACA(r13)
+ std r9,PACA_EXSLB+EX_R9(r13)
+ std r10,PACA_EXSLB+EX_R10(r13)
+ mfspr r10,SPRN_DAR
+ mfspr r9,SPRN_DSISR
+ srdi r10,r10,60
+ rlwimi r10,r9,16,0x20
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ lbz r9,HSTATE_IN_GUEST(r13)
+ rlwimi r10,r9,8,0x300
+#endif
+ mfcr r9
+ cmpwi r10,0x2c
+ beq do_stab_bolted_pSeries
+ mtcrf 0x80,r9
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ b data_access_not_stab
+do_stab_bolted_pSeries:
+ std r11,PACA_EXSLB+EX_R11(r13)
+ std r12,PACA_EXSLB+EX_R12(r13)
+ GET_SCRATCH0(r10)
+ std r10,PACA_EXSLB+EX_R13(r13)
+ EXCEPTION_PROLOG_PSERIES_1(do_stab_bolted, EXC_STD)
+
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
+ KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400)
+ KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982)
+
+#ifdef CONFIG_PPC_DENORMALISATION
+denorm_assist:
+BEGIN_FTR_SECTION
+/*
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER6 do that here for all FP regs.
+ */
+ mfmsr r10
+ ori r10,r10,(MSR_FP|MSR_FE0|MSR_FE1)
+ xori r10,r10,(MSR_FE0|MSR_FE1)
+ mtmsrd r10
+ sync
+
+#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1
+#define FMR4(n) FMR2(n) ; FMR2(n+2)
+#define FMR8(n) FMR4(n) ; FMR4(n+4)
+#define FMR16(n) FMR8(n) ; FMR8(n+8)
+#define FMR32(n) FMR16(n) ; FMR16(n+16)
+ FMR32(0)
+
+FTR_SECTION_ELSE
+/*
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER7 do that here for the first 32 VSX registers only.
+ */
+ mfmsr r10
+ oris r10,r10,MSR_VSX@h
+ mtmsrd r10
+ sync
+
+#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1)
+#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2)
+#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4)
+#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8)
+#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16)
+ XVCPSGNDP32(0)
+
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
+
+BEGIN_FTR_SECTION
+ b denorm_done
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+/*
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER8 we need to do that for all 64 VSX registers
+ */
+ XVCPSGNDP32(32)
+denorm_done:
+ mtspr SPRN_HSRR0,r11
+ mtcrf 0x80,r9
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ RESTORE_PPR_PACA(PACA_EXGEN, r10)
+BEGIN_FTR_SECTION
+ ld r10,PACA_EXGEN+EX_CFAR(r13)
+ mtspr SPRN_CFAR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ HRFID
+ b .
+#endif
+
+ .align 7
+ /* moved from 0xe00 */
+ STD_EXCEPTION_HV_OOL(0xe02, h_data_storage)
+ KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02)
+ STD_EXCEPTION_HV_OOL(0xe22, h_instr_storage)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22)
+ STD_EXCEPTION_HV_OOL(0xe42, emulation_assist)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42)
+ STD_EXCEPTION_HV_OOL(0xe62, hmi_exception) /* need to flush cache ? */
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62)
+ MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82)
+
+ /* moved from 0xf00 */
+ STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00)
+ STD_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20)
+ STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40)
+ STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
+ KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60)
+ STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable)
+ KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82)
+
+/*
+ * An interrupt came in while soft-disabled. We set paca->irq_happened, then:
+ * - If it was a decrementer interrupt, we bump the dec to max and and return.
+ * - If it was a doorbell we return immediately since doorbells are edge
+ * triggered and won't automatically refire.
+ * - else we hard disable and return.
+ * This is called with r10 containing the value to OR to the paca field.
+ */
+#define MASKED_INTERRUPT(_H) \
+masked_##_H##interrupt: \
+ std r11,PACA_EXGEN+EX_R11(r13); \
+ lbz r11,PACAIRQHAPPENED(r13); \
+ or r11,r11,r10; \
+ stb r11,PACAIRQHAPPENED(r13); \
+ cmpwi r10,PACA_IRQ_DEC; \
+ bne 1f; \
+ lis r10,0x7fff; \
+ ori r10,r10,0xffff; \
+ mtspr SPRN_DEC,r10; \
+ b 2f; \
+1: cmpwi r10,PACA_IRQ_DBELL; \
+ beq 2f; \
+ mfspr r10,SPRN_##_H##SRR1; \
+ rldicl r10,r10,48,1; /* clear MSR_EE */ \
+ rotldi r10,r10,16; \
+ mtspr SPRN_##_H##SRR1,r10; \
+2: mtcrf 0x80,r9; \
+ ld r9,PACA_EXGEN+EX_R9(r13); \
+ ld r10,PACA_EXGEN+EX_R10(r13); \
+ ld r11,PACA_EXGEN+EX_R11(r13); \
+ GET_SCRATCH0(r13); \
+ ##_H##rfid; \
+ b .
+
+ MASKED_INTERRUPT()
+ MASKED_INTERRUPT(H)
+
+/*
+ * Called from arch_local_irq_enable when an interrupt needs
+ * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
+ * which kind of interrupt. MSR:EE is already off. We generate a
+ * stackframe like if a real interrupt had happened.
+ *
+ * Note: While MSR:EE is off, we need to make sure that _MSR
+ * in the generated frame has EE set to 1 or the exception
+ * handler will not properly re-enable them.
+ */
+_GLOBAL(__replay_interrupt)
+ /* We are going to jump to the exception common code which
+ * will retrieve various register values from the PACA which
+ * we don't give a damn about, so we don't bother storing them.
+ */
+ mfmsr r12
+ mflr r11
+ mfcr r9
+ ori r12,r12,MSR_EE
+ cmpwi r3,0x900
+ beq decrementer_common
+ cmpwi r3,0x500
+ beq hardware_interrupt_common
+BEGIN_FTR_SECTION
+ cmpwi r3,0xe80
+ beq h_doorbell_common
+FTR_SECTION_ELSE
+ cmpwi r3,0xa00
+ beq doorbell_super_common
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+ blr
+
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Vectors for the FWNMI option. Share common code.
+ */
+ .globl system_reset_fwnmi
+ .align 7
+system_reset_fwnmi:
+ HMT_MEDIUM_PPR_DISCARD
+ SET_SCRATCH0(r13) /* save r13 */
+ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
+ NOTEST, 0x100)
+
+#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef __DISABLED__
+/*
+ * This is used for when the SLB miss handler has to go virtual,
+ * which doesn't happen for now anymore but will once we re-implement
+ * dynamic VSIDs for shared page tables
+ */
+slb_miss_user_pseries:
+ std r10,PACA_EXGEN+EX_R10(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ GET_SCRATCH0(r10)
+ ld r11,PACA_EXSLB+EX_R9(r13)
+ ld r12,PACA_EXSLB+EX_R3(r13)
+ std r10,PACA_EXGEN+EX_R13(r13)
+ std r11,PACA_EXGEN+EX_R9(r13)
+ std r12,PACA_EXGEN+EX_R3(r13)
+ clrrdi r12,r13,32
+ mfmsr r10
+ mfspr r11,SRR0 /* save SRR0 */
+ ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
+ ori r10,r10,MSR_IR|MSR_DR|MSR_RI
+ mtspr SRR0,r12
+ mfspr r12,SRR1 /* and SRR1 */
+ mtspr SRR1,r10
+ rfid
+ b . /* prevent spec. execution */
+#endif /* __DISABLED__ */
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+kvmppc_skip_interrupt:
+ /*
+ * Here all GPRs are unchanged from when the interrupt happened
+ * except for r13, which is saved in SPRG_SCRATCH0.
+ */
+ mfspr r13, SPRN_SRR0
+ addi r13, r13, 4
+ mtspr SPRN_SRR0, r13
+ GET_SCRATCH0(r13)
+ rfid
+ b .
+
+kvmppc_skip_Hinterrupt:
+ /*
+ * Here all GPRs are unchanged from when the interrupt happened
+ * except for r13, which is saved in SPRG_SCRATCH0.
+ */
+ mfspr r13, SPRN_HSRR0
+ addi r13, r13, 4
+ mtspr SPRN_HSRR0, r13
+ GET_SCRATCH0(r13)
+ hrfid
+ b .
+#endif
+
+/*
+ * Code from here down to __end_handlers is invoked from the
+ * exception prologs above. Because the prologs assemble the
+ * addresses of these handlers using the LOAD_HANDLER macro,
+ * which uses an ori instruction, these handlers must be in
+ * the first 64k of the kernel image.
+ */
+
+/*** Common interrupt handlers ***/
+
+ STD_EXCEPTION_COMMON(0x100, system_reset, system_reset_exception)
+
+ STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
+ STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, timer_interrupt)
+ STD_EXCEPTION_COMMON(0x980, hdecrementer, hdec_interrupt)
+#ifdef CONFIG_PPC_DOORBELL
+ STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, doorbell_exception)
+#else
+ STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, unknown_exception)
+#endif
+ STD_EXCEPTION_COMMON(0xb00, trap_0b, unknown_exception)
+ STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception)
+ STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception)
+ STD_EXCEPTION_COMMON(0xe40, emulation_assist, emulation_assist_interrupt)
+ STD_EXCEPTION_COMMON(0xe60, hmi_exception, unknown_exception)
+#ifdef CONFIG_PPC_DOORBELL
+ STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, doorbell_exception)
+#else
+ STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception)
+#endif
+ STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception)
+ STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception)
+ STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception)
+#ifdef CONFIG_ALTIVEC
+ STD_EXCEPTION_COMMON(0x1700, altivec_assist, altivec_assist_exception)
+#else
+ STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception)
+#endif
+#ifdef CONFIG_CBE_RAS
+ STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception)
+ STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception)
+ STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception)
+#endif /* CONFIG_CBE_RAS */
+
+ /*
+ * Relocation-on interrupts: A subset of the interrupts can be delivered
+ * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
+ * it. Addresses are the same as the original interrupt addresses, but
+ * offset by 0xc000000000004000.
+ * It's impossible to receive interrupts below 0x300 via this mechanism.
+ * KVM: None of these traps are from the guest ; anything that escalated
+ * to HV=1 from HV=0 is delivered via real mode handlers.
+ */
+
+ /*
+ * This uses the standard macro, since the original 0x300 vector
+ * only has extra guff for STAB-based processors -- which never
+ * come here.
+ */
+ STD_RELON_EXCEPTION_PSERIES(0x4300, 0x300, data_access)
+ . = 0x4380
+ .globl data_access_slb_relon_pSeries
+data_access_slb_relon_pSeries:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXSLB)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_DAR
+ mfspr r12,SPRN_SRR1
+#ifndef CONFIG_RELOCATABLE
+ b slb_miss_realmode
+#else
+ /*
+ * We can't just use a direct branch to slb_miss_realmode
+ * because the distance from here to there depends on where
+ * the kernel ends up being put.
+ */
+ mfctr r11
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10, slb_miss_realmode)
+ mtctr r10
+ bctr
+#endif
+
+ STD_RELON_EXCEPTION_PSERIES(0x4400, 0x400, instruction_access)
+ . = 0x4480
+ .globl instruction_access_slb_relon_pSeries
+instruction_access_slb_relon_pSeries:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXSLB)
+ EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
+ std r3,PACA_EXSLB+EX_R3(r13)
+ mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
+ mfspr r12,SPRN_SRR1
+#ifndef CONFIG_RELOCATABLE
+ b slb_miss_realmode
+#else
+ mfctr r11
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10, slb_miss_realmode)
+ mtctr r10
+ bctr
+#endif
+
+ . = 0x4500
+ .globl hardware_interrupt_relon_pSeries;
+ .globl hardware_interrupt_relon_hv;
+hardware_interrupt_relon_pSeries:
+hardware_interrupt_relon_hv:
+ BEGIN_FTR_SECTION
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV, SOFTEN_TEST_HV)
+ FTR_SECTION_ELSE
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD, SOFTEN_TEST_PR)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+ STD_RELON_EXCEPTION_PSERIES(0x4600, 0x600, alignment)
+ STD_RELON_EXCEPTION_PSERIES(0x4700, 0x700, program_check)
+ STD_RELON_EXCEPTION_PSERIES(0x4800, 0x800, fp_unavailable)
+ MASKABLE_RELON_EXCEPTION_PSERIES(0x4900, 0x900, decrementer)
+ STD_RELON_EXCEPTION_HV(0x4980, 0x982, hdecrementer)
+ MASKABLE_RELON_EXCEPTION_PSERIES(0x4a00, 0xa00, doorbell_super)
+ STD_RELON_EXCEPTION_PSERIES(0x4b00, 0xb00, trap_0b)
+
+ . = 0x4c00
+ .globl system_call_relon_pSeries
+system_call_relon_pSeries:
+ HMT_MEDIUM
+ SYSCALL_PSERIES_1
+ SYSCALL_PSERIES_2_DIRECT
+ SYSCALL_PSERIES_3
+
+ STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step)
+
+ . = 0x4e00
+ b . /* Can't happen, see v2.07 Book III-S section 6.5 */
+
+ . = 0x4e20
+ b . /* Can't happen, see v2.07 Book III-S section 6.5 */
+
+ . = 0x4e40
+emulation_assist_relon_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b emulation_assist_relon_hv
+
+ . = 0x4e60
+ b . /* Can't happen, see v2.07 Book III-S section 6.5 */
+
+ . = 0x4e80
+h_doorbell_relon_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b h_doorbell_relon_hv
+
+ . = 0x4f00
+performance_monitor_relon_pseries_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b performance_monitor_relon_pSeries
+
+ . = 0x4f20
+altivec_unavailable_relon_pseries_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b altivec_unavailable_relon_pSeries
+
+ . = 0x4f40
+vsx_unavailable_relon_pseries_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b vsx_unavailable_relon_pSeries
+
+ . = 0x4f60
+facility_unavailable_relon_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b facility_unavailable_relon_pSeries
+
+ . = 0x4f80
+hv_facility_unavailable_relon_trampoline:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b hv_facility_unavailable_relon_hv
+
+ STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint)
+#ifdef CONFIG_PPC_DENORMALISATION
+ . = 0x5500
+ b denorm_exception_hv
+#endif
+ STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
+
+ /* Other future vectors */
+ .align 7
+ .globl __end_interrupts
+__end_interrupts:
+
+ .align 7
+system_call_entry_direct:
+#if defined(CONFIG_RELOCATABLE)
+ /* The first level prologue may have used LR to get here, saving
+ * orig in r10. To save hacking/ifdeffing common code, restore here.
+ */
+ mtlr r10
+#endif
+system_call_entry:
+ b system_call_common
+
+ppc64_runlatch_on_trampoline:
+ b __ppc64_runlatch_on
+
+/*
+ * Here we have detected that the kernel stack pointer is bad.
+ * R9 contains the saved CR, r13 points to the paca,
+ * r10 contains the (bad) kernel stack pointer,
+ * r11 and r12 contain the saved SRR0 and SRR1.
+ * We switch to using an emergency stack, save the registers there,
+ * and call kernel_bad_stack(), which panics.
+ */
+bad_stack:
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,64+INT_FRAME_SIZE
+ std r9,_CCR(r1)
+ std r10,GPR1(r1)
+ std r11,_NIP(r1)
+ std r12,_MSR(r1)
+ mfspr r11,SPRN_DAR
+ mfspr r12,SPRN_DSISR
+ std r11,_DAR(r1)
+ std r12,_DSISR(r1)
+ mflr r10
+ mfctr r11
+ mfxer r12
+ std r10,_LINK(r1)
+ std r11,_CTR(r1)
+ std r12,_XER(r1)
+ SAVE_GPR(0,r1)
+ SAVE_GPR(2,r1)
+ ld r10,EX_R3(r3)
+ std r10,GPR3(r1)
+ SAVE_GPR(4,r1)
+ SAVE_4GPRS(5,r1)
+ ld r9,EX_R9(r3)
+ ld r10,EX_R10(r3)
+ SAVE_2GPRS(9,r1)
+ ld r9,EX_R11(r3)
+ ld r10,EX_R12(r3)
+ ld r11,EX_R13(r3)
+ std r9,GPR11(r1)
+ std r10,GPR12(r1)
+ std r11,GPR13(r1)
+BEGIN_FTR_SECTION
+ ld r10,EX_CFAR(r3)
+ std r10,ORIG_GPR3(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ SAVE_8GPRS(14,r1)
+ SAVE_10GPRS(22,r1)
+ lhz r12,PACA_TRAP_SAVE(r13)
+ std r12,_TRAP(r1)
+ addi r11,r1,INT_FRAME_SIZE
+ std r11,0(r1)
+ li r12,0
+ std r12,0(r11)
+ ld r2,PACATOC(r13)
+ ld r11,exception_marker@toc(r2)
+ std r12,RESULT(r1)
+ std r11,STACK_FRAME_OVERHEAD-16(r1)
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl kernel_bad_stack
+ b 1b
+
+/*
+ * Here r13 points to the paca, r9 contains the saved CR,
+ * SRR0 and SRR1 are saved in r11 and r12,
+ * r9 - r13 are saved in paca->exgen.
+ */
+ .align 7
+ .globl data_access_common
+data_access_common:
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ mfspr r10,SPRN_DSISR
+ stw r10,PACA_EXGEN+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
+ DISABLE_INTS
+ ld r12,_MSR(r1)
+ ld r3,PACA_EXGEN+EX_DAR(r13)
+ lwz r4,PACA_EXGEN+EX_DSISR(r13)
+ li r5,0x300
+ b do_hash_page /* Try to handle as hpte fault */
+
+ .align 7
+ .globl h_data_storage_common
+h_data_storage_common:
+ mfspr r10,SPRN_HDAR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ mfspr r10,SPRN_HDSISR
+ stw r10,PACA_EXGEN+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
+ bl save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unknown_exception
+ b ret_from_except
+
+ .align 7
+ .globl instruction_access_common
+instruction_access_common:
+ EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
+ DISABLE_INTS
+ ld r12,_MSR(r1)
+ ld r3,_NIP(r1)
+ andis. r4,r12,0x5820
+ li r5,0x400
+ b do_hash_page /* Try to handle as hpte fault */
+
+ STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)
+
+/*
+ * Here is the common SLB miss user that is used when going to virtual
+ * mode for SLB misses, that is currently not used
+ */
+#ifdef __DISABLED__
+ .align 7
+ .globl slb_miss_user_common
+slb_miss_user_common:
+ mflr r10
+ std r3,PACA_EXGEN+EX_DAR(r13)
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+ std r10,PACA_EXGEN+EX_LR(r13)
+ std r11,PACA_EXGEN+EX_SRR0(r13)
+ bl slb_allocate_user
+
+ ld r10,PACA_EXGEN+EX_LR(r13)
+ ld r3,PACA_EXGEN+EX_R3(r13)
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ ld r11,PACA_EXGEN+EX_SRR0(r13)
+ mtlr r10
+ beq- slb_miss_fault
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- unrecov_user_slb
+ mfmsr r10
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+.machine pop
+
+ clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
+ mtmsrd r10,1
+
+ mtspr SRR0,r11
+ mtspr SRR1,r12
+
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ rfid
+ b .
+
+slb_miss_fault:
+ EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
+ ld r4,PACA_EXGEN+EX_DAR(r13)
+ li r5,0
+ std r4,_DAR(r1)
+ std r5,_DSISR(r1)
+ b handle_page_fault
+
+unrecov_user_slb:
+ EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
+ DISABLE_INTS
+ bl save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unrecoverable_exception
+ b 1b
+
+#endif /* __DISABLED__ */
+
+
+ /*
+ * Machine check is different because we use a different
+ * save area: PACA_EXMC instead of PACA_EXGEN.
+ */
+ .align 7
+ .globl machine_check_common
+machine_check_common:
+
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ mfspr r10,SPRN_DSISR
+ stw r10,PACA_EXGEN+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
+ FINISH_NAP
+ DISABLE_INTS
+ ld r3,PACA_EXGEN+EX_DAR(r13)
+ lwz r4,PACA_EXGEN+EX_DSISR(r13)
+ std r3,_DAR(r1)
+ std r4,_DSISR(r1)
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_exception
+ b ret_from_except
+
+ .align 7
+ .globl alignment_common
+alignment_common:
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXGEN+EX_DAR(r13)
+ mfspr r10,SPRN_DSISR
+ stw r10,PACA_EXGEN+EX_DSISR(r13)
+ EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
+ ld r3,PACA_EXGEN+EX_DAR(r13)
+ lwz r4,PACA_EXGEN+EX_DSISR(r13)
+ std r3,_DAR(r1)
+ std r4,_DSISR(r1)
+ bl save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl alignment_exception
+ b ret_from_except
+
+ .align 7
+ .globl program_check_common
+program_check_common:
+ EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
+ bl save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl program_check_exception
+ b ret_from_except
+
+ .align 7
+ .globl fp_unavailable_common
+fp_unavailable_common:
+ EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
+ bne 1f /* if from user, just load it up */
+ bl save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl kernel_fp_unavailable_exception
+ BUG_OPCODE
+1:
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+ /* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in
+ * transaction), go do TM stuff
+ */
+ rldicl. r0, r12, (64-MSR_TS_LG), (64-2)
+ bne- 2f
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
+ bl load_up_fpu
+ b fast_exception_return
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+2: /* User process was in a transaction */
+ bl save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl fp_unavailable_tm
+ b ret_from_except
+#endif
+ .align 7
+ .globl altivec_unavailable_common
+altivec_unavailable_common:
+ EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ beq 1f
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ BEGIN_FTR_SECTION_NESTED(69)
+ /* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in
+ * transaction), go do TM stuff
+ */
+ rldicl. r0, r12, (64-MSR_TS_LG), (64-2)
+ bne- 2f
+ END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
+#endif
+ bl load_up_altivec
+ b fast_exception_return
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+2: /* User process was in a transaction */
+ bl save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl altivec_unavailable_tm
+ b ret_from_except
+#endif
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+ bl save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl altivec_unavailable_exception
+ b ret_from_except
+
+ .align 7
+ .globl vsx_unavailable_common
+vsx_unavailable_common:
+ EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ beq 1f
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ BEGIN_FTR_SECTION_NESTED(69)
+ /* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in
+ * transaction), go do TM stuff
+ */
+ rldicl. r0, r12, (64-MSR_TS_LG), (64-2)
+ bne- 2f
+ END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
+#endif
+ b load_up_vsx
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+2: /* User process was in a transaction */
+ bl save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl vsx_unavailable_tm
+ b ret_from_except
+#endif
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ bl save_nvgprs
+ DISABLE_INTS
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl vsx_unavailable_exception
+ b ret_from_except
+
+ STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception)
+ STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception)
+
+ .align 7
+ .globl __end_handlers
+__end_handlers:
+
+ /* Equivalents to the above handlers for relocation-on interrupt vectors */
+ STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
+ MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
+
+ STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
+ STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
+ STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
+ STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
+ STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+/*
+ * Data area reserved for FWNMI option.
+ * This address (0x7000) is fixed by the RPA.
+ */
+ .= 0x7000
+ .globl fwnmi_data_area
+fwnmi_data_area:
+
+ /* pseries and powernv need to keep the whole page from
+ * 0x7000 to 0x8000 free for use by the firmware
+ */
+ . = 0x8000
+#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
+
+/* Space for CPU0's segment table */
+ .balign 4096
+ .globl initial_stab
+initial_stab:
+ .space 4096
+
+#ifdef CONFIG_PPC_POWERNV
+_GLOBAL(opal_mc_secondary_handler)
+ HMT_MEDIUM_PPR_DISCARD
+ SET_SCRATCH0(r13)
+ GET_PACA(r13)
+ clrldi r3,r3,2
+ tovirt(r3,r3)
+ std r3,PACA_OPAL_MC_EVT(r13)
+ ld r13,OPAL_MC_SRR0(r3)
+ mtspr SPRN_SRR0,r13
+ ld r13,OPAL_MC_SRR1(r3)
+ mtspr SPRN_SRR1,r13
+ ld r3,OPAL_MC_GPR3(r3)
+ GET_SCRATCH0(r13)
+ b machine_check_pSeries
+#endif /* CONFIG_PPC_POWERNV */
+
+
+#define MACHINE_CHECK_HANDLER_WINDUP \
+ /* Clear MSR_RI before setting SRR0 and SRR1. */\
+ li r0,MSR_RI; \
+ mfmsr r9; /* get MSR value */ \
+ andc r9,r9,r0; \
+ mtmsrd r9,1; /* Clear MSR_RI */ \
+ /* Move original SRR0 and SRR1 into the respective regs */ \
+ ld r9,_MSR(r1); \
+ mtspr SPRN_SRR1,r9; \
+ ld r3,_NIP(r1); \
+ mtspr SPRN_SRR0,r3; \
+ ld r9,_CTR(r1); \
+ mtctr r9; \
+ ld r9,_XER(r1); \
+ mtxer r9; \
+ ld r9,_LINK(r1); \
+ mtlr r9; \
+ REST_GPR(0, r1); \
+ REST_8GPRS(2, r1); \
+ REST_GPR(10, r1); \
+ ld r11,_CCR(r1); \
+ mtcr r11; \
+ /* Decrement paca->in_mce. */ \
+ lhz r12,PACA_IN_MCE(r13); \
+ subi r12,r12,1; \
+ sth r12,PACA_IN_MCE(r13); \
+ REST_GPR(11, r1); \
+ REST_2GPRS(12, r1); \
+ /* restore original r1. */ \
+ ld r1,GPR1(r1)
+
+ /*
+ * Handle machine check early in real mode. We come here with
+ * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
+ */
+ .align 7
+ .globl machine_check_handle_early
+machine_check_handle_early:
+ std r0,GPR0(r1) /* Save r0 */
+ EXCEPTION_PROLOG_COMMON_3(0x200)
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_early
+ std r3,RESULT(r1) /* Save result */
+ ld r12,_MSR(r1)
+#ifdef CONFIG_PPC_P7_NAP
+ /*
+ * Check if thread was in power saving mode. We come here when any
+ * of the following is true:
+ * a. thread wasn't in power saving mode
+ * b. thread was in power saving mode with no state loss or
+ * supervisor state loss
+ *
+ * Go back to nap again if (b) is true.
+ */
+ rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
+ beq 4f /* No, it wasn;t */
+ /* Thread was in power saving mode. Go back to nap again. */
+ cmpwi r11,2
+ bne 3f
+ /* Supervisor state loss */
+ li r0,1
+ stb r0,PACA_NAPSTATELOST(r13)
+3: bl machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
+ GET_PACA(r13)
+ ld r1,PACAR1(r13)
+ b power7_enter_nap_mode
+4:
+#endif
+ /*
+ * Check if we are coming from hypervisor userspace. If yes then we
+ * continue in host kernel in V mode to deliver the MC event.
+ */
+ rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
+ beq 5f
+ andi. r11,r12,MSR_PR /* See if coming from user. */
+ bne 9f /* continue in V mode if we are. */
+
+5:
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ /*
+ * We are coming from kernel context. Check if we are coming from
+ * guest. if yes, then we can continue. We will fall through
+ * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
+ */
+ lbz r11,HSTATE_IN_GUEST(r13)
+ cmpwi r11,0 /* Check if coming from guest */
+ bne 9f /* continue if we are. */
+#endif
+ /*
+ * At this point we are not sure about what context we come from.
+ * Queue up the MCE event and return from the interrupt.
+ * But before that, check if this is an un-recoverable exception.
+ * If yes, then stay on emergency stack and panic.
+ */
+ andi. r11,r12,MSR_RI
+ bne 2f
+1: mfspr r11,SPRN_SRR0
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10,unrecover_mce)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ /*
+ * We are going down. But there are chances that we might get hit by
+ * another MCE during panic path and we may run into unstable state
+ * with no way out. Hence, turn ME bit off while going down, so that
+ * when another MCE is hit during panic path, system will checkstop
+ * and hypervisor will get restarted cleanly by SP.
+ */
+ li r3,MSR_ME
+ andc r10,r10,r3 /* Turn off MSR_ME */
+ mtspr SPRN_SRR1,r10
+ rfid
+ b .
+2:
+ /*
+ * Check if we have successfully handled/recovered from error, if not
+ * then stay on emergency stack and panic.
+ */
+ ld r3,RESULT(r1) /* Load result */
+ cmpdi r3,0 /* see if we handled MCE successfully */
+
+ beq 1b /* if !handled then panic */
+ /*
+ * Return from MC interrupt.
+ * Queue up the MCE event so that we can log it later, while
+ * returning from kernel or opal call.
+ */
+ bl machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
+ rfid
+9:
+ /* Deliver the machine check to host kernel in V mode. */
+ MACHINE_CHECK_HANDLER_WINDUP
+ b machine_check_pSeries
+
+unrecover_mce:
+ /* Invoke machine_check_exception to print MCE event and panic. */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_exception
+ /*
+ * We will not reach here. Even if we did, there is no way out. Call
+ * unrecoverable_exception and die.
+ */
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unrecoverable_exception
+ b 1b
+/*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r12 contain the saved SRR1, SRR0 is still ready for return
+ * r3 has the faulting address
+ * r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
+ * We assume we aren't going to take any exceptions during this procedure.
+ */
+slb_miss_realmode:
+ mflr r10
+#ifdef CONFIG_RELOCATABLE
+ mtctr r11
+#endif
+
+ stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
+ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
+
+ bl slb_allocate_realmode
+
+ /* All done -- return from exception. */
+
+ ld r10,PACA_EXSLB+EX_LR(r13)
+ ld r3,PACA_EXSLB+EX_R3(r13)
+ lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
+
+ mtlr r10
+
+ andi. r10,r12,MSR_RI /* check for unrecoverable exception */
+ beq- 2f
+
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
+.machine pop
+
+ RESTORE_PPR_PACA(PACA_EXSLB, r9)
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ rfid
+ b . /* prevent speculative execution */
+
+2: mfspr r11,SPRN_SRR0
+ ld r10,PACAKBASE(r13)
+ LOAD_HANDLER(r10,unrecov_slb)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ mtspr SPRN_SRR1,r10
+ rfid
+ b .
+
+unrecov_slb:
+ EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
+ DISABLE_INTS
+ bl save_nvgprs
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+ bl unrecoverable_exception
+ b 1b
+
+
+#ifdef CONFIG_PPC_970_NAP
+power4_fixup_nap:
+ andc r9,r9,r10
+ std r9,TI_LOCAL_FLAGS(r11)
+ ld r10,_LINK(r1) /* make idle task do the */
+ std r10,_NIP(r1) /* equivalent of a blr */
+ blr
+#endif
+
+/*
+ * Hash table stuff
+ */
+ .align 7
+do_hash_page:
+ std r3,_DAR(r1)
+ std r4,_DSISR(r1)
+
+ andis. r0,r4,0xa410 /* weird error? */
+ bne- handle_page_fault /* if not, try to insert a HPTE */
+ andis. r0,r4,DSISR_DABRMATCH@h
+ bne- handle_dabr_fault
+
+BEGIN_FTR_SECTION
+ andis. r0,r4,0x0020 /* Is it a segment table fault? */
+ bne- do_ste_alloc /* If so handle it */
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
+
+ CURRENT_THREAD_INFO(r11, r1)
+ lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
+ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
+ bne 77f /* then don't call hash_page now */
+ /*
+ * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
+ * accessing a userspace segment (even from the kernel). We assume
+ * kernel addresses always have the high bit set.
+ */
+ rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
+ rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
+ orc r0,r12,r0 /* MSR_PR | ~high_bit */
+ rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
+ ori r4,r4,1 /* add _PAGE_PRESENT */
+ rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */
+
+ /*
+ * r3 contains the faulting address
+ * r4 contains the required access permissions
+ * r5 contains the trap number
+ *
+ * at return r3 = 0 for success, 1 for page fault, negative for error
+ */
+ bl hash_page /* build HPTE if possible */
+ cmpdi r3,0 /* see if hash_page succeeded */
+
+ /* Success */
+ beq fast_exc_return_irq /* Return from exception on success */
+
+ /* Error */
+ blt- 13f
+
+/* Here we have a page fault that hash_page can't handle. */
+handle_page_fault:
+11: ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_page_fault
+ cmpdi r3,0
+ beq+ 12f
+ bl save_nvgprs
+ mr r5,r3
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ lwz r4,_DAR(r1)
+ bl bad_page_fault
+ b ret_from_except
+
+/* We have a data breakpoint exception - handle it */
+handle_dabr_fault:
+ bl save_nvgprs
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_break
+12: b ret_from_except_lite
+
+
+/* We have a page fault that hash_page could handle but HV refused
+ * the PTE insertion
+ */
+13: bl save_nvgprs
+ mr r5,r3
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ ld r4,_DAR(r1)
+ bl low_hash_fault
+ b ret_from_except
+
+/*
+ * We come here as a result of a DSI at a point where we don't want
+ * to call hash_page, such as when we are accessing memory (possibly
+ * user memory) inside a PMU interrupt that occurred while interrupts
+ * were soft-disabled. We want to invoke the exception handler for
+ * the access, or panic if there isn't a handler.
+ */
+77: bl save_nvgprs
+ mr r4,r3
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ li r5,SIGSEGV
+ bl bad_page_fault
+ b ret_from_except
+
+ /* here we have a segment miss */
+do_ste_alloc:
+ bl ste_allocate /* try to insert stab entry */
+ cmpdi r3,0
+ bne- handle_page_fault
+ b fast_exception_return
+
+/*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r11 and r12 contain the saved SRR0 and SRR1.
+ * r9 - r13 are saved in paca->exslb.
+ * We assume we aren't going to take any exceptions during this procedure.
+ * We assume (DAR >> 60) == 0xc.
+ */
+ .align 7
+do_stab_bolted:
+ stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
+ std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */
+ mfspr r11,SPRN_DAR /* ea */
+
+ /*
+ * check for bad kernel/user address
+ * (ea & ~REGION_MASK) >= PGTABLE_RANGE
+ */
+ rldicr. r9,r11,4,(63 - 46 - 4)
+ li r9,0 /* VSID = 0 for bad address */
+ bne- 0f
+
+ /*
+ * Calculate VSID:
+ * This is the kernel vsid, we take the top for context from
+ * the range. context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
+ * Here we know that (ea >> 60) == 0xc
+ */
+ lis r9,(MAX_USER_CONTEXT + 1)@ha
+ addi r9,r9,(MAX_USER_CONTEXT + 1)@l
+
+ srdi r10,r11,SID_SHIFT
+ rldimi r10,r9,ESID_BITS,0 /* proto vsid */
+ ASM_VSID_SCRAMBLE(r10, r9, 256M)
+ rldic r9,r10,12,16 /* r9 = vsid << 12 */
+
+0:
+ /* Hash to the primary group */
+ ld r10,PACASTABVIRT(r13)
+ srdi r11,r11,SID_SHIFT
+ rldimi r10,r11,7,52 /* r10 = first ste of the group */
+
+ /* Search the primary group for a free entry */
+1: ld r11,0(r10) /* Test valid bit of the current ste */
+ andi. r11,r11,0x80
+ beq 2f
+ addi r10,r10,16
+ andi. r11,r10,0x70
+ bne 1b
+
+ /* Stick for only searching the primary group for now. */
+ /* At least for now, we use a very simple random castout scheme */
+ /* Use the TB as a random number ; OR in 1 to avoid entry 0 */
+ mftb r11
+ rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */
+ ori r11,r11,0x10
+
+ /* r10 currently points to an ste one past the group of interest */
+ /* make it point to the randomly selected entry */
+ subi r10,r10,128
+ or r10,r10,r11 /* r10 is the entry to invalidate */
+
+ isync /* mark the entry invalid */
+ ld r11,0(r10)
+ rldicl r11,r11,56,1 /* clear the valid bit */
+ rotldi r11,r11,8
+ std r11,0(r10)
+ sync
+
+ clrrdi r11,r11,28 /* Get the esid part of the ste */
+ slbie r11
+
+2: std r9,8(r10) /* Store the vsid part of the ste */
+ eieio
+
+ mfspr r11,SPRN_DAR /* Get the new esid */
+ clrrdi r11,r11,28 /* Permits a full 32b of ESID */
+ ori r11,r11,0x90 /* Turn on valid and kp */
+ std r11,0(r10) /* Put new entry back into the stab */
+
+ sync
+
+ /* All done -- return from exception. */
+ lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
+ ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */
+
+ andi. r10,r12,MSR_RI
+ beq- unrecov_slb
+
+ mtcrf 0x80,r9 /* restore CR */
+
+ mfmsr r10
+ clrrdi r10,r10,2
+ mtmsrd r10,1
+
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ rfid
+ b . /* prevent speculative execution */
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
new file mode 100644
index 00000000000..742694c1d85
--- /dev/null
+++ b/arch/powerpc/kernel/fadump.c
@@ -0,0 +1,1316 @@
+/*
+ * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
+ * dump with assistance from firmware. This approach does not use kexec,
+ * instead firmware assists in booting the kdump kernel while preserving
+ * memory contents. The most of the code implementation has been adapted
+ * from phyp assisted dump implementation written by Linas Vepstas and
+ * Manish Ahuja
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2011 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/fadump.h>
+#include <asm/debug.h>
+#include <asm/setup.h>
+
+static struct fw_dump fw_dump;
+static struct fadump_mem_struct fdm;
+static const struct fadump_mem_struct *fdm_active;
+
+static DEFINE_MUTEX(fadump_mutex);
+struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
+int crash_mem_ranges;
+
+/* Scan the Firmware Assisted dump configuration details. */
+int __init early_init_dt_scan_fw_dump(unsigned long node,
+ const char *uname, int depth, void *data)
+{
+ const __be32 *sections;
+ int i, num_sections;
+ int size;
+ const int *token;
+
+ if (depth != 1 || strcmp(uname, "rtas") != 0)
+ return 0;
+
+ /*
+ * Check if Firmware Assisted dump is supported. if yes, check
+ * if dump has been initiated on last reboot.
+ */
+ token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
+ if (!token)
+ return 1;
+
+ fw_dump.fadump_supported = 1;
+ fw_dump.ibm_configure_kernel_dump = *token;
+
+ /*
+ * The 'ibm,kernel-dump' rtas node is present only if there is
+ * dump data waiting for us.
+ */
+ fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
+ if (fdm_active)
+ fw_dump.dump_active = 1;
+
+ /* Get the sizes required to store dump data for the firmware provided
+ * dump sections.
+ * For each dump section type supported, a 32bit cell which defines
+ * the ID of a supported section followed by two 32 bit cells which
+ * gives teh size of the section in bytes.
+ */
+ sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
+ &size);
+
+ if (!sections)
+ return 1;
+
+ num_sections = size / (3 * sizeof(u32));
+
+ for (i = 0; i < num_sections; i++, sections += 3) {
+ u32 type = (u32)of_read_number(sections, 1);
+
+ switch (type) {
+ case FADUMP_CPU_STATE_DATA:
+ fw_dump.cpu_state_data_size =
+ of_read_ulong(&sections[1], 2);
+ break;
+ case FADUMP_HPTE_REGION:
+ fw_dump.hpte_region_size =
+ of_read_ulong(&sections[1], 2);
+ break;
+ }
+ }
+
+ return 1;
+}
+
+int is_fadump_active(void)
+{
+ return fw_dump.dump_active;
+}
+
+/* Print firmware assisted dump configurations for debugging purpose. */
+static void fadump_show_config(void)
+{
+ pr_debug("Support for firmware-assisted dump (fadump): %s\n",
+ (fw_dump.fadump_supported ? "present" : "no support"));
+
+ if (!fw_dump.fadump_supported)
+ return;
+
+ pr_debug("Fadump enabled : %s\n",
+ (fw_dump.fadump_enabled ? "yes" : "no"));
+ pr_debug("Dump Active : %s\n",
+ (fw_dump.dump_active ? "yes" : "no"));
+ pr_debug("Dump section sizes:\n");
+ pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
+ pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
+ pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size);
+}
+
+static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
+ unsigned long addr)
+{
+ if (!fdm)
+ return 0;
+
+ memset(fdm, 0, sizeof(struct fadump_mem_struct));
+ addr = addr & PAGE_MASK;
+
+ fdm->header.dump_format_version = 0x00000001;
+ fdm->header.dump_num_sections = 3;
+ fdm->header.dump_status_flag = 0;
+ fdm->header.offset_first_dump_section =
+ (u32)offsetof(struct fadump_mem_struct, cpu_state_data);
+
+ /*
+ * Fields for disk dump option.
+ * We are not using disk dump option, hence set these fields to 0.
+ */
+ fdm->header.dd_block_size = 0;
+ fdm->header.dd_block_offset = 0;
+ fdm->header.dd_num_blocks = 0;
+ fdm->header.dd_offset_disk_path = 0;
+
+ /* set 0 to disable an automatic dump-reboot. */
+ fdm->header.max_time_auto = 0;
+
+ /* Kernel dump sections */
+ /* cpu state data section. */
+ fdm->cpu_state_data.request_flag = FADUMP_REQUEST_FLAG;
+ fdm->cpu_state_data.source_data_type = FADUMP_CPU_STATE_DATA;
+ fdm->cpu_state_data.source_address = 0;
+ fdm->cpu_state_data.source_len = fw_dump.cpu_state_data_size;
+ fdm->cpu_state_data.destination_address = addr;
+ addr += fw_dump.cpu_state_data_size;
+
+ /* hpte region section */
+ fdm->hpte_region.request_flag = FADUMP_REQUEST_FLAG;
+ fdm->hpte_region.source_data_type = FADUMP_HPTE_REGION;
+ fdm->hpte_region.source_address = 0;
+ fdm->hpte_region.source_len = fw_dump.hpte_region_size;
+ fdm->hpte_region.destination_address = addr;
+ addr += fw_dump.hpte_region_size;
+
+ /* RMA region section */
+ fdm->rmr_region.request_flag = FADUMP_REQUEST_FLAG;
+ fdm->rmr_region.source_data_type = FADUMP_REAL_MODE_REGION;
+ fdm->rmr_region.source_address = RMA_START;
+ fdm->rmr_region.source_len = fw_dump.boot_memory_size;
+ fdm->rmr_region.destination_address = addr;
+ addr += fw_dump.boot_memory_size;
+
+ return addr;
+}
+
+/**
+ * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
+ *
+ * Function to find the largest memory size we need to reserve during early
+ * boot process. This will be the size of the memory that is required for a
+ * kernel to boot successfully.
+ *
+ * This function has been taken from phyp-assisted dump feature implementation.
+ *
+ * returns larger of 256MB or 5% rounded down to multiples of 256MB.
+ *
+ * TODO: Come up with better approach to find out more accurate memory size
+ * that is required for a kernel to boot successfully.
+ *
+ */
+static inline unsigned long fadump_calculate_reserve_size(void)
+{
+ unsigned long size;
+
+ /*
+ * Check if the size is specified through fadump_reserve_mem= cmdline
+ * option. If yes, then use that.
+ */
+ if (fw_dump.reserve_bootvar)
+ return fw_dump.reserve_bootvar;
+
+ /* divide by 20 to get 5% of value */
+ size = memblock_end_of_DRAM() / 20;
+
+ /* round it down in multiples of 256 */
+ size = size & ~0x0FFFFFFFUL;
+
+ /* Truncate to memory_limit. We don't want to over reserve the memory.*/
+ if (memory_limit && size > memory_limit)
+ size = memory_limit;
+
+ return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
+}
+
+/*
+ * Calculate the total memory size required to be reserved for
+ * firmware-assisted dump registration.
+ */
+static unsigned long get_fadump_area_size(void)
+{
+ unsigned long size = 0;
+
+ size += fw_dump.cpu_state_data_size;
+ size += fw_dump.hpte_region_size;
+ size += fw_dump.boot_memory_size;
+ size += sizeof(struct fadump_crash_info_header);
+ size += sizeof(struct elfhdr); /* ELF core header.*/
+ size += sizeof(struct elf_phdr); /* place holder for cpu notes */
+ /* Program headers for crash memory regions. */
+ size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
+
+ size = PAGE_ALIGN(size);
+ return size;
+}
+
+int __init fadump_reserve_mem(void)
+{
+ unsigned long base, size, memory_boundary;
+
+ if (!fw_dump.fadump_enabled)
+ return 0;
+
+ if (!fw_dump.fadump_supported) {
+ printk(KERN_INFO "Firmware-assisted dump is not supported on"
+ " this hardware\n");
+ fw_dump.fadump_enabled = 0;
+ return 0;
+ }
+ /*
+ * Initialize boot memory size
+ * If dump is active then we have already calculated the size during
+ * first kernel.
+ */
+ if (fdm_active)
+ fw_dump.boot_memory_size = fdm_active->rmr_region.source_len;
+ else
+ fw_dump.boot_memory_size = fadump_calculate_reserve_size();
+
+ /*
+ * Calculate the memory boundary.
+ * If memory_limit is less than actual memory boundary then reserve
+ * the memory for fadump beyond the memory_limit and adjust the
+ * memory_limit accordingly, so that the running kernel can run with
+ * specified memory_limit.
+ */
+ if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
+ size = get_fadump_area_size();
+ if ((memory_limit + size) < memblock_end_of_DRAM())
+ memory_limit += size;
+ else
+ memory_limit = memblock_end_of_DRAM();
+ printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
+ " dump, now %#016llx\n", memory_limit);
+ }
+ if (memory_limit)
+ memory_boundary = memory_limit;
+ else
+ memory_boundary = memblock_end_of_DRAM();
+
+ if (fw_dump.dump_active) {
+ printk(KERN_INFO "Firmware-assisted dump is active.\n");
+ /*
+ * If last boot has crashed then reserve all the memory
+ * above boot_memory_size so that we don't touch it until
+ * dump is written to disk by userspace tool. This memory
+ * will be released for general use once the dump is saved.
+ */
+ base = fw_dump.boot_memory_size;
+ size = memory_boundary - base;
+ memblock_reserve(base, size);
+ printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
+ "for saving crash dump\n",
+ (unsigned long)(size >> 20),
+ (unsigned long)(base >> 20));
+
+ fw_dump.fadumphdr_addr =
+ fdm_active->rmr_region.destination_address +
+ fdm_active->rmr_region.source_len;
+ pr_debug("fadumphdr_addr = %p\n",
+ (void *) fw_dump.fadumphdr_addr);
+ } else {
+ /* Reserve the memory at the top of memory. */
+ size = get_fadump_area_size();
+ base = memory_boundary - size;
+ memblock_reserve(base, size);
+ printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
+ "for firmware-assisted dump\n",
+ (unsigned long)(size >> 20),
+ (unsigned long)(base >> 20));
+ }
+ fw_dump.reserve_dump_area_start = base;
+ fw_dump.reserve_dump_area_size = size;
+ return 1;
+}
+
+/* Look for fadump= cmdline option. */
+static int __init early_fadump_param(char *p)
+{
+ if (!p)
+ return 1;
+
+ if (strncmp(p, "on", 2) == 0)
+ fw_dump.fadump_enabled = 1;
+ else if (strncmp(p, "off", 3) == 0)
+ fw_dump.fadump_enabled = 0;
+
+ return 0;
+}
+early_param("fadump", early_fadump_param);
+
+/* Look for fadump_reserve_mem= cmdline option */
+static int __init early_fadump_reserve_mem(char *p)
+{
+ if (p)
+ fw_dump.reserve_bootvar = memparse(p, &p);
+ return 0;
+}
+early_param("fadump_reserve_mem", early_fadump_reserve_mem);
+
+static void register_fw_dump(struct fadump_mem_struct *fdm)
+{
+ int rc;
+ unsigned int wait_time;
+
+ pr_debug("Registering for firmware-assisted kernel dump...\n");
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
+ FADUMP_REGISTER, fdm,
+ sizeof(struct fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+
+ } while (wait_time);
+
+ switch (rc) {
+ case -1:
+ printk(KERN_ERR "Failed to register firmware-assisted kernel"
+ " dump. Hardware Error(%d).\n", rc);
+ break;
+ case -3:
+ printk(KERN_ERR "Failed to register firmware-assisted kernel"
+ " dump. Parameter Error(%d).\n", rc);
+ break;
+ case -9:
+ printk(KERN_ERR "firmware-assisted kernel dump is already "
+ " registered.");
+ fw_dump.dump_registered = 1;
+ break;
+ case 0:
+ printk(KERN_INFO "firmware-assisted kernel dump registration"
+ " is successful\n");
+ fw_dump.dump_registered = 1;
+ break;
+ }
+}
+
+void crash_fadump(struct pt_regs *regs, const char *str)
+{
+ struct fadump_crash_info_header *fdh = NULL;
+
+ if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+ return;
+
+ fdh = __va(fw_dump.fadumphdr_addr);
+ crashing_cpu = smp_processor_id();
+ fdh->crashing_cpu = crashing_cpu;
+ crash_save_vmcoreinfo();
+
+ if (regs)
+ fdh->regs = *regs;
+ else
+ ppc_save_regs(&fdh->regs);
+
+ fdh->cpu_online_mask = *cpu_online_mask;
+
+ /* Call ibm,os-term rtas call to trigger firmware assisted dump */
+ rtas_os_term((char *)str);
+}
+
+#define GPR_MASK 0xffffff0000000000
+static inline int fadump_gpr_index(u64 id)
+{
+ int i = -1;
+ char str[3];
+
+ if ((id & GPR_MASK) == REG_ID("GPR")) {
+ /* get the digits at the end */
+ id &= ~GPR_MASK;
+ id >>= 24;
+ str[2] = '\0';
+ str[1] = id & 0xff;
+ str[0] = (id >> 8) & 0xff;
+ sscanf(str, "%d", &i);
+ if (i > 31)
+ i = -1;
+ }
+ return i;
+}
+
+static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
+ u64 reg_val)
+{
+ int i;
+
+ i = fadump_gpr_index(reg_id);
+ if (i >= 0)
+ regs->gpr[i] = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("NIA"))
+ regs->nip = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("MSR"))
+ regs->msr = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("CTR"))
+ regs->ctr = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("LR"))
+ regs->link = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("XER"))
+ regs->xer = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("CR"))
+ regs->ccr = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("DAR"))
+ regs->dar = (unsigned long)reg_val;
+ else if (reg_id == REG_ID("DSISR"))
+ regs->dsisr = (unsigned long)reg_val;
+}
+
+static struct fadump_reg_entry*
+fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
+{
+ memset(regs, 0, sizeof(struct pt_regs));
+
+ while (reg_entry->reg_id != REG_ID("CPUEND")) {
+ fadump_set_regval(regs, reg_entry->reg_id,
+ reg_entry->reg_value);
+ reg_entry++;
+ }
+ reg_entry++;
+ return reg_entry;
+}
+
+static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type,
+ void *data, size_t data_len)
+{
+ struct elf_note note;
+
+ note.n_namesz = strlen(name) + 1;
+ note.n_descsz = data_len;
+ note.n_type = type;
+ memcpy(buf, &note, sizeof(note));
+ buf += (sizeof(note) + 3)/4;
+ memcpy(buf, name, note.n_namesz);
+ buf += (note.n_namesz + 3)/4;
+ memcpy(buf, data, note.n_descsz);
+ buf += (note.n_descsz + 3)/4;
+
+ return buf;
+}
+
+static void fadump_final_note(u32 *buf)
+{
+ struct elf_note note;
+
+ note.n_namesz = 0;
+ note.n_descsz = 0;
+ note.n_type = 0;
+ memcpy(buf, &note, sizeof(note));
+}
+
+static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+{
+ struct elf_prstatus prstatus;
+
+ memset(&prstatus, 0, sizeof(prstatus));
+ /*
+ * FIXME: How do i get PID? Do I really need it?
+ * prstatus.pr_pid = ????
+ */
+ elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+ buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
+ &prstatus, sizeof(prstatus));
+ return buf;
+}
+
+static void fadump_update_elfcore_header(char *bufp)
+{
+ struct elfhdr *elf;
+ struct elf_phdr *phdr;
+
+ elf = (struct elfhdr *)bufp;
+ bufp += sizeof(struct elfhdr);
+
+ /* First note is a place holder for cpu notes info. */
+ phdr = (struct elf_phdr *)bufp;
+
+ if (phdr->p_type == PT_NOTE) {
+ phdr->p_paddr = fw_dump.cpu_notes_buf;
+ phdr->p_offset = phdr->p_paddr;
+ phdr->p_filesz = fw_dump.cpu_notes_buf_size;
+ phdr->p_memsz = fw_dump.cpu_notes_buf_size;
+ }
+ return;
+}
+
+static void *fadump_cpu_notes_buf_alloc(unsigned long size)
+{
+ void *vaddr;
+ struct page *page;
+ unsigned long order, count, i;
+
+ order = get_order(size);
+ vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+ if (!vaddr)
+ return NULL;
+
+ count = 1 << order;
+ page = virt_to_page(vaddr);
+ for (i = 0; i < count; i++)
+ SetPageReserved(page + i);
+ return vaddr;
+}
+
+static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
+{
+ struct page *page;
+ unsigned long order, count, i;
+
+ order = get_order(size);
+ count = 1 << order;
+ page = virt_to_page(vaddr);
+ for (i = 0; i < count; i++)
+ ClearPageReserved(page + i);
+ __free_pages(page, order);
+}
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
+ * used to access the data to allow for additional fields to be added without
+ * affecting compatibility. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
+ * 8 Byte ASCII identifier and 8 Byte register value. The register entry
+ * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
+ * of register value. For more details refer to PAPR document.
+ *
+ * Only for the crashing cpu we ignore the CPU dump data and get exact
+ * state from fadump crash info structure populated by first kernel at the
+ * time of crash.
+ */
+static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
+{
+ struct fadump_reg_save_area_header *reg_header;
+ struct fadump_reg_entry *reg_entry;
+ struct fadump_crash_info_header *fdh = NULL;
+ void *vaddr;
+ unsigned long addr;
+ u32 num_cpus, *note_buf;
+ struct pt_regs regs;
+ int i, rc = 0, cpu = 0;
+
+ if (!fdm->cpu_state_data.bytes_dumped)
+ return -EINVAL;
+
+ addr = fdm->cpu_state_data.destination_address;
+ vaddr = __va(addr);
+
+ reg_header = vaddr;
+ if (reg_header->magic_number != REGSAVE_AREA_MAGIC) {
+ printk(KERN_ERR "Unable to read register save area.\n");
+ return -ENOENT;
+ }
+ pr_debug("--------CPU State Data------------\n");
+ pr_debug("Magic Number: %llx\n", reg_header->magic_number);
+ pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset);
+
+ vaddr += reg_header->num_cpu_offset;
+ num_cpus = *((u32 *)(vaddr));
+ pr_debug("NumCpus : %u\n", num_cpus);
+ vaddr += sizeof(u32);
+ reg_entry = (struct fadump_reg_entry *)vaddr;
+
+ /* Allocate buffer to hold cpu crash notes. */
+ fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
+ fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
+ note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
+ if (!note_buf) {
+ printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
+ "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
+ return -ENOMEM;
+ }
+ fw_dump.cpu_notes_buf = __pa(note_buf);
+
+ pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
+ (num_cpus * sizeof(note_buf_t)), note_buf);
+
+ if (fw_dump.fadumphdr_addr)
+ fdh = __va(fw_dump.fadumphdr_addr);
+
+ for (i = 0; i < num_cpus; i++) {
+ if (reg_entry->reg_id != REG_ID("CPUSTRT")) {
+ printk(KERN_ERR "Unable to read CPU state data\n");
+ rc = -ENOENT;
+ goto error_out;
+ }
+ /* Lower 4 bytes of reg_value contains logical cpu id */
+ cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK;
+ if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
+ SKIP_TO_NEXT_CPU(reg_entry);
+ continue;
+ }
+ pr_debug("Reading register data for cpu %d...\n", cpu);
+ if (fdh && fdh->crashing_cpu == cpu) {
+ regs = fdh->regs;
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ SKIP_TO_NEXT_CPU(reg_entry);
+ } else {
+ reg_entry++;
+ reg_entry = fadump_read_registers(reg_entry, &regs);
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ }
+ }
+ fadump_final_note(note_buf);
+
+ if (fdh) {
+ pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+ fdh->elfcorehdr_addr);
+ fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
+ }
+ return 0;
+
+error_out:
+ fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
+ fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf = 0;
+ fw_dump.cpu_notes_buf_size = 0;
+ return rc;
+
+}
+
+/*
+ * Validate and process the dump data stored by firmware before exporting
+ * it through '/proc/vmcore'.
+ */
+static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
+{
+ struct fadump_crash_info_header *fdh;
+ int rc = 0;
+
+ if (!fdm_active || !fw_dump.fadumphdr_addr)
+ return -EINVAL;
+
+ /* Check if the dump data is valid. */
+ if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) ||
+ (fdm_active->cpu_state_data.error_flags != 0) ||
+ (fdm_active->rmr_region.error_flags != 0)) {
+ printk(KERN_ERR "Dump taken by platform is not valid\n");
+ return -EINVAL;
+ }
+ if ((fdm_active->rmr_region.bytes_dumped !=
+ fdm_active->rmr_region.source_len) ||
+ !fdm_active->cpu_state_data.bytes_dumped) {
+ printk(KERN_ERR "Dump taken by platform is incomplete\n");
+ return -EINVAL;
+ }
+
+ /* Validate the fadump crash info header */
+ fdh = __va(fw_dump.fadumphdr_addr);
+ if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+ printk(KERN_ERR "Crash info header is not valid.\n");
+ return -EINVAL;
+ }
+
+ rc = fadump_build_cpu_notes(fdm_active);
+ if (rc)
+ return rc;
+
+ /*
+ * We are done validating dump info and elfcore header is now ready
+ * to be exported. set elfcorehdr_addr so that vmcore module will
+ * export the elfcore header through '/proc/vmcore'.
+ */
+ elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+ return 0;
+}
+
+static inline void fadump_add_crash_memory(unsigned long long base,
+ unsigned long long end)
+{
+ if (base == end)
+ return;
+
+ pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
+ crash_mem_ranges, base, end - 1, (end - base));
+ crash_memory_ranges[crash_mem_ranges].base = base;
+ crash_memory_ranges[crash_mem_ranges].size = end - base;
+ crash_mem_ranges++;
+}
+
+static void fadump_exclude_reserved_area(unsigned long long start,
+ unsigned long long end)
+{
+ unsigned long long ra_start, ra_end;
+
+ ra_start = fw_dump.reserve_dump_area_start;
+ ra_end = ra_start + fw_dump.reserve_dump_area_size;
+
+ if ((ra_start < end) && (ra_end > start)) {
+ if ((start < ra_start) && (end > ra_end)) {
+ fadump_add_crash_memory(start, ra_start);
+ fadump_add_crash_memory(ra_end, end);
+ } else if (start < ra_start) {
+ fadump_add_crash_memory(start, ra_start);
+ } else if (ra_end < end) {
+ fadump_add_crash_memory(ra_end, end);
+ }
+ } else
+ fadump_add_crash_memory(start, end);
+}
+
+static int fadump_init_elfcore_header(char *bufp)
+{
+ struct elfhdr *elf;
+
+ elf = (struct elfhdr *) bufp;
+ bufp += sizeof(struct elfhdr);
+ memcpy(elf->e_ident, ELFMAG, SELFMAG);
+ elf->e_ident[EI_CLASS] = ELF_CLASS;
+ elf->e_ident[EI_DATA] = ELF_DATA;
+ elf->e_ident[EI_VERSION] = EV_CURRENT;
+ elf->e_ident[EI_OSABI] = ELF_OSABI;
+ memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+ elf->e_type = ET_CORE;
+ elf->e_machine = ELF_ARCH;
+ elf->e_version = EV_CURRENT;
+ elf->e_entry = 0;
+ elf->e_phoff = sizeof(struct elfhdr);
+ elf->e_shoff = 0;
+ elf->e_flags = ELF_CORE_EFLAGS;
+ elf->e_ehsize = sizeof(struct elfhdr);
+ elf->e_phentsize = sizeof(struct elf_phdr);
+ elf->e_phnum = 0;
+ elf->e_shentsize = 0;
+ elf->e_shnum = 0;
+ elf->e_shstrndx = 0;
+
+ return 0;
+}
+
+/*
+ * Traverse through memblock structure and setup crash memory ranges. These
+ * ranges will be used create PT_LOAD program headers in elfcore header.
+ */
+static void fadump_setup_crash_memory_ranges(void)
+{
+ struct memblock_region *reg;
+ unsigned long long start, end;
+
+ pr_debug("Setup crash memory ranges.\n");
+ crash_mem_ranges = 0;
+ /*
+ * add the first memory chunk (RMA_START through boot_memory_size) as
+ * a separate memory chunk. The reason is, at the time crash firmware
+ * will move the content of this memory chunk to different location
+ * specified during fadump registration. We need to create a separate
+ * program header for this chunk with the correct offset.
+ */
+ fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
+
+ for_each_memblock(memory, reg) {
+ start = (unsigned long long)reg->base;
+ end = start + (unsigned long long)reg->size;
+ if (start == RMA_START && end >= fw_dump.boot_memory_size)
+ start = fw_dump.boot_memory_size;
+
+ /* add this range excluding the reserved dump area. */
+ fadump_exclude_reserved_area(start, end);
+ }
+}
+
+/*
+ * If the given physical address falls within the boot memory region then
+ * return the relocated address that points to the dump region reserved
+ * for saving initial boot memory contents.
+ */
+static inline unsigned long fadump_relocate(unsigned long paddr)
+{
+ if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
+ return fdm.rmr_region.destination_address + paddr;
+ else
+ return paddr;
+}
+
+static int fadump_create_elfcore_headers(char *bufp)
+{
+ struct elfhdr *elf;
+ struct elf_phdr *phdr;
+ int i;
+
+ fadump_init_elfcore_header(bufp);
+ elf = (struct elfhdr *)bufp;
+ bufp += sizeof(struct elfhdr);
+
+ /*
+ * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
+ * will be populated during second kernel boot after crash. Hence
+ * this PT_NOTE will always be the first elf note.
+ *
+ * NOTE: Any new ELF note addition should be placed after this note.
+ */
+ phdr = (struct elf_phdr *)bufp;
+ bufp += sizeof(struct elf_phdr);
+ phdr->p_type = PT_NOTE;
+ phdr->p_flags = 0;
+ phdr->p_vaddr = 0;
+ phdr->p_align = 0;
+
+ phdr->p_offset = 0;
+ phdr->p_paddr = 0;
+ phdr->p_filesz = 0;
+ phdr->p_memsz = 0;
+
+ (elf->e_phnum)++;
+
+ /* setup ELF PT_NOTE for vmcoreinfo */
+ phdr = (struct elf_phdr *)bufp;
+ bufp += sizeof(struct elf_phdr);
+ phdr->p_type = PT_NOTE;
+ phdr->p_flags = 0;
+ phdr->p_vaddr = 0;
+ phdr->p_align = 0;
+
+ phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note());
+ phdr->p_offset = phdr->p_paddr;
+ phdr->p_memsz = vmcoreinfo_max_size;
+ phdr->p_filesz = vmcoreinfo_max_size;
+
+ /* Increment number of program headers. */
+ (elf->e_phnum)++;
+
+ /* setup PT_LOAD sections. */
+
+ for (i = 0; i < crash_mem_ranges; i++) {
+ unsigned long long mbase, msize;
+ mbase = crash_memory_ranges[i].base;
+ msize = crash_memory_ranges[i].size;
+
+ if (!msize)
+ continue;
+
+ phdr = (struct elf_phdr *)bufp;
+ bufp += sizeof(struct elf_phdr);
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R|PF_W|PF_X;
+ phdr->p_offset = mbase;
+
+ if (mbase == RMA_START) {
+ /*
+ * The entire RMA region will be moved by firmware
+ * to the specified destination_address. Hence set
+ * the correct offset.
+ */
+ phdr->p_offset = fdm.rmr_region.destination_address;
+ }
+
+ phdr->p_paddr = mbase;
+ phdr->p_vaddr = (unsigned long)__va(mbase);
+ phdr->p_filesz = msize;
+ phdr->p_memsz = msize;
+ phdr->p_align = 0;
+
+ /* Increment number of program headers. */
+ (elf->e_phnum)++;
+ }
+ return 0;
+}
+
+static unsigned long init_fadump_header(unsigned long addr)
+{
+ struct fadump_crash_info_header *fdh;
+
+ if (!addr)
+ return 0;
+
+ fw_dump.fadumphdr_addr = addr;
+ fdh = __va(addr);
+ addr += sizeof(struct fadump_crash_info_header);
+
+ memset(fdh, 0, sizeof(struct fadump_crash_info_header));
+ fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
+ fdh->elfcorehdr_addr = addr;
+ /* We will set the crashing cpu id in crash_fadump() during crash. */
+ fdh->crashing_cpu = CPU_UNKNOWN;
+
+ return addr;
+}
+
+static void register_fadump(void)
+{
+ unsigned long addr;
+ void *vaddr;
+
+ /*
+ * If no memory is reserved then we can not register for firmware-
+ * assisted dump.
+ */
+ if (!fw_dump.reserve_dump_area_size)
+ return;
+
+ fadump_setup_crash_memory_ranges();
+
+ addr = fdm.rmr_region.destination_address + fdm.rmr_region.source_len;
+ /* Initialize fadump crash info header. */
+ addr = init_fadump_header(addr);
+ vaddr = __va(addr);
+
+ pr_debug("Creating ELF core headers at %#016lx\n", addr);
+ fadump_create_elfcore_headers(vaddr);
+
+ /* register the future kernel dump with firmware. */
+ register_fw_dump(&fdm);
+}
+
+static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
+{
+ int rc = 0;
+ unsigned int wait_time;
+
+ pr_debug("Un-register firmware-assisted dump\n");
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
+ FADUMP_UNREGISTER, fdm,
+ sizeof(struct fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ printk(KERN_ERR "Failed to un-register firmware-assisted dump."
+ " unexpected error(%d).\n", rc);
+ return rc;
+ }
+ fw_dump.dump_registered = 0;
+ return 0;
+}
+
+static int fadump_invalidate_dump(struct fadump_mem_struct *fdm)
+{
+ int rc = 0;
+ unsigned int wait_time;
+
+ pr_debug("Invalidating firmware-assisted dump registration\n");
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
+ FADUMP_INVALIDATE, fdm,
+ sizeof(struct fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ printk(KERN_ERR "Failed to invalidate firmware-assisted dump "
+ "rgistration. unexpected error(%d).\n", rc);
+ return rc;
+ }
+ fw_dump.dump_active = 0;
+ fdm_active = NULL;
+ return 0;
+}
+
+void fadump_cleanup(void)
+{
+ /* Invalidate the registration only if dump is active. */
+ if (fw_dump.dump_active) {
+ init_fadump_mem_struct(&fdm,
+ fdm_active->cpu_state_data.destination_address);
+ fadump_invalidate_dump(&fdm);
+ }
+}
+
+/*
+ * Release the memory that was reserved in early boot to preserve the memory
+ * contents. The released memory will be available for general use.
+ */
+static void fadump_release_memory(unsigned long begin, unsigned long end)
+{
+ unsigned long addr;
+ unsigned long ra_start, ra_end;
+
+ ra_start = fw_dump.reserve_dump_area_start;
+ ra_end = ra_start + fw_dump.reserve_dump_area_size;
+
+ for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ /*
+ * exclude the dump reserve area. Will reuse it for next
+ * fadump registration.
+ */
+ if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start))
+ continue;
+
+ free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
+ }
+}
+
+static void fadump_invalidate_release_mem(void)
+{
+ unsigned long reserved_area_start, reserved_area_end;
+ unsigned long destination_address;
+
+ mutex_lock(&fadump_mutex);
+ if (!fw_dump.dump_active) {
+ mutex_unlock(&fadump_mutex);
+ return;
+ }
+
+ destination_address = fdm_active->cpu_state_data.destination_address;
+ fadump_cleanup();
+ mutex_unlock(&fadump_mutex);
+
+ /*
+ * Save the current reserved memory bounds we will require them
+ * later for releasing the memory for general use.
+ */
+ reserved_area_start = fw_dump.reserve_dump_area_start;
+ reserved_area_end = reserved_area_start +
+ fw_dump.reserve_dump_area_size;
+ /*
+ * Setup reserve_dump_area_start and its size so that we can
+ * reuse this reserved memory for Re-registration.
+ */
+ fw_dump.reserve_dump_area_start = destination_address;
+ fw_dump.reserve_dump_area_size = get_fadump_area_size();
+
+ fadump_release_memory(reserved_area_start, reserved_area_end);
+ if (fw_dump.cpu_notes_buf) {
+ fadump_cpu_notes_buf_free(
+ (unsigned long)__va(fw_dump.cpu_notes_buf),
+ fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf = 0;
+ fw_dump.cpu_notes_buf_size = 0;
+ }
+ /* Initialize the kernel dump memory structure for FAD registration. */
+ init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+}
+
+static ssize_t fadump_release_memory_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ if (!fw_dump.dump_active)
+ return -EPERM;
+
+ if (buf[0] == '1') {
+ /*
+ * Take away the '/proc/vmcore'. We are releasing the dump
+ * memory, hence it will not be valid anymore.
+ */
+ vmcore_cleanup();
+ fadump_invalidate_release_mem();
+
+ } else
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t fadump_enabled_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
+}
+
+static ssize_t fadump_register_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", fw_dump.dump_registered);
+}
+
+static ssize_t fadump_register_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret = 0;
+
+ if (!fw_dump.fadump_enabled || fdm_active)
+ return -EPERM;
+
+ mutex_lock(&fadump_mutex);
+
+ switch (buf[0]) {
+ case '0':
+ if (fw_dump.dump_registered == 0) {
+ ret = -EINVAL;
+ goto unlock_out;
+ }
+ /* Un-register Firmware-assisted dump */
+ fadump_unregister_dump(&fdm);
+ break;
+ case '1':
+ if (fw_dump.dump_registered == 1) {
+ ret = -EINVAL;
+ goto unlock_out;
+ }
+ /* Register Firmware-assisted dump */
+ register_fadump();
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+unlock_out:
+ mutex_unlock(&fadump_mutex);
+ return ret < 0 ? ret : count;
+}
+
+static int fadump_region_show(struct seq_file *m, void *private)
+{
+ const struct fadump_mem_struct *fdm_ptr;
+
+ if (!fw_dump.fadump_enabled)
+ return 0;
+
+ mutex_lock(&fadump_mutex);
+ if (fdm_active)
+ fdm_ptr = fdm_active;
+ else {
+ mutex_unlock(&fadump_mutex);
+ fdm_ptr = &fdm;
+ }
+
+ seq_printf(m,
+ "CPU : [%#016llx-%#016llx] %#llx bytes, "
+ "Dumped: %#llx\n",
+ fdm_ptr->cpu_state_data.destination_address,
+ fdm_ptr->cpu_state_data.destination_address +
+ fdm_ptr->cpu_state_data.source_len - 1,
+ fdm_ptr->cpu_state_data.source_len,
+ fdm_ptr->cpu_state_data.bytes_dumped);
+ seq_printf(m,
+ "HPTE: [%#016llx-%#016llx] %#llx bytes, "
+ "Dumped: %#llx\n",
+ fdm_ptr->hpte_region.destination_address,
+ fdm_ptr->hpte_region.destination_address +
+ fdm_ptr->hpte_region.source_len - 1,
+ fdm_ptr->hpte_region.source_len,
+ fdm_ptr->hpte_region.bytes_dumped);
+ seq_printf(m,
+ "DUMP: [%#016llx-%#016llx] %#llx bytes, "
+ "Dumped: %#llx\n",
+ fdm_ptr->rmr_region.destination_address,
+ fdm_ptr->rmr_region.destination_address +
+ fdm_ptr->rmr_region.source_len - 1,
+ fdm_ptr->rmr_region.source_len,
+ fdm_ptr->rmr_region.bytes_dumped);
+
+ if (!fdm_active ||
+ (fw_dump.reserve_dump_area_start ==
+ fdm_ptr->cpu_state_data.destination_address))
+ goto out;
+
+ /* Dump is active. Show reserved memory region. */
+ seq_printf(m,
+ " : [%#016llx-%#016llx] %#llx bytes, "
+ "Dumped: %#llx\n",
+ (unsigned long long)fw_dump.reserve_dump_area_start,
+ fdm_ptr->cpu_state_data.destination_address - 1,
+ fdm_ptr->cpu_state_data.destination_address -
+ fw_dump.reserve_dump_area_start,
+ fdm_ptr->cpu_state_data.destination_address -
+ fw_dump.reserve_dump_area_start);
+out:
+ if (fdm_active)
+ mutex_unlock(&fadump_mutex);
+ return 0;
+}
+
+static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
+ 0200, NULL,
+ fadump_release_memory_store);
+static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
+ 0444, fadump_enabled_show,
+ NULL);
+static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
+ 0644, fadump_register_show,
+ fadump_register_store);
+
+static int fadump_region_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, fadump_region_show, inode->i_private);
+}
+
+static const struct file_operations fadump_region_fops = {
+ .open = fadump_region_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void fadump_init_files(void)
+{
+ struct dentry *debugfs_file;
+ int rc = 0;
+
+ rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
+ if (rc)
+ printk(KERN_ERR "fadump: unable to create sysfs file"
+ " fadump_enabled (%d)\n", rc);
+
+ rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
+ if (rc)
+ printk(KERN_ERR "fadump: unable to create sysfs file"
+ " fadump_registered (%d)\n", rc);
+
+ debugfs_file = debugfs_create_file("fadump_region", 0444,
+ powerpc_debugfs_root, NULL,
+ &fadump_region_fops);
+ if (!debugfs_file)
+ printk(KERN_ERR "fadump: unable to create debugfs file"
+ " fadump_region\n");
+
+ if (fw_dump.dump_active) {
+ rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
+ if (rc)
+ printk(KERN_ERR "fadump: unable to create sysfs file"
+ " fadump_release_mem (%d)\n", rc);
+ }
+ return;
+}
+
+/*
+ * Prepare for firmware-assisted dump.
+ */
+int __init setup_fadump(void)
+{
+ if (!fw_dump.fadump_enabled)
+ return 0;
+
+ if (!fw_dump.fadump_supported) {
+ printk(KERN_ERR "Firmware-assisted dump is not supported on"
+ " this hardware\n");
+ return 0;
+ }
+
+ fadump_show_config();
+ /*
+ * If dump data is available then see if it is valid and prepare for
+ * saving it to the disk.
+ */
+ if (fw_dump.dump_active) {
+ /*
+ * if dump process fails then invalidate the registration
+ * and release memory before proceeding for re-registration.
+ */
+ if (process_fadump(fdm_active) < 0)
+ fadump_invalidate_release_mem();
+ }
+ /* Initialize the kernel dump memory structure for FAD registration. */
+ else if (fw_dump.reserve_dump_area_size)
+ init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+ fadump_init_files();
+
+ return 1;
+}
+subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
index 1679a70bbca..2eae4478f7a 100644
--- a/arch/powerpc/kernel/firmware.c
+++ b/arch/powerpc/kernel/firmware.c
@@ -13,9 +13,10 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/cache.h>
#include <asm/firmware.h>
-unsigned long powerpc_firmware_features;
+unsigned long powerpc_firmware_features __read_mostly;
EXPORT_SYMBOL_GPL(powerpc_firmware_features);
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 821e152e093..9ad236e5d2c 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -23,6 +23,98 @@
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_VSX
+#define __REST_32FPVSRS(n,c,base) \
+BEGIN_FTR_SECTION \
+ b 2f; \
+END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
+ REST_32FPRS(n,base); \
+ b 3f; \
+2: REST_32VSRS(n,c,base); \
+3:
+
+#define __SAVE_32FPVSRS(n,c,base) \
+BEGIN_FTR_SECTION \
+ b 2f; \
+END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
+ SAVE_32FPRS(n,base); \
+ b 3f; \
+2: SAVE_32VSRS(n,c,base); \
+3:
+#else
+#define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base)
+#define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base)
+#endif
+#define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
+#define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/* void do_load_up_transact_fpu(struct thread_struct *thread)
+ *
+ * This is similar to load_up_fpu but for the transactional version of the FP
+ * register set. It doesn't mess with the task MSR or valid flags.
+ * Furthermore, we don't do lazy FP with TM currently.
+ */
+_GLOBAL(do_load_up_transact_fpu)
+ mfmsr r6
+ ori r5,r6,MSR_FP
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r5,r5,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ SYNC
+ MTMSRD(r5)
+
+ addi r7,r3,THREAD_TRANSACT_FPSTATE
+ lfd fr0,FPSTATE_FPSCR(r7)
+ MTFSF_L(fr0)
+ REST_32FPVSRS(0, R4, R7)
+
+ /* FP/VSX off again */
+ MTMSRD(r6)
+ SYNC
+
+ blr
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+/*
+ * Enable use of the FPU, and VSX if possible, for the caller.
+ */
+_GLOBAL(fp_enable)
+ mfmsr r3
+ ori r3,r3,MSR_FP
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r3,r3,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ SYNC
+ MTMSRD(r3)
+ isync /* (not necessary for arch 2.02 and later) */
+ blr
+
+/*
+ * Load state from memory into FP registers including FPSCR.
+ * Assumes the caller has enabled FP in the MSR.
+ */
+_GLOBAL(load_fp_state)
+ lfd fr0,FPSTATE_FPSCR(r3)
+ MTFSF_L(fr0)
+ REST_32FPVSRS(0, R4, R3)
+ blr
+
+/*
+ * Store FP state into memory, including FPSCR
+ * Assumes the caller has enabled FP in the MSR.
+ */
+_GLOBAL(store_fp_state)
+ SAVE_32FPVSRS(0, R4, R3)
+ mffs fr0
+ stfd fr0,FPSTATE_FPSCR(r3)
+ blr
/*
* This task wants to use the FPU now.
@@ -30,10 +122,17 @@
* and save its floating-point registers in its thread_struct.
* Load up this task's FP registers from its thread_struct,
* enable the FPU for the current task and return to the task.
+ * Note that on 32-bit this can only use registers that will be
+ * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
*/
_GLOBAL(load_up_fpu)
mfmsr r5
ori r5,r5,MSR_FP
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r5,r5,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
SYNC
MTMSRD(r5) /* enable use of fpu now */
isync
@@ -50,9 +149,10 @@ _GLOBAL(load_up_fpu)
beq 1f
toreal(r4)
addi r4,r4,THREAD /* want last_task_used_math->thread */
- SAVE_32FPRS(0, r4)
+ addi r10,r4,THREAD_FPSTATE
+ SAVE_32FPVSRS(0, R5, R10)
mffs fr0
- stfd fr0,THREAD_FPSCR(r4)
+ stfd fr0,FPSTATE_FPSCR(r10)
PPC_LL r5,PT_REGS(r4)
toreal(r5)
PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
@@ -63,7 +163,7 @@ _GLOBAL(load_up_fpu)
#endif /* CONFIG_SMP */
/* enable use of FP after return */
#ifdef CONFIG_PPC32
- mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */
+ mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
lwz r4,THREAD_FPEXC_MODE(r5)
ori r9,r9,MSR_FP /* enable FP for current */
or r9,r9,r4
@@ -75,9 +175,10 @@ _GLOBAL(load_up_fpu)
or r12,r12,r4
std r12,_MSR(r1)
#endif
- lfd fr0,THREAD_FPSCR(r5)
+ addi r10,r5,THREAD_FPSTATE
+ lfd fr0,FPSTATE_FPSCR(r10)
MTFSF_L(fr0)
- REST_32FPRS(0, r5)
+ REST_32FPVSRS(0, R4, R10)
#ifndef CONFIG_SMP
subi r4,r5,THREAD
fromreal(r4)
@@ -85,7 +186,7 @@ _GLOBAL(load_up_fpu)
#endif /* CONFIG_SMP */
/* restore registers and return */
/* we haven't used ctr or xer or lr */
- b fast_exception_return
+ blr
/*
* giveup_fpu(tsk)
@@ -96,6 +197,11 @@ _GLOBAL(load_up_fpu)
_GLOBAL(giveup_fpu)
mfmsr r5
ori r5,r5,MSR_FP
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r5,r5,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
SYNC_601
ISYNC_601
MTMSRD(r5) /* enable use of fpu now */
@@ -104,14 +210,23 @@ _GLOBAL(giveup_fpu)
PPC_LCMPI 0,r3,0
beqlr- /* if no previous owner, done */
addi r3,r3,THREAD /* want THREAD of task */
+ PPC_LL r6,THREAD_FPSAVEAREA(r3)
PPC_LL r5,PT_REGS(r3)
- PPC_LCMPI 0,r5,0
- SAVE_32FPRS(0, r3)
+ PPC_LCMPI 0,r6,0
+ bne 2f
+ addi r6,r3,THREAD_FPSTATE
+2: PPC_LCMPI 0,r5,0
+ SAVE_32FPVSRS(0, R4, R6)
mffs fr0
- stfd fr0,THREAD_FPSCR(r3)
+ stfd fr0,FPSTATE_FPSCR(r6)
beq 1f
PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
li r3,MSR_FP|MSR_FE0|MSR_FE1
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r3,r3,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
andc r4,r4,r3 /* disable FP for previous task */
PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
@@ -125,24 +240,14 @@ _GLOBAL(giveup_fpu)
/*
* These are used in the alignment trap handler when emulating
* single-precision loads and stores.
- * We restore and save the fpscr so the task gets the same result
- * and exceptions as if the cpu had performed the load or store.
*/
_GLOBAL(cvt_fd)
- lfd 0,THREAD_FPSCR(r5) /* load up fpscr value */
- MTFSF_L(0)
lfs 0,0(r3)
stfd 0,0(r4)
- mffs 0
- stfd 0,THREAD_FPSCR(r5) /* save new fpscr value */
blr
_GLOBAL(cvt_df)
- lfd 0,THREAD_FPSCR(r5) /* load up fpscr value */
- MTFSF_L(0)
lfd 0,0(r3)
stfs 0,0(r4)
- mffs 0
- stfd 0,THREAD_FPSCR(r5) /* save new fpscr value */
blr
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
new file mode 100644
index 00000000000..f22e7e44fbf
--- /dev/null
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -0,0 +1,237 @@
+
+/* 1. Find the index of the entry we're executing in */
+ bl invstr /* Find our address */
+invstr: mflr r6 /* Make it accessible */
+ mfmsr r7
+ rlwinm r4,r7,27,31,31 /* extract MSR[IS] */
+ mfspr r7, SPRN_PID0
+ slwi r7,r7,16
+ or r7,r7,r4
+ mtspr SPRN_MAS6,r7
+ tlbsx 0,r6 /* search MSR[IS], SPID=PID0 */
+ mfspr r7,SPRN_MAS1
+ andis. r7,r7,MAS1_VALID@h
+ bne match_TLB
+
+ mfspr r7,SPRN_MMUCFG
+ rlwinm r7,r7,21,28,31 /* extract MMUCFG[NPIDS] */
+ cmpwi r7,3
+ bne match_TLB /* skip if NPIDS != 3 */
+
+ mfspr r7,SPRN_PID1
+ slwi r7,r7,16
+ or r7,r7,r4
+ mtspr SPRN_MAS6,r7
+ tlbsx 0,r6 /* search MSR[IS], SPID=PID1 */
+ mfspr r7,SPRN_MAS1
+ andis. r7,r7,MAS1_VALID@h
+ bne match_TLB
+ mfspr r7, SPRN_PID2
+ slwi r7,r7,16
+ or r7,r7,r4
+ mtspr SPRN_MAS6,r7
+ tlbsx 0,r6 /* Fall through, we had to match */
+
+match_TLB:
+ mfspr r7,SPRN_MAS0
+ rlwinm r3,r7,16,20,31 /* Extract MAS0(Entry) */
+
+ mfspr r7,SPRN_MAS1 /* Insure IPROT set */
+ oris r7,r7,MAS1_IPROT@h
+ mtspr SPRN_MAS1,r7
+ tlbwe
+
+/* 2. Invalidate all entries except the entry we're executing in */
+ mfspr r9,SPRN_TLB1CFG
+ andi. r9,r9,0xfff
+ li r6,0 /* Set Entry counter to 0 */
+1: lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r7,r6,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */
+ mtspr SPRN_MAS0,r7
+ tlbre
+ mfspr r7,SPRN_MAS1
+ rlwinm r7,r7,0,2,31 /* Clear MAS1 Valid and IPROT */
+ cmpw r3,r6
+ beq skpinv /* Dont update the current execution TLB */
+ mtspr SPRN_MAS1,r7
+ tlbwe
+ isync
+skpinv: addi r6,r6,1 /* Increment */
+ cmpw r6,r9 /* Are we done? */
+ bne 1b /* If not, repeat */
+
+ /* Invalidate TLB0 */
+ li r6,0x04
+ tlbivax 0,r6
+ TLBSYNC
+ /* Invalidate TLB1 */
+ li r6,0x0c
+ tlbivax 0,r6
+ TLBSYNC
+
+/* 3. Setup a temp mapping and jump to it */
+ andi. r5, r3, 0x1 /* Find an entry not used and is non-zero */
+ addi r5, r5, 0x1
+ lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
+ mtspr SPRN_MAS0,r7
+ tlbre
+
+ /* grab and fixup the RPN */
+ mfspr r6,SPRN_MAS1 /* extract MAS1[SIZE] */
+ rlwinm r6,r6,25,27,31
+ li r8,-1
+ addi r6,r6,10
+ slw r6,r8,r6 /* convert to mask */
+
+ bl 1f /* Find our address */
+1: mflr r7
+
+ mfspr r8,SPRN_MAS3
+#ifdef CONFIG_PHYS_64BIT
+ mfspr r23,SPRN_MAS7
+#endif
+ and r8,r6,r8
+ subfic r9,r6,-4096
+ and r9,r9,r7
+
+ or r25,r8,r9
+ ori r8,r25,(MAS3_SX|MAS3_SW|MAS3_SR)
+
+ /* Just modify the entry ID and EPN for the temp mapping */
+ lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */
+ mtspr SPRN_MAS0,r7
+ xori r6,r4,1 /* Setup TMP mapping in the other Address space */
+ slwi r6,r6,12
+ oris r6,r6,(MAS1_VALID|MAS1_IPROT)@h
+ ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_4K))@l
+ mtspr SPRN_MAS1,r6
+ mfspr r6,SPRN_MAS2
+ li r7,0 /* temp EPN = 0 */
+ rlwimi r7,r6,0,20,31
+ mtspr SPRN_MAS2,r7
+ mtspr SPRN_MAS3,r8
+ tlbwe
+
+ xori r6,r4,1
+ slwi r6,r6,5 /* setup new context with other address space */
+ bl 1f /* Find our address */
+1: mflr r9
+ rlwimi r7,r9,0,20,31
+ addi r7,r7,(2f - 1b)
+ mtspr SPRN_SRR0,r7
+ mtspr SPRN_SRR1,r6
+ rfi
+2:
+/* 4. Clear out PIDs & Search info */
+ li r6,0
+ mtspr SPRN_MAS6,r6
+ mtspr SPRN_PID0,r6
+
+ mfspr r7,SPRN_MMUCFG
+ rlwinm r7,r7,21,28,31 /* extract MMUCFG[NPIDS] */
+ cmpwi r7,3
+ bne 2f /* skip if NPIDS != 3 */
+
+ mtspr SPRN_PID1,r6
+ mtspr SPRN_PID2,r6
+
+/* 5. Invalidate mapping we started in */
+2:
+ lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
+ mtspr SPRN_MAS0,r7
+ tlbre
+ mfspr r6,SPRN_MAS1
+ rlwinm r6,r6,0,2,0 /* clear IPROT */
+ mtspr SPRN_MAS1,r6
+ tlbwe
+ /* Invalidate TLB1 */
+ li r9,0x0c
+ tlbivax 0,r9
+ TLBSYNC
+
+/* The mapping only needs to be cache-coherent on SMP */
+#ifdef CONFIG_SMP
+#define M_IF_SMP MAS2_M
+#else
+#define M_IF_SMP 0
+#endif
+
+#if defined(ENTRY_MAPPING_BOOT_SETUP)
+
+/* 6. Setup KERNELBASE mapping in TLB1[0] */
+ lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */
+ mtspr SPRN_MAS0,r6
+ lis r6,(MAS1_VALID|MAS1_IPROT)@h
+ ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
+ mtspr SPRN_MAS1,r6
+ lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@h
+ ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@l
+ mtspr SPRN_MAS2,r6
+ mtspr SPRN_MAS3,r8
+ tlbwe
+
+/* 7. Jump to KERNELBASE mapping */
+ lis r6,(KERNELBASE & ~0xfff)@h
+ ori r6,r6,(KERNELBASE & ~0xfff)@l
+ rlwinm r7,r25,0,0x03ffffff
+ add r6,r7,r6
+
+#elif defined(ENTRY_MAPPING_KEXEC_SETUP)
+/*
+ * 6. Setup a 1:1 mapping in TLB1. Esel 0 is unsued, 1 or 2 contains the tmp
+ * mapping so we start at 3. We setup 8 mappings, each 256MiB in size. This
+ * will cover the first 2GiB of memory.
+ */
+
+ lis r10, (MAS1_VALID|MAS1_IPROT)@h
+ ori r10,r10, (MAS1_TSIZE(BOOK3E_PAGESZ_256M))@l
+ li r11, 0
+ li r0, 8
+ mtctr r0
+
+next_tlb_setup:
+ addi r0, r11, 3
+ rlwinm r0, r0, 16, 4, 15 // Compute esel
+ rlwinm r9, r11, 28, 0, 3 // Compute [ER]PN
+ oris r0, r0, (MAS0_TLBSEL(1))@h
+ mtspr SPRN_MAS0,r0
+ mtspr SPRN_MAS1,r10
+ mtspr SPRN_MAS2,r9
+ ori r9, r9, (MAS3_SX|MAS3_SW|MAS3_SR)
+ mtspr SPRN_MAS3,r9
+ tlbwe
+ addi r11, r11, 1
+ bdnz+ next_tlb_setup
+
+/* 7. Jump to our 1:1 mapping */
+ mr r6, r25
+#else
+ #error You need to specify the mapping or not use this at all.
+#endif
+
+ lis r7,MSR_KERNEL@h
+ ori r7,r7,MSR_KERNEL@l
+ bl 1f /* Find our address */
+1: mflr r9
+ rlwimi r6,r9,0,20,31
+ addi r6,r6,(2f - 1b)
+ mtspr SPRN_SRR0,r6
+ mtspr SPRN_SRR1,r7
+ rfi /* start execution out of TLB1[0] entry */
+
+/* 8. Clear out the temp mapping */
+2: lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */
+ mtspr SPRN_MAS0,r7
+ tlbre
+ mfspr r8,SPRN_MAS1
+ rlwinm r8,r8,0,2,0 /* clear IPROT */
+ mtspr SPRN_MAS1,r8
+ tlbwe
+ /* Invalidate TLB1 */
+ li r9,0x0c
+ tlbivax 0,r9
+ TLBSYNC
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
new file mode 100644
index 00000000000..d178834fe50
--- /dev/null
+++ b/arch/powerpc/kernel/ftrace.c
@@ -0,0 +1,591 @@
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
+ *
+ * Added function graph tracer code, taken from x86 that was written
+ * by Frederic Weisbecker, and ported to PPC by Steven Rostedt.
+ *
+ */
+
+#define pr_fmt(fmt) "ftrace-powerpc: " fmt
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+#include <asm/ftrace.h>
+#include <asm/syscall.h>
+
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+static unsigned int
+ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
+{
+ unsigned int op;
+
+ addr = ppc_function_entry((void *)addr);
+
+ /* if (link) set op to 'bl' else 'b' */
+ op = create_branch((unsigned int *)ip, addr, link ? 1 : 0);
+
+ return op;
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
+{
+ unsigned int replaced;
+
+ /*
+ * Note: Due to modules and __init, code can
+ * disappear and change, we need to protect against faulting
+ * as well as code changing. We do this by using the
+ * probe_kernel_* functions.
+ *
+ * No real locking needed, this code is run through
+ * kstop_machine, or before SMP starts.
+ */
+
+ /* read the text we want to modify */
+ if (probe_kernel_read(&replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* Make sure it is what we expect it to be */
+ if (replaced != old)
+ return -EINVAL;
+
+ /* replace the text with the new text */
+ if (patch_instruction((unsigned int *)ip, new))
+ return -EPERM;
+
+ return 0;
+}
+
+/*
+ * Helper functions that are the same for both PPC64 and PPC32.
+ */
+static int test_24bit_addr(unsigned long ip, unsigned long addr)
+{
+ addr = ppc_function_entry((void *)addr);
+
+ /* use the create_branch to verify that this offset can be branched */
+ return create_branch((unsigned int *)ip, addr, 0);
+}
+
+#ifdef CONFIG_MODULES
+
+static int is_bl_op(unsigned int op)
+{
+ return (op & 0xfc000003) == 0x48000001;
+}
+
+static unsigned long find_bl_target(unsigned long ip, unsigned int op)
+{
+ static int offset;
+
+ offset = (op & 0x03fffffc);
+ /* make it signed */
+ if (offset & 0x02000000)
+ offset |= 0xfe000000;
+
+ return ip + (long)offset;
+}
+
+#ifdef CONFIG_PPC64
+static int
+__ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned int op;
+ unsigned long entry, ptr;
+ unsigned long ip = rec->ip;
+ void *tramp;
+
+ /* read where this goes */
+ if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
+ return -EFAULT;
+
+ /* Make sure that that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %x\n", op);
+ return -EINVAL;
+ }
+
+ /* lets find where the pointer goes */
+ tramp = (void *)find_bl_target(ip, op);
+
+ pr_devel("ip:%lx jumps to %p", ip, tramp);
+
+ if (!is_module_trampoline(tramp)) {
+ pr_err("Not a trampoline\n");
+ return -EINVAL;
+ }
+
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
+ return -EFAULT;
+ }
+
+ pr_devel("trampoline target %lx", ptr);
+
+ entry = ppc_global_function_entry((void *)addr);
+ /* This should match what was called */
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+ return -EINVAL;
+ }
+
+ /*
+ * Our original call site looks like:
+ *
+ * bl <tramp>
+ * ld r2,XX(r1)
+ *
+ * Milton Miller pointed out that we can not simply nop the branch.
+ * If a task was preempted when calling a trace function, the nops
+ * will remove the way to restore the TOC in r2 and the r2 TOC will
+ * get corrupted.
+ *
+ * Use a b +8 to jump over the load.
+ */
+ op = 0x48000008; /* b +8 */
+
+ if (patch_instruction((unsigned int *)ip, op))
+ return -EPERM;
+
+ return 0;
+}
+
+#else /* !PPC64 */
+static int
+__ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned int op;
+ unsigned int jmp[4];
+ unsigned long ip = rec->ip;
+ unsigned long tramp;
+
+ if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* Make sure that that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %x\n", op);
+ return -EINVAL;
+ }
+
+ /* lets find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+
+ /*
+ * On PPC32 the trampoline looks like:
+ * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha
+ * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l
+ * 0x7d, 0x89, 0x03, 0xa6 mtctr r12
+ * 0x4e, 0x80, 0x04, 0x20 bctr
+ */
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ /* Find where the trampoline jumps to */
+ if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
+ pr_err("Failed to read %lx\n", tramp);
+ return -EFAULT;
+ }
+
+ pr_devel(" %08x %08x ", jmp[0], jmp[1]);
+
+ /* verify that this is what we expect it to be */
+ if (((jmp[0] & 0xffff0000) != 0x3d800000) ||
+ ((jmp[1] & 0xffff0000) != 0x398c0000) ||
+ (jmp[2] != 0x7d8903a6) ||
+ (jmp[3] != 0x4e800420)) {
+ pr_err("Not a trampoline\n");
+ return -EINVAL;
+ }
+
+ tramp = (jmp[1] & 0xffff) |
+ ((jmp[0] & 0xffff) << 16);
+ if (tramp & 0x8000)
+ tramp -= 0x10000;
+
+ pr_devel(" %lx ", tramp);
+
+ if (tramp != addr) {
+ pr_err("Trampoline location %08lx does not match addr\n",
+ tramp);
+ return -EINVAL;
+ }
+
+ op = PPC_INST_NOP;
+
+ if (patch_instruction((unsigned int *)ip, op))
+ return -EPERM;
+
+ return 0;
+}
+#endif /* PPC64 */
+#endif /* CONFIG_MODULES */
+
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ unsigned int old, new;
+
+ /*
+ * If the calling address is more that 24 bits away,
+ * then we had to use a trampoline to make the call.
+ * Otherwise just update the call site.
+ */
+ if (test_24bit_addr(ip, addr)) {
+ /* within range */
+ old = ftrace_call_replace(ip, addr, 1);
+ new = PPC_INST_NOP;
+ return ftrace_modify_code(ip, old, new);
+ }
+
+#ifdef CONFIG_MODULES
+ /*
+ * Out of range jumps are called from modules.
+ * We should either already have a pointer to the module
+ * or it has been passed in.
+ */
+ if (!rec->arch.mod) {
+ if (!mod) {
+ pr_err("No module loaded addr=%lx\n", addr);
+ return -EFAULT;
+ }
+ rec->arch.mod = mod;
+ } else if (mod) {
+ if (mod != rec->arch.mod) {
+ pr_err("Record mod %p not equal to passed in mod %p\n",
+ rec->arch.mod, mod);
+ return -EINVAL;
+ }
+ /* nothing to do if mod == rec->arch.mod */
+ } else
+ mod = rec->arch.mod;
+
+ return __ftrace_make_nop(mod, rec, addr);
+#else
+ /* We should not get here without modules */
+ return -EINVAL;
+#endif /* CONFIG_MODULES */
+}
+
+#ifdef CONFIG_MODULES
+#ifdef CONFIG_PPC64
+static int
+__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned int op[2];
+ void *ip = (void *)rec->ip;
+
+ /* read where this goes */
+ if (probe_kernel_read(op, ip, sizeof(op)))
+ return -EFAULT;
+
+ /*
+ * We expect to see:
+ *
+ * b +8
+ * ld r2,XX(r1)
+ *
+ * The load offset is different depending on the ABI. For simplicity
+ * just mask it out when doing the compare.
+ */
+ if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
+ pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
+ return -EINVAL;
+ }
+
+ /* If we never set up a trampoline to ftrace_caller, then bail */
+ if (!rec->arch.mod->arch.tramp) {
+ pr_err("No ftrace trampoline\n");
+ return -EINVAL;
+ }
+
+ /* Ensure branch is within 24 bits */
+ if (!create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
+ pr_err("Branch out of range\n");
+ return -EINVAL;
+ }
+
+ if (patch_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
+ pr_err("REL24 out of range!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#else
+static int
+__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned int op;
+ unsigned long ip = rec->ip;
+
+ /* read where this goes */
+ if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* It should be pointing to a nop */
+ if (op != PPC_INST_NOP) {
+ pr_err("Expected NOP but have %x\n", op);
+ return -EINVAL;
+ }
+
+ /* If we never set up a trampoline to ftrace_caller, then bail */
+ if (!rec->arch.mod->arch.tramp) {
+ pr_err("No ftrace trampoline\n");
+ return -EINVAL;
+ }
+
+ /* create the branch to the trampoline */
+ op = create_branch((unsigned int *)ip,
+ rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
+ if (!op) {
+ pr_err("REL24 out of range!\n");
+ return -EINVAL;
+ }
+
+ pr_devel("write to %lx\n", rec->ip);
+
+ if (patch_instruction((unsigned int *)ip, op))
+ return -EPERM;
+
+ return 0;
+}
+#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_MODULES */
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ unsigned int old, new;
+
+ /*
+ * If the calling address is more that 24 bits away,
+ * then we had to use a trampoline to make the call.
+ * Otherwise just update the call site.
+ */
+ if (test_24bit_addr(ip, addr)) {
+ /* within range */
+ old = PPC_INST_NOP;
+ new = ftrace_call_replace(ip, addr, 1);
+ return ftrace_modify_code(ip, old, new);
+ }
+
+#ifdef CONFIG_MODULES
+ /*
+ * Out of range jumps are called from modules.
+ * Being that we are converting from nop, it had better
+ * already have a module defined.
+ */
+ if (!rec->arch.mod) {
+ pr_err("No module loaded\n");
+ return -EINVAL;
+ }
+
+ return __ftrace_make_call(rec, addr);
+#else
+ /* We should not get here without modules */
+ return -EINVAL;
+#endif /* CONFIG_MODULES */
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ unsigned int old, new;
+ int ret;
+
+ old = *(unsigned int *)&ftrace_call;
+ new = ftrace_call_replace(ip, (unsigned long)func, 1);
+ ret = ftrace_modify_code(ip, old, new);
+
+ return ret;
+}
+
+static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+{
+ unsigned long ftrace_addr = (unsigned long)FTRACE_ADDR;
+ int ret;
+
+ ret = ftrace_update_record(rec, enable);
+
+ switch (ret) {
+ case FTRACE_UPDATE_IGNORE:
+ return 0;
+ case FTRACE_UPDATE_MAKE_CALL:
+ return ftrace_make_call(rec, ftrace_addr);
+ case FTRACE_UPDATE_MAKE_NOP:
+ return ftrace_make_nop(NULL, rec, ftrace_addr);
+ }
+
+ return 0;
+}
+
+void ftrace_replace_code(int enable)
+{
+ struct ftrace_rec_iter *iter;
+ struct dyn_ftrace *rec;
+ int ret;
+
+ for (iter = ftrace_rec_iter_start(); iter;
+ iter = ftrace_rec_iter_next(iter)) {
+ rec = ftrace_rec_iter_record(iter);
+ ret = __ftrace_replace_code(rec, enable);
+ if (ret) {
+ ftrace_bug(ret, rec->ip);
+ return;
+ }
+ }
+}
+
+void arch_ftrace_update_code(int command)
+{
+ if (command & FTRACE_UPDATE_CALLS)
+ ftrace_replace_code(1);
+ else if (command & FTRACE_DISABLE_CALLS)
+ ftrace_replace_code(0);
+
+ if (command & FTRACE_UPDATE_TRACE_FUNC)
+ ftrace_update_ftrace_func(ftrace_trace_function);
+
+ if (command & FTRACE_START_FUNC_RET)
+ ftrace_enable_ftrace_graph_caller();
+ else if (command & FTRACE_STOP_FUNC_RET)
+ ftrace_disable_ftrace_graph_caller();
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+ return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_graph_call(void);
+extern void ftrace_graph_stub(void);
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ unsigned long addr = (unsigned long)(&ftrace_graph_caller);
+ unsigned long stub = (unsigned long)(&ftrace_graph_stub);
+ unsigned int old, new;
+
+ old = ftrace_call_replace(ip, stub, 0);
+ new = ftrace_call_replace(ip, addr, 0);
+
+ return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ unsigned long addr = (unsigned long)(&ftrace_graph_caller);
+ unsigned long stub = (unsigned long)(&ftrace_graph_stub);
+ unsigned int old, new;
+
+ old = ftrace_call_replace(ip, addr, 0);
+ new = ftrace_call_replace(ip, stub, 0);
+
+ return ftrace_modify_code(ip, old, new);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_PPC64
+extern void mod_return_to_handler(void);
+#endif
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+{
+ unsigned long old;
+ int faulted;
+ struct ftrace_graph_ent trace;
+ unsigned long return_hooker = (unsigned long)&return_to_handler;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+#ifdef CONFIG_PPC64
+ /* non core kernel code needs to save and restore the TOC */
+ if (REGION_ID(self_addr) != KERNEL_REGION_ID)
+ return_hooker = (unsigned long)&mod_return_to_handler;
+#endif
+
+ return_hooker = ppc_function_entry((void *)return_hooker);
+
+ /*
+ * Protect against fault, even if it shouldn't
+ * happen. This tool is too much intrusive to
+ * ignore such a protection.
+ */
+ asm volatile(
+ "1: " PPC_LL "%[old], 0(%[parent])\n"
+ "2: " PPC_STL "%[return_hooker], 0(%[parent])\n"
+ " li %[faulted], 0\n"
+ "3:\n"
+
+ ".section .fixup, \"ax\"\n"
+ "4: li %[faulted], 1\n"
+ " b 3b\n"
+ ".previous\n"
+
+ ".section __ex_table,\"a\"\n"
+ PPC_LONG_ALIGN "\n"
+ PPC_LONG "1b,4b\n"
+ PPC_LONG "2b,4b\n"
+ ".previous"
+
+ : [old] "=&r" (old), [faulted] "=r" (faulted)
+ : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
+ : "memory"
+ );
+
+ if (unlikely(faulted)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ return;
+ }
+
+ trace.func = self_addr;
+ trace.depth = current->curr_ret_stack + 1;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace)) {
+ *parent = old;
+ return;
+ }
+
+ if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY)
+ *parent = old;
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64)
+unsigned long __init arch_syscall_addr(int nr)
+{
+ return sys_call_table[nr*2];
+}
+#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index d88e182e40b..dc0488b6f6e 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -9,7 +9,6 @@
* rewritten by Paul Mackerras.
* Copyright (C) 1996 Paul Mackerras.
* MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
*
* This file contains the low-level support and setup for the
* PowerPC platform, including trap and interrupt dispatch.
@@ -22,6 +21,7 @@
*
*/
+#include <linux/init.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -31,10 +31,9 @@
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
-
-#ifdef CONFIG_APUS
-#include <asm/amigappc.h>
-#endif
+#include <asm/ptrace.h>
+#include <asm/bug.h>
+#include <asm/kvm_book3s_asm.h>
/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
#define LOAD_BAT(n, reg, RA, RB) \
@@ -53,20 +52,17 @@
mtspr SPRN_DBAT##n##L,RB; \
1:
- .text
+ __HEAD
.stabs "arch/powerpc/kernel/",N_SO,0,0,0f
.stabs "head_32.S",N_SO,0,0,0f
0:
- .globl _stext
-_stext:
+_ENTRY(_stext);
/*
* _start is defined this way because the XCOFF loader in the OpenFirmware
* on the powermac expects the entry point to be a procedure descriptor.
*/
- .text
- .globl _start
-_start:
+_ENTRY(_start);
/*
* These are here for legacy reasons, the kernel used to
* need to look like a coff function entry for the pmac
@@ -92,11 +88,6 @@ _start:
* r4: virtual address of boot_infos_t
* r5: 0
*
- * APUS
- * r3: 'APUS'
- * r4: physical address of memory base
- * Linux/m68k style BootInfo structure at &_end.
- *
* PREP
* This is jumped to on prep systems right after the kernel is relocated
* to its proper place in memory by the boot loader. The expected layout
@@ -119,12 +110,21 @@ __start:
* because OF may have I/O devices mapped into that area
* (particularly on CHRP).
*/
-#ifdef CONFIG_PPC_MULTIPLATFORM
cmpwi 0,r5,0
beq 1f
+
+#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
+ /* find out where we are now */
+ bcl 20,31,$+4
+0: mflr r8 /* r8 = runtime addr here */
+ addis r8,r8,(_stext - 0b)@ha
+ addi r8,r8,(_stext - 0b)@l /* current runtime base addr */
bl prom_init
+#endif /* CONFIG_PPC_OF_BOOT_TRAMPOLINE */
+
+ /* We never return. We also hit that trap if trying to boot
+ * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
trap
-#endif
/*
* Check for BootX signature when supporting PowerMac and branch to
@@ -139,8 +139,7 @@ __start:
trap
#endif /* CONFIG_PPC_PMAC */
-1: mr r31,r3 /* save parameters */
- mr r30,r4
+1: mr r31,r3 /* save device tree ptr */
li r24,0 /* cpu # */
/*
@@ -150,14 +149,6 @@ __start:
*/
bl early_init
-#ifdef CONFIG_APUS
-/* On APUS the __va/__pa constants need to be set to the correct
- * values before continuing.
- */
- mr r4,r30
- bl fix_mem_constants
-#endif /* CONFIG_APUS */
-
/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains
* the physical address we are running at, returned by early_init()
*/
@@ -167,9 +158,15 @@ __after_mmu_off:
bl flush_tlbs
bl initial_bats
-#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
+#if defined(CONFIG_BOOTX_TEXT)
bl setup_disp_bat
#endif
+#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
+ bl setup_cpm_bat
+#endif
+#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
+ bl setup_usbgecko_bat
+#endif
/*
* Call setup_cpu for CPU 0 and initialize 6xx Idle
@@ -183,7 +180,6 @@ __after_mmu_off:
#endif /* CONFIG_6xx */
-#ifndef CONFIG_APUS
/*
* We need to run with _start at physical address 0.
* On CHRP, we are loaded at 0x10000 since OF on CHRP uses
@@ -194,9 +190,9 @@ __after_mmu_off:
bl reloc_offset
mr r26,r3
addis r4,r3,KERNELBASE@h /* current address of _start */
- cmpwi 0,r4,0 /* are we already running at 0? */
+ lis r5,PHYSICAL_START@h
+ cmplw 0,r4,r5 /* already running at PHYSICAL_START? */
bne relocate_kernel
-#endif /* CONFIG_APUS */
/*
* we now have the 1st 16M of ram mapped with the bats.
* prep needs the mmu to be turned on here, but pmac already has it on.
@@ -251,8 +247,8 @@ __secondary_hold_acknowledge:
* task's thread_struct.
*/
#define EXCEPTION_PROLOG \
- mtspr SPRN_SPRG0,r10; \
- mtspr SPRN_SPRG1,r11; \
+ mtspr SPRN_SPRG_SCRATCH0,r10; \
+ mtspr SPRN_SPRG_SCRATCH1,r11; \
mfcr r10; \
EXCEPTION_PROLOG_1; \
EXCEPTION_PROLOG_2
@@ -262,7 +258,7 @@ __secondary_hold_acknowledge:
andi. r11,r11,MSR_PR; \
tophys(r11,r1); /* use tophys(r1) if kernel */ \
beq 1f; \
- mfspr r11,SPRN_SPRG3; \
+ mfspr r11,SPRN_SPRG_THREAD; \
lwz r11,THREAD_INFO-THREAD(r11); \
addi r11,r11,THREAD_SIZE; \
tophys(r11,r11); \
@@ -274,9 +270,9 @@ __secondary_hold_acknowledge:
stw r10,_CCR(r11); /* save registers */ \
stw r12,GPR12(r11); \
stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG0; \
+ mfspr r10,SPRN_SPRG_SCRATCH0; \
stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG1; \
+ mfspr r12,SPRN_SPRG_SCRATCH1; \
stw r12,GPR11(r11); \
mflr r10; \
stw r10,_LINK(r11); \
@@ -288,8 +284,8 @@ __secondary_hold_acknowledge:
li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
MTMSRD(r10); /* (except for mach check in rtas) */ \
stw r0,GPR0(r11); \
- lis r10,0x7265; /* put exception frame marker */ \
- addi r10,r10,0x6773; \
+ lis r10,STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \
+ addi r10,r10,STACK_FRAME_REGS_MARKER@l; \
stw r10,8(r11); \
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
@@ -307,6 +303,7 @@ __secondary_hold_acknowledge:
*/
#define EXCEPTION(n, label, hdlr, xfer) \
. = n; \
+ DO_KVM n; \
label: \
EXCEPTION_PROLOG; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -344,12 +341,7 @@ i##n: \
/* System reset */
/* core99 pmac starts the seconary here by changing the vector, and
putting it back to what it was (unknown_exception) when done. */
-#if defined(CONFIG_GEMINI) && defined(CONFIG_SMP)
- . = 0x100
- b __secondary_start_gemini
-#else
EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
-#endif
/* Machine check */
/*
@@ -367,11 +359,12 @@ i##n: \
* -- paulus.
*/
. = 0x200
- mtspr SPRN_SPRG0,r10
- mtspr SPRN_SPRG1,r11
+ DO_KVM 0x200
+ mtspr SPRN_SPRG_SCRATCH0,r10
+ mtspr SPRN_SPRG_SCRATCH1,r11
mfcr r10
#ifdef CONFIG_PPC_CHRP
- mfspr r11,SPRN_SPRG2
+ mfspr r11,SPRN_SPRG_RTAS
cmpwi 0,r11,0
bne 7f
#endif /* CONFIG_PPC_CHRP */
@@ -379,7 +372,7 @@ i##n: \
7: EXCEPTION_PROLOG_2
addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_CHRP
- mfspr r4,SPRN_SPRG2
+ mfspr r4,SPRN_SPRG_RTAS
cmpwi cr1,r4,0
bne cr1,1f
#endif
@@ -390,22 +383,24 @@ i##n: \
/* Data access exception. */
. = 0x300
+ DO_KVM 0x300
DataAccess:
EXCEPTION_PROLOG
mfspr r10,SPRN_DSISR
+ stw r10,_DSISR(r11)
andis. r0,r10,0xa470 /* weird error? */
bne 1f /* if not, try to put a PTE */
mfspr r4,SPRN_DAR /* into the hash table */
rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
bl hash_page
-1: stw r10,_DSISR(r11)
- mr r5,r10
+1: lwz r5,_DSISR(r11) /* get DSISR value */
mfspr r4,SPRN_DAR
- EXC_XFER_EE_LITE(0x300, handle_page_fault)
+ EXC_XFER_LITE(0x300, handle_page_fault)
/* Instruction access exception. */
. = 0x400
+ DO_KVM 0x400
InstructionAccess:
EXCEPTION_PROLOG
andis. r0,r9,0x4000 /* no pte found? */
@@ -415,13 +410,14 @@ InstructionAccess:
bl hash_page
1: mr r4,r12
mr r5,r9
- EXC_XFER_EE_LITE(0x400, handle_page_fault)
+ EXC_XFER_LITE(0x400, handle_page_fault)
/* External interrupt */
EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
/* Alignment exception */
. = 0x600
+ DO_KVM 0x600
Alignment:
EXCEPTION_PROLOG
mfspr r4,SPRN_DAR
@@ -436,10 +432,20 @@ Alignment:
/* Floating-point unavailable */
. = 0x800
+ DO_KVM 0x800
FPUnavailable:
+BEGIN_FTR_SECTION
+/*
+ * Certain Freescale cores don't have a FPU and treat fp instructions
+ * as a FP Unavailable exception. Redirect to illegal/emulation handling.
+ */
+ b ProgramCheck
+END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
EXCEPTION_PROLOG
- bne load_up_fpu /* if from user, just load it up */
- addi r3,r1,STACK_FRAME_OVERHEAD
+ beq 1f
+ bl load_up_fpu /* if from user, just load it up */
+ b fast_exception_return
+1: addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
/* Decrementer */
@@ -450,6 +456,7 @@ FPUnavailable:
/* System call */
. = 0xc00
+ DO_KVM 0xc00
SystemCall:
EXCEPTION_PROLOG
EXC_XFER_EE_LITE(0xc00, DoSyscall)
@@ -467,9 +474,11 @@ SystemCall:
* by executing an altivec instruction.
*/
. = 0xf00
+ DO_KVM 0xf00
b PerformanceMonitor
. = 0xf20
+ DO_KVM 0xf20
b AltiVecUnavailable
/*
@@ -479,49 +488,50 @@ SystemCall:
. = 0x1000
InstructionTLBMiss:
/*
- * r0: stored ctr
+ * r0: scratch
* r1: linux style pte ( later becomes ppc hardware pte )
* r2: ptr to linux-style pte
* r3: scratch
*/
- mfctr r0
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_IMISS
- lis r1,KERNELBASE@h /* check if kernel address */
- cmplw 0,r3,r1
- mfspr r2,SPRN_SPRG3
+ lis r1,PAGE_OFFSET@h /* check if kernel address */
+ cmplw 0,r1,r3
+ mfspr r2,SPRN_SPRG_THREAD
li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
lwz r2,PGDIR(r2)
- blt+ 112f
+ bge- 112f
+ mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
+ rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
lis r2,swapper_pg_dir@ha /* if kernel address, use */
addi r2,r2,swapper_pg_dir@l /* kernel page table */
- mfspr r1,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwinm r1,r1,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
112: tophys(r2,r2)
rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- InstructionAddressInvalid /* return if no mapping */
rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
- lwz r3,0(r2) /* get linux-style pte */
- andc. r1,r1,r3 /* check access & ~permission */
+ lwz r0,0(r2) /* get linux-style pte */
+ andc. r1,r1,r0 /* check access & ~permission */
bne- InstructionAddressInvalid /* return if access not permitted */
- ori r3,r3,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
+ ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
/*
* NOTE! We are assuming this is not an SMP system, otherwise
* we would need to update the pte atomically with lwarx/stwcx.
*/
- stw r3,0(r2) /* update PTE (accessed bit) */
+ stw r0,0(r2) /* update PTE (accessed bit) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r3,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r2,r3,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
+ rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
+ rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
and r1,r1,r2 /* writable if _RW and _DIRTY */
- rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */
- rlwimi r3,r3,32-1,31,31 /* _PAGE_USER -> PP lsb */
- ori r1,r1,0xe14 /* clear out reserved bits and M */
- andc r1,r3,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
+ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
+ ori r1,r1,0xe04 /* clear out reserved bits */
+ andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+BEGIN_FTR_SECTION
+ rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
+END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mtspr SPRN_RPA,r1
- mfspr r3,SPRN_IMISS
tlbli r3
mfspr r3,SPRN_SRR1 /* Need to restore CR0 */
mtcrf 0x80,r3
@@ -532,7 +542,6 @@ InstructionAddressInvalid:
addis r1,r1,0x2000
mtspr SPRN_DSISR,r1 /* (shouldn't be needed) */
- mtctr r0 /* Restore CTR */
andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */
or r2,r2,r1
mtspr SPRN_SRR1,r2
@@ -553,59 +562,71 @@ InstructionAddressInvalid:
. = 0x1100
DataLoadTLBMiss:
/*
- * r0: stored ctr
+ * r0: scratch
* r1: linux style pte ( later becomes ppc hardware pte )
* r2: ptr to linux-style pte
* r3: scratch
*/
- mfctr r0
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_DMISS
- lis r1,KERNELBASE@h /* check if kernel address */
- cmplw 0,r3,r1
- mfspr r2,SPRN_SPRG3
+ lis r1,PAGE_OFFSET@h /* check if kernel address */
+ cmplw 0,r1,r3
+ mfspr r2,SPRN_SPRG_THREAD
li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
lwz r2,PGDIR(r2)
- blt+ 112f
+ bge- 112f
+ mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
+ rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
lis r2,swapper_pg_dir@ha /* if kernel address, use */
addi r2,r2,swapper_pg_dir@l /* kernel page table */
- mfspr r1,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwinm r1,r1,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
112: tophys(r2,r2)
rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- DataAddressInvalid /* return if no mapping */
rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
- lwz r3,0(r2) /* get linux-style pte */
- andc. r1,r1,r3 /* check access & ~permission */
+ lwz r0,0(r2) /* get linux-style pte */
+ andc. r1,r1,r0 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- ori r3,r3,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
+ ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
/*
* NOTE! We are assuming this is not an SMP system, otherwise
* we would need to update the pte atomically with lwarx/stwcx.
*/
- stw r3,0(r2) /* update PTE (accessed bit) */
+ stw r0,0(r2) /* update PTE (accessed bit) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r3,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r2,r3,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
+ rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
+ rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
and r1,r1,r2 /* writable if _RW and _DIRTY */
- rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */
- rlwimi r3,r3,32-1,31,31 /* _PAGE_USER -> PP lsb */
- ori r1,r1,0xe14 /* clear out reserved bits and M */
- andc r1,r3,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
+ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
+ ori r1,r1,0xe04 /* clear out reserved bits */
+ andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+BEGIN_FTR_SECTION
+ rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
+END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mtspr SPRN_RPA,r1
- mfspr r3,SPRN_DMISS
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
+BEGIN_MMU_FTR_SECTION
+ li r0,1
+ mfspr r1,SPRN_SPRG_603_LRU
+ rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */
+ slw r0,r0,r2
+ xor r1,r0,r1
+ srw r0,r1,r2
+ mtspr SPRN_SPRG_603_LRU,r1
+ mfspr r2,SPRN_SRR1
+ rlwimi r2,r0,31-14,14,14
+ mtspr SPRN_SRR1,r2
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
tlbld r3
- mfspr r3,SPRN_SRR1 /* Need to restore CR0 */
- mtcrf 0x80,r3
rfi
DataAddressInvalid:
mfspr r3,SPRN_SRR1
rlwinm r1,r3,9,6,6 /* Get load/store bit */
addis r1,r1,0x2000
mtspr SPRN_DSISR,r1
- mtctr r0 /* Restore CTR */
andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */
mtspr SPRN_SRR1,r2
mfspr r1,SPRN_DMISS /* Get failing address */
@@ -625,48 +646,61 @@ DataAddressInvalid:
. = 0x1200
DataStoreTLBMiss:
/*
- * r0: stored ctr
+ * r0: scratch
* r1: linux style pte ( later becomes ppc hardware pte )
* r2: ptr to linux-style pte
* r3: scratch
*/
- mfctr r0
/* Get PTE (linux-style) and check access */
mfspr r3,SPRN_DMISS
- lis r1,KERNELBASE@h /* check if kernel address */
- cmplw 0,r3,r1
- mfspr r2,SPRN_SPRG3
+ lis r1,PAGE_OFFSET@h /* check if kernel address */
+ cmplw 0,r1,r3
+ mfspr r2,SPRN_SPRG_THREAD
li r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */
lwz r2,PGDIR(r2)
- blt+ 112f
+ bge- 112f
+ mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
+ rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
lis r2,swapper_pg_dir@ha /* if kernel address, use */
addi r2,r2,swapper_pg_dir@l /* kernel page table */
- mfspr r1,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwinm r1,r1,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
112: tophys(r2,r2)
rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- DataAddressInvalid /* return if no mapping */
rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
- lwz r3,0(r2) /* get linux-style pte */
- andc. r1,r1,r3 /* check access & ~permission */
+ lwz r0,0(r2) /* get linux-style pte */
+ andc. r1,r1,r0 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- ori r3,r3,_PAGE_ACCESSED|_PAGE_DIRTY
+ ori r0,r0,_PAGE_ACCESSED|_PAGE_DIRTY
/*
* NOTE! We are assuming this is not an SMP system, otherwise
* we would need to update the pte atomically with lwarx/stwcx.
*/
- stw r3,0(r2) /* update PTE (accessed/dirty bits) */
+ stw r0,0(r2) /* update PTE (accessed/dirty bits) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */
- li r1,0xe15 /* clear out reserved bits and M */
- andc r1,r3,r1 /* PP = user? 2: 0 */
+ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
+ li r1,0xe05 /* clear out reserved bits & PP lsb */
+ andc r1,r0,r1 /* PP = user? 2: 0 */
+BEGIN_FTR_SECTION
+ rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
+END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mtspr SPRN_RPA,r1
- mfspr r3,SPRN_DMISS
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
+BEGIN_MMU_FTR_SECTION
+ li r0,1
+ mfspr r1,SPRN_SPRG_603_LRU
+ rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */
+ slw r0,r0,r2
+ xor r1,r0,r1
+ srw r0,r1,r2
+ mtspr SPRN_SPRG_603_LRU,r1
+ mfspr r2,SPRN_SRR1
+ rlwimi r2,r0,31-14,14,14
+ mtspr SPRN_SRR1,r2
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
tlbld r3
- mfspr r3,SPRN_SRR1 /* Need to restore CR0 */
- mtcrf 0x80,r3
rfi
#ifndef CONFIG_ALTIVEC
@@ -711,9 +745,11 @@ DataStoreTLBMiss:
AltiVecUnavailable:
EXCEPTION_PROLOG
#ifdef CONFIG_ALTIVEC
- bne load_up_altivec /* if from user, just load it up */
+ beq 1f
+ bl load_up_altivec /* if from user, just load it up */
+ b fast_exception_return
#endif /* CONFIG_ALTIVEC */
- addi r3,r1,STACK_FRAME_OVERHEAD
+1: addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception)
PerformanceMonitor:
@@ -721,128 +757,16 @@ PerformanceMonitor:
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0xf00, performance_monitor_exception)
-#ifdef CONFIG_ALTIVEC
-/* Note that the AltiVec support is closely modeled after the FP
- * support. Changes to one are likely to be applicable to the
- * other! */
-load_up_altivec:
-/*
- * Disable AltiVec for the task which had AltiVec previously,
- * and save its AltiVec registers in its thread_struct.
- * Enables AltiVec for use in the kernel on return.
- * On SMP we know the AltiVec units are free, since we give it up every
- * switch. -- Kumar
- */
- mfmsr r5
- oris r5,r5,MSR_VEC@h
- MTMSRD(r5) /* enable use of AltiVec now */
- isync
-/*
- * For SMP, we don't do lazy AltiVec switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another. Instead we call giveup_altivec in switch_to.
- */
-#ifndef CONFIG_SMP
- tophys(r6,0)
- addis r3,r6,last_task_used_altivec@ha
- lwz r4,last_task_used_altivec@l(r3)
- cmpwi 0,r4,0
- beq 1f
- add r4,r4,r6
- addi r4,r4,THREAD /* want THREAD of last_task_used_altivec */
- SAVE_32VRS(0,r10,r4)
- mfvscr vr0
- li r10,THREAD_VSCR
- stvx vr0,r10,r4
- lwz r5,PT_REGS(r4)
- add r5,r5,r6
- lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- lis r10,MSR_VEC@h
- andc r4,r4,r10 /* disable altivec for previous task */
- stw r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
- /* enable use of AltiVec after return */
- oris r9,r9,MSR_VEC@h
- mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */
- li r4,1
- li r10,THREAD_VSCR
- stw r4,THREAD_USED_VR(r5)
- lvx vr0,r10,r5
- mtvscr vr0
- REST_32VRS(0,r10,r5)
-#ifndef CONFIG_SMP
- subi r4,r5,THREAD
- sub r4,r4,r6
- stw r4,last_task_used_altivec@l(r3)
-#endif /* CONFIG_SMP */
- /* restore registers and return */
- /* we haven't used ctr or xer or lr */
- b fast_exception_return
-
-/*
- * AltiVec unavailable trap from kernel - print a message, but let
- * the task use AltiVec in the kernel until it returns to user mode.
- */
-KernelAltiVec:
- lwz r3,_MSR(r1)
- oris r3,r3,MSR_VEC@h
- stw r3,_MSR(r1) /* enable use of AltiVec after return */
- lis r3,87f@h
- ori r3,r3,87f@l
- mr r4,r2 /* current */
- lwz r5,_NIP(r1)
- bl printk
- b ret_from_except
-87: .string "AltiVec used in kernel (task=%p, pc=%x) \n"
- .align 4,0
-
-/*
- * giveup_altivec(tsk)
- * Disable AltiVec for the task given as the argument,
- * and save the AltiVec registers in its thread_struct.
- * Enables AltiVec for use in the kernel on return.
- */
-
- .globl giveup_altivec
-giveup_altivec:
- mfmsr r5
- oris r5,r5,MSR_VEC@h
- SYNC
- MTMSRD(r5) /* enable use of AltiVec now */
- isync
- cmpwi 0,r3,0
- beqlr- /* if no previous owner, done */
- addi r3,r3,THREAD /* want THREAD of task */
- lwz r5,PT_REGS(r3)
- cmpwi 0,r5,0
- SAVE_32VRS(0, r4, r3)
- mfvscr vr0
- li r4,THREAD_VSCR
- stvx vr0,r4,r3
- beq 1f
- lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- lis r3,MSR_VEC@h
- andc r4,r4,r3 /* disable AltiVec for previous task */
- stw r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#ifndef CONFIG_SMP
- li r5,0
- lis r4,last_task_used_altivec@ha
- stw r5,last_task_used_altivec@l(r4)
-#endif /* CONFIG_SMP */
- blr
-#endif /* CONFIG_ALTIVEC */
/*
* This code is jumped to from the startup code to copy
- * the kernel image to physical address 0.
+ * the kernel image to physical address PHYSICAL_START.
*/
relocate_kernel:
addis r9,r26,klimit@ha /* fetch klimit */
lwz r25,klimit@l(r9)
addis r25,r25,-KERNELBASE@h
- li r3,0 /* Destination base address */
+ lis r3,PHYSICAL_START@h /* Destination base address */
li r6,0 /* Destination offset */
li r5,0x4000 /* # bytes of memory to copy */
bl copy_and_flush /* copy the first 0x4000 bytes */
@@ -859,7 +783,7 @@ relocate_kernel:
* r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
* on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
*/
-_GLOBAL(copy_and_flush)
+_ENTRY(copy_and_flush)
addi r5,r5,-4
addi r6,r6,-4
4: li r0,L1_CACHE_BYTES/4
@@ -879,99 +803,7 @@ _GLOBAL(copy_and_flush)
addi r6,r6,4
blr
-#ifdef CONFIG_APUS
-/*
- * On APUS the physical base address of the kernel is not known at compile
- * time, which means the __pa/__va constants used are incorrect. In the
- * __init section is recorded the virtual addresses of instructions using
- * these constants, so all that has to be done is fix these before
- * continuing the kernel boot.
- *
- * r4 = The physical address of the kernel base.
- */
-fix_mem_constants:
- mr r10,r4
- addis r10,r10,-KERNELBASE@h /* virt_to_phys constant */
- neg r11,r10 /* phys_to_virt constant */
-
- lis r12,__vtop_table_begin@h
- ori r12,r12,__vtop_table_begin@l
- add r12,r12,r10 /* table begin phys address */
- lis r13,__vtop_table_end@h
- ori r13,r13,__vtop_table_end@l
- add r13,r13,r10 /* table end phys address */
- subi r12,r12,4
- subi r13,r13,4
-1: lwzu r14,4(r12) /* virt address of instruction */
- add r14,r14,r10 /* phys address of instruction */
- lwz r15,0(r14) /* instruction, now insert top */
- rlwimi r15,r10,16,16,31 /* half of vp const in low half */
- stw r15,0(r14) /* of instruction and restore. */
- dcbst r0,r14 /* write it to memory */
- sync
- icbi r0,r14 /* flush the icache line */
- cmpw r12,r13
- bne 1b
- sync /* additional sync needed on g4 */
- isync
-
-/*
- * Map the memory where the exception handlers will
- * be copied to when hash constants have been patched.
- */
-#ifdef CONFIG_APUS_FAST_EXCEPT
- lis r8,0xfff0
-#else
- lis r8,0
-#endif
- ori r8,r8,0x2 /* 128KB, supervisor */
- mtspr SPRN_DBAT3U,r8
- mtspr SPRN_DBAT3L,r8
-
- lis r12,__ptov_table_begin@h
- ori r12,r12,__ptov_table_begin@l
- add r12,r12,r10 /* table begin phys address */
- lis r13,__ptov_table_end@h
- ori r13,r13,__ptov_table_end@l
- add r13,r13,r10 /* table end phys address */
- subi r12,r12,4
- subi r13,r13,4
-1: lwzu r14,4(r12) /* virt address of instruction */
- add r14,r14,r10 /* phys address of instruction */
- lwz r15,0(r14) /* instruction, now insert top */
- rlwimi r15,r11,16,16,31 /* half of pv const in low half*/
- stw r15,0(r14) /* of instruction and restore. */
- dcbst r0,r14 /* write it to memory */
- sync
- icbi r0,r14 /* flush the icache line */
- cmpw r12,r13
- bne 1b
-
- sync /* additional sync needed on g4 */
- isync /* No speculative loading until now */
- blr
-
-/***********************************************************************
- * Please note that on APUS the exception handlers are located at the
- * physical address 0xfff0000. For this reason, the exception handlers
- * cannot use relative branches to access the code below.
- ***********************************************************************/
-#endif /* CONFIG_APUS */
-
#ifdef CONFIG_SMP
-#ifdef CONFIG_GEMINI
- .globl __secondary_start_gemini
-__secondary_start_gemini:
- mfspr r4,SPRN_HID0
- ori r4,r4,HID0_ICFI
- li r3,0
- ori r3,r3,HID0_ICE
- andc r4,r4,r3
- mtspr SPRN_HID0,r4
- sync
- b __secondary_start
-#endif /* CONFIG_GEMINI */
-
.globl __secondary_start_mpc86xx
__secondary_start_mpc86xx:
mfspr r3, SPRN_PIR
@@ -1031,9 +863,9 @@ __secondary_start:
tophys(r4,r2)
addi r4,r4,THREAD /* phys address of our thread_struct */
CLR_TOP32(r4)
- mtspr SPRN_SPRG3,r4
+ mtspr SPRN_SPRG_THREAD,r4
li r3,0
- mtspr SPRN_SPRG2,r3 /* 0 => not in RTAS */
+ mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
/* enable MMU and jump to start_secondary */
li r4,MSR_KERNEL
@@ -1046,14 +878,18 @@ __secondary_start:
RFI
#endif /* CONFIG_SMP */
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+#include "../kvm/book3s_rmhandlers.S"
+#endif
+
/*
* Those generic dummy functions are kept for CPUs not
* included in CONFIG_6xx
*/
#if !defined(CONFIG_6xx)
-_GLOBAL(__save_cpu_setup)
+_ENTRY(__save_cpu_setup)
blr
-_GLOBAL(__restore_cpu_setup)
+_ENTRY(__restore_cpu_setup)
blr
#endif /* !defined(CONFIG_6xx) */
@@ -1094,12 +930,12 @@ load_up_mmu:
LOAD_BAT(1,r3,r4,r5)
LOAD_BAT(2,r3,r4,r5)
LOAD_BAT(3,r3,r4,r5)
-BEGIN_FTR_SECTION
+BEGIN_MMU_FTR_SECTION
LOAD_BAT(4,r3,r4,r5)
LOAD_BAT(5,r3,r4,r5)
LOAD_BAT(6,r3,r4,r5)
LOAD_BAT(7,r3,r4,r5)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
/*
@@ -1114,9 +950,9 @@ start_here:
tophys(r4,r2)
addi r4,r4,THREAD /* init task's THREAD */
CLR_TOP32(r4)
- mtspr SPRN_SPRG3,r4
+ mtspr SPRN_SPRG_THREAD,r4
li r3,0
- mtspr SPRN_SPRG2,r3 /* 0 => not in RTAS */
+ mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
/* stack */
lis r1,init_thread_union@ha
@@ -1127,25 +963,12 @@ start_here:
* Do early platform-specific initialization,
* and set up the MMU.
*/
- mr r3,r31
- mr r4,r30
+ li r3,0
+ mr r4,r31
bl machine_init
bl __save_cpu_setup
bl MMU_init
-#ifdef CONFIG_APUS
- /* Copy exception code to exception vector base on APUS. */
- lis r4,KERNELBASE@h
-#ifdef CONFIG_APUS_FAST_EXCEPT
- lis r3,0xfff0 /* Copy to 0xfff00000 */
-#else
- lis r3,0 /* Copy to 0x00000000 */
-#endif
- li r5,0x4000 /* # bytes of memory to copy */
- li r6,0
- bl copy_and_flush /* copy the first 0x4000 bytes */
-#endif /* CONFIG_APUS */
-
/*
* Go back to running unmapped so we can load up new values
* for SDR1 (hash table pointer) and the segment registers
@@ -1188,9 +1011,14 @@ start_here:
RFI
/*
+ * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
+ *
* Set up the segment registers for a new context.
*/
-_GLOBAL(set_context)
+_ENTRY(switch_mmu_context)
+ lwz r3,MMCONTEXTID(r4)
+ cmpwi cr0,r3,0
+ blt- 4f
mulli r3,r3,897 /* multiply context by skew factor */
rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */
addis r3,r3,0x6000 /* Set Ks, Ku bits */
@@ -1201,6 +1029,7 @@ _GLOBAL(set_context)
/* Context switch the PTE pointer for the Abatron BDI2000.
* The PGDIR is passed as second argument.
*/
+ lwz r4,MM_PGD(r4)
lis r5, KERNELBASE@h
lwz r5, 0xf0(r5)
stw r4, 0x4(r5)
@@ -1216,6 +1045,9 @@ _GLOBAL(set_context)
sync
isync
blr
+4: trap
+ EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0
+ blr
/*
* An undocumented "feature" of 604e requires that the v bit
@@ -1249,7 +1081,7 @@ clear_bats:
mtspr SPRN_IBAT2L,r10
mtspr SPRN_IBAT3U,r10
mtspr SPRN_IBAT3L,r10
-BEGIN_FTR_SECTION
+BEGIN_MMU_FTR_SECTION
/* Here's a tweak: at this point, CPU setup have
* not been called yet, so HIGH_BAT_EN may not be
* set in HID0 for the 745x processors. However, it
@@ -1272,14 +1104,14 @@ BEGIN_FTR_SECTION
mtspr SPRN_IBAT6L,r10
mtspr SPRN_IBAT7U,r10
mtspr SPRN_IBAT7L,r10
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
flush_tlbs:
lis r10, 0x40
1: addic. r10, r10, -0x1000
tlbie r10
- blt 1b
+ bgt 1b
sync
blr
@@ -1295,24 +1127,27 @@ mmu_off:
RFI
/*
- * Use the first pair of BAT registers to map the 1st 16MB
- * of RAM to KERNELBASE. From this point on we can't safely
- * call OF any more.
+ * On 601, we use 3 BATs to map up to 24M of RAM at _PAGE_OFFSET
+ * (we keep one for debugging) and on others, we use one 256M BAT.
*/
initial_bats:
- lis r11,KERNELBASE@h
+ lis r11,PAGE_OFFSET@h
mfspr r9,SPRN_PVR
rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
cmpwi 0,r9,1
bne 4f
ori r11,r11,4 /* set up BAT registers for 601 */
li r8,0x7f /* valid, block length = 8MB */
- oris r9,r11,0x800000@h /* set up BAT reg for 2nd 8M */
- oris r10,r8,0x800000@h /* set up BAT reg for 2nd 8M */
mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
mtspr SPRN_IBAT0L,r8 /* lower BAT register */
- mtspr SPRN_IBAT1U,r9
- mtspr SPRN_IBAT1L,r10
+ addis r11,r11,0x800000@h
+ addis r8,r8,0x800000@h
+ mtspr SPRN_IBAT1U,r11
+ mtspr SPRN_IBAT1L,r8
+ addis r11,r11,0x800000@h
+ addis r8,r8,0x800000@h
+ mtspr SPRN_IBAT2U,r11
+ mtspr SPRN_IBAT2L,r8
isync
blr
@@ -1322,11 +1157,7 @@ initial_bats:
#else
ori r8,r8,2 /* R/W access */
#endif /* CONFIG_SMP */
-#ifdef CONFIG_APUS
- ori r11,r11,BL_8M<<2|0x2 /* set up 8MB BAT registers for 604 */
-#else
ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */
-#endif /* CONFIG_APUS */
mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */
mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
@@ -1336,7 +1167,7 @@ initial_bats:
blr
-#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
+#ifdef CONFIG_BOOTX_TEXT
setup_disp_bat:
/*
* setup the display bat prepared for us in prom.c
@@ -1360,7 +1191,42 @@ setup_disp_bat:
1: mtspr SPRN_IBAT3L,r8
mtspr SPRN_IBAT3U,r11
blr
-#endif /* !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) */
+#endif /* CONFIG_BOOTX_TEXT */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
+setup_cpm_bat:
+ lis r8, 0xf000
+ ori r8, r8, 0x002a
+ mtspr SPRN_DBAT1L, r8
+
+ lis r11, 0xf000
+ ori r11, r11, (BL_1M << 2) | 2
+ mtspr SPRN_DBAT1U, r11
+
+ blr
+#endif
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
+setup_usbgecko_bat:
+ /* prepare a BAT for early io */
+#if defined(CONFIG_GAMECUBE)
+ lis r8, 0x0c00
+#elif defined(CONFIG_WII)
+ lis r8, 0x0d00
+#else
+#error Invalid platform for USB Gecko based early debugging.
+#endif
+ /*
+ * The virtual address used must match the virtual address
+ * associated to the fixmap entry FIX_EARLY_DEBUG_BASE.
+ */
+ lis r11, 0xfffe /* top 128K */
+ ori r8, r8, 0x002a /* uncached, guarded ,rw */
+ ori r11, r11, 0x2 /* 128K, Vs=1, Vp=0 */
+ mtspr SPRN_DBAT1L, r8
+ mtspr SPRN_DBAT1U, r11
+ blr
+#endif
#ifdef CONFIG_8260
/* Jump into the system reset for the rom.
@@ -1412,15 +1278,7 @@ empty_zero_page:
.globl swapper_pg_dir
swapper_pg_dir:
- .space 4096
-
-/*
- * This space gets a copy of optional info passed to us by the bootstrap
- * Used to pass parameters into the kernel like root=/dev/sda1, etc.
- */
- .globl cmd_line
-cmd_line:
- .space 512
+ .space PGD_TABLE_SIZE
.globl intercept_table
intercept_table:
diff --git a/arch/powerpc/kernel/head_4xx.S b/arch/powerpc/kernel/head_40x.S
index adc7f8097cd..7d7d8635227 100644
--- a/arch/powerpc/kernel/head_4xx.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -31,15 +31,16 @@
*
*/
+#include <linux/init.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/pgtable.h>
-#include <asm/ibm4xx.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
/* As with the other PowerPC ports, it is expected that when code
* execution begins here, the following registers contain valid, yet
@@ -53,17 +54,11 @@
*
* This is all going to change RSN when we add bi_recs....... -- Dan
*/
- .text
-_GLOBAL(_stext)
-_GLOBAL(_start)
+ __HEAD
+_ENTRY(_stext);
+_ENTRY(_start);
- /* Save parameters we are passed.
- */
- mr r31,r3
- mr r30,r4
- mr r29,r5
- mr r28,r6
- mr r27,r7
+ mr r31,r3 /* save device tree ptr */
/* We have to turn on the MMU right away so we get cache modes
* set correctly.
@@ -90,28 +85,34 @@ turn_on_mmu:
*/
. = 0xc0
crit_save:
-_GLOBAL(crit_r10)
+_ENTRY(crit_r10)
+ .space 4
+_ENTRY(crit_r11)
+ .space 4
+_ENTRY(crit_srr0)
.space 4
-_GLOBAL(crit_r11)
+_ENTRY(crit_srr1)
+ .space 4
+_ENTRY(saved_ksp_limit)
.space 4
/*
* Exception vector entry code. This code runs with address translation
- * turned off (i.e. using physical addresses). We assume SPRG3 has the
- * physical address of the current task thread_struct.
+ * turned off (i.e. using physical addresses). We assume SPRG_THREAD has
+ * the physical address of the current task thread_struct.
* Note that we have to have decremented r1 before we write to any fields
* of the exception frame, since a critical interrupt could occur at any
* time, and it will write to the area immediately below the current r1.
*/
#define NORMAL_EXCEPTION_PROLOG \
- mtspr SPRN_SPRG0,r10; /* save two registers to work with */\
- mtspr SPRN_SPRG1,r11; \
- mtspr SPRN_SPRG2,r1; \
+ mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\
+ mtspr SPRN_SPRG_SCRATCH1,r11; \
+ mtspr SPRN_SPRG_SCRATCH2,r1; \
mfcr r10; /* save CR in r10 for now */\
mfspr r11,SPRN_SRR1; /* check whether user or kernel */\
andi. r11,r11,MSR_PR; \
beq 1f; \
- mfspr r1,SPRN_SPRG3; /* if from user, start at top of */\
+ mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\
lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\
addi r1,r1,THREAD_SIZE; \
1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\
@@ -119,13 +120,13 @@ _GLOBAL(crit_r11)
stw r10,_CCR(r11); /* save various registers */\
stw r12,GPR12(r11); \
stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG0; \
+ mfspr r10,SPRN_SPRG_SCRATCH0; \
stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG1; \
+ mfspr r12,SPRN_SPRG_SCRATCH1; \
stw r12,GPR11(r11); \
mflr r10; \
stw r10,_LINK(r11); \
- mfspr r10,SPRN_SPRG2; \
+ mfspr r10,SPRN_SPRG_SCRATCH2; \
mfspr r12,SPRN_SRR0; \
stw r10,GPR1(r11); \
mfspr r9,SPRN_SRR1; \
@@ -149,14 +150,14 @@ _GLOBAL(crit_r11)
mfcr r10; /* save CR in r10 for now */\
mfspr r11,SPRN_SRR3; /* check whether user or kernel */\
andi. r11,r11,MSR_PR; \
- lis r11,critical_stack_top@h; \
- ori r11,r11,critical_stack_top@l; \
+ lis r11,critirq_ctx@ha; \
+ tophys(r11,r11); \
+ lwz r11,critirq_ctx@l(r11); \
beq 1f; \
/* COMING FROM USER MODE */ \
- mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\
+ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
- addi r11,r11,THREAD_SIZE; \
-1: subi r11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\
+1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\
tophys(r11,r11); \
stw r10,_CCR(r11); /* save various registers */\
stw r12,GPR12(r11); \
@@ -259,8 +260,8 @@ label:
* and exit. Otherwise, we call heavywight functions to do the work.
*/
START_EXCEPTION(0x0300, DataStorage)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
+ mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_SCRATCH1, r11
#ifdef CONFIG_403GCX
stw r12, 0(r0)
stw r9, 4(r0)
@@ -269,12 +270,12 @@ label:
stw r11, 8(r0)
stw r12, 12(r0)
#else
- mtspr SPRN_SPRG4, r12
- mtspr SPRN_SPRG5, r9
+ mtspr SPRN_SPRG_SCRATCH3, r12
+ mtspr SPRN_SPRG_SCRATCH4, r9
mfcr r11
mfspr r12, SPRN_PID
- mtspr SPRN_SPRG7, r11
- mtspr SPRN_SPRG6, r12
+ mtspr SPRN_SPRG_SCRATCH6, r11
+ mtspr SPRN_SPRG_SCRATCH5, r12
#endif
/* First, check if it was a zone fault (which means a user
@@ -290,7 +291,7 @@ label:
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
- lis r11, TASK_SIZE@h
+ lis r11, PAGE_OFFSET@h
cmplw r10, r11
blt+ 3f
lis r11, swapper_pg_dir@h
@@ -302,7 +303,7 @@ label:
/* Get the PGD for the current thread.
*/
3:
- mfspr r11,SPRN_SPRG3
+ mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
4:
tophys(r11, r11)
@@ -349,15 +350,15 @@ label:
lwz r9, 4(r0)
lwz r12, 0(r0)
#else
- mfspr r12, SPRN_SPRG6
- mfspr r11, SPRN_SPRG7
+ mfspr r12, SPRN_SPRG_SCRATCH5
+ mfspr r11, SPRN_SPRG_SCRATCH6
mtspr SPRN_PID, r12
mtcr r11
- mfspr r9, SPRN_SPRG5
- mfspr r12, SPRN_SPRG4
+ mfspr r9, SPRN_SPRG_SCRATCH4
+ mfspr r12, SPRN_SPRG_SCRATCH3
#endif
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH0
PPC405_ERR77_SYNC
rfi /* Should sync shadow TLBs */
b . /* prevent prefetch past rfi */
@@ -374,15 +375,15 @@ label:
lwz r9, 4(r0)
lwz r12, 0(r0)
#else
- mfspr r12, SPRN_SPRG6
- mfspr r11, SPRN_SPRG7
+ mfspr r12, SPRN_SPRG_SCRATCH5
+ mfspr r11, SPRN_SPRG_SCRATCH6
mtspr SPRN_PID, r12
mtcr r11
- mfspr r9, SPRN_SPRG5
- mfspr r12, SPRN_SPRG4
+ mfspr r9, SPRN_SPRG_SCRATCH4
+ mfspr r12, SPRN_SPRG_SCRATCH3
#endif
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH0
b DataAccess
/*
@@ -393,7 +394,7 @@ label:
NORMAL_EXCEPTION_PROLOG
mr r4,r12 /* Pass SRR0 as arg2 */
li r5,0 /* Pass zero as arg3 */
- EXC_XFER_EE_LITE(0x400, handle_page_fault)
+ EXC_XFER_LITE(0x400, handle_page_fault)
/* 0x0500 - External Interrupt Exception */
EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
@@ -429,30 +430,18 @@ label:
EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE)
/* 0x1000 - Programmable Interval Timer (PIT) Exception */
- START_EXCEPTION(0x1000, Decrementer)
- NORMAL_EXCEPTION_PROLOG
- lis r0,TSR_PIS@h
- mtspr SPRN_TSR,r0 /* Clear the PIT exception */
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_LITE(0x1000, timer_interrupt)
-
-#if 0
-/* NOTE:
- * FIT and WDT handlers are not implemented yet.
- */
+ . = 0x1000
+ b Decrementer
/* 0x1010 - Fixed Interval Timer (FIT) Exception
*/
- STND_EXCEPTION(0x1010, FITException, unknown_exception)
+ . = 0x1010
+ b FITException
/* 0x1020 - Watchdog Timer (WDT) Exception
*/
-#ifdef CONFIG_BOOKE_WDT
- CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException)
-#else
- CRITICAL_EXCEPTION(0x1020, WDTException, unknown_exception)
-#endif
-#endif
+ . = 0x1020
+ b WDTException
/* 0x1100 - Data TLB Miss Exception
* As the name implies, translation is not in the MMU, so search the
@@ -460,8 +449,8 @@ label:
* load TLB entries from the page table if they exist.
*/
START_EXCEPTION(0x1100, DTLBMiss)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
+ mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_SCRATCH1, r11
#ifdef CONFIG_403GCX
stw r12, 0(r0)
stw r9, 4(r0)
@@ -470,19 +459,19 @@ label:
stw r11, 8(r0)
stw r12, 12(r0)
#else
- mtspr SPRN_SPRG4, r12
- mtspr SPRN_SPRG5, r9
+ mtspr SPRN_SPRG_SCRATCH3, r12
+ mtspr SPRN_SPRG_SCRATCH4, r9
mfcr r11
mfspr r12, SPRN_PID
- mtspr SPRN_SPRG7, r11
- mtspr SPRN_SPRG6, r12
+ mtspr SPRN_SPRG_SCRATCH6, r11
+ mtspr SPRN_SPRG_SCRATCH5, r12
#endif
mfspr r10, SPRN_DEAR /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
- lis r11, TASK_SIZE@h
+ lis r11, PAGE_OFFSET@h
cmplw r10, r11
blt+ 3f
lis r11, swapper_pg_dir@h
@@ -494,7 +483,7 @@ label:
/* Get the PGD for the current thread.
*/
3:
- mfspr r11,SPRN_SPRG3
+ mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
4:
tophys(r11, r11)
@@ -544,15 +533,15 @@ label:
lwz r9, 4(r0)
lwz r12, 0(r0)
#else
- mfspr r12, SPRN_SPRG6
- mfspr r11, SPRN_SPRG7
+ mfspr r12, SPRN_SPRG_SCRATCH5
+ mfspr r11, SPRN_SPRG_SCRATCH6
mtspr SPRN_PID, r12
mtcr r11
- mfspr r9, SPRN_SPRG5
- mfspr r12, SPRN_SPRG4
+ mfspr r9, SPRN_SPRG_SCRATCH4
+ mfspr r12, SPRN_SPRG_SCRATCH3
#endif
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH0
b DataAccess
/* 0x1200 - Instruction TLB Miss Exception
@@ -560,8 +549,8 @@ label:
* registers and bailout to a different point.
*/
START_EXCEPTION(0x1200, ITLBMiss)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
+ mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_SCRATCH1, r11
#ifdef CONFIG_403GCX
stw r12, 0(r0)
stw r9, 4(r0)
@@ -570,19 +559,19 @@ label:
stw r11, 8(r0)
stw r12, 12(r0)
#else
- mtspr SPRN_SPRG4, r12
- mtspr SPRN_SPRG5, r9
+ mtspr SPRN_SPRG_SCRATCH3, r12
+ mtspr SPRN_SPRG_SCRATCH4, r9
mfcr r11
mfspr r12, SPRN_PID
- mtspr SPRN_SPRG7, r11
- mtspr SPRN_SPRG6, r12
+ mtspr SPRN_SPRG_SCRATCH6, r11
+ mtspr SPRN_SPRG_SCRATCH5, r12
#endif
mfspr r10, SPRN_SRR0 /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
- lis r11, TASK_SIZE@h
+ lis r11, PAGE_OFFSET@h
cmplw r10, r11
blt+ 3f
lis r11, swapper_pg_dir@h
@@ -594,7 +583,7 @@ label:
/* Get the PGD for the current thread.
*/
3:
- mfspr r11,SPRN_SPRG3
+ mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
4:
tophys(r11, r11)
@@ -644,15 +633,15 @@ label:
lwz r9, 4(r0)
lwz r12, 0(r0)
#else
- mfspr r12, SPRN_SPRG6
- mfspr r11, SPRN_SPRG7
+ mfspr r12, SPRN_SPRG_SCRATCH5
+ mfspr r11, SPRN_SPRG_SCRATCH6
mtspr SPRN_PID, r12
mtcr r11
- mfspr r9, SPRN_SPRG5
- mfspr r12, SPRN_SPRG4
+ mfspr r9, SPRN_SPRG_SCRATCH4
+ mfspr r12, SPRN_SPRG_SCRATCH3
#endif
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH0
b InstructionAccess
EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE)
@@ -737,6 +726,29 @@ label:
(MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+ /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
+Decrementer:
+ NORMAL_EXCEPTION_PROLOG
+ lis r0,TSR_PIS@h
+ mtspr SPRN_TSR,r0 /* Clear the PIT exception */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_LITE(0x1000, timer_interrupt)
+
+ /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
+FITException:
+ NORMAL_EXCEPTION_PROLOG
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ EXC_XFER_EE(0x1010, unknown_exception)
+
+ /* Watchdog Timer (WDT) Exception. (from 0x1020) */
+WDTException:
+ CRITICAL_EXCEPTION_PROLOG;
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2,
+ (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
+ NOCOPY, crit_transfer_to_handler,
+ ret_from_crit_exc)
+
/*
* The other Data TLB exceptions bail out to this point
* if they can't resolve the lightweight TLB fault.
@@ -746,7 +758,7 @@ DataAccess:
mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
stw r5,_ESR(r11)
mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
- EXC_XFER_EE_LITE(0x300, handle_page_fault)
+ EXC_XFER_LITE(0x300, handle_page_fault)
/* Other PowerPC processors, namely those derived from the 6xx-series
* have vectors from 0x2100 through 0x2F00 defined, but marked as reserved.
@@ -759,7 +771,7 @@ DataAccess:
* miss get to this point to load the TLB.
* r10 - TLB_TAG value
* r11 - Linux PTE
- * r12, r9 - avilable to use
+ * r12, r9 - available to use
* PID - loaded with proper value when we get here
* Upon exit, we reload everything and RFI.
* Actually, it will fit now, but oh well.....a common place
@@ -772,7 +784,7 @@ finish_tlb_load:
*/
lwz r9, tlb_4xx_index@l(0)
addi r9, r9, 1
- andi. r9, r9, (PPC4XX_TLB_SIZE-1)
+ andi. r9, r9, (PPC40X_TLB_SIZE-1)
stw r9, tlb_4xx_index@l(0)
6:
@@ -797,27 +809,19 @@ finish_tlb_load:
lwz r9, 4(r0)
lwz r12, 0(r0)
#else
- mfspr r12, SPRN_SPRG6
- mfspr r11, SPRN_SPRG7
+ mfspr r12, SPRN_SPRG_SCRATCH5
+ mfspr r11, SPRN_SPRG_SCRATCH6
mtspr SPRN_PID, r12
mtcr r11
- mfspr r9, SPRN_SPRG5
- mfspr r12, SPRN_SPRG4
+ mfspr r9, SPRN_SPRG_SCRATCH4
+ mfspr r12, SPRN_SPRG_SCRATCH3
#endif
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH0
PPC405_ERR77_SYNC
rfi /* Should sync shadow TLBs */
b . /* prevent prefetch past rfi */
-/* extern void giveup_fpu(struct task_struct *prev)
- *
- * The PowerPC 4xx family of processors do not have an FPU, so this just
- * returns.
- */
-_GLOBAL(giveup_fpu)
- blr
-
/* This is where the main kernel code starts.
*/
start_here:
@@ -829,7 +833,7 @@ start_here:
/* ptr to phys current thread */
tophys(r4,r2)
addi r4,r4,THREAD /* init task's THREAD */
- mtspr SPRN_SPRG3,r4
+ mtspr SPRN_SPRG_THREAD,r4
/* stack */
lis r1,init_thread_union@ha
@@ -842,11 +846,8 @@ start_here:
/*
* Decide what sort of machine this is and initialize the MMU.
*/
- mr r3,r31
- mr r4,r30
- mr r5,r29
- mr r6,r28
- mr r7,r27
+ li r3,0
+ mr r4,r31
bl machine_init
bl MMU_init
@@ -917,11 +918,7 @@ initial_mmu:
mtspr SPRN_PID,r0
sync
- /* Configure and load two entries into TLB slots 62 and 63.
- * In case we are pinning TLBs, these are reserved in by the
- * other TLB functions. If not reserving, then it doesn't
- * matter where they are loaded.
- */
+ /* Configure and load one entry into TLB slots 63 */
clrrwi r4,r4,10 /* Mask off the real page number */
ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */
@@ -933,25 +930,6 @@ initial_mmu:
tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
-#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE)
-
- /* Load a TLB entry for the UART, so that ppc4xx_progress() can use
- * the UARTs nice and early. We use a 4k real==virtual mapping. */
-
- lis r3,SERIAL_DEBUG_IO_BASE@h
- ori r3,r3,SERIAL_DEBUG_IO_BASE@l
- mr r4,r3
- clrrwi r4,r4,12
- ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G)
-
- clrrwi r3,r3,12
- ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
-
- li r0,0 /* TLB slot 0 */
- tlbwe r4,r0,TLB_DATA
- tlbwe r3,r0,TLB_TAG
-#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */
-
isync
/* Establish the exception vector base
@@ -995,24 +973,7 @@ empty_zero_page:
.space 4096
.globl swapper_pg_dir
swapper_pg_dir:
- .space 4096
-
-
-/* Stack for handling critical exceptions from kernel mode */
- .section .bss
- .align 12
-exception_stack_bottom:
- .space 4096
-critical_stack_top:
- .globl exception_stack_top
-exception_stack_top:
-
-/* This space gets a copy of optional info passed to us by the bootstrap
- * which is used to pass parameters into the kernel like root=/dev/sda1, etc.
- */
- .globl cmd_line
-cmd_line:
- .space 512
+ .space PGD_TABLE_SIZE
/* Room for two PTE pointers, usually the kernel and current user pointers
* to their respective root page table.
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index accb39d4991..c334f53453f 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -28,16 +28,17 @@
* option) any later version.
*/
+#include <linux/init.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/pgtable.h>
-#include <asm/ibm4xx.h>
-#include <asm/ibm44x.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/synch.h>
#include "head_booke.h"
@@ -52,190 +53,47 @@
* r7 - End of kernel command line string
*
*/
- .text
-_GLOBAL(_stext)
-_GLOBAL(_start)
+ __HEAD
+_ENTRY(_stext);
+_ENTRY(_start);
/*
* Reserve a word at a fixed location to store the address
* of abatron_pteptrs
*/
nop
-/*
- * Save parameters we are passed
- */
- mr r31,r3
- mr r30,r4
- mr r29,r5
- mr r28,r6
- mr r27,r7
+ mr r31,r3 /* save device tree ptr */
li r24,0 /* CPU number */
+#ifdef CONFIG_RELOCATABLE
/*
- * Set up the initial MMU state
- *
- * We are still executing code at the virtual address
- * mappings set by the firmware for the base of RAM.
- *
- * We first invalidate all TLB entries but the one
- * we are running from. We then load the KERNELBASE
- * mappings so we can begin to use kernel addresses
- * natively and so the interrupt vector locations are
- * permanently pinned (necessary since Book E
- * implementations always have translation enabled).
- *
- * TODO: Use the known TLB entry we are running from to
- * determine which physical region we are located
- * in. This can be used to determine where in RAM
- * (on a shared CPU system) or PCI memory space
- * (on a DRAMless system) we are located.
- * For now, we assume a perfect world which means
- * we are located at the base of DRAM (physical 0).
- */
-
-/*
- * Search TLB for entry that we are currently using.
- * Invalidate all entries but the one we are using.
- */
- /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
- mfspr r3,SPRN_PID /* Get PID */
- mfmsr r4 /* Get MSR */
- andi. r4,r4,MSR_IS@l /* TS=1? */
- beq wmmucr /* If not, leave STS=0 */
- oris r3,r3,PPC44x_MMUCR_STS@h /* Set STS=1 */
-wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */
- sync
-
- bl invstr /* Find our address */
-invstr: mflr r5 /* Make it accessible */
- tlbsx r23,0,r5 /* Find entry we are in */
- li r4,0 /* Start at TLB entry 0 */
- li r3,0 /* Set PAGEID inval value */
-1: cmpw r23,r4 /* Is this our entry? */
- beq skpinv /* If so, skip the inval */
- tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */
-skpinv: addi r4,r4,1 /* Increment */
- cmpwi r4,64 /* Are we done? */
- bne 1b /* If not, repeat */
- isync /* If so, context change */
-
-/*
- * Configure and load pinned entry into TLB slot 63.
+ * Relocate ourselves to the current runtime address.
+ * This is called only by the Boot CPU.
+ * "relocate" is called with our current runtime virutal
+ * address.
+ * r21 will be loaded with the physical runtime address of _stext
*/
+ bl 0f /* Get our runtime address */
+0: mflr r21 /* Make it accessible */
+ addis r21,r21,(_stext - 0b)@ha
+ addi r21,r21,(_stext - 0b)@l /* Get our current runtime base */
- lis r3,KERNELBASE@h /* Load the kernel virtual address */
- ori r3,r3,KERNELBASE@l
-
- /* Kernel is at the base of RAM */
- li r4, 0 /* Load the kernel physical address */
-
- /* Load the kernel PID = 0 */
- li r0,0
- mtspr SPRN_PID,r0
- sync
-
- /* Initialize MMUCR */
- li r5,0
- mtspr SPRN_MMUCR,r5
- sync
-
- /* pageid fields */
- clrrwi r3,r3,10 /* Mask off the effective page number */
- ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M
-
- /* xlat fields */
- clrrwi r4,r4,10 /* Mask off the real page number */
- /* ERPN is 0 for first 4GB page */
-
- /* attrib fields */
- /* Added guarded bit to protect against speculative loads/stores */
- li r5,0
- ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
-
- li r0,63 /* TLB slot 63 */
-
- tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */
- tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */
- tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */
-
- /* Force context change */
- mfmsr r0
- mtspr SPRN_SRR1, r0
- lis r0,3f@h
- ori r0,r0,3f@l
- mtspr SPRN_SRR0,r0
- sync
- rfi
-
- /* If necessary, invalidate original entry we used */
-3: cmpwi r23,63
- beq 4f
- li r6,0
- tlbwe r6,r23,PPC44x_TLB_PAGEID
- isync
-
-4:
-#ifdef CONFIG_SERIAL_TEXT_DEBUG
/*
- * Add temporary UART mapping for early debug.
- * We can map UART registers wherever we want as long as they don't
- * interfere with other system mappings (e.g. with pinned entries).
- * For an example of how we handle this - see ocotea.h. --ebs
+ * We have the runtime (virutal) address of our base.
+ * We calculate our shift of offset from a 256M page.
+ * We could map the 256M page we belong to at PAGE_OFFSET and
+ * get going from there.
*/
- /* pageid fields */
- lis r3,UART0_IO_BASE@h
- ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_4K
-
- /* xlat fields */
- lis r4,UART0_PHYS_IO_BASE@h /* RPN depends on SoC */
-#ifndef CONFIG_440EP
- ori r4,r4,0x0001 /* ERPN is 1 for second 4GB page */
+ lis r4,KERNELBASE@h
+ ori r4,r4,KERNELBASE@l
+ rlwinm r6,r21,0,4,31 /* r6 = PHYS_START % 256M */
+ rlwinm r5,r4,0,4,31 /* r5 = KERNELBASE % 256M */
+ subf r3,r5,r6 /* r3 = r6 - r5 */
+ add r3,r4,r3 /* Required Virutal Address */
+
+ bl relocate
#endif
- /* attrib fields */
- li r5,0
- ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_I | PPC44x_TLB_G)
-
- li r0,0 /* TLB slot 0 */
-
- tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */
- tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */
- tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */
-
- /* Force context change */
- isync
-#endif /* CONFIG_SERIAL_TEXT_DEBUG */
-
- /* Establish the interrupt vector offsets */
- SET_IVOR(0, CriticalInput);
- SET_IVOR(1, MachineCheck);
- SET_IVOR(2, DataStorage);
- SET_IVOR(3, InstructionStorage);
- SET_IVOR(4, ExternalInput);
- SET_IVOR(5, Alignment);
- SET_IVOR(6, Program);
- SET_IVOR(7, FloatingPointUnavailable);
- SET_IVOR(8, SystemCall);
- SET_IVOR(9, AuxillaryProcessorUnavailable);
- SET_IVOR(10, Decrementer);
- SET_IVOR(11, FixedIntervalTimer);
- SET_IVOR(12, WatchdogTimer);
- SET_IVOR(13, DataTLBError);
- SET_IVOR(14, InstructionTLBError);
- SET_IVOR(15, Debug);
-
- /* Establish the interrupt vector base */
- lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */
- mtspr SPRN_IVPR,r4
-
-#ifdef CONFIG_440EP
- /* Clear DAPUIB flag in CCR0 (enable APU between CPU and FPU) */
- mfspr r2,SPRN_CCR0
- lis r3,0xffef
- ori r3,r3,0xffff
- and r2,r2,r3
- mtspr SPRN_CCR0,r2
- isync
-#endif
+ bl init_cpu_state
/*
* This is where the main kernel code starts.
@@ -247,7 +105,7 @@ skpinv: addi r4,r4,1 /* Increment */
/* ptr to current thread */
addi r4,r2,THREAD /* init task's THREAD */
- mtspr SPRN_SPRG3,r4
+ mtspr SPRN_SPRG_THREAD,r4
/* stack */
lis r1,init_thread_union@h
@@ -257,14 +115,94 @@ skpinv: addi r4,r4,1 /* Increment */
bl early_init
+#ifdef CONFIG_RELOCATABLE
+ /*
+ * Relocatable kernel support based on processing of dynamic
+ * relocation entries.
+ *
+ * r25 will contain RPN/ERPN for the start address of memory
+ * r21 will contain the current offset of _stext
+ */
+ lis r3,kernstart_addr@ha
+ la r3,kernstart_addr@l(r3)
+
+ /*
+ * Compute the kernstart_addr.
+ * kernstart_addr => (r6,r8)
+ * kernstart_addr & ~0xfffffff => (r6,r7)
+ */
+ rlwinm r6,r25,0,28,31 /* ERPN. Bits 32-35 of Address */
+ rlwinm r7,r25,0,0,3 /* RPN - assuming 256 MB page size */
+ rlwinm r8,r21,0,4,31 /* r8 = (_stext & 0xfffffff) */
+ or r8,r7,r8 /* Compute the lower 32bit of kernstart_addr */
+
+ /* Store kernstart_addr */
+ stw r6,0(r3) /* higher 32bit */
+ stw r8,4(r3) /* lower 32bit */
+
+ /*
+ * Compute the virt_phys_offset :
+ * virt_phys_offset = stext.run - kernstart_addr
+ *
+ * stext.run = (KERNELBASE & ~0xfffffff) + (kernstart_addr & 0xfffffff)
+ * When we relocate, we have :
+ *
+ * (kernstart_addr & 0xfffffff) = (stext.run & 0xfffffff)
+ *
+ * hence:
+ * virt_phys_offset = (KERNELBASE & ~0xfffffff) - (kernstart_addr & ~0xfffffff)
+ *
+ */
+
+ /* KERNELBASE&~0xfffffff => (r4,r5) */
+ li r4, 0 /* higer 32bit */
+ lis r5,KERNELBASE@h
+ rlwinm r5,r5,0,0,3 /* Align to 256M, lower 32bit */
+
+ /*
+ * 64bit subtraction.
+ */
+ subfc r5,r7,r5
+ subfe r4,r6,r4
+
+ /* Store virt_phys_offset */
+ lis r3,virt_phys_offset@ha
+ la r3,virt_phys_offset@l(r3)
+
+ stw r4,0(r3)
+ stw r5,4(r3)
+
+#elif defined(CONFIG_DYNAMIC_MEMSTART)
+ /*
+ * Mapping based, page aligned dynamic kernel loading.
+ *
+ * r25 will contain RPN/ERPN for the start address of memory
+ *
+ * Add the difference between KERNELBASE and PAGE_OFFSET to the
+ * start of physical memory to get kernstart_addr.
+ */
+ lis r3,kernstart_addr@ha
+ la r3,kernstart_addr@l(r3)
+
+ lis r4,KERNELBASE@h
+ ori r4,r4,KERNELBASE@l
+ lis r5,PAGE_OFFSET@h
+ ori r5,r5,PAGE_OFFSET@l
+ subf r4,r5,r4
+
+ rlwinm r6,r25,0,28,31 /* ERPN */
+ rlwinm r7,r25,0,0,3 /* RPN - assuming 256 MB page size */
+ add r7,r7,r4
+
+ stw r6,0(r3)
+ stw r7,4(r3)
+#endif
+
/*
* Decide what sort of machine this is and initialize the MMU.
*/
- mr r3,r31
- mr r4,r30
- mr r5,r29
- mr r6,r28
- mr r7,r27
+ li r3,0
+ mr r4,r31
bl machine_init
bl MMU_init
@@ -278,6 +216,10 @@ skpinv: addi r4,r4,1 /* Increment */
stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */
stw r6, 0(r5)
+ /* Clear the Machine Check Syndrome Register */
+ li r0,0
+ mtspr SPRN_MCSR,r0
+
/* Let's move on */
lis r4,start_kernel@h
ori r4,r4,start_kernel@l
@@ -306,128 +248,22 @@ skpinv: addi r4,r4,1 /* Increment */
interrupt_base:
/* Critical Input Interrupt */
- CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception)
+ CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
/* Machine Check Interrupt */
-#ifdef CONFIG_440A
- MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
-#else
- CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
-#endif
+ CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
+ machine_check_exception)
+ MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception)
/* Data Storage Interrupt */
- START_EXCEPTION(DataStorage)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
- mtspr SPRN_SPRG4W, r12
- mtspr SPRN_SPRG5W, r13
- mfcr r11
- mtspr SPRN_SPRG7W, r11
-
- /*
- * Check if it was a store fault, if not then bail
- * because a user tried to access a kernel or
- * read-protected page. Otherwise, get the
- * offending address and handle it.
- */
- mfspr r10, SPRN_ESR
- andis. r10, r10, ESR_ST@h
- beq 2f
-
- mfspr r10, SPRN_DEAR /* Get faulting address */
-
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- lis r11, TASK_SIZE@h
- cmplw r10, r11
- blt+ 3f
- lis r11, swapper_pg_dir@h
- ori r11, r11, swapper_pg_dir@l
-
- mfspr r12,SPRN_MMUCR
- rlwinm r12,r12,0,0,23 /* Clear TID */
-
- b 4f
-
- /* Get the PGD for the current thread */
-3:
- mfspr r11,SPRN_SPRG3
- lwz r11,PGDIR(r11)
-
- /* Load PID into MMUCR TID */
- mfspr r12,SPRN_MMUCR /* Get MMUCR */
- mfspr r13,SPRN_PID /* Get PID */
- rlwimi r12,r13,0,24,31 /* Set TID */
-
-4:
- mtspr SPRN_MMUCR,r12
-
- rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */
- lwzx r11, r12, r11 /* Get pgd/pmd entry */
- rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */
- beq 2f /* Bail if no table */
-
- rlwimi r12, r10, 23, 20, 28 /* Compute pte address */
- lwz r11, 4(r12) /* Get pte entry */
-
- andi. r13, r11, _PAGE_RW /* Is it writeable? */
- beq 2f /* Bail if not */
-
- /* Update 'changed'.
- */
- ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
- stw r11, 4(r12) /* Update Linux page table */
-
- li r13, PPC44x_TLB_SR@l /* Set SR */
- rlwimi r13, r11, 29, 29, 29 /* SX = _PAGE_HWEXEC */
- rlwimi r13, r11, 0, 30, 30 /* SW = _PAGE_RW */
- rlwimi r13, r11, 29, 28, 28 /* UR = _PAGE_USER */
- rlwimi r12, r11, 31, 26, 26 /* (_PAGE_USER>>1)->r12 */
- rlwimi r12, r11, 29, 30, 30 /* (_PAGE_USER>>3)->r12 */
- and r12, r12, r11 /* HWEXEC/RW & USER */
- rlwimi r13, r12, 0, 26, 26 /* UX = HWEXEC & USER */
- rlwimi r13, r12, 3, 27, 27 /* UW = RW & USER */
-
- rlwimi r11,r13,0,26,31 /* Insert static perms */
-
- rlwinm r11,r11,0,20,15 /* Clear U0-U3 */
-
- /* find the TLB index that caused the fault. It has to be here. */
- tlbsx r10, 0, r10
-
- tlbwe r11, r10, PPC44x_TLB_ATTRIB /* Write ATTRIB */
-
- /* Done...restore registers and get out of here.
- */
- mfspr r11, SPRN_SPRG7R
- mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
+ DATA_STORAGE_EXCEPTION
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- rfi /* Force context change */
-
-2:
- /*
- * The bailout. Restore registers to pre-exception conditions
- * and call the heavyweights to help us out.
- */
- mfspr r11, SPRN_SPRG7R
- mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
-
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- b data_access
-
- /* Instruction Storage Interrupt */
+ /* Instruction Storage Interrupt */
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \
+ do_IRQ, EXC_XFER_LITE)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
@@ -439,46 +275,48 @@ interrupt_base:
#ifdef CONFIG_PPC_FPU
FP_UNAVAILABLE_EXCEPTION
#else
- EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
+ FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
#endif
-
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL)
EXC_XFER_EE_LITE(0x0c00, DoSyscall)
- /* Auxillary Processor Unavailable Interrupt */
- EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+ /* Auxiliary Processor Unavailable Interrupt */
+ EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
+ AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
+ unknown_exception, EXC_XFER_EE)
/* Watchdog Timer Interrupt */
/* TODO: Add watchdog support */
#ifdef CONFIG_BOOKE_WDT
- CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException)
+ CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, WatchdogException)
#else
- CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception)
+ CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, unknown_exception)
#endif
/* Data TLB Error Interrupt */
- START_EXCEPTION(DataTLBError)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
- mtspr SPRN_SPRG4W, r12
- mtspr SPRN_SPRG5W, r13
+ START_EXCEPTION(DataTLBError44x)
+ mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_WSCRATCH1, r11
+ mtspr SPRN_SPRG_WSCRATCH2, r12
+ mtspr SPRN_SPRG_WSCRATCH3, r13
mfcr r11
- mtspr SPRN_SPRG7W, r11
+ mtspr SPRN_SPRG_WSCRATCH4, r11
mfspr r10, SPRN_DEAR /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
- lis r11, TASK_SIZE@h
+ lis r11, PAGE_OFFSET@h
cmplw r10, r11
blt+ 3f
lis r11, swapper_pg_dir@h
@@ -491,7 +329,7 @@ interrupt_base:
/* Get the PGD for the current thread */
3:
- mfspr r11,SPRN_SPRG3
+ mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
/* Load PID into MMUCR TID */
@@ -502,33 +340,74 @@ interrupt_base:
4:
mtspr SPRN_MMUCR,r12
- rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */
+ /* Mask of required permission bits. Note that while we
+ * do copy ESR:ST to _PAGE_RW position as trying to write
+ * to an RO page is pretty common, we don't do it with
+ * _PAGE_DIRTY. We could do it, but it's a fairly rare
+ * event so I'd rather take the overhead when it happens
+ * rather than adding an instruction here. We should measure
+ * whether the whole thing is worth it in the first place
+ * as we could avoid loading SPRN_ESR completely in the first
+ * place...
+ *
+ * TODO: Is it worth doing that mfspr & rlwimi in the first
+ * place or can we save a couple of instructions here ?
+ */
+ mfspr r12,SPRN_ESR
+ li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+ rlwimi r13,r12,10,30,30
+
+ /* Load the PTE */
+ /* Compute pgdir/pmd offset */
+ rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29
lwzx r11, r12, r11 /* Get pgd/pmd entry */
rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */
beq 2f /* Bail if no table */
- rlwimi r12, r10, 23, 20, 28 /* Compute pte address */
- lwz r11, 4(r12) /* Get pte entry */
- andi. r13, r11, _PAGE_PRESENT /* Is the page present? */
- beq 2f /* Bail if not present */
+ /* Compute pte address */
+ rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28
+ lwz r11, 0(r12) /* Get high word of pte entry */
+ lwz r12, 4(r12) /* Get low word of pte entry */
+
+ lis r10,tlb_44x_index@ha
- ori r11, r11, _PAGE_ACCESSED
- stw r11, 4(r12)
+ andc. r13,r13,r12 /* Check permission */
+
+ /* Load the next available TLB index */
+ lwz r13,tlb_44x_index@l(r10)
+
+ bne 2f /* Bail if permission mismach */
+
+ /* Increment, rollover, and store TLB index */
+ addi r13,r13,1
+
+ /* Compare with watermark (instruction gets patched) */
+ .globl tlb_44x_patch_hwater_D
+tlb_44x_patch_hwater_D:
+ cmpwi 0,r13,1 /* reserve entries */
+ ble 5f
+ li r13,0
+5:
+ /* Store the next available TLB index */
+ stw r13,tlb_44x_index@l(r10)
+
+ /* Re-load the faulting address */
+ mfspr r10,SPRN_DEAR
/* Jump to common tlb load */
- b finish_tlb_load
+ b finish_tlb_load_44x
2:
/* The bailout. Restore registers to pre-exception conditions
* and call the heavyweights to help us out.
*/
- mfspr r11, SPRN_SPRG7R
+ mfspr r11, SPRN_SPRG_RSCRATCH4
mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- b data_access
+ mfspr r13, SPRN_SPRG_RSCRATCH3
+ mfspr r12, SPRN_SPRG_RSCRATCH2
+ mfspr r11, SPRN_SPRG_RSCRATCH1
+ mfspr r10, SPRN_SPRG_RSCRATCH0
+ b DataStorage
/* Instruction TLB Error Interrupt */
/*
@@ -536,19 +415,19 @@ interrupt_base:
* information from different registers and bailout
* to a different point.
*/
- START_EXCEPTION(InstructionTLBError)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
- mtspr SPRN_SPRG4W, r12
- mtspr SPRN_SPRG5W, r13
+ START_EXCEPTION(InstructionTLBError44x)
+ mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
+ mtspr SPRN_SPRG_WSCRATCH1, r11
+ mtspr SPRN_SPRG_WSCRATCH2, r12
+ mtspr SPRN_SPRG_WSCRATCH3, r13
mfcr r11
- mtspr SPRN_SPRG7W, r11
+ mtspr SPRN_SPRG_WSCRATCH4, r11
mfspr r10, SPRN_SRR0 /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
- lis r11, TASK_SIZE@h
+ lis r11, PAGE_OFFSET@h
cmplw r10, r11
blt+ 3f
lis r11, swapper_pg_dir@h
@@ -561,7 +440,7 @@ interrupt_base:
/* Get the PGD for the current thread */
3:
- mfspr r11,SPRN_SPRG3
+ mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
/* Load PID into MMUCR TID */
@@ -572,154 +451,338 @@ interrupt_base:
4:
mtspr SPRN_MMUCR,r12
- rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */
+ /* Make up the required permissions */
+ li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+
+ /* Compute pgdir/pmd offset */
+ rlwinm r12, r10, PPC44x_PGD_OFF_SHIFT, PPC44x_PGD_OFF_MASK_BIT, 29
lwzx r11, r12, r11 /* Get pgd/pmd entry */
rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */
beq 2f /* Bail if no table */
- rlwimi r12, r10, 23, 20, 28 /* Compute pte address */
- lwz r11, 4(r12) /* Get pte entry */
- andi. r13, r11, _PAGE_PRESENT /* Is the page present? */
- beq 2f /* Bail if not present */
+ /* Compute pte address */
+ rlwimi r12, r10, PPC44x_PTE_ADD_SHIFT, PPC44x_PTE_ADD_MASK_BIT, 28
+ lwz r11, 0(r12) /* Get high word of pte entry */
+ lwz r12, 4(r12) /* Get low word of pte entry */
+
+ lis r10,tlb_44x_index@ha
+
+ andc. r13,r13,r12 /* Check permission */
+
+ /* Load the next available TLB index */
+ lwz r13,tlb_44x_index@l(r10)
+
+ bne 2f /* Bail if permission mismach */
+
+ /* Increment, rollover, and store TLB index */
+ addi r13,r13,1
+
+ /* Compare with watermark (instruction gets patched) */
+ .globl tlb_44x_patch_hwater_I
+tlb_44x_patch_hwater_I:
+ cmpwi 0,r13,1 /* reserve entries */
+ ble 5f
+ li r13,0
+5:
+ /* Store the next available TLB index */
+ stw r13,tlb_44x_index@l(r10)
- ori r11, r11, _PAGE_ACCESSED
- stw r11, 4(r12)
+ /* Re-load the faulting address */
+ mfspr r10,SPRN_SRR0
/* Jump to common TLB load point */
- b finish_tlb_load
+ b finish_tlb_load_44x
2:
/* The bailout. Restore registers to pre-exception conditions
* and call the heavyweights to help us out.
*/
- mfspr r11, SPRN_SPRG7R
+ mfspr r11, SPRN_SPRG_RSCRATCH4
mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
+ mfspr r13, SPRN_SPRG_RSCRATCH3
+ mfspr r12, SPRN_SPRG_RSCRATCH2
+ mfspr r11, SPRN_SPRG_RSCRATCH1
+ mfspr r10, SPRN_SPRG_RSCRATCH0
b InstructionStorage
- /* Debug Interrupt */
- DEBUG_EXCEPTION
-
/*
- * Local functions
- */
- /*
- * Data TLB exceptions will bail out to this point
- * if they can't resolve the lightweight TLB fault.
- */
-data_access:
- NORMAL_EXCEPTION_PROLOG
- mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
- stw r5,_ESR(r11)
- mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
- EXC_XFER_EE_LITE(0x0300, handle_page_fault)
-
-/*
-
* Both the instruction and data TLB miss get to this
* point to load the TLB.
* r10 - EA of fault
- * r11 - available to use
- * r12 - Pointer to the 64-bit PTE
- * r13 - available to use
+ * r11 - PTE high word value
+ * r12 - PTE low word value
+ * r13 - TLB index
* MMUCR - loaded with proper value when we get here
* Upon exit, we reload everything and RFI.
*/
-finish_tlb_load:
- /*
- * We set execute, because we don't have the granularity to
- * properly set this at the page level (Linux problem).
- * If shared is set, we cause a zero PID->TID load.
- * Many of these bits are software only. Bits we don't set
- * here we (properly should) assume have the appropriate value.
- */
-
- /* Load the next available TLB index */
- lis r13, tlb_44x_index@ha
- lwz r13, tlb_44x_index@l(r13)
- /* Load the TLB high watermark */
- lis r11, tlb_44x_hwater@ha
- lwz r11, tlb_44x_hwater@l(r11)
-
- /* Increment, rollover, and store TLB index */
- addi r13, r13, 1
- cmpw 0, r13, r11 /* reserve entries */
- ble 7f
- li r13, 0
-7:
- /* Store the next available TLB index */
- lis r11, tlb_44x_index@ha
- stw r13, tlb_44x_index@l(r11)
-
- lwz r11, 0(r12) /* Get MS word of PTE */
- lwz r12, 4(r12) /* Get LS word of PTE */
- rlwimi r11, r12, 0, 0 , 19 /* Insert RPN */
- tlbwe r11, r13, PPC44x_TLB_XLAT /* Write XLAT */
+finish_tlb_load_44x:
+ /* Combine RPN & ERPN an write WS 0 */
+ rlwimi r11,r12,0,0,31-PAGE_SHIFT
+ tlbwe r11,r13,PPC44x_TLB_XLAT
/*
- * Create PAGEID. This is the faulting address,
+ * Create WS1. This is the faulting address (EPN),
* page size, and valid flag.
*/
- li r11, PPC44x_TLB_VALID | PPC44x_TLB_4K
- rlwimi r10, r11, 0, 20, 31 /* Insert valid and page size */
- tlbwe r10, r13, PPC44x_TLB_PAGEID /* Write PAGEID */
-
- li r10, PPC44x_TLB_SR@l /* Set SR */
- rlwimi r10, r12, 0, 30, 30 /* Set SW = _PAGE_RW */
- rlwimi r10, r12, 29, 29, 29 /* SX = _PAGE_HWEXEC */
- rlwimi r10, r12, 29, 28, 28 /* UR = _PAGE_USER */
- rlwimi r11, r12, 31, 26, 26 /* (_PAGE_USER>>1)->r12 */
- and r11, r12, r11 /* HWEXEC & USER */
- rlwimi r10, r11, 0, 26, 26 /* UX = HWEXEC & USER */
-
- rlwimi r12, r10, 0, 26, 31 /* Insert static perms */
- rlwinm r12, r12, 0, 20, 15 /* Clear U0-U3 */
- tlbwe r12, r13, PPC44x_TLB_ATTRIB /* Write ATTRIB */
+ li r11,PPC44x_TLB_VALID | PPC44x_TLBE_SIZE
+ /* Insert valid and page size */
+ rlwimi r10,r11,0,PPC44x_PTE_ADD_MASK_BIT,31
+ tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */
+
+ /* And WS 2 */
+ li r10,0xf85 /* Mask to apply from PTE */
+ rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */
+ and r11,r12,r10 /* Mask PTE bits to keep */
+ andi. r10,r12,_PAGE_USER /* User page ? */
+ beq 1f /* nope, leave U bits empty */
+ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
+1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */
/* Done...restore registers and get out of here.
*/
- mfspr r11, SPRN_SPRG7R
+ mfspr r11, SPRN_SPRG_RSCRATCH4
mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
+ mfspr r13, SPRN_SPRG_RSCRATCH3
+ mfspr r12, SPRN_SPRG_RSCRATCH2
+ mfspr r11, SPRN_SPRG_RSCRATCH1
+ mfspr r10, SPRN_SPRG_RSCRATCH0
rfi /* Force context change */
-/*
- * Global functions
+/* TLB error interrupts for 476
*/
+#ifdef CONFIG_PPC_47x
+ START_EXCEPTION(DataTLBError47x)
+ mtspr SPRN_SPRG_WSCRATCH0,r10 /* Save some working registers */
+ mtspr SPRN_SPRG_WSCRATCH1,r11
+ mtspr SPRN_SPRG_WSCRATCH2,r12
+ mtspr SPRN_SPRG_WSCRATCH3,r13
+ mfcr r11
+ mtspr SPRN_SPRG_WSCRATCH4,r11
+ mfspr r10,SPRN_DEAR /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11,PAGE_OFFSET@h
+ cmplw cr0,r10,r11
+ blt+ 3f
+ lis r11,swapper_pg_dir@h
+ ori r11,r11, swapper_pg_dir@l
+ li r12,0 /* MMUCR = 0 */
+ b 4f
+
+ /* Get the PGD for the current thread and setup MMUCR */
+3: mfspr r11,SPRN_SPRG3
+ lwz r11,PGDIR(r11)
+ mfspr r12,SPRN_PID /* Get PID */
+4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
+
+ /* Mask of required permission bits. Note that while we
+ * do copy ESR:ST to _PAGE_RW position as trying to write
+ * to an RO page is pretty common, we don't do it with
+ * _PAGE_DIRTY. We could do it, but it's a fairly rare
+ * event so I'd rather take the overhead when it happens
+ * rather than adding an instruction here. We should measure
+ * whether the whole thing is worth it in the first place
+ * as we could avoid loading SPRN_ESR completely in the first
+ * place...
+ *
+ * TODO: Is it worth doing that mfspr & rlwimi in the first
+ * place or can we save a couple of instructions here ?
+ */
+ mfspr r12,SPRN_ESR
+ li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+ rlwimi r13,r12,10,30,30
+
+ /* Load the PTE */
+ /* Compute pgdir/pmd offset */
+ rlwinm r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29
+ lwzx r11,r12,r11 /* Get pgd/pmd entry */
+
+ /* Word 0 is EPN,V,TS,DSIZ */
+ li r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE
+ rlwimi r10,r12,0,32-PAGE_SHIFT,31 /* Insert valid and page size*/
+ li r12,0
+ tlbwe r10,r12,0
+
+ /* XXX can we do better ? Need to make sure tlbwe has established
+ * latch V bit in MMUCR0 before the PTE is loaded further down */
+#ifdef CONFIG_SMP
+ isync
+#endif
+
+ rlwinm. r12,r11,0,0,20 /* Extract pt base address */
+ /* Compute pte address */
+ rlwimi r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28
+ beq 2f /* Bail if no table */
+ lwz r11,0(r12) /* Get high word of pte entry */
+
+ /* XXX can we do better ? maybe insert a known 0 bit from r11 into the
+ * bottom of r12 to create a data dependency... We can also use r10
+ * as destination nowadays
+ */
+#ifdef CONFIG_SMP
+ lwsync
+#endif
+ lwz r12,4(r12) /* Get low word of pte entry */
+
+ andc. r13,r13,r12 /* Check permission */
+
+ /* Jump to common tlb load */
+ beq finish_tlb_load_47x
+
+2: /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+ mfspr r11,SPRN_SPRG_RSCRATCH4
+ mtcr r11
+ mfspr r13,SPRN_SPRG_RSCRATCH3
+ mfspr r12,SPRN_SPRG_RSCRATCH2
+ mfspr r11,SPRN_SPRG_RSCRATCH1
+ mfspr r10,SPRN_SPRG_RSCRATCH0
+ b DataStorage
+
+ /* Instruction TLB Error Interrupt */
+ /*
+ * Nearly the same as above, except we get our
+ * information from different registers and bailout
+ * to a different point.
+ */
+ START_EXCEPTION(InstructionTLBError47x)
+ mtspr SPRN_SPRG_WSCRATCH0,r10 /* Save some working registers */
+ mtspr SPRN_SPRG_WSCRATCH1,r11
+ mtspr SPRN_SPRG_WSCRATCH2,r12
+ mtspr SPRN_SPRG_WSCRATCH3,r13
+ mfcr r11
+ mtspr SPRN_SPRG_WSCRATCH4,r11
+ mfspr r10,SPRN_SRR0 /* Get faulting address */
+
+ /* If we are faulting a kernel address, we have to use the
+ * kernel page tables.
+ */
+ lis r11,PAGE_OFFSET@h
+ cmplw cr0,r10,r11
+ blt+ 3f
+ lis r11,swapper_pg_dir@h
+ ori r11,r11, swapper_pg_dir@l
+ li r12,0 /* MMUCR = 0 */
+ b 4f
+
+ /* Get the PGD for the current thread and setup MMUCR */
+3: mfspr r11,SPRN_SPRG_THREAD
+ lwz r11,PGDIR(r11)
+ mfspr r12,SPRN_PID /* Get PID */
+4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
+
+ /* Make up the required permissions */
+ li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+
+ /* Load PTE */
+ /* Compute pgdir/pmd offset */
+ rlwinm r12,r10,PPC44x_PGD_OFF_SHIFT,PPC44x_PGD_OFF_MASK_BIT,29
+ lwzx r11,r12,r11 /* Get pgd/pmd entry */
+
+ /* Word 0 is EPN,V,TS,DSIZ */
+ li r12,PPC47x_TLB0_VALID | PPC47x_TLBE_SIZE
+ rlwimi r10,r12,0,32-PAGE_SHIFT,31 /* Insert valid and page size*/
+ li r12,0
+ tlbwe r10,r12,0
+
+ /* XXX can we do better ? Need to make sure tlbwe has established
+ * latch V bit in MMUCR0 before the PTE is loaded further down */
+#ifdef CONFIG_SMP
+ isync
+#endif
+
+ rlwinm. r12,r11,0,0,20 /* Extract pt base address */
+ /* Compute pte address */
+ rlwimi r12,r10,PPC44x_PTE_ADD_SHIFT,PPC44x_PTE_ADD_MASK_BIT,28
+ beq 2f /* Bail if no table */
+
+ lwz r11,0(r12) /* Get high word of pte entry */
+ /* XXX can we do better ? maybe insert a known 0 bit from r11 into the
+ * bottom of r12 to create a data dependency... We can also use r10
+ * as destination nowadays
+ */
+#ifdef CONFIG_SMP
+ lwsync
+#endif
+ lwz r12,4(r12) /* Get low word of pte entry */
+
+ andc. r13,r13,r12 /* Check permission */
+
+ /* Jump to common TLB load point */
+ beq finish_tlb_load_47x
+
+2: /* The bailout. Restore registers to pre-exception conditions
+ * and call the heavyweights to help us out.
+ */
+ mfspr r11, SPRN_SPRG_RSCRATCH4
+ mtcr r11
+ mfspr r13, SPRN_SPRG_RSCRATCH3
+ mfspr r12, SPRN_SPRG_RSCRATCH2
+ mfspr r11, SPRN_SPRG_RSCRATCH1
+ mfspr r10, SPRN_SPRG_RSCRATCH0
+ b InstructionStorage
/*
- * extern void giveup_altivec(struct task_struct *prev)
- *
- * The 44x core does not have an AltiVec unit.
+ * Both the instruction and data TLB miss get to this
+ * point to load the TLB.
+ * r10 - free to use
+ * r11 - PTE high word value
+ * r12 - PTE low word value
+ * r13 - free to use
+ * MMUCR - loaded with proper value when we get here
+ * Upon exit, we reload everything and RFI.
*/
-_GLOBAL(giveup_altivec)
- blr
+finish_tlb_load_47x:
+ /* Combine RPN & ERPN an write WS 1 */
+ rlwimi r11,r12,0,0,31-PAGE_SHIFT
+ tlbwe r11,r13,1
+
+ /* And make up word 2 */
+ li r10,0xf85 /* Mask to apply from PTE */
+ rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */
+ and r11,r12,r10 /* Mask PTE bits to keep */
+ andi. r10,r12,_PAGE_USER /* User page ? */
+ beq 1f /* nope, leave U bits empty */
+ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
+1: tlbwe r11,r13,2
+
+ /* Done...restore registers and get out of here.
+ */
+ mfspr r11, SPRN_SPRG_RSCRATCH4
+ mtcr r11
+ mfspr r13, SPRN_SPRG_RSCRATCH3
+ mfspr r12, SPRN_SPRG_RSCRATCH2
+ mfspr r11, SPRN_SPRG_RSCRATCH1
+ mfspr r10, SPRN_SPRG_RSCRATCH0
+ rfi
+
+#endif /* CONFIG_PPC_47x */
+
+ /* Debug Interrupt */
+ /*
+ * This statement needs to exist at the end of the IVPR
+ * definition just in case you end up taking a debug
+ * exception within another exception.
+ */
+ DEBUG_CRIT_EXCEPTION
+
+interrupt_end:
/*
- * extern void giveup_fpu(struct task_struct *prev)
- *
- * The 44x core does not have an FPU.
+ * Global functions
*/
-#ifndef CONFIG_PPC_FPU
-_GLOBAL(giveup_fpu)
- blr
-#endif
/*
- * extern void abort(void)
- *
- * At present, this routine just applies a system reset.
+ * Adjust the machine check IVOR on 440A cores
*/
-_GLOBAL(abort)
- mfspr r13,SPRN_DBCR0
- oris r13,r13,DBCR0_RST_SYSTEM@h
- mtspr SPRN_DBCR0,r13
+_GLOBAL(__fixup_440A_mcheck)
+ li r3,MachineCheckA@l
+ mtspr SPRN_IVOR1,r3
+ sync
+ blr
_GLOBAL(set_context)
@@ -736,40 +799,466 @@ _GLOBAL(set_context)
blr
/*
+ * Init CPU state. This is called at boot time or for secondary CPUs
+ * to setup initial TLB entries, setup IVORs, etc...
+ *
+ */
+_GLOBAL(init_cpu_state)
+ mflr r22
+#ifdef CONFIG_PPC_47x
+ /* We use the PVR to differenciate 44x cores from 476 */
+ mfspr r3,SPRN_PVR
+ srwi r3,r3,16
+ cmplwi cr0,r3,PVR_476FPE@h
+ beq head_start_47x
+ cmplwi cr0,r3,PVR_476@h
+ beq head_start_47x
+ cmplwi cr0,r3,PVR_476_ISS@h
+ beq head_start_47x
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * In case the firmware didn't do it, we apply some workarounds
+ * that are good for all 440 core variants here
+ */
+ mfspr r3,SPRN_CCR0
+ rlwinm r3,r3,0,0,27 /* disable icache prefetch */
+ isync
+ mtspr SPRN_CCR0,r3
+ isync
+ sync
+
+/*
+ * Set up the initial MMU state for 44x
+ *
+ * We are still executing code at the virtual address
+ * mappings set by the firmware for the base of RAM.
+ *
+ * We first invalidate all TLB entries but the one
+ * we are running from. We then load the KERNELBASE
+ * mappings so we can begin to use kernel addresses
+ * natively and so the interrupt vector locations are
+ * permanently pinned (necessary since Book E
+ * implementations always have translation enabled).
+ *
+ * TODO: Use the known TLB entry we are running from to
+ * determine which physical region we are located
+ * in. This can be used to determine where in RAM
+ * (on a shared CPU system) or PCI memory space
+ * (on a DRAMless system) we are located.
+ * For now, we assume a perfect world which means
+ * we are located at the base of DRAM (physical 0).
+ */
+
+/*
+ * Search TLB for entry that we are currently using.
+ * Invalidate all entries but the one we are using.
+ */
+ /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
+ mfspr r3,SPRN_PID /* Get PID */
+ mfmsr r4 /* Get MSR */
+ andi. r4,r4,MSR_IS@l /* TS=1? */
+ beq wmmucr /* If not, leave STS=0 */
+ oris r3,r3,PPC44x_MMUCR_STS@h /* Set STS=1 */
+wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */
+ sync
+
+ bl invstr /* Find our address */
+invstr: mflr r5 /* Make it accessible */
+ tlbsx r23,0,r5 /* Find entry we are in */
+ li r4,0 /* Start at TLB entry 0 */
+ li r3,0 /* Set PAGEID inval value */
+1: cmpw r23,r4 /* Is this our entry? */
+ beq skpinv /* If so, skip the inval */
+ tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */
+skpinv: addi r4,r4,1 /* Increment */
+ cmpwi r4,64 /* Are we done? */
+ bne 1b /* If not, repeat */
+ isync /* If so, context change */
+
+/*
+ * Configure and load pinned entry into TLB slot 63.
+ */
+#ifdef CONFIG_NONSTATIC_KERNEL
+ /*
+ * In case of a NONSTATIC_KERNEL we reuse the TLB XLAT
+ * entries of the initial mapping set by the boot loader.
+ * The XLAT entry is stored in r25
+ */
+
+ /* Read the XLAT entry for our current mapping */
+ tlbre r25,r23,PPC44x_TLB_XLAT
+
+ lis r3,KERNELBASE@h
+ ori r3,r3,KERNELBASE@l
+
+ /* Use our current RPN entry */
+ mr r4,r25
+#else
+
+ lis r3,PAGE_OFFSET@h
+ ori r3,r3,PAGE_OFFSET@l
+
+ /* Kernel is at the base of RAM */
+ li r4, 0 /* Load the kernel physical address */
+#endif
+
+ /* Load the kernel PID = 0 */
+ li r0,0
+ mtspr SPRN_PID,r0
+ sync
+
+ /* Initialize MMUCR */
+ li r5,0
+ mtspr SPRN_MMUCR,r5
+ sync
+
+ /* pageid fields */
+ clrrwi r3,r3,10 /* Mask off the effective page number */
+ ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M
+
+ /* xlat fields */
+ clrrwi r4,r4,10 /* Mask off the real page number */
+ /* ERPN is 0 for first 4GB page */
+
+ /* attrib fields */
+ /* Added guarded bit to protect against speculative loads/stores */
+ li r5,0
+ ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+
+ li r0,63 /* TLB slot 63 */
+
+ tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */
+ tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */
+ tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */
+
+ /* Force context change */
+ mfmsr r0
+ mtspr SPRN_SRR1, r0
+ lis r0,3f@h
+ ori r0,r0,3f@l
+ mtspr SPRN_SRR0,r0
+ sync
+ rfi
+
+ /* If necessary, invalidate original entry we used */
+3: cmpwi r23,63
+ beq 4f
+ li r6,0
+ tlbwe r6,r23,PPC44x_TLB_PAGEID
+ isync
+
+4:
+#ifdef CONFIG_PPC_EARLY_DEBUG_44x
+ /* Add UART mapping for early debug. */
+
+ /* pageid fields */
+ lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h
+ ori r3,r3,PPC44x_TLB_VALID|PPC44x_TLB_TS|PPC44x_TLB_64K
+
+ /* xlat fields */
+ lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h
+ ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH
+
+ /* attrib fields */
+ li r5,(PPC44x_TLB_SW|PPC44x_TLB_SR|PPC44x_TLB_I|PPC44x_TLB_G)
+ li r0,62 /* TLB slot 0 */
+
+ tlbwe r3,r0,PPC44x_TLB_PAGEID
+ tlbwe r4,r0,PPC44x_TLB_XLAT
+ tlbwe r5,r0,PPC44x_TLB_ATTRIB
+
+ /* Force context change */
+ isync
+#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
+
+ /* Establish the interrupt vector offsets */
+ SET_IVOR(0, CriticalInput);
+ SET_IVOR(1, MachineCheck);
+ SET_IVOR(2, DataStorage);
+ SET_IVOR(3, InstructionStorage);
+ SET_IVOR(4, ExternalInput);
+ SET_IVOR(5, Alignment);
+ SET_IVOR(6, Program);
+ SET_IVOR(7, FloatingPointUnavailable);
+ SET_IVOR(8, SystemCall);
+ SET_IVOR(9, AuxillaryProcessorUnavailable);
+ SET_IVOR(10, Decrementer);
+ SET_IVOR(11, FixedIntervalTimer);
+ SET_IVOR(12, WatchdogTimer);
+ SET_IVOR(13, DataTLBError44x);
+ SET_IVOR(14, InstructionTLBError44x);
+ SET_IVOR(15, DebugCrit);
+
+ b head_start_common
+
+
+#ifdef CONFIG_PPC_47x
+
+#ifdef CONFIG_SMP
+
+/* Entry point for secondary 47x processors */
+_GLOBAL(start_secondary_47x)
+ mr r24,r3 /* CPU number */
+
+ bl init_cpu_state
+
+ /* Now we need to bolt the rest of kernel memory which
+ * is done in C code. We must be careful because our task
+ * struct or our stack can (and will probably) be out
+ * of reach of the initial 256M TLB entry, so we use a
+ * small temporary stack in .bss for that. This works
+ * because only one CPU at a time can be in this code
+ */
+ lis r1,temp_boot_stack@h
+ ori r1,r1,temp_boot_stack@l
+ addi r1,r1,1024-STACK_FRAME_OVERHEAD
+ li r0,0
+ stw r0,0(r1)
+ bl mmu_init_secondary
+
+ /* Now we can get our task struct and real stack pointer */
+
+ /* Get current_thread_info and current */
+ lis r1,secondary_ti@ha
+ lwz r1,secondary_ti@l(r1)
+ lwz r2,TI_TASK(r1)
+
+ /* Current stack pointer */
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ li r0,0
+ stw r0,0(r1)
+
+ /* Kernel stack for exception entry in SPRG3 */
+ addi r4,r2,THREAD /* init task's THREAD */
+ mtspr SPRN_SPRG3,r4
+
+ b start_secondary
+
+#endif /* CONFIG_SMP */
+
+/*
+ * Set up the initial MMU state for 44x
+ *
+ * We are still executing code at the virtual address
+ * mappings set by the firmware for the base of RAM.
+ */
+
+head_start_47x:
+ /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
+ mfspr r3,SPRN_PID /* Get PID */
+ mfmsr r4 /* Get MSR */
+ andi. r4,r4,MSR_IS@l /* TS=1? */
+ beq 1f /* If not, leave STS=0 */
+ oris r3,r3,PPC47x_MMUCR_STS@h /* Set STS=1 */
+1: mtspr SPRN_MMUCR,r3 /* Put MMUCR */
+ sync
+
+ /* Find the entry we are running from */
+ bl 1f
+1: mflr r23
+ tlbsx r23,0,r23
+ tlbre r24,r23,0
+ tlbre r25,r23,1
+ tlbre r26,r23,2
+
+/*
+ * Cleanup time
+ */
+
+ /* Initialize MMUCR */
+ li r5,0
+ mtspr SPRN_MMUCR,r5
+ sync
+
+clear_all_utlb_entries:
+
+ #; Set initial values.
+
+ addis r3,0,0x8000
+ addi r4,0,0
+ addi r5,0,0
+ b clear_utlb_entry
+
+ #; Align the loop to speed things up.
+
+ .align 6
+
+clear_utlb_entry:
+
+ tlbwe r4,r3,0
+ tlbwe r5,r3,1
+ tlbwe r5,r3,2
+ addis r3,r3,0x2000
+ cmpwi r3,0
+ bne clear_utlb_entry
+ addis r3,0,0x8000
+ addis r4,r4,0x100
+ cmpwi r4,0
+ bne clear_utlb_entry
+
+ #; Restore original entry.
+
+ oris r23,r23,0x8000 /* specify the way */
+ tlbwe r24,r23,0
+ tlbwe r25,r23,1
+ tlbwe r26,r23,2
+
+/*
+ * Configure and load pinned entry into TLB for the kernel core
+ */
+
+ lis r3,PAGE_OFFSET@h
+ ori r3,r3,PAGE_OFFSET@l
+
+ /* Load the kernel PID = 0 */
+ li r0,0
+ mtspr SPRN_PID,r0
+ sync
+
+ /* Word 0 */
+ clrrwi r3,r3,12 /* Mask off the effective page number */
+ ori r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_256M
+
+ /* Word 1 - use r25. RPN is the same as the original entry */
+
+ /* Word 2 */
+ li r5,0
+ ori r5,r5,PPC47x_TLB2_S_RWX
+#ifdef CONFIG_SMP
+ ori r5,r5,PPC47x_TLB2_M
+#endif
+
+ /* We write to way 0 and bolted 0 */
+ lis r0,0x8800
+ tlbwe r3,r0,0
+ tlbwe r25,r0,1
+ tlbwe r5,r0,2
+
+/*
+ * Configure SSPCR, ISPCR and USPCR for now to search everything, we can fix
+ * them up later
+ */
+ LOAD_REG_IMMEDIATE(r3, 0x9abcdef0)
+ mtspr SPRN_SSPCR,r3
+ mtspr SPRN_USPCR,r3
+ LOAD_REG_IMMEDIATE(r3, 0x12345670)
+ mtspr SPRN_ISPCR,r3
+
+ /* Force context change */
+ mfmsr r0
+ mtspr SPRN_SRR1, r0
+ lis r0,3f@h
+ ori r0,r0,3f@l
+ mtspr SPRN_SRR0,r0
+ sync
+ rfi
+
+ /* Invalidate original entry we used */
+3:
+ rlwinm r24,r24,0,21,19 /* clear the "valid" bit */
+ tlbwe r24,r23,0
+ addi r24,0,0
+ tlbwe r24,r23,1
+ tlbwe r24,r23,2
+ isync /* Clear out the shadow TLB entries */
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_44x
+ /* Add UART mapping for early debug. */
+
+ /* Word 0 */
+ lis r3,PPC44x_EARLY_DEBUG_VIRTADDR@h
+ ori r3,r3,PPC47x_TLB0_VALID | PPC47x_TLB0_TS | PPC47x_TLB0_1M
+
+ /* Word 1 */
+ lis r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW@h
+ ori r4,r4,CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH
+
+ /* Word 2 */
+ li r5,(PPC47x_TLB2_S_RW | PPC47x_TLB2_IMG)
+
+ /* Bolted in way 0, bolt slot 5, we -hope- we don't hit the same
+ * congruence class as the kernel, we need to make sure of it at
+ * some point
+ */
+ lis r0,0x8d00
+ tlbwe r3,r0,0
+ tlbwe r4,r0,1
+ tlbwe r5,r0,2
+
+ /* Force context change */
+ isync
+#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
+
+ /* Establish the interrupt vector offsets */
+ SET_IVOR(0, CriticalInput);
+ SET_IVOR(1, MachineCheckA);
+ SET_IVOR(2, DataStorage);
+ SET_IVOR(3, InstructionStorage);
+ SET_IVOR(4, ExternalInput);
+ SET_IVOR(5, Alignment);
+ SET_IVOR(6, Program);
+ SET_IVOR(7, FloatingPointUnavailable);
+ SET_IVOR(8, SystemCall);
+ SET_IVOR(9, AuxillaryProcessorUnavailable);
+ SET_IVOR(10, Decrementer);
+ SET_IVOR(11, FixedIntervalTimer);
+ SET_IVOR(12, WatchdogTimer);
+ SET_IVOR(13, DataTLBError47x);
+ SET_IVOR(14, InstructionTLBError47x);
+ SET_IVOR(15, DebugCrit);
+
+ /* We configure icbi to invalidate 128 bytes at a time since the
+ * current 32-bit kernel code isn't too happy with icache != dcache
+ * block size
+ */
+ mfspr r3,SPRN_CCR0
+ oris r3,r3,0x0020
+ mtspr SPRN_CCR0,r3
+ isync
+
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * Here we are back to code that is common between 44x and 47x
+ *
+ * We proceed to further kernel initialization and return to the
+ * main kernel entry
+ */
+head_start_common:
+ /* Establish the interrupt vector base */
+ lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */
+ mtspr SPRN_IVPR,r4
+
+ /*
+ * If the kernel was loaded at a non-zero 256 MB page, we need to
+ * mask off the most significant 4 bits to get the relative address
+ * from the start of physical memory
+ */
+ rlwinm r22,r22,0,4,31
+ addis r22,r22,PAGE_OFFSET@h
+ mtlr r22
+ isync
+ blr
+
+/*
* We put a few things here that have to be page-aligned. This stuff
* goes at the beginning of the data segment, which is page-aligned.
*/
.data
- .align 12
+ .align PAGE_SHIFT
.globl sdata
sdata:
.globl empty_zero_page
empty_zero_page:
- .space 4096
+ .space PAGE_SIZE
/*
* To support >32-bit physical addresses, we use an 8KB pgdir.
*/
.globl swapper_pg_dir
swapper_pg_dir:
- .space 8192
-
-/* Reserved 4k for the critical exception stack & 4k for the machine
- * check stack per CPU for kernel mode exceptions */
- .section .bss
- .align 12
-exception_stack_bottom:
- .space BOOKE_EXCEPTION_STACK_SIZE
- .globl exception_stack_top
-exception_stack_top:
-
-/*
- * This space gets a copy of optional info passed to us by the bootstrap
- * which is used to pass parameters into the kernel like root=/dev/sda1, etc.
- */
- .globl cmd_line
-cmd_line:
- .space 512
+ .space PGD_TABLE_SIZE
/*
* Room for two PTE pointers, usually the kernel and current user pointers
@@ -777,3 +1266,9 @@ cmd_line:
*/
abatron_pteptrs:
.space 8
+
+#ifdef CONFIG_SMP
+ .align 12
+temp_boot_stack:
+ .space 1024
+#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 3065b472b95..a95145d7f61 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -12,8 +12,9 @@
* Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and
* Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com
*
- * This file contains the low-level support and setup for the
- * PowerPC-64 platform, including trap and interrupt dispatch.
+ * This file contains the entry point for the 64-bit kernel along
+ * with some early initialization code common to all 64-bit powerpc
+ * variants.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -22,6 +23,7 @@
*/
#include <linux/threads.h>
+#include <linux/init.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -31,60 +33,54 @@
#include <asm/cputable.h>
#include <asm/setup.h>
#include <asm/hvcall.h>
-#include <asm/iseries/lpar_map.h>
#include <asm/thread_info.h>
+#include <asm/firmware.h>
+#include <asm/page_64.h>
+#include <asm/irqflags.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/ptrace.h>
+#include <asm/hw_irq.h>
-#ifdef CONFIG_PPC_ISERIES
-#define DO_SOFT_DISABLE
-#endif
-
-/*
- * We layout physical memory as follows:
- * 0x0000 - 0x00ff : Secondary processor spin code
- * 0x0100 - 0x2fff : pSeries Interrupt prologs
- * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs
- * 0x6000 - 0x6fff : Initial (CPU0) segment table
- * 0x7000 - 0x7fff : FWNMI data area
- * 0x8000 - : Early init and support code
- */
-
-/*
- * SPRG Usage
- *
- * Register Definition
- *
- * SPRG0 reserved for hypervisor
- * SPRG1 temp - used to save gpr
- * SPRG2 temp - used to save gpr
- * SPRG3 virt addr of paca
+/* The physical memory is laid out such that the secondary processor
+ * spin code sits at 0x0000...0x00ff. On server, the vectors follow
+ * using the layout described in exceptions-64s.S
*/
/*
* Entering into this code we make the following assumptions:
- * For pSeries:
+ *
+ * For pSeries or server processors:
* 1. The MMU is off & open firmware is running in real mode.
* 2. The kernel is entered at __start
+ * -or- For OPAL entry:
+ * 1. The MMU is off, processor in HV mode, primary CPU enters at 0
+ * with device-tree in gpr3. We also get OPAL base in r8 and
+ * entry in r9 for debugging purposes
+ * 2. Secondary processors enter at 0x60 with PIR in gpr3
*
- * For iSeries:
- * 1. The MMU is on (as it always is for iSeries)
- * 2. The kernel is entered at system_reset_iSeries
+ * For Book3E processors:
+ * 1. The MMU is on running in AS0 in a state defined in ePAPR
+ * 2. The kernel is entered at __start
*/
.text
.globl _stext
_stext:
-#ifdef CONFIG_PPC_MULTIPLATFORM
_GLOBAL(__start)
/* NOP this out unconditionally */
BEGIN_FTR_SECTION
- b .__start_initialization_multiplatform
+ FIXUP_ENDIAN
+ b __start_initialization_multiplatform
END_FTR_SECTION(0, 1)
-#endif /* CONFIG_PPC_MULTIPLATFORM */
/* Catch branch to 0 in real mode */
trap
- /* Secondary processors spin on this value until it goes to 1. */
+ /* Secondary processors spin on this value until it becomes non-zero.
+ * When non-zero, it contains the real address of the function the cpu
+ * should jump to.
+ */
+ .balign 8
.globl __secondary_hold_spinloop
__secondary_hold_spinloop:
.llong 0x0
@@ -95,46 +91,75 @@ __secondary_hold_spinloop:
__secondary_hold_acknowledge:
.llong 0x0
-#ifdef CONFIG_PPC_ISERIES
- /*
- * At offset 0x20, there is a pointer to iSeries LPAR data.
- * This is required by the hypervisor
+#ifdef CONFIG_RELOCATABLE
+ /* This flag is set to 1 by a loader if the kernel should run
+ * at the loaded address instead of the linked address. This
+ * is used by kexec-tools to keep the the kdump kernel in the
+ * crash_kernel region. The loader is responsible for
+ * observing the alignment requirement.
*/
- . = 0x20
- .llong hvReleaseData-KERNELBASE
-#endif /* CONFIG_PPC_ISERIES */
+ /* Do not move this variable as kexec-tools knows about it. */
+ . = 0x5c
+ .globl __run_at_load
+__run_at_load:
+ .long 0x72756e30 /* "run0" -- relocate to 0 by default */
+#endif
. = 0x60
/*
- * The following code is used on pSeries to hold secondary processors
- * in a spin loop after they have been freed from OpenFirmware, but
+ * The following code is used to hold secondary processors
+ * in a spin loop after they have entered the kernel, but
* before the bulk of the kernel has been relocated. This code
* is relocated to physical address 0x60 before prom_init is run.
* All of it must fit below the first exception vector at 0x100.
+ * Use .globl here not _GLOBAL because we want __secondary_hold
+ * to be the actual text address, not a descriptor.
*/
-_GLOBAL(__secondary_hold)
+ .globl __secondary_hold
+__secondary_hold:
+ FIXUP_ENDIAN
+#ifndef CONFIG_PPC_BOOK3E
mfmsr r24
ori r24,r24,MSR_RI
mtmsrd r24 /* RI on */
-
+#endif
/* Grab our physical cpu number */
mr r24,r3
+ /* stash r4 for book3e */
+ mr r25,r4
/* Tell the master cpu we're here */
/* Relocation is off & we are located at an address less */
/* than 0x100, so only need to grab low order offset. */
- std r24,__secondary_hold_acknowledge@l(0)
+ std r24,__secondary_hold_acknowledge-_stext(0)
sync
+ li r26,0
+#ifdef CONFIG_PPC_BOOK3E
+ tovirt(r26,r26)
+#endif
/* All secondary cpus wait here until told to start. */
-100: ld r4,__secondary_hold_spinloop@l(0)
- cmpdi 0,r4,1
- bne 100b
+100: ld r12,__secondary_hold_spinloop-_stext(r26)
+ cmpdi 0,r12,0
+ beq 100b
#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
- LOAD_REG_IMMEDIATE(r4, .generic_secondary_smp_init)
- mtctr r4
+#ifdef CONFIG_PPC_BOOK3E
+ tovirt(r12,r12)
+#endif
+ mtctr r12
mr r3,r24
+ /*
+ * it may be the case that other platforms have r4 right to
+ * begin with, this gives us some safety in case it is not
+ */
+#ifdef CONFIG_PPC_BOOK3E
+ mr r4,r25
+#else
+ li r4,0
+#endif
+ /* Make sure that patched code is visible */
+ isync
bctr
#else
BUG_OPCODE
@@ -147,1451 +172,145 @@ exception_marker:
.text
/*
- * The following macros define the code that appears as
- * the prologue to each of the exception handlers. They
- * are split into two parts to allow a single kernel binary
- * to be used for pSeries and iSeries.
- * LOL. One day... - paulus
- */
-
-/*
- * We make as much of the exception code common between native
- * exception handlers (including pSeries LPAR) and iSeries LPAR
- * implementations as possible.
- */
-
-/*
- * This is the start of the interrupt handlers for pSeries
- * This code runs with relocation off.
- */
-#define EX_R9 0
-#define EX_R10 8
-#define EX_R11 16
-#define EX_R12 24
-#define EX_R13 32
-#define EX_SRR0 40
-#define EX_DAR 48
-#define EX_DSISR 56
-#define EX_CCR 60
-#define EX_R3 64
-#define EX_LR 72
-
-/*
- * We're short on space and time in the exception prolog, so we can't
- * use the normal SET_REG_IMMEDIATE macro. Normally we just need the
- * low halfword of the address, but for Kdump we need the whole low
- * word.
- */
-#ifdef CONFIG_CRASH_DUMP
-#define LOAD_HANDLER(reg, label) \
- oris reg,reg,(label)@h; /* virt addr of handler ... */ \
- ori reg,reg,(label)@l; /* .. and the rest */
-#else
-#define LOAD_HANDLER(reg, label) \
- ori reg,reg,(label)@l; /* virt addr of handler ... */
-#endif
-
-/*
- * Equal to EXCEPTION_PROLOG_PSERIES, except that it forces 64bit mode.
- * The firmware calls the registered system_reset_fwnmi and
- * machine_check_fwnmi handlers in 32bit mode if the cpu happens to run
- * a 32bit application at the time of the event.
- * This firmware bug is present on POWER4 and JS20.
- */
-#define EXCEPTION_PROLOG_PSERIES_FORCE_64BIT(area, label) \
- mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \
- std r9,area+EX_R9(r13); /* save r9 - r12 */ \
- std r10,area+EX_R10(r13); \
- std r11,area+EX_R11(r13); \
- std r12,area+EX_R12(r13); \
- mfspr r9,SPRN_SPRG1; \
- std r9,area+EX_R13(r13); \
- mfcr r9; \
- clrrdi r12,r13,32; /* get high part of &label */ \
- mfmsr r10; \
- /* force 64bit mode */ \
- li r11,5; /* MSR_SF_LG|MSR_ISF_LG */ \
- rldimi r10,r11,61,0; /* insert into top 3 bits */ \
- /* done 64bit mode */ \
- mfspr r11,SPRN_SRR0; /* save SRR0 */ \
- LOAD_HANDLER(r12,label) \
- ori r10,r10,MSR_IR|MSR_DR|MSR_RI; \
- mtspr SPRN_SRR0,r12; \
- mfspr r12,SPRN_SRR1; /* and SRR1 */ \
- mtspr SPRN_SRR1,r10; \
- rfid; \
- b . /* prevent speculative execution */
-
-#define EXCEPTION_PROLOG_PSERIES(area, label) \
- mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \
- std r9,area+EX_R9(r13); /* save r9 - r12 */ \
- std r10,area+EX_R10(r13); \
- std r11,area+EX_R11(r13); \
- std r12,area+EX_R12(r13); \
- mfspr r9,SPRN_SPRG1; \
- std r9,area+EX_R13(r13); \
- mfcr r9; \
- clrrdi r12,r13,32; /* get high part of &label */ \
- mfmsr r10; \
- mfspr r11,SPRN_SRR0; /* save SRR0 */ \
- LOAD_HANDLER(r12,label) \
- ori r10,r10,MSR_IR|MSR_DR|MSR_RI; \
- mtspr SPRN_SRR0,r12; \
- mfspr r12,SPRN_SRR1; /* and SRR1 */ \
- mtspr SPRN_SRR1,r10; \
- rfid; \
- b . /* prevent speculative execution */
-
-/*
- * This is the start of the interrupt handlers for iSeries
- * This code runs with relocation on.
- */
-#define EXCEPTION_PROLOG_ISERIES_1(area) \
- mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \
- std r9,area+EX_R9(r13); /* save r9 - r12 */ \
- std r10,area+EX_R10(r13); \
- std r11,area+EX_R11(r13); \
- std r12,area+EX_R12(r13); \
- mfspr r9,SPRN_SPRG1; \
- std r9,area+EX_R13(r13); \
- mfcr r9
-
-#define EXCEPTION_PROLOG_ISERIES_2 \
- mfmsr r10; \
- ld r12,PACALPPACAPTR(r13); \
- ld r11,LPPACASRR0(r12); \
- ld r12,LPPACASRR1(r12); \
- ori r10,r10,MSR_RI; \
- mtmsrd r10,1
-
-/*
- * The common exception prolog is used for all except a few exceptions
- * such as a segment miss on a kernel address. We have to be prepared
- * to take another exception from the point where we first touch the
- * kernel stack onwards.
- *
- * On entry r13 points to the paca, r9-r13 are saved in the paca,
- * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
- * SRR1, and relocation is on.
- */
-#define EXCEPTION_PROLOG_COMMON(n, area) \
- andi. r10,r12,MSR_PR; /* See if coming from user */ \
- mr r10,r1; /* Save r1 */ \
- subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \
- beq- 1f; \
- ld r1,PACAKSAVE(r13); /* kernel stack to use */ \
-1: cmpdi cr1,r1,0; /* check if r1 is in userspace */ \
- bge- cr1,bad_stack; /* abort if it is */ \
- std r9,_CCR(r1); /* save CR in stackframe */ \
- std r11,_NIP(r1); /* save SRR0 in stackframe */ \
- std r12,_MSR(r1); /* save SRR1 in stackframe */ \
- std r10,0(r1); /* make stack chain pointer */ \
- std r0,GPR0(r1); /* save r0 in stackframe */ \
- std r10,GPR1(r1); /* save r1 in stackframe */ \
- ACCOUNT_CPU_USER_ENTRY(r9, r10); \
- std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \
- ld r10,area+EX_R10(r13); \
- std r9,GPR9(r1); \
- std r10,GPR10(r1); \
- ld r9,area+EX_R11(r13); /* move r11 - r13 to stackframe */ \
- ld r10,area+EX_R12(r13); \
- ld r11,area+EX_R13(r13); \
- std r9,GPR11(r1); \
- std r10,GPR12(r1); \
- std r11,GPR13(r1); \
- ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
- mflr r9; /* save LR in stackframe */ \
- std r9,_LINK(r1); \
- mfctr r10; /* save CTR in stackframe */ \
- std r10,_CTR(r1); \
- mfspr r11,SPRN_XER; /* save XER in stackframe */ \
- std r11,_XER(r1); \
- li r9,(n)+1; \
- std r9,_TRAP(r1); /* set trap number */ \
- li r10,0; \
- ld r11,exception_marker@toc(r2); \
- std r10,RESULT(r1); /* clear regs->result */ \
- std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */
-
-/*
- * Exception vectors.
- */
-#define STD_EXCEPTION_PSERIES(n, label) \
- . = n; \
- .globl label##_pSeries; \
-label##_pSeries: \
- HMT_MEDIUM; \
- mtspr SPRN_SPRG1,r13; /* save r13 */ \
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
-
-#define HSTD_EXCEPTION_PSERIES(n, label) \
- . = n; \
- .globl label##_pSeries; \
-label##_pSeries: \
- HMT_MEDIUM; \
- mtspr SPRN_SPRG1,r20; /* save r20 */ \
- mfspr r20,SPRN_HSRR0; /* copy HSRR0 to SRR0 */ \
- mtspr SPRN_SRR0,r20; \
- mfspr r20,SPRN_HSRR1; /* copy HSRR0 to SRR0 */ \
- mtspr SPRN_SRR1,r20; \
- mfspr r20,SPRN_SPRG1; /* restore r20 */ \
- mtspr SPRN_SPRG1,r13; /* save r13 */ \
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
-
-
-#define STD_EXCEPTION_ISERIES(n, label, area) \
- .globl label##_iSeries; \
-label##_iSeries: \
- HMT_MEDIUM; \
- mtspr SPRN_SPRG1,r13; /* save r13 */ \
- EXCEPTION_PROLOG_ISERIES_1(area); \
- EXCEPTION_PROLOG_ISERIES_2; \
- b label##_common
-
-#define MASKABLE_EXCEPTION_ISERIES(n, label) \
- .globl label##_iSeries; \
-label##_iSeries: \
- HMT_MEDIUM; \
- mtspr SPRN_SPRG1,r13; /* save r13 */ \
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \
- lbz r10,PACAPROCENABLED(r13); \
- cmpwi 0,r10,0; \
- beq- label##_iSeries_masked; \
- EXCEPTION_PROLOG_ISERIES_2; \
- b label##_common; \
-
-#ifdef DO_SOFT_DISABLE
-#define DISABLE_INTS \
- lbz r10,PACAPROCENABLED(r13); \
- li r11,0; \
- std r10,SOFTE(r1); \
- mfmsr r10; \
- stb r11,PACAPROCENABLED(r13); \
- ori r10,r10,MSR_EE; \
- mtmsrd r10,1
-
-#define ENABLE_INTS \
- lbz r10,PACAPROCENABLED(r13); \
- mfmsr r11; \
- std r10,SOFTE(r1); \
- ori r11,r11,MSR_EE; \
- mtmsrd r11,1
-
-#else /* hard enable/disable interrupts */
-#define DISABLE_INTS
-
-#define ENABLE_INTS \
- ld r12,_MSR(r1); \
- mfmsr r11; \
- rlwimi r11,r12,0,MSR_EE; \
- mtmsrd r11,1
-
-#endif
-
-#define STD_EXCEPTION_COMMON(trap, label, hdlr) \
- .align 7; \
- .globl label##_common; \
-label##_common: \
- EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \
- DISABLE_INTS; \
- bl .save_nvgprs; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr; \
- b .ret_from_except
-
-/*
- * Like STD_EXCEPTION_COMMON, but for exceptions that can occur
- * in the idle task and therefore need the special idle handling.
- */
-#define STD_EXCEPTION_COMMON_IDLE(trap, label, hdlr) \
- .align 7; \
- .globl label##_common; \
-label##_common: \
- EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \
- FINISH_NAP; \
- DISABLE_INTS; \
- bl .save_nvgprs; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr; \
- b .ret_from_except
-
-#define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr) \
- .align 7; \
- .globl label##_common; \
-label##_common: \
- EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \
- FINISH_NAP; \
- DISABLE_INTS; \
- bl .ppc64_runlatch_on; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr; \
- b .ret_from_except_lite
-
-/*
- * When the idle code in power4_idle puts the CPU into NAP mode,
- * it has to do so in a loop, and relies on the external interrupt
- * and decrementer interrupt entry code to get it out of the loop.
- * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
- * to signal that it is in the loop and needs help to get out.
- */
-#ifdef CONFIG_PPC_970_NAP
-#define FINISH_NAP \
-BEGIN_FTR_SECTION \
- clrrdi r11,r1,THREAD_SHIFT; \
- ld r9,TI_LOCAL_FLAGS(r11); \
- andi. r10,r9,_TLF_NAPPING; \
- bnel power4_fixup_nap; \
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-#else
-#define FINISH_NAP
-#endif
-
-/*
- * Start of pSeries system interrupt routines
- */
- . = 0x100
- .globl __start_interrupts
-__start_interrupts:
-
- STD_EXCEPTION_PSERIES(0x100, system_reset)
-
- . = 0x200
-_machine_check_pSeries:
- HMT_MEDIUM
- mtspr SPRN_SPRG1,r13 /* save r13 */
- EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
-
- . = 0x300
- .globl data_access_pSeries
-data_access_pSeries:
- HMT_MEDIUM
- mtspr SPRN_SPRG1,r13
-BEGIN_FTR_SECTION
- mtspr SPRN_SPRG2,r12
- mfspr r13,SPRN_DAR
- mfspr r12,SPRN_DSISR
- srdi r13,r13,60
- rlwimi r13,r12,16,0x20
- mfcr r12
- cmpwi r13,0x2c
- beq .do_stab_bolted_pSeries
- mtcrf 0x80,r12
- mfspr r12,SPRN_SPRG2
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common)
-
- . = 0x380
- .globl data_access_slb_pSeries
-data_access_slb_pSeries:
- HMT_MEDIUM
- mtspr SPRN_SPRG1,r13
- mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r3,SPRN_DAR
- std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
- mfcr r9
-#ifdef __DISABLED__
- /* Keep that around for when we re-implement dynamic VSIDs */
- cmpdi r3,0
- bge slb_miss_user_pseries
-#endif /* __DISABLED__ */
- std r10,PACA_EXSLB+EX_R10(r13)
- std r11,PACA_EXSLB+EX_R11(r13)
- std r12,PACA_EXSLB+EX_R12(r13)
- mfspr r10,SPRN_SPRG1
- std r10,PACA_EXSLB+EX_R13(r13)
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- b .slb_miss_realmode /* Rel. branch works in real mode */
-
- STD_EXCEPTION_PSERIES(0x400, instruction_access)
-
- . = 0x480
- .globl instruction_access_slb_pSeries
-instruction_access_slb_pSeries:
- HMT_MEDIUM
- mtspr SPRN_SPRG1,r13
- mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */
- mfcr r9
-#ifdef __DISABLED__
- /* Keep that around for when we re-implement dynamic VSIDs */
- cmpdi r3,0
- bge slb_miss_user_pseries
-#endif /* __DISABLED__ */
- std r10,PACA_EXSLB+EX_R10(r13)
- std r11,PACA_EXSLB+EX_R11(r13)
- std r12,PACA_EXSLB+EX_R12(r13)
- mfspr r10,SPRN_SPRG1
- std r10,PACA_EXSLB+EX_R13(r13)
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- b .slb_miss_realmode /* Rel. branch works in real mode */
-
- STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
- STD_EXCEPTION_PSERIES(0x600, alignment)
- STD_EXCEPTION_PSERIES(0x700, program_check)
- STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
- STD_EXCEPTION_PSERIES(0x900, decrementer)
- STD_EXCEPTION_PSERIES(0xa00, trap_0a)
- STD_EXCEPTION_PSERIES(0xb00, trap_0b)
-
- . = 0xc00
- .globl system_call_pSeries
-system_call_pSeries:
- HMT_MEDIUM
- mr r9,r13
- mfmsr r10
- mfspr r13,SPRN_SPRG3
- mfspr r11,SPRN_SRR0
- clrrdi r12,r13,32
- oris r12,r12,system_call_common@h
- ori r12,r12,system_call_common@l
- mtspr SPRN_SRR0,r12
- ori r10,r10,MSR_IR|MSR_DR|MSR_RI
- mfspr r12,SPRN_SRR1
- mtspr SPRN_SRR1,r10
- rfid
- b . /* prevent speculative execution */
-
- STD_EXCEPTION_PSERIES(0xd00, single_step)
- STD_EXCEPTION_PSERIES(0xe00, trap_0e)
-
- /* We need to deal with the Altivec unavailable exception
- * here which is at 0xf20, thus in the middle of the
- * prolog code of the PerformanceMonitor one. A little
- * trickery is thus necessary
- */
- . = 0xf00
- b performance_monitor_pSeries
-
- STD_EXCEPTION_PSERIES(0xf20, altivec_unavailable)
-
-#ifdef CONFIG_CBE_RAS
- HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error)
-#endif /* CONFIG_CBE_RAS */
- STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
-#ifdef CONFIG_CBE_RAS
- HSTD_EXCEPTION_PSERIES(0x1600, cbe_maintenance)
-#endif /* CONFIG_CBE_RAS */
- STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
-#ifdef CONFIG_CBE_RAS
- HSTD_EXCEPTION_PSERIES(0x1800, cbe_thermal)
-#endif /* CONFIG_CBE_RAS */
-
- . = 0x3000
-
-/*** pSeries interrupt support ***/
-
- /* moved from 0xf00 */
- STD_EXCEPTION_PSERIES(., performance_monitor)
-
- .align 7
-_GLOBAL(do_stab_bolted_pSeries)
- mtcrf 0x80,r12
- mfspr r12,SPRN_SPRG2
- EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
-
-/*
- * We have some room here we use that to put
- * the peries slb miss user trampoline code so it's reasonably
- * away from slb_miss_user_common to avoid problems with rfid
- *
- * This is used for when the SLB miss handler has to go virtual,
- * which doesn't happen for now anymore but will once we re-implement
- * dynamic VSIDs for shared page tables
- */
-#ifdef __DISABLED__
-slb_miss_user_pseries:
- std r10,PACA_EXGEN+EX_R10(r13)
- std r11,PACA_EXGEN+EX_R11(r13)
- std r12,PACA_EXGEN+EX_R12(r13)
- mfspr r10,SPRG1
- ld r11,PACA_EXSLB+EX_R9(r13)
- ld r12,PACA_EXSLB+EX_R3(r13)
- std r10,PACA_EXGEN+EX_R13(r13)
- std r11,PACA_EXGEN+EX_R9(r13)
- std r12,PACA_EXGEN+EX_R3(r13)
- clrrdi r12,r13,32
- mfmsr r10
- mfspr r11,SRR0 /* save SRR0 */
- ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
- ori r10,r10,MSR_IR|MSR_DR|MSR_RI
- mtspr SRR0,r12
- mfspr r12,SRR1 /* and SRR1 */
- mtspr SRR1,r10
- rfid
- b . /* prevent spec. execution */
-#endif /* __DISABLED__ */
-
-/*
- * Vectors for the FWNMI option. Share common code.
- */
- .globl system_reset_fwnmi
- .align 7
-system_reset_fwnmi:
- HMT_MEDIUM
- mtspr SPRN_SPRG1,r13 /* save r13 */
- EXCEPTION_PROLOG_PSERIES_FORCE_64BIT(PACA_EXGEN, system_reset_common)
-
- .globl machine_check_fwnmi
- .align 7
-machine_check_fwnmi:
- HMT_MEDIUM
- mtspr SPRN_SPRG1,r13 /* save r13 */
- EXCEPTION_PROLOG_PSERIES_FORCE_64BIT(PACA_EXMC, machine_check_common)
-
-#ifdef CONFIG_PPC_ISERIES
-/*** ISeries-LPAR interrupt handlers ***/
-
- STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC)
-
- .globl data_access_iSeries
-data_access_iSeries:
- mtspr SPRN_SPRG1,r13
-BEGIN_FTR_SECTION
- mtspr SPRN_SPRG2,r12
- mfspr r13,SPRN_DAR
- mfspr r12,SPRN_DSISR
- srdi r13,r13,60
- rlwimi r13,r12,16,0x20
- mfcr r12
- cmpwi r13,0x2c
- beq .do_stab_bolted_iSeries
- mtcrf 0x80,r12
- mfspr r12,SPRN_SPRG2
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN)
- EXCEPTION_PROLOG_ISERIES_2
- b data_access_common
-
-.do_stab_bolted_iSeries:
- mtcrf 0x80,r12
- mfspr r12,SPRN_SPRG2
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
- EXCEPTION_PROLOG_ISERIES_2
- b .do_stab_bolted
-
- .globl data_access_slb_iSeries
-data_access_slb_iSeries:
- mtspr SPRN_SPRG1,r13 /* save r13 */
- mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r3,SPRN_DAR
- std r9,PACA_EXSLB+EX_R9(r13)
- mfcr r9
-#ifdef __DISABLED__
- cmpdi r3,0
- bge slb_miss_user_iseries
-#endif
- std r10,PACA_EXSLB+EX_R10(r13)
- std r11,PACA_EXSLB+EX_R11(r13)
- std r12,PACA_EXSLB+EX_R12(r13)
- mfspr r10,SPRN_SPRG1
- std r10,PACA_EXSLB+EX_R13(r13)
- ld r12,PACALPPACAPTR(r13)
- ld r12,LPPACASRR1(r12)
- b .slb_miss_realmode
-
- STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
-
- .globl instruction_access_slb_iSeries
-instruction_access_slb_iSeries:
- mtspr SPRN_SPRG1,r13 /* save r13 */
- mfspr r13,SPRN_SPRG3 /* get paca address into r13 */
- std r3,PACA_EXSLB+EX_R3(r13)
- ld r3,PACALPPACAPTR(r13)
- ld r3,LPPACASRR0(r3) /* get SRR0 value */
- std r9,PACA_EXSLB+EX_R9(r13)
- mfcr r9
-#ifdef __DISABLED__
- cmpdi r3,0
- bge .slb_miss_user_iseries
-#endif
- std r10,PACA_EXSLB+EX_R10(r13)
- std r11,PACA_EXSLB+EX_R11(r13)
- std r12,PACA_EXSLB+EX_R12(r13)
- mfspr r10,SPRN_SPRG1
- std r10,PACA_EXSLB+EX_R13(r13)
- ld r12,PACALPPACAPTR(r13)
- ld r12,LPPACASRR1(r12)
- b .slb_miss_realmode
-
-#ifdef __DISABLED__
-slb_miss_user_iseries:
- std r10,PACA_EXGEN+EX_R10(r13)
- std r11,PACA_EXGEN+EX_R11(r13)
- std r12,PACA_EXGEN+EX_R12(r13)
- mfspr r10,SPRG1
- ld r11,PACA_EXSLB+EX_R9(r13)
- ld r12,PACA_EXSLB+EX_R3(r13)
- std r10,PACA_EXGEN+EX_R13(r13)
- std r11,PACA_EXGEN+EX_R9(r13)
- std r12,PACA_EXGEN+EX_R3(r13)
- EXCEPTION_PROLOG_ISERIES_2
- b slb_miss_user_common
-#endif
-
- MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
- STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
- STD_EXCEPTION_ISERIES(0x700, program_check, PACA_EXGEN)
- STD_EXCEPTION_ISERIES(0x800, fp_unavailable, PACA_EXGEN)
- MASKABLE_EXCEPTION_ISERIES(0x900, decrementer)
- STD_EXCEPTION_ISERIES(0xa00, trap_0a, PACA_EXGEN)
- STD_EXCEPTION_ISERIES(0xb00, trap_0b, PACA_EXGEN)
-
- .globl system_call_iSeries
-system_call_iSeries:
- mr r9,r13
- mfspr r13,SPRN_SPRG3
- EXCEPTION_PROLOG_ISERIES_2
- b system_call_common
-
- STD_EXCEPTION_ISERIES( 0xd00, single_step, PACA_EXGEN)
- STD_EXCEPTION_ISERIES( 0xe00, trap_0e, PACA_EXGEN)
- STD_EXCEPTION_ISERIES( 0xf00, performance_monitor, PACA_EXGEN)
-
- .globl system_reset_iSeries
-system_reset_iSeries:
- mfspr r13,SPRN_SPRG3 /* Get paca address */
- mfmsr r24
- ori r24,r24,MSR_RI
- mtmsrd r24 /* RI on */
- lhz r24,PACAPACAINDEX(r13) /* Get processor # */
- cmpwi 0,r24,0 /* Are we processor 0? */
- beq .__start_initialization_iSeries /* Start up the first processor */
- mfspr r4,SPRN_CTRLF
- li r5,CTRL_RUNLATCH /* Turn off the run light */
- andc r4,r4,r5
- mtspr SPRN_CTRLT,r4
-
-1:
- HMT_LOW
-#ifdef CONFIG_SMP
- lbz r23,PACAPROCSTART(r13) /* Test if this processor
- * should start */
- sync
- LOAD_REG_IMMEDIATE(r3,current_set)
- sldi r28,r24,3 /* get current_set[cpu#] */
- ldx r3,r3,r28
- addi r1,r3,THREAD_SIZE
- subi r1,r1,STACK_FRAME_OVERHEAD
-
- cmpwi 0,r23,0
- beq iSeries_secondary_smp_loop /* Loop until told to go */
- bne .__secondary_start /* Loop until told to go */
-iSeries_secondary_smp_loop:
- /* Let the Hypervisor know we are alive */
- /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
- lis r3,0x8002
- rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */
-#else /* CONFIG_SMP */
- /* Yield the processor. This is required for non-SMP kernels
- which are running on multi-threaded machines. */
- lis r3,0x8000
- rldicr r3,r3,32,15 /* r3 = (r3 << 32) & 0xffff000000000000 */
- addi r3,r3,18 /* r3 = 0x8000000000000012 which is "yield" */
- li r4,0 /* "yield timed" */
- li r5,-1 /* "yield forever" */
-#endif /* CONFIG_SMP */
- li r0,-1 /* r0=-1 indicates a Hypervisor call */
- sc /* Invoke the hypervisor via a system call */
- mfspr r13,SPRN_SPRG3 /* Put r13 back ???? */
- b 1b /* If SMP not configured, secondaries
- * loop forever */
-
- .globl decrementer_iSeries_masked
-decrementer_iSeries_masked:
- /* We may not have a valid TOC pointer in here. */
- li r11,1
- ld r12,PACALPPACAPTR(r13)
- stb r11,LPPACADECRINT(r12)
- LOAD_REG_IMMEDIATE(r12, tb_ticks_per_jiffy)
- lwz r12,0(r12)
- mtspr SPRN_DEC,r12
- /* fall through */
-
- .globl hardware_interrupt_iSeries_masked
-hardware_interrupt_iSeries_masked:
- mtcrf 0x80,r9 /* Restore regs */
- ld r12,PACALPPACAPTR(r13)
- ld r11,LPPACASRR0(r12)
- ld r12,LPPACASRR1(r12)
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
- ld r9,PACA_EXGEN+EX_R9(r13)
- ld r10,PACA_EXGEN+EX_R10(r13)
- ld r11,PACA_EXGEN+EX_R11(r13)
- ld r12,PACA_EXGEN+EX_R12(r13)
- ld r13,PACA_EXGEN+EX_R13(r13)
- rfid
- b . /* prevent speculative execution */
-#endif /* CONFIG_PPC_ISERIES */
-
-/*** Common interrupt handlers ***/
-
- STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
-
- /*
- * Machine check is different because we use a different
- * save area: PACA_EXMC instead of PACA_EXGEN.
- */
- .align 7
- .globl machine_check_common
-machine_check_common:
- EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
- FINISH_NAP
- DISABLE_INTS
- bl .save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .machine_check_exception
- b .ret_from_except
-
- STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt)
- STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
- STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
- STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
- STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
- STD_EXCEPTION_COMMON_IDLE(0xf00, performance_monitor, .performance_monitor_exception)
- STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
-#ifdef CONFIG_ALTIVEC
- STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
-#else
- STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception)
-#endif
-#ifdef CONFIG_CBE_RAS
- STD_EXCEPTION_COMMON(0x1200, cbe_system_error, .cbe_system_error_exception)
- STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, .cbe_maintenance_exception)
- STD_EXCEPTION_COMMON(0x1800, cbe_thermal, .cbe_thermal_exception)
-#endif /* CONFIG_CBE_RAS */
-
-/*
- * Here we have detected that the kernel stack pointer is bad.
- * R9 contains the saved CR, r13 points to the paca,
- * r10 contains the (bad) kernel stack pointer,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * We switch to using an emergency stack, save the registers there,
- * and call kernel_bad_stack(), which panics.
+ * On server, we include the exception vectors code here as it
+ * relies on absolute addressing which is only possible within
+ * this compilation unit
*/
-bad_stack:
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,64+INT_FRAME_SIZE
- std r9,_CCR(r1)
- std r10,GPR1(r1)
- std r11,_NIP(r1)
- std r12,_MSR(r1)
- mfspr r11,SPRN_DAR
- mfspr r12,SPRN_DSISR
- std r11,_DAR(r1)
- std r12,_DSISR(r1)
- mflr r10
- mfctr r11
- mfxer r12
- std r10,_LINK(r1)
- std r11,_CTR(r1)
- std r12,_XER(r1)
- SAVE_GPR(0,r1)
- SAVE_GPR(2,r1)
- SAVE_4GPRS(3,r1)
- SAVE_2GPRS(7,r1)
- SAVE_10GPRS(12,r1)
- SAVE_10GPRS(22,r1)
- addi r11,r1,INT_FRAME_SIZE
- std r11,0(r1)
- li r12,0
- std r12,0(r11)
- ld r2,PACATOC(r13)
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .kernel_bad_stack
- b 1b
-
-/*
- * Return from an exception with minimal checks.
- * The caller is assumed to have done EXCEPTION_PROLOG_COMMON.
- * If interrupts have been enabled, or anything has been
- * done that might have changed the scheduling status of
- * any task or sent any task a signal, you should use
- * ret_from_except or ret_from_except_lite instead of this.
- */
- .globl fast_exception_return
-fast_exception_return:
- ld r12,_MSR(r1)
- ld r11,_NIP(r1)
- andi. r3,r12,MSR_RI /* check if RI is set */
- beq- unrecov_fer
-
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
- andi. r3,r12,MSR_PR
- beq 2f
- ACCOUNT_CPU_USER_EXIT(r3, r4)
-2:
+#ifdef CONFIG_PPC_BOOK3S
+#include "exceptions-64s.S"
#endif
- ld r3,_CCR(r1)
- ld r4,_LINK(r1)
- ld r5,_CTR(r1)
- ld r6,_XER(r1)
- mtcr r3
- mtlr r4
- mtctr r5
- mtxer r6
- REST_GPR(0, r1)
- REST_8GPRS(2, r1)
-
- mfmsr r10
- clrrdi r10,r10,2 /* clear RI (LE is 0 already) */
- mtmsrd r10,1
-
- mtspr SPRN_SRR1,r12
- mtspr SPRN_SRR0,r11
- REST_4GPRS(10, r1)
- ld r1,GPR1(r1)
- rfid
- b . /* prevent speculative execution */
-
-unrecov_fer:
- bl .save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unrecoverable_exception
- b 1b
-
-/*
- * Here r13 points to the paca, r9 contains the saved CR,
- * SRR0 and SRR1 are saved in r11 and r12,
- * r9 - r13 are saved in paca->exgen.
- */
- .align 7
- .globl data_access_common
-data_access_common:
- mfspr r10,SPRN_DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- li r5,0x300
- b .do_hash_page /* Try to handle as hpte fault */
-
- .align 7
- .globl instruction_access_common
-instruction_access_common:
- EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
- ld r3,_NIP(r1)
- andis. r4,r12,0x5820
- li r5,0x400
- b .do_hash_page /* Try to handle as hpte fault */
-
-/*
- * Here is the common SLB miss user that is used when going to virtual
- * mode for SLB misses, that is currently not used
- */
-#ifdef __DISABLED__
- .align 7
- .globl slb_miss_user_common
-slb_miss_user_common:
- mflr r10
- std r3,PACA_EXGEN+EX_DAR(r13)
- stw r9,PACA_EXGEN+EX_CCR(r13)
- std r10,PACA_EXGEN+EX_LR(r13)
- std r11,PACA_EXGEN+EX_SRR0(r13)
- bl .slb_allocate_user
-
- ld r10,PACA_EXGEN+EX_LR(r13)
- ld r3,PACA_EXGEN+EX_R3(r13)
- lwz r9,PACA_EXGEN+EX_CCR(r13)
- ld r11,PACA_EXGEN+EX_SRR0(r13)
- mtlr r10
- beq- slb_miss_fault
-
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
- beq- unrecov_user_slb
- mfmsr r10
-
-.machine push
-.machine "power4"
- mtcrf 0x80,r9
-.machine pop
-
- clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
- mtmsrd r10,1
-
- mtspr SRR0,r11
- mtspr SRR1,r12
-
- ld r9,PACA_EXGEN+EX_R9(r13)
- ld r10,PACA_EXGEN+EX_R10(r13)
- ld r11,PACA_EXGEN+EX_R11(r13)
- ld r12,PACA_EXGEN+EX_R12(r13)
- ld r13,PACA_EXGEN+EX_R13(r13)
- rfid
- b .
-
-slb_miss_fault:
- EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
- ld r4,PACA_EXGEN+EX_DAR(r13)
- li r5,0
- std r4,_DAR(r1)
- std r5,_DSISR(r1)
- b .handle_page_fault
-
-unrecov_user_slb:
- EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
- DISABLE_INTS
- bl .save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unrecoverable_exception
- b 1b
-
-#endif /* __DISABLED__ */
-
-
-/*
- * r13 points to the PACA, r9 contains the saved CR,
- * r12 contain the saved SRR1, SRR0 is still ready for return
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
- * We assume we aren't going to take any exceptions during this procedure.
- */
-_GLOBAL(slb_miss_realmode)
- mflr r10
-
- stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
- std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
-
- bl .slb_allocate_realmode
-
- /* All done -- return from exception. */
-
- ld r10,PACA_EXSLB+EX_LR(r13)
- ld r3,PACA_EXSLB+EX_R3(r13)
- lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
-#ifdef CONFIG_PPC_ISERIES
- ld r11,PACALPPACAPTR(r13)
- ld r11,LPPACASRR0(r11) /* get SRR0 value */
-#endif /* CONFIG_PPC_ISERIES */
-
- mtlr r10
-
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
- beq- unrecov_slb
-
-.machine push
-.machine "power4"
- mtcrf 0x80,r9
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
-
-#ifdef CONFIG_PPC_ISERIES
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
-#endif /* CONFIG_PPC_ISERIES */
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- rfid
- b . /* prevent speculative execution */
-
-unrecov_slb:
- EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
- DISABLE_INTS
- bl .save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl .unrecoverable_exception
- b 1b
-
- .align 7
- .globl hardware_interrupt_common
- .globl hardware_interrupt_entry
-hardware_interrupt_common:
- EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN)
- FINISH_NAP
-hardware_interrupt_entry:
- DISABLE_INTS
- bl .ppc64_runlatch_on
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_IRQ
- b .ret_from_except_lite
-
-#ifdef CONFIG_PPC_970_NAP
-power4_fixup_nap:
- andc r9,r9,r10
- std r9,TI_LOCAL_FLAGS(r11)
- ld r10,_LINK(r1) /* make idle task do the */
- std r10,_NIP(r1) /* equivalent of a blr */
- blr
-#endif
-
- .align 7
- .globl alignment_common
-alignment_common:
- mfspr r10,SPRN_DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
- bl .save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- ENABLE_INTS
- bl .alignment_exception
- b .ret_from_except
-
- .align 7
- .globl program_check_common
-program_check_common:
- EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
- bl .save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- ENABLE_INTS
- bl .program_check_exception
- b .ret_from_except
-
- .align 7
- .globl fp_unavailable_common
-fp_unavailable_common:
- EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
- bne .load_up_fpu /* if from user, just load it up */
- bl .save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- ENABLE_INTS
- bl .kernel_fp_unavailable_exception
- BUG_OPCODE
-
- .align 7
- .globl altivec_unavailable_common
-altivec_unavailable_common:
- EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
- bne .load_up_altivec /* if from user, just load it up */
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif
- bl .save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- ENABLE_INTS
- bl .altivec_unavailable_exception
- b .ret_from_except
-
-#ifdef CONFIG_ALTIVEC
-/*
- * load_up_altivec(unused, unused, tsk)
- * Disable VMX for the task which had it previously,
- * and save its vector registers in its thread_struct.
- * Enables the VMX for use in the kernel on return.
- * On SMP we know the VMX is free, since we give it up every
- * switch (ie, no lazy save of the vector registers).
- * On entry: r13 == 'current' && last_task_used_altivec != 'current'
- */
-_STATIC(load_up_altivec)
- mfmsr r5 /* grab the current MSR */
- oris r5,r5,MSR_VEC@h
- mtmsrd r5 /* enable use of VMX now */
- isync
-
-/*
- * For SMP, we don't do lazy VMX switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another. Instead we call giveup_altvec in switch_to.
- * VRSAVE isn't dealt with here, that is done in the normal context
- * switch code. Note that we could rely on vrsave value to eventually
- * avoid saving all of the VREGs here...
- */
-#ifndef CONFIG_SMP
- ld r3,last_task_used_altivec@got(r2)
- ld r4,0(r3)
- cmpdi 0,r4,0
- beq 1f
- /* Save VMX state to last_task_used_altivec's THREAD struct */
- addi r4,r4,THREAD
- SAVE_32VRS(0,r5,r4)
- mfvscr vr0
- li r10,THREAD_VSCR
- stvx vr0,r10,r4
- /* Disable VMX for last_task_used_altivec */
- ld r5,PT_REGS(r4)
- ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- lis r6,MSR_VEC@h
- andc r4,r4,r6
- std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
- /* Hack: if we get an altivec unavailable trap with VRSAVE
- * set to all zeros, we assume this is a broken application
- * that fails to set it properly, and thus we switch it to
- * all 1's
- */
- mfspr r4,SPRN_VRSAVE
- cmpdi 0,r4,0
- bne+ 1f
- li r4,-1
- mtspr SPRN_VRSAVE,r4
-1:
- /* enable use of VMX after return */
- ld r4,PACACURRENT(r13)
- addi r5,r4,THREAD /* Get THREAD */
- oris r12,r12,MSR_VEC@h
- std r12,_MSR(r1)
- li r4,1
- li r10,THREAD_VSCR
- stw r4,THREAD_USED_VR(r5)
- lvx vr0,r10,r5
- mtvscr vr0
- REST_32VRS(0,r4,r5)
-#ifndef CONFIG_SMP
- /* Update last_task_used_math to 'current' */
- subi r4,r5,THREAD /* Back to 'current' */
- std r4,0(r3)
-#endif /* CONFIG_SMP */
- /* restore registers and return */
- b fast_exception_return
-#endif /* CONFIG_ALTIVEC */
-
-/*
- * Hash table stuff
- */
- .align 7
-_GLOBAL(do_hash_page)
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
-
- andis. r0,r4,0xa450 /* weird error? */
- bne- .handle_page_fault /* if not, try to insert a HPTE */
-BEGIN_FTR_SECTION
- andis. r0,r4,0x0020 /* Is it a segment table fault? */
- bne- .do_ste_alloc /* If so handle it */
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
-
- /*
- * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
- * accessing a userspace segment (even from the kernel). We assume
- * kernel addresses always have the high bit set.
- */
- rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
- rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
- orc r0,r12,r0 /* MSR_PR | ~high_bit */
- rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
- ori r4,r4,1 /* add _PAGE_PRESENT */
- rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */
-
- /*
- * On iSeries, we soft-disable interrupts here, then
- * hard-enable interrupts so that the hash_page code can spin on
- * the hash_table_lock without problems on a shared processor.
- */
- DISABLE_INTS
-
- /*
- * r3 contains the faulting address
- * r4 contains the required access permissions
- * r5 contains the trap number
- *
- * at return r3 = 0 for success
- */
- bl .hash_page /* build HPTE if possible */
- cmpdi r3,0 /* see if hash_page succeeded */
+_GLOBAL(generic_secondary_thread_init)
+ mr r24,r3
-#ifdef DO_SOFT_DISABLE
- /*
- * If we had interrupts soft-enabled at the point where the
- * DSI/ISI occurred, and an interrupt came in during hash_page,
- * handle it now.
- * We jump to ret_from_except_lite rather than fast_exception_return
- * because ret_from_except_lite will check for and handle pending
- * interrupts if necessary.
- */
- beq .ret_from_except_lite
- /* For a hash failure, we don't bother re-enabling interrupts */
- ble- 12f
+ /* turn on 64-bit mode */
+ bl enable_64b_mode
- /*
- * hash_page couldn't handle it, set soft interrupt enable back
- * to what it was before the trap. Note that .local_irq_restore
- * handles any interrupts pending at this point.
- */
- ld r3,SOFTE(r1)
- bl .local_irq_restore
- b 11f
-#else
- beq fast_exception_return /* Return from exception on success */
- ble- 12f /* Failure return from hash_page */
+ /* get a valid TOC pointer, wherever we're mapped at */
+ bl relative_toc
+ tovirt(r2,r2)
- /* fall through */
+#ifdef CONFIG_PPC_BOOK3E
+ /* Book3E initialization */
+ mr r3,r24
+ bl book3e_secondary_thread_init
#endif
-
-/* Here we have a page fault that hash_page can't handle. */
-_GLOBAL(handle_page_fault)
- ENABLE_INTS
-11: ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_page_fault
- cmpdi r3,0
- beq+ .ret_from_except_lite
- bl .save_nvgprs
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- lwz r4,_DAR(r1)
- bl .bad_page_fault
- b .ret_from_except
-
-/* We have a page fault that hash_page could handle but HV refused
- * the PTE insertion
- */
-12: bl .save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- lwz r4,_DAR(r1)
- bl .low_hash_fault
- b .ret_from_except
-
- /* here we have a segment miss */
-_GLOBAL(do_ste_alloc)
- bl .ste_allocate /* try to insert stab entry */
- cmpdi r3,0
- beq+ fast_exception_return
- b .handle_page_fault
-
-/*
- * r13 points to the PACA, r9 contains the saved CR,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * r9 - r13 are saved in paca->exslb.
- * We assume we aren't going to take any exceptions during this procedure.
- * We assume (DAR >> 60) == 0xc.
- */
- .align 7
-_GLOBAL(do_stab_bolted)
- stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
- std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */
-
- /* Hash to the primary group */
- ld r10,PACASTABVIRT(r13)
- mfspr r11,SPRN_DAR
- srdi r11,r11,28
- rldimi r10,r11,7,52 /* r10 = first ste of the group */
-
- /* Calculate VSID */
- /* This is a kernel address, so protovsid = ESID */
- ASM_VSID_SCRAMBLE(r11, r9)
- rldic r9,r11,12,16 /* r9 = vsid << 12 */
-
- /* Search the primary group for a free entry */
-1: ld r11,0(r10) /* Test valid bit of the current ste */
- andi. r11,r11,0x80
- beq 2f
- addi r10,r10,16
- andi. r11,r10,0x70
- bne 1b
-
- /* Stick for only searching the primary group for now. */
- /* At least for now, we use a very simple random castout scheme */
- /* Use the TB as a random number ; OR in 1 to avoid entry 0 */
- mftb r11
- rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */
- ori r11,r11,0x10
-
- /* r10 currently points to an ste one past the group of interest */
- /* make it point to the randomly selected entry */
- subi r10,r10,128
- or r10,r10,r11 /* r10 is the entry to invalidate */
-
- isync /* mark the entry invalid */
- ld r11,0(r10)
- rldicl r11,r11,56,1 /* clear the valid bit */
- rotldi r11,r11,8
- std r11,0(r10)
- sync
-
- clrrdi r11,r11,28 /* Get the esid part of the ste */
- slbie r11
-
-2: std r9,8(r10) /* Store the vsid part of the ste */
- eieio
-
- mfspr r11,SPRN_DAR /* Get the new esid */
- clrrdi r11,r11,28 /* Permits a full 32b of ESID */
- ori r11,r11,0x90 /* Turn on valid and kp */
- std r11,0(r10) /* Put new entry back into the stab */
-
- sync
-
- /* All done -- return from exception. */
- lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
- ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */
-
- andi. r10,r12,MSR_RI
- beq- unrecov_slb
-
- mtcrf 0x80,r9 /* restore CR */
-
- mfmsr r10
- clrrdi r10,r10,2
- mtmsrd r10,1
-
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- rfid
- b . /* prevent speculative execution */
-
-/*
- * Space for CPU0's segment table.
- *
- * On iSeries, the hypervisor must fill in at least one entry before
- * we get control (with relocate on). The address is give to the hv
- * as a page number (see xLparMap in lpardata.c), so this must be at a
- * fixed address (the linker can't compute (u64)&initial_stab >>
- * PAGE_SHIFT).
- */
- . = STAB0_OFFSET /* 0x6000 */
- .globl initial_stab
-initial_stab:
- .space 4096
-
-/*
- * Data area reserved for FWNMI option.
- * This address (0x7000) is fixed by the RPA.
- */
- .= 0x7000
- .globl fwnmi_data_area
-fwnmi_data_area:
-
- /* iSeries does not use the FWNMI stuff, so it is safe to put
- * this here, even if we later allow kernels that will boot on
- * both pSeries and iSeries */
-#ifdef CONFIG_PPC_ISERIES
- . = LPARMAP_PHYS
-#include "lparmap.s"
-/*
- * This ".text" is here for old compilers that generate a trailing
- * .note section when compiling .c files to .s
- */
- .text
-#endif /* CONFIG_PPC_ISERIES */
-
- . = 0x8000
+ b generic_secondary_common_init
/*
* On pSeries and most other platforms, secondary processors spin
* in the following code.
* At entry, r3 = this processor's number (physical cpu id)
+ *
+ * On Book3E, r4 = 1 to indicate that the initial TLB entry for
+ * this core already exists (setup via some other mechanism such
+ * as SCOM before entry).
*/
_GLOBAL(generic_secondary_smp_init)
+ FIXUP_ENDIAN
mr r24,r3
-
+ mr r25,r4
+
/* turn on 64-bit mode */
- bl .enable_64b_mode
- isync
+ bl enable_64b_mode
+
+ /* get a valid TOC pointer, wherever we're mapped at */
+ bl relative_toc
+ tovirt(r2,r2)
+
+#ifdef CONFIG_PPC_BOOK3E
+ /* Book3E initialization */
+ mr r3,r24
+ mr r4,r25
+ bl book3e_secondary_core_init
+#endif
+generic_secondary_common_init:
/* Set up a paca value for this processor. Since we have the
* physical cpu id in r24, we need to search the pacas to find
* which logical id maps to our physical one.
*/
- LOAD_REG_IMMEDIATE(r13, paca) /* Get base vaddr of paca array */
+ LOAD_REG_ADDR(r13, paca) /* Load paca pointer */
+ ld r13,0(r13) /* Get base vaddr of paca array */
+#ifndef CONFIG_SMP
+ addi r13,r13,PACA_SIZE /* know r13 if used accidentally */
+ b kexec_wait /* wait for next kernel if !SMP */
+#else
+ LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */
+ lwz r7,0(r7) /* also the max paca allocated */
li r5,0 /* logical cpu id */
1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
cmpw r6,r24 /* Compare to our id */
beq 2f
addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */
addi r5,r5,1
- cmpwi r5,NR_CPUS
+ cmpw r5,r7 /* Check if more pacas exist */
blt 1b
mr r3,r24 /* not found, copy phys to r3 */
- b .kexec_wait /* next kernel might do better */
+ b kexec_wait /* next kernel might do better */
+
+2: SET_PACA(r13)
+#ifdef CONFIG_PPC_BOOK3E
+ addi r12,r13,PACA_EXTLB /* and TLB exc frame in another */
+ mtspr SPRN_SPRG_TLB_EXFRAME,r12
+#endif
-2: mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */
/* From now on, r24 is expected to be logical cpuid */
mr r24,r5
-3: HMT_LOW
- lbz r23,PACAPROCSTART(r13) /* Test if this processor should */
- /* start. */
- sync
-
-#ifndef CONFIG_SMP
- b 3b /* Never go on non-SMP */
-#else
- cmpwi 0,r23,0
- beq 3b /* Loop until told to go */
/* See if we need to call a cpu state restore handler */
- LOAD_REG_IMMEDIATE(r23, cur_cpu_spec)
+ LOAD_REG_ADDR(r23, cur_cpu_spec)
ld r23,0(r23)
- ld r23,CPU_SPEC_RESTORE(r23)
- cmpdi 0,r23,0
- beq 4f
- ld r23,0(r23)
- mtctr r23
- bctrl
-
-4: /* Create a temp kernel stack for use before relocation is on. */
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
-
- b .__secondary_start
+ ld r12,CPU_SPEC_RESTORE(r23)
+ cmpdi 0,r12,0
+ beq 3f
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+ ld r12,0(r12)
#endif
+ mtctr r12
+ bctrl
-#ifdef CONFIG_PPC_ISERIES
-_STATIC(__start_initialization_iSeries)
- /* Clear out the BSS */
- LOAD_REG_IMMEDIATE(r11,__bss_stop)
- LOAD_REG_IMMEDIATE(r8,__bss_start)
- sub r11,r11,r8 /* bss size */
- addi r11,r11,7 /* round up to an even double word */
- rldicl. r11,r11,61,3 /* shift right by 3 */
- beq 4f
- addi r8,r8,-8
- li r0,0
- mtctr r11 /* zero this many doublewords */
-3: stdu r0,8(r8)
- bdnz 3b
-4:
- LOAD_REG_IMMEDIATE(r1,init_thread_union)
- addi r1,r1,THREAD_SIZE
- li r0,0
- stdu r0,-STACK_FRAME_OVERHEAD(r1)
-
- LOAD_REG_IMMEDIATE(r3,cpu_specs)
- LOAD_REG_IMMEDIATE(r4,cur_cpu_spec)
- li r5,0
- bl .identify_cpu
-
- LOAD_REG_IMMEDIATE(r2,__toc_start)
- addi r2,r2,0x4000
- addi r2,r2,0x4000
+3: LOAD_REG_ADDR(r3, spinning_secondaries) /* Decrement spinning_secondaries */
+ lwarx r4,0,r3
+ subi r4,r4,1
+ stwcx. r4,0,r3
+ bne 3b
+ isync
- bl .iSeries_early_setup
- bl .early_setup
+4: HMT_LOW
+ lbz r23,PACAPROCSTART(r13) /* Test if this processor should */
+ /* start. */
+ cmpwi 0,r23,0
+ beq 4b /* Loop until told to go */
- /* relocation is on at this point */
+ sync /* order paca.run and cur_cpu_spec */
+ isync /* In case code patching happened */
- b .start_here_common
-#endif /* CONFIG_PPC_ISERIES */
+ /* Create a temp kernel stack for use before relocation is on. */
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_MULTIPLATFORM
+ b __secondary_start
+#endif /* SMP */
-_STATIC(__mmu_off)
+/*
+ * Turn the MMU off.
+ * Assumes we're mapped EA == RA if the MMU is on.
+ */
+#ifdef CONFIG_PPC_BOOK3S
+__mmu_off:
mfmsr r3
andi. r0,r3,MSR_IR|MSR_DR
beqlr
+ mflr r4
andc r3,r3,r0
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
sync
rfid
b . /* prevent speculative execution */
+#endif
/*
@@ -1605,22 +324,39 @@ _STATIC(__mmu_off)
* DT block, r4 is a physical pointer to the kernel itself
*
*/
-_GLOBAL(__start_initialization_multiplatform)
-#ifdef CONFIG_PPC_MULTIPLATFORM
+__start_initialization_multiplatform:
+ /* Make sure we are running in 64 bits mode */
+ bl enable_64b_mode
+
+ /* Get TOC pointer (current runtime address) */
+ bl relative_toc
+
+ /* find out where we are now */
+ bcl 20,31,$+4
+0: mflr r26 /* r26 = runtime addr here */
+ addis r26,r26,(_stext - 0b)@ha
+ addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
+
/*
* Are we booted from a PROM Of-type client-interface ?
*/
cmpldi cr0,r5,0
- bne .__boot_from_prom /* yes -> prom */
-#endif
-
+ beq 1f
+ b __boot_from_prom /* yes -> prom */
+1:
/* Save parameters */
mr r31,r3
mr r30,r4
+#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
+ /* Save OPAL entry */
+ mr r28,r8
+ mr r29,r9
+#endif
- /* Make sure we are running in 64 bits mode */
- bl .enable_64b_mode
-
+#ifdef CONFIG_PPC_BOOK3E
+ bl start_initialization_book3e
+ b __after_prom_start
+#else
/* Setup some critical 970 SPRs before switching MMU off */
mfspr r0,SPRN_PVR
srwi r0,r0,16
@@ -1629,18 +365,19 @@ _GLOBAL(__start_initialization_multiplatform)
cmpwi r0,0x3c /* 970FX */
beq 1f
cmpwi r0,0x44 /* 970MP */
+ beq 1f
+ cmpwi r0,0x45 /* 970GX */
bne 2f
-1: bl .__cpu_preinit_ppc970
+1: bl __cpu_preinit_ppc970
2:
- /* Switch off MMU if not already */
- LOAD_REG_IMMEDIATE(r4, .__after_prom_start - KERNELBASE)
- add r4,r4,r30
- bl .__mmu_off
- b .__after_prom_start
+ /* Switch off MMU if not already off */
+ bl __mmu_off
+ b __after_prom_start
+#endif /* CONFIG_PPC_BOOK3E */
-#ifdef CONFIG_PPC_MULTIPLATFORM
-_STATIC(__boot_from_prom)
+__boot_from_prom:
+#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
/* Save parameters */
mr r31,r3
mr r30,r4
@@ -1651,22 +388,15 @@ _STATIC(__boot_from_prom)
/*
* Align the stack to 16-byte boundary
* Depending on the size and layout of the ELF sections in the initial
- * boot binary, the stack pointer will be unalignet on PowerMac
+ * boot binary, the stack pointer may be unaligned on PowerMac
*/
rldicr r1,r1,0,59
- /* Make sure we are running in 64 bits mode */
- bl .enable_64b_mode
-
- /* put a relocation offset into r3 */
- bl .reloc_offset
-
- LOAD_REG_IMMEDIATE(r2,__toc_start)
- addi r2,r2,0x4000
- addi r2,r2,0x4000
-
- /* Relocate the TOC from a virt addr to a real addr */
- add r2,r2,r3
+#ifdef CONFIG_RELOCATABLE
+ /* Relocate code for where we are now */
+ mr r3,r26
+ bl relocate
+#endif
/* Restore parameters */
mr r3,r31
@@ -1676,66 +406,81 @@ _STATIC(__boot_from_prom)
mr r7,r27
/* Do all of the interaction with OF client interface */
- bl .prom_init
- /* We never return */
+ mr r8,r26
+ bl prom_init
+#endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */
+
+ /* We never return. We also hit that trap if trying to boot
+ * from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
trap
-#endif
-/*
- * At this point, r3 contains the physical address we are running at,
- * returned by prom_init()
- */
-_STATIC(__after_prom_start)
+__after_prom_start:
+#ifdef CONFIG_RELOCATABLE
+ /* process relocations for the final address of the kernel */
+ lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */
+ sldi r25,r25,32
+ lwz r7,__run_at_load-_stext(r26)
+ cmplwi cr0,r7,1 /* flagged to stay where we are ? */
+ bne 1f
+ add r25,r25,r26
+1: mr r3,r25
+ bl relocate
+#endif
/*
- * We need to run with __start at physical address PHYSICAL_START.
+ * We need to run with _stext at physical address PHYSICAL_START.
* This will leave some code in the first 256B of
* real memory, which are reserved for software use.
- * The remainder of the first page is loaded with the fixed
- * interrupt vectors. The next two pages are filled with
- * unknown exception placeholders.
*
* Note: This process overwrites the OF exception vectors.
- * r26 == relocation offset
- * r27 == KERNELBASE
*/
- bl .reloc_offset
- mr r26,r3
- LOAD_REG_IMMEDIATE(r27, KERNELBASE)
-
- LOAD_REG_IMMEDIATE(r3, PHYSICAL_START) /* target addr */
-
- // XXX FIXME: Use phys returned by OF (r30)
- add r4,r27,r26 /* source addr */
- /* current address of _start */
- /* i.e. where we are running */
- /* the source addr */
-
- cmpdi r4,0 /* In some cases the loader may */
- beq .start_here_multiplatform /* have already put us at zero */
- /* so we can skip the copy. */
- LOAD_REG_IMMEDIATE(r5,copy_to_here) /* # bytes of memory to copy */
- sub r5,r5,r27
-
+ li r3,0 /* target addr */
+#ifdef CONFIG_PPC_BOOK3E
+ tovirt(r3,r3) /* on booke, we already run at PAGE_OFFSET */
+#endif
+ mr. r4,r26 /* In some cases the loader may */
+ beq 9f /* have already put us at zero */
li r6,0x100 /* Start offset, the first 0x100 */
/* bytes were copied earlier. */
+#ifdef CONFIG_PPC_BOOK3E
+ tovirt(r6,r6) /* on booke, we already run at PAGE_OFFSET */
+#endif
+
+#ifdef CONFIG_RELOCATABLE
+/*
+ * Check if the kernel has to be running as relocatable kernel based on the
+ * variable __run_at_load, if it is set the kernel is treated as relocatable
+ * kernel, otherwise it will be moved to PHYSICAL_START
+ */
+ lwz r7,__run_at_load-_stext(r26)
+ cmplwi cr0,r7,1
+ bne 3f
- bl .copy_and_flush /* copy the first n bytes */
+ /* just copy interrupts */
+ LOAD_REG_IMMEDIATE(r5, __end_interrupts - _stext)
+ b 5f
+3:
+#endif
+ lis r5,(copy_to_here - _stext)@ha
+ addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
+
+ bl copy_and_flush /* copy the first n bytes */
/* this includes the code being */
/* executed here. */
-
- LOAD_REG_IMMEDIATE(r0, 4f) /* Jump to the copy of this code */
- mtctr r0 /* that we just made/relocated */
+ addis r8,r3,(4f - _stext)@ha /* Jump to the copy of this code */
+ addi r12,r8,(4f - _stext)@l /* that we just made */
+ mtctr r12
bctr
-4: LOAD_REG_IMMEDIATE(r5,klimit)
- add r5,r5,r26
- ld r5,0(r5) /* get the value of klimit */
- sub r5,r5,r27
- bl .copy_and_flush /* copy the rest */
- b .start_here_multiplatform
+.balign 8
+p_end: .llong _end - _stext
+
+4: /* Now copy the rest of the kernel up to _end */
+ addis r5,r26,(p_end - _stext)@ha
+ ld r5,(p_end - _stext)@l(r5) /* get _end */
+5: bl copy_and_flush /* copy the rest */
-#endif /* CONFIG_PPC_MULTIPLATFORM */
+9: b start_here_multiplatform
/*
* Copy routine used to copy the kernel to start at physical address 0
@@ -1770,6 +515,7 @@ _GLOBAL(copy_and_flush)
sync
addi r5,r5,8
addi r6,r6,8
+ isync
blr
.align 8
@@ -1798,11 +544,23 @@ __secondary_start_pmac_0:
_GLOBAL(pmac_secondary_start)
/* turn on 64-bit mode */
- bl .enable_64b_mode
+ bl enable_64b_mode
+
+ li r0,0
+ mfspr r3,SPRN_HID4
+ rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */
+ sync
+ mtspr SPRN_HID4,r3
isync
+ sync
+ slbia
+
+ /* get TOC pointer (real address) */
+ bl relative_toc
+ tovirt(r2,r2)
/* Copy some CPU settings from CPU 0 */
- bl .__restore_cpu_ppc970
+ bl __restore_cpu_ppc970
/* pSeries do that early though I don't think we really need it */
mfmsr r3
@@ -1810,16 +568,25 @@ _GLOBAL(pmac_secondary_start)
mtmsrd r3 /* RI on */
/* Set up a paca value for this processor. */
- LOAD_REG_IMMEDIATE(r4, paca) /* Get base vaddr of paca array */
- mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
+ LOAD_REG_ADDR(r4,paca) /* Load paca pointer */
+ ld r4,0(r4) /* Get base vaddr of paca array */
+ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
add r13,r13,r4 /* for this processor. */
- mtspr SPRN_SPRG3,r13 /* Save vaddr of paca in SPRG3 */
+ SET_PACA(r13) /* Save vaddr of paca in an SPRG*/
+
+ /* Mark interrupts soft and hard disabled (they might be enabled
+ * in the PACA when doing hotplug)
+ */
+ li r0,0
+ stb r0,PACASOFTIRQEN(r13)
+ li r0,PACA_IRQ_HARD_DIS
+ stb r0,PACAIRQHAPPENED(r13)
/* Create a temp kernel stack for use before relocation is on. */
ld r1,PACAEMERGSP(r13)
subi r1,r1,STACK_FRAME_OVERHEAD
- b .__secondary_start
+ b __secondary_start
#endif /* CONFIG_PPC_PMAC */
@@ -1831,88 +598,138 @@ _GLOBAL(pmac_secondary_start)
* 1. Processor number
* 2. Segment table pointer (virtual address)
* On entry the following are set:
- * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries
- * r24 = cpu# (in Linux terms)
- * r13 = paca virtual address
- * SPRG3 = paca virtual address
+ * r1 = stack pointer (real addr of temp stack)
+ * r24 = cpu# (in Linux terms)
+ * r13 = paca virtual address
+ * SPRG_PACA = paca virtual address
*/
-_GLOBAL(__secondary_start)
+ .section ".text";
+ .align 2 ;
+
+ .globl __secondary_start
+__secondary_start:
/* Set thread priority to MEDIUM */
HMT_MEDIUM
- /* Load TOC */
- ld r2,PACATOC(r13)
+ /* Initialize the kernel stack */
+ LOAD_REG_ADDR(r3, current_set)
+ sldi r28,r24,3 /* get current_set[cpu#] */
+ ldx r14,r3,r28
+ addi r14,r14,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ std r14,PACAKSAVE(r13)
/* Do early setup for that CPU (stab, slb, hash table pointer) */
- bl .early_setup_secondary
+ bl early_setup_secondary
- /* Initialize the kernel stack. Just a repeat for iSeries. */
- LOAD_REG_ADDR(r3, current_set)
- sldi r28,r24,3 /* get current_set[cpu#] */
- ldx r1,r3,r28
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
- std r1,PACAKSAVE(r13)
+ /*
+ * setup the new stack pointer, but *don't* use this until
+ * translation is on.
+ */
+ mr r1, r14
/* Clear backchain so we get nice backtraces */
li r7,0
mtlr r7
+ /* Mark interrupts soft and hard disabled (they might be enabled
+ * in the PACA when doing hotplug)
+ */
+ stb r7,PACASOFTIRQEN(r13)
+ li r0,PACA_IRQ_HARD_DIS
+ stb r0,PACAIRQHAPPENED(r13)
+
/* enable MMU and jump to start_secondary */
- LOAD_REG_ADDR(r3, .start_secondary_prolog)
+ LOAD_REG_ADDR(r3, start_secondary_prolog)
LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
-#ifdef DO_SOFT_DISABLE
- ori r4,r4,MSR_EE
-#endif
+
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- rfid
+ RFI
b . /* prevent speculative execution */
/*
* Running with relocation on at this point. All we want to do is
- * zero the stack back-chain pointer before going into C code.
+ * zero the stack back-chain pointer and get the TOC virtual address
+ * before going into C code.
+ */
+start_secondary_prolog:
+ ld r2,PACATOC(r13)
+ li r3,0
+ std r3,0(r1) /* Zero the stack frame pointer */
+ bl start_secondary
+ b .
+/*
+ * Reset stack pointer and call start_secondary
+ * to continue with online operation when woken up
+ * from cede in cpu offline.
*/
-_GLOBAL(start_secondary_prolog)
+_GLOBAL(start_secondary_resume)
+ ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
- bl .start_secondary
+ bl start_secondary
b .
#endif
/*
* This subroutine clobbers r11 and r12
*/
-_GLOBAL(enable_64b_mode)
+enable_64b_mode:
mfmsr r11 /* grab the current MSR */
- li r12,1
- rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
- or r11,r11,r12
- li r12,1
- rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
+#ifdef CONFIG_PPC_BOOK3E
+ oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */
+ mtmsr r11
+#else /* CONFIG_PPC_BOOK3E */
+ li r12,(MSR_64BIT | MSR_ISF)@highest
+ sldi r12,r12,48
or r11,r11,r12
mtmsrd r11
isync
+#endif
+ blr
+
+/*
+ * This puts the TOC pointer into r2, offset by 0x8000 (as expected
+ * by the toolchain). It computes the correct value for wherever we
+ * are running at the moment, using position-independent code.
+ *
+ * Note: The compiler constructs pointers using offsets from the
+ * TOC in -mcmodel=medium mode. After we relocate to 0 but before
+ * the MMU is on we need our TOC to be a virtual address otherwise
+ * these pointers will be real addresses which may get stored and
+ * accessed later with the MMU on. We use tovirt() at the call
+ * sites to handle this.
+ */
+_GLOBAL(relative_toc)
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r11
+ ld r2,(p_toc - 0b)(r11)
+ add r2,r2,r11
+ mtlr r0
blr
-#ifdef CONFIG_PPC_MULTIPLATFORM
+.balign 8
+p_toc: .llong __toc_start + 0x8000 - 0b
+
/*
* This is where the main kernel code starts.
*/
-_STATIC(start_here_multiplatform)
- /* get a new offset, now that the kernel has moved. */
- bl .reloc_offset
- mr r26,r3
+start_here_multiplatform:
+ /* set up the TOC */
+ bl relative_toc
+ tovirt(r2,r2)
/* Clear out the BSS. It may have been done in prom_init,
* already but that's irrelevant since prom_init will soon
* be detached from the kernel completely. Besides, we need
* to clear it now for kexec-style entry.
*/
- LOAD_REG_IMMEDIATE(r11,__bss_stop)
- LOAD_REG_IMMEDIATE(r8,__bss_start)
+ LOAD_REG_ADDR(r11,__bss_stop)
+ LOAD_REG_ADDR(r8,__bss_start)
sub r11,r11,r8 /* bss size */
addi r11,r11,7 /* round up to an even double word */
- rldicl. r11,r11,61,3 /* shift right by 3 */
+ srdi. r11,r11,3 /* shift right by 3 */
beq 4f
addi r8,r8,-8
li r0,0
@@ -1921,93 +738,75 @@ _STATIC(start_here_multiplatform)
bdnz 3b
4:
+#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
+ /* Setup OPAL entry */
+ LOAD_REG_ADDR(r11, opal)
+ std r28,0(r11);
+ std r29,8(r11);
+#endif
+
+#ifndef CONFIG_PPC_BOOK3E
mfmsr r6
ori r6,r6,MSR_RI
mtmsrd r6 /* RI on */
+#endif
- /* The following gets the stack and TOC set up with the regs */
+#ifdef CONFIG_RELOCATABLE
+ /* Save the physical address we're running at in kernstart_addr */
+ LOAD_REG_ADDR(r4, kernstart_addr)
+ clrldi r0,r25,2
+ std r0,0(r4)
+#endif
+
+ /* The following gets the stack set up with the regs */
/* pointing to the real addr of the kernel stack. This is */
/* all done to support the C function call below which sets */
/* up the htab. This is done because we have relocated the */
/* kernel but are still running in real mode. */
- LOAD_REG_IMMEDIATE(r3,init_thread_union)
- add r3,r3,r26
+ LOAD_REG_ADDR(r3,init_thread_union)
- /* set up a stack pointer (physical address) */
+ /* set up a stack pointer */
addi r1,r3,THREAD_SIZE
li r0,0
stdu r0,-STACK_FRAME_OVERHEAD(r1)
- /* set up the TOC (physical address) */
- LOAD_REG_IMMEDIATE(r2,__toc_start)
- addi r2,r2,0x4000
- addi r2,r2,0x4000
- add r2,r2,r26
-
- LOAD_REG_IMMEDIATE(r3, cpu_specs)
- add r3,r3,r26
- LOAD_REG_IMMEDIATE(r4,cur_cpu_spec)
- add r4,r4,r26
- mr r5,r26
- bl .identify_cpu
-
/* Do very early kernel initializations, including initial hash table,
* stab and slb setup before we turn on relocation. */
/* Restore parameters passed from prom_init/kexec */
mr r3,r31
- bl .early_setup
+ bl early_setup /* also sets r13 and SPRG_PACA */
- LOAD_REG_IMMEDIATE(r3, .start_here_common)
- LOAD_REG_IMMEDIATE(r4, MSR_KERNEL)
+ LOAD_REG_ADDR(r3, start_here_common)
+ ld r4,PACAKMSR(r13)
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- rfid
+ RFI
b . /* prevent speculative execution */
-#endif /* CONFIG_PPC_MULTIPLATFORM */
/* This is where all platforms converge execution */
-_STATIC(start_here_common)
- /* relocation is on at this point */
-
- /* The following code sets up the SP and TOC now that we are */
- /* running with translation enabled. */
-
- LOAD_REG_IMMEDIATE(r3,init_thread_union)
- /* set up the stack */
- addi r1,r3,THREAD_SIZE
- li r0,0
- stdu r0,-STACK_FRAME_OVERHEAD(r1)
-
- /* Apply the CPUs-specific fixups (nop out sections not relevant
- * to this CPU
- */
- li r3,0
- bl .do_cpu_ftr_fixups
-
- /* ptr to current */
- LOAD_REG_IMMEDIATE(r4, init_task)
- std r4,PACACURRENT(r13)
+start_here_common:
+ /* relocation is on at this point */
+ std r1,PACAKSAVE(r13)
- /* Load the TOC */
+ /* Load the TOC (virtual address) */
ld r2,PACATOC(r13)
- std r1,PACAKSAVE(r13)
- bl .setup_system
+ /* Do more system initializations in virtual mode */
+ bl setup_system
- /* Load up the kernel context */
-5:
-#ifdef DO_SOFT_DISABLE
- li r5,0
- stb r5,PACAPROCENABLED(r13) /* Soft Disabled */
- mfmsr r5
- ori r5,r5,MSR_EE /* Hard Enabled */
- mtmsrd r5
-#endif
+ /* Mark interrupts soft and hard disabled (they might be enabled
+ * in the PACA when doing hotplug)
+ */
+ li r0,0
+ stb r0,PACASOFTIRQEN(r13)
+ li r0,PACA_IRQ_HARD_DIS
+ stb r0,PACAIRQHAPPENED(r13)
- bl .start_kernel
+ /* Generic kernel entry */
+ bl start_kernel
/* Not reached */
BUG_OPCODE
@@ -2026,12 +825,4 @@ empty_zero_page:
.globl swapper_pg_dir
swapper_pg_dir:
- .space PAGE_SIZE
-
-/*
- * This space gets a copy of optional info passed to us by the bootstrap
- * Used to pass parameters into the kernel like root=/dev/sda1, etc.
- */
- .globl cmd_line
-cmd_line:
- .space COMMAND_LINE_SIZE
+ .space PGD_TABLE_SIZE
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 901be47a02a..7ee876d2adb 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -19,6 +19,7 @@
*
*/
+#include <linux/init.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -28,6 +29,7 @@
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
/* Macro to make the code more readable. */
#ifdef CONFIG_8xx_CPU6
@@ -38,12 +40,9 @@
#else
#define DO_8xx_CPU6(val, reg)
#endif
- .text
- .globl _stext
-_stext:
- .text
- .globl _start
-_start:
+ __HEAD
+_ENTRY(_stext);
+_ENTRY(_start);
/* MPC8xx
* This port was done on an MBX board with an 860. Right now I only
@@ -73,18 +72,11 @@ _start:
* in the first level table, but that would require many changes to the
* Linux page directory/table functions that I don't want to do right now.
*
- * I used to use SPRG2 for a temporary register in the TLB handler, but it
- * has since been put to other uses. I now use a hack to save a register
- * and the CCR at memory location 0.....Someday I'll fix this.....
* -- Dan
*/
.globl __start
__start:
- mr r31,r3 /* save parameters */
- mr r30,r4
- mr r29,r5
- mr r28,r6
- mr r27,r7
+ mr r31,r3 /* save device tree ptr */
/* We have to turn on the MMU right away so we get cache modes
* set correctly.
@@ -112,8 +104,8 @@ turn_on_mmu:
* task's thread_struct.
*/
#define EXCEPTION_PROLOG \
- mtspr SPRN_SPRG0,r10; \
- mtspr SPRN_SPRG1,r11; \
+ mtspr SPRN_SPRG_SCRATCH0,r10; \
+ mtspr SPRN_SPRG_SCRATCH1,r11; \
mfcr r10; \
EXCEPTION_PROLOG_1; \
EXCEPTION_PROLOG_2
@@ -123,7 +115,7 @@ turn_on_mmu:
andi. r11,r11,MSR_PR; \
tophys(r11,r1); /* use tophys(r1) if kernel */ \
beq 1f; \
- mfspr r11,SPRN_SPRG3; \
+ mfspr r11,SPRN_SPRG_THREAD; \
lwz r11,THREAD_INFO-THREAD(r11); \
addi r11,r11,THREAD_SIZE; \
tophys(r11,r11); \
@@ -135,9 +127,9 @@ turn_on_mmu:
stw r10,_CCR(r11); /* save registers */ \
stw r12,GPR12(r11); \
stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG0; \
+ mfspr r10,SPRN_SPRG_SCRATCH0; \
stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG1; \
+ mfspr r12,SPRN_SPRG_SCRATCH1; \
stw r12,GPR11(r11); \
mflr r10; \
stw r10,_LINK(r11); \
@@ -208,6 +200,8 @@ MachineCheck:
EXCEPTION_PROLOG
mfspr r4,SPRN_DAR
stw r4,_DAR(r11)
+ li r5,0x00f0
+ mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -224,7 +218,9 @@ DataAccess:
stw r10,_DSISR(r11)
mr r5,r10
mfspr r4,SPRN_DAR
- EXC_XFER_EE_LITE(0x300, handle_page_fault)
+ li r10,0x00f0
+ mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */
+ EXC_XFER_LITE(0x300, handle_page_fault)
/* Instruction access exception.
* This is "never generated" by the MPC8xx. We jump to it for other
@@ -235,7 +231,7 @@ InstructionAccess:
EXCEPTION_PROLOG
mr r4,r12
mr r5,r9
- EXC_XFER_EE_LITE(0x400, handle_page_fault)
+ EXC_XFER_LITE(0x400, handle_page_fault)
/* External interrupt */
EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
@@ -246,6 +242,8 @@ Alignment:
EXCEPTION_PROLOG
mfspr r4,SPRN_DAR
stw r4,_DAR(r11)
+ li r5,0x00f0
+ mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -298,9 +296,20 @@ InstructionTLBMiss:
DO_8xx_CPU6(0x3f80, r3)
mtspr SPRN_M_TW, r10 /* Save a couple of working registers */
mfcr r10
+#ifdef CONFIG_8xx_CPU6
stw r10, 0(r0)
stw r11, 4(r0)
+#else
+ mtspr SPRN_DAR, r10
+ mtspr SPRN_SPRG2, r11
+#endif
mfspr r10, SPRN_SRR0 /* Get effective address of fault */
+#ifdef CONFIG_8xx_CPU15
+ addi r11, r10, 0x1000
+ tlbie r11
+ addi r11, r10, -0x1000
+ tlbie r11
+#endif
DO_8xx_CPU6(0x3780, r3)
mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */
mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */
@@ -308,12 +317,16 @@ InstructionTLBMiss:
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
+#ifdef CONFIG_MODULES
+ /* Only modules will cause ITLB Misses as we always
+ * pin the first 8MB of kernel memory */
andi. r11, r10, 0x0800 /* Address >= 0x80000000 */
beq 3f
lis r11, swapper_pg_dir@h
ori r11, r11, swapper_pg_dir@l
rlwimi r10, r11, 0, 2, 19
3:
+#endif
lwz r11, 0(r10) /* Get the level 1 entry */
rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
beq 2f /* If zero, don't try to find a pte */
@@ -329,28 +342,59 @@ InstructionTLBMiss:
mfspr r11, SPRN_MD_TWC /* ....and get the pte address */
lwz r10, 0(r11) /* Get the pte */
- ori r10, r10, _PAGE_ACCESSED
- stw r10, 0(r11)
-
+#ifdef CONFIG_SWAP
+ andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT
+ cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT
+ bne- cr0, 2f
+#endif
/* The Linux PTE won't go exactly into the MMU TLB.
- * Software indicator bits 21, 22 and 28 must be clear.
+ * Software indicator bits 21 and 28 must be clear.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
-2: li r11, 0x00f0
- rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
+ li r11, 0x00f0
+ rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */
DO_8xx_CPU6(0x2d80, r3)
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
- mfspr r10, SPRN_M_TW /* Restore registers */
+ /* Restore registers */
+#ifndef CONFIG_8xx_CPU6
+ mfspr r10, SPRN_DAR
+ mtcr r10
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_SPRG2
+#else
lwz r11, 0(r0)
mtcr r11
lwz r11, 4(r0)
-#ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
#endif
+ mfspr r10, SPRN_M_TW
rfi
+2:
+ mfspr r11, SPRN_SRR1
+ /* clear all error bits as TLB Miss
+ * sets a few unconditionally
+ */
+ rlwinm r11, r11, 0, 0xffff
+ mtspr SPRN_SRR1, r11
+
+ /* Restore registers */
+#ifndef CONFIG_8xx_CPU6
+ mfspr r10, SPRN_DAR
+ mtcr r10
+ li r11, 0x00f0
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_SPRG2
+#else
+ lwz r11, 0(r0)
+ mtcr r11
+ lwz r11, 4(r0)
+ lwz r3, 8(r0)
+#endif
+ mfspr r10, SPRN_M_TW
+ b InstructionAccess
. = 0x1200
DataStoreTLBMiss:
@@ -360,8 +404,13 @@ DataStoreTLBMiss:
DO_8xx_CPU6(0x3f80, r3)
mtspr SPRN_M_TW, r10 /* Save a couple of working registers */
mfcr r10
+#ifdef CONFIG_8xx_CPU6
stw r10, 0(r0)
stw r11, 4(r0)
+#else
+ mtspr SPRN_DAR, r10
+ mtspr SPRN_SPRG2, r11
+#endif
mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */
/* If we are faulting a kernel address, we have to use the
@@ -392,15 +441,38 @@ DataStoreTLBMiss:
* above.
*/
rlwimi r11, r10, 0, 27, 27
+ /* Insert the WriteThru flag into the TWC from the Linux PTE.
+ * It is bit 25 in the Linux PTE and bit 30 in the TWC
+ */
+ rlwimi r11, r10, 32-5, 30, 30
DO_8xx_CPU6(0x3b80, r3)
mtspr SPRN_MD_TWC, r11
- mfspr r11, SPRN_MD_TWC /* get the pte address again */
- ori r10, r10, _PAGE_ACCESSED
- stw r10, 0(r11)
+ /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
+ * We also need to know if the insn is a load/store, so:
+ * Clear _PAGE_PRESENT and load that which will
+ * trap into DTLB Error with store bit set accordinly.
+ */
+ /* PRESENT=0x1, ACCESSED=0x20
+ * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
+ * r10 = (r10 & ~PRESENT) | r11;
+ */
+#ifdef CONFIG_SWAP
+ rlwinm r11, r10, 32-5, _PAGE_PRESENT
+ and r11, r11, r10
+ rlwimi r10, r11, 0, _PAGE_PRESENT
+#endif
+ /* Honour kernel RO, User NA */
+ /* 0x200 == Extended encoding, bit 22 */
+ rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */
+ /* r11 = (r10 & _PAGE_RW) >> 1 */
+ rlwinm r11, r10, 32-1, 0x200
+ or r10, r11, r10
+ /* invert RW and 0x200 bits */
+ xori r10, r10, _PAGE_RW | 0x200
/* The Linux PTE won't go exactly into the MMU TLB.
- * Software indicator bits 21, 22 and 28 must be clear.
+ * Software indicator bits 22 and 28 must be clear.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
* of the MMU.
@@ -410,13 +482,20 @@ DataStoreTLBMiss:
DO_8xx_CPU6(0x3d80, r3)
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
- mfspr r10, SPRN_M_TW /* Restore registers */
+ /* Restore registers */
+#ifndef CONFIG_8xx_CPU6
+ mfspr r10, SPRN_DAR
+ mtcr r10
+ mtspr SPRN_DAR, r11 /* Tag DAR */
+ mfspr r11, SPRN_SPRG2
+#else
+ mtspr SPRN_DAR, r11 /* Tag DAR */
lwz r11, 0(r0)
mtcr r11
lwz r11, 4(r0)
-#ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
#endif
+ mfspr r10, SPRN_M_TW
rfi
/* This is an instruction TLB error on the MPC8xx. This could be due
@@ -446,88 +525,10 @@ DataTLBError:
stw r10, 0(r0)
stw r11, 4(r0)
- /* First, make sure this was a store operation.
- */
- mfspr r10, SPRN_DSISR
- andis. r11, r10, 0x0200 /* If set, indicates store op */
- beq 2f
-
- /* The EA of a data TLB miss is automatically stored in the MD_EPN
- * register. The EA of a data TLB error is automatically stored in
- * the DAR, but not the MD_EPN register. We must copy the 20 most
- * significant bits of the EA from the DAR to MD_EPN before we
- * start walking the page tables. We also need to copy the CASID
- * value from the M_CASID register.
- * Addendum: The EA of a data TLB error is _supposed_ to be stored
- * in DAR, but it seems that this doesn't happen in some cases, such
- * as when the error is due to a dcbi instruction to a page with a
- * TLB that doesn't have the changed bit set. In such cases, there
- * does not appear to be any way to recover the EA of the error
- * since it is neither in DAR nor MD_EPN. As a workaround, the
- * _PAGE_HWWRITE bit is set for all kernel data pages when the PTEs
- * are initialized in mapin_ram(). This will avoid the problem,
- * assuming we only use the dcbi instruction on kernel addresses.
- */
mfspr r10, SPRN_DAR
- rlwinm r11, r10, 0, 0, 19
- ori r11, r11, MD_EVALID
- mfspr r10, SPRN_M_CASID
- rlwimi r11, r10, 0, 28, 31
- DO_8xx_CPU6(0x3780, r3)
- mtspr SPRN_MD_EPN, r11
-
- mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */
-
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- andi. r11, r10, 0x0800
- beq 3f
- lis r11, swapper_pg_dir@h
- ori r11, r11, swapper_pg_dir@l
- rlwimi r10, r11, 0, 2, 19
-3:
- lwz r11, 0(r10) /* Get the level 1 entry */
- rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
- beq 2f /* If zero, bail */
-
- /* We have a pte table, so fetch the pte from the table.
- */
- ori r11, r11, 1 /* Set valid bit in physical L2 page */
- DO_8xx_CPU6(0x3b80, r3)
- mtspr SPRN_MD_TWC, r11 /* Load pte table base address */
- mfspr r11, SPRN_MD_TWC /* ....and get the pte address */
- lwz r10, 0(r11) /* Get the pte */
-
- andi. r11, r10, _PAGE_RW /* Is it writeable? */
- beq 2f /* Bail out if not */
-
- /* Update 'changed', among others.
- */
- ori r10, r10, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
- mfspr r11, SPRN_MD_TWC /* Get pte address again */
- stw r10, 0(r11) /* and update pte in table */
-
- /* The Linux PTE won't go exactly into the MMU TLB.
- * Software indicator bits 21, 22 and 28 must be clear.
- * Software indicator bits 24, 25, 26, and 27 must be
- * set. All other Linux PTE bits control the behavior
- * of the MMU.
- */
- li r11, 0x00f0
- rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
- DO_8xx_CPU6(0x3d80, r3)
- mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
-
- mfspr r10, SPRN_M_TW /* Restore registers */
- lwz r11, 0(r0)
- mtcr r11
- lwz r11, 4(r0)
-#ifdef CONFIG_8xx_CPU6
- lwz r3, 8(r0)
-#endif
- rfi
-2:
+ cmpwi cr0, r10, 0x00f0
+ beq- FixupDAR /* must be a buggy dcbX, icbi insn. */
+DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */
mfspr r10, SPRN_M_TW /* Restore registers */
lwz r11, 0(r0)
mtcr r11
@@ -556,9 +557,139 @@ DataTLBError:
. = 0x2000
- .globl giveup_fpu
-giveup_fpu:
- blr
+/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
+ * by decoding the registers used by the dcbx instruction and adding them.
+ * DAR is set to the calculated address and r10 also holds the EA on exit.
+ */
+ /* define if you don't want to use self modifying code */
+#define NO_SELF_MODIFYING_CODE
+FixupDAR:/* Entry point for dcbx workaround. */
+ /* fetch instruction from memory. */
+ mfspr r10, SPRN_SRR0
+ andis. r11, r10, 0x8000 /* Address >= 0x80000000 */
+ DO_8xx_CPU6(0x3780, r3)
+ mtspr SPRN_MD_EPN, r10
+ mfspr r11, SPRN_M_TWB /* Get level 1 table entry address */
+ beq- 3f /* Branch if user space */
+ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
+ ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
+ rlwimi r11, r10, 32-20, 0xffc /* r11 = r11&~0xffc|(r10>>20)&0xffc */
+3: lwz r11, 0(r11) /* Get the level 1 entry */
+ DO_8xx_CPU6(0x3b80, r3)
+ mtspr SPRN_MD_TWC, r11 /* Load pte table base address */
+ mfspr r11, SPRN_MD_TWC /* ....and get the pte address */
+ lwz r11, 0(r11) /* Get the pte */
+ /* concat physical page address(r11) and page offset(r10) */
+ rlwimi r11, r10, 0, 20, 31
+ lwz r11,0(r11)
+/* Check if it really is a dcbx instruction. */
+/* dcbt and dcbtst does not generate DTLB Misses/Errors,
+ * no need to include them here */
+ srwi r10, r11, 26 /* check if major OP code is 31 */
+ cmpwi cr0, r10, 31
+ bne- 141f
+ rlwinm r10, r11, 0, 21, 30
+ cmpwi cr0, r10, 2028 /* Is dcbz? */
+ beq+ 142f
+ cmpwi cr0, r10, 940 /* Is dcbi? */
+ beq+ 142f
+ cmpwi cr0, r10, 108 /* Is dcbst? */
+ beq+ 144f /* Fix up store bit! */
+ cmpwi cr0, r10, 172 /* Is dcbf? */
+ beq+ 142f
+ cmpwi cr0, r10, 1964 /* Is icbi? */
+ beq+ 142f
+141: mfspr r10, SPRN_DAR /* r10 must hold DAR at exit */
+ b DARFixed /* Nope, go back to normal TLB processing */
+
+144: mfspr r10, SPRN_DSISR
+ rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
+ mtspr SPRN_DSISR, r10
+142: /* continue, it was a dcbx, dcbi instruction. */
+#ifdef CONFIG_8xx_CPU6
+ lwz r3, 8(r0) /* restore r3 from memory */
+#endif
+#ifndef NO_SELF_MODIFYING_CODE
+ andis. r10,r11,0x1f /* test if reg RA is r0 */
+ li r10,modified_instr@l
+ dcbtst r0,r10 /* touch for store */
+ rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */
+ oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */
+ ori r11,r11,532
+ stw r11,0(r10) /* store add/and instruction */
+ dcbf 0,r10 /* flush new instr. to memory. */
+ icbi 0,r10 /* invalidate instr. cache line */
+ lwz r11, 4(r0) /* restore r11 from memory */
+ mfspr r10, SPRN_M_TW /* restore r10 from M_TW */
+ isync /* Wait until new instr is loaded from memory */
+modified_instr:
+ .space 4 /* this is where the add instr. is stored */
+ bne+ 143f
+ subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */
+143: mtdar r10 /* store faulting EA in DAR */
+ b DARFixed /* Go back to normal TLB handling */
+#else
+ mfctr r10
+ mtdar r10 /* save ctr reg in DAR */
+ rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */
+ addi r10, r10, 150f@l /* add start of table */
+ mtctr r10 /* load ctr with jump address */
+ xor r10, r10, r10 /* sum starts at zero */
+ bctr /* jump into table */
+150:
+ add r10, r10, r0 ;b 151f
+ add r10, r10, r1 ;b 151f
+ add r10, r10, r2 ;b 151f
+ add r10, r10, r3 ;b 151f
+ add r10, r10, r4 ;b 151f
+ add r10, r10, r5 ;b 151f
+ add r10, r10, r6 ;b 151f
+ add r10, r10, r7 ;b 151f
+ add r10, r10, r8 ;b 151f
+ add r10, r10, r9 ;b 151f
+ mtctr r11 ;b 154f /* r10 needs special handling */
+ mtctr r11 ;b 153f /* r11 needs special handling */
+ add r10, r10, r12 ;b 151f
+ add r10, r10, r13 ;b 151f
+ add r10, r10, r14 ;b 151f
+ add r10, r10, r15 ;b 151f
+ add r10, r10, r16 ;b 151f
+ add r10, r10, r17 ;b 151f
+ add r10, r10, r18 ;b 151f
+ add r10, r10, r19 ;b 151f
+ add r10, r10, r20 ;b 151f
+ add r10, r10, r21 ;b 151f
+ add r10, r10, r22 ;b 151f
+ add r10, r10, r23 ;b 151f
+ add r10, r10, r24 ;b 151f
+ add r10, r10, r25 ;b 151f
+ add r10, r10, r26 ;b 151f
+ add r10, r10, r27 ;b 151f
+ add r10, r10, r28 ;b 151f
+ add r10, r10, r29 ;b 151f
+ add r10, r10, r30 ;b 151f
+ add r10, r10, r31
+151:
+ rlwinm. r11,r11,19,24,28 /* offset into jump table for reg RA */
+ beq 152f /* if reg RA is zero, don't add it */
+ addi r11, r11, 150b@l /* add start of table */
+ mtctr r11 /* load ctr with jump address */
+ rlwinm r11,r11,0,16,10 /* make sure we don't execute this more than once */
+ bctr /* jump into table */
+152:
+ mfdar r11
+ mtctr r11 /* restore ctr reg from DAR */
+ mtdar r10 /* save fault EA to DAR */
+ b DARFixed /* Go back to normal TLB handling */
+
+ /* special handling for r10,r11 since these are modified already */
+153: lwz r11, 4(r0) /* load r11 from memory */
+ b 155f
+154: mfspr r11, SPRN_M_TW /* load r10 from M_TW */
+155: add r10, r10, r11 /* add it */
+ mfctr r11 /* restore r11 */
+ b 151b
+#endif
/*
* This is where the main kernel code starts.
@@ -571,9 +702,7 @@ start_here:
/* ptr to phys current thread */
tophys(r4,r2)
addi r4,r4,THREAD /* init task's THREAD */
- mtspr SPRN_SPRG3,r4
- li r3,0
- mtspr SPRN_SPRG2,r3 /* 0 => r1 has kernel sp */
+ mtspr SPRN_SPRG_THREAD,r4
/* stack */
lis r1,init_thread_union@ha
@@ -586,11 +715,8 @@ start_here:
/*
* Decide what sort of machine this is and initialize the MMU.
*/
- mr r3,r31
- mr r4,r30
- mr r5,r29
- mr r6,r28
- mr r7,r27
+ li r3,0
+ mr r4,r31
bl machine_init
bl MMU_init
@@ -656,12 +782,12 @@ start_here:
*/
initial_mmu:
tlbia /* Invalidate all TLB entries */
-#ifdef CONFIG_PIN_TLB
+/* Always pin the first 8 MB ITLB to prevent ITLB
+ misses while mucking around with SRR0/SRR1 in asm
+*/
lis r8, MI_RSV4I@h
ori r8, r8, 0x1c00
-#else
- li r8, 0
-#endif
+
mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
#ifdef CONFIG_PIN_TLB
@@ -730,13 +856,16 @@ initial_mmu:
mtspr SPRN_MD_TWC, r9
li r11, MI_BOOTINIT /* Create RPN for address 0 */
addis r11, r11, 0x0080 /* Add 8M */
- mtspr SPRN_MD_RPN, r8
+ mtspr SPRN_MD_RPN, r11
+
+ addi r10, r10, 0x0100
+ mtspr SPRN_MD_CTR, r10
addis r8, r8, 0x0080 /* Add 8M */
mtspr SPRN_MD_EPN, r8
mtspr SPRN_MD_TWC, r9
addis r11, r11, 0x0080 /* Add 8M */
- mtspr SPRN_MD_RPN, r8
+ mtspr SPRN_MD_RPN, r11
#endif
/* Since the cache is enabled according to the information we
@@ -835,14 +964,6 @@ empty_zero_page:
swapper_pg_dir:
.space 4096
-/*
- * This space gets a copy of optional info passed to us by the bootstrap
- * Used to pass parameters into the kernel like root=/dev/sda1, etc.
- */
- .globl cmd_line
-cmd_line:
- .space 512
-
/* Room for two PTE table poiners, usually the kernel and current user
* pointer to their respective root page table (pgdir).
*/
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 8536e767616..a620203f7de 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -1,6 +1,10 @@
#ifndef __HEAD_BOOKE_H__
#define __HEAD_BOOKE_H__
+#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
+#include <asm/kvm_asm.h>
+#include <asm/kvm_booke_hv_asm.h>
+
/*
* Macros used for common Book-e exception handling
*/
@@ -10,42 +14,63 @@
mtspr SPRN_IVOR##vector_number,r26; \
sync
-#define NORMAL_EXCEPTION_PROLOG \
- mtspr SPRN_SPRG0,r10; /* save two registers to work with */\
- mtspr SPRN_SPRG1,r11; \
- mtspr SPRN_SPRG4W,r1; \
- mfcr r10; /* save CR in r10 for now */\
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */\
- andi. r11,r11,MSR_PR; \
+#if (THREAD_SHIFT < 15)
+#define ALLOC_STACK_FRAME(reg, val) \
+ addi reg,reg,val
+#else
+#define ALLOC_STACK_FRAME(reg, val) \
+ addis reg,reg,val@ha; \
+ addi reg,reg,val@l
+#endif
+
+/*
+ * Macro used to get to thread save registers.
+ * Note that entries 0-3 are used for the prolog code, and the remaining
+ * entries are available for specific exception use in the event a handler
+ * requires more than 4 scratch registers.
+ */
+#define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4))
+
+#define NORMAL_EXCEPTION_PROLOG(intno) \
+ mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
+ mfspr r10, SPRN_SPRG_THREAD; \
+ stw r11, THREAD_NORMSAVE(0)(r10); \
+ stw r13, THREAD_NORMSAVE(2)(r10); \
+ mfcr r13; /* save CR in r13 for now */\
+ mfspr r11, SPRN_SRR1; \
+ DO_KVM BOOKE_INTERRUPT_##intno SPRN_SRR1; \
+ andi. r11, r11, MSR_PR; /* check whether user or kernel */\
+ mr r11, r1; \
beq 1f; \
- mfspr r1,SPRN_SPRG3; /* if from user, start at top of */\
- lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\
- addi r1,r1,THREAD_SIZE; \
-1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\
- mr r11,r1; \
- stw r10,_CCR(r11); /* save various registers */\
+ /* if from user, start at top of this thread's kernel stack */ \
+ lwz r11, THREAD_INFO-THREAD(r10); \
+ ALLOC_STACK_FRAME(r11, THREAD_SIZE); \
+1 : subi r11, r11, INT_FRAME_SIZE; /* Allocate exception frame */ \
+ stw r13, _CCR(r11); /* save various registers */ \
stw r12,GPR12(r11); \
stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG1; \
+ mfspr r13, SPRN_SPRG_RSCRATCH0; \
+ stw r13, GPR10(r11); \
+ lwz r12, THREAD_NORMSAVE(0)(r10); \
stw r12,GPR11(r11); \
+ lwz r13, THREAD_NORMSAVE(2)(r10); /* restore r13 */ \
mflr r10; \
stw r10,_LINK(r11); \
- mfspr r10,SPRN_SPRG4R; \
mfspr r12,SPRN_SRR0; \
- stw r10,GPR1(r11); \
+ stw r1, GPR1(r11); \
mfspr r9,SPRN_SRR1; \
- stw r10,0(r11); \
+ stw r1, 0(r11); \
+ mr r1, r11; \
rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
stw r0,GPR0(r11); \
+ lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \
+ addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
+ stw r10, 8(r11); \
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
/* To handle the additional exception priority levels on 40x and Book-E
- * processors we allocate a 4k stack per additional priority level. The various
- * head_xxx.S files allocate space (exception_stack_top) for each priority's
- * stack times the number of CPUs
+ * processors we allocate a stack per additional priority level.
*
* On 40x critical is the only additional level
* On 44x/e500 we have critical and machine check
@@ -56,32 +81,32 @@
* is necessary since the MMU is always on, for Book-E parts, and the stacks
* are offset from KERNELBASE.
*
+ * There is some space optimization to be had here if desired. However
+ * to allow for a common kernel with support for debug exceptions either
+ * going to critical or their own debug level we aren't currently
+ * providing configurations that micro-optimize space usage.
*/
-#define BOOKE_EXCEPTION_STACK_SIZE (8192)
-/* CRIT_SPRG only used in critical exception handling */
-#define CRIT_SPRG SPRN_SPRG2
-/* MCHECK_SPRG only used in machine check exception handling */
-#define MCHECK_SPRG SPRN_SPRG6W
+#define MC_STACK_BASE mcheckirq_ctx
+#define CRIT_STACK_BASE critirq_ctx
-#define MCHECK_STACK_TOP (exception_stack_top - 4096)
-#define CRIT_STACK_TOP (exception_stack_top)
+/* only on e500mc/e200 */
+#define DBG_STACK_BASE dbgirq_ctx
-/* only on e200 for now */
-#define DEBUG_STACK_TOP (exception_stack_top - 4096)
-#define DEBUG_SPRG SPRN_SPRG6W
+#define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE)
#ifdef CONFIG_SMP
#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
mfspr r8,SPRN_PIR; \
- mulli r8,r8,BOOKE_EXCEPTION_STACK_SIZE; \
- neg r8,r8; \
- addis r8,r8,level##_STACK_TOP@ha; \
- addi r8,r8,level##_STACK_TOP@l
+ slwi r8,r8,2; \
+ addis r8,r8,level##_STACK_BASE@ha; \
+ lwz r8,level##_STACK_BASE@l(r8); \
+ addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
#else
#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
- lis r8,level##_STACK_TOP@h; \
- ori r8,r8,level##_STACK_TOP@l
+ lis r8,level##_STACK_BASE@ha; \
+ lwz r8,level##_STACK_BASE@l(r8); \
+ addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
#endif
/*
@@ -92,25 +117,40 @@
* registers as the normal prolog above. Instead we use a portion of the
* critical/machine check exception stack at low physical addresses.
*/
-#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \
- mtspr exc_level##_SPRG,r8; \
+#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \
+ mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \
BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
- stw r10,GPR10-INT_FRAME_SIZE(r8); \
- stw r11,GPR11-INT_FRAME_SIZE(r8); \
- mfcr r10; /* save CR in r10 for now */\
+ stw r9,GPR9(r8); /* save various registers */\
+ mfcr r9; /* save CR in r9 for now */\
+ stw r10,GPR10(r8); \
+ stw r11,GPR11(r8); \
+ stw r9,_CCR(r8); /* save CR on stack */\
mfspr r11,exc_level_srr1; /* check whether user or kernel */\
+ DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \
andi. r11,r11,MSR_PR; \
- mr r11,r8; \
- mfspr r8,exc_level##_SPRG; \
+ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
+ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+ addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\
beq 1f; \
/* COMING FROM USER MODE */ \
- mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\
- lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
- addi r11,r11,THREAD_SIZE; \
-1: subi r11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\
- stw r10,_CCR(r11); /* save various registers */\
- stw r12,GPR12(r11); \
+ stw r9,_CCR(r11); /* save CR */\
+ lwz r10,GPR10(r8); /* copy regs from exception stack */\
+ lwz r9,GPR9(r8); \
+ stw r10,GPR10(r11); \
+ lwz r10,GPR11(r8); \
stw r9,GPR9(r11); \
+ stw r10,GPR11(r11); \
+ b 2f; \
+ /* COMING FROM PRIV MODE */ \
+1: lwz r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r11); \
+ lwz r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r11); \
+ stw r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r8); \
+ stw r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r8); \
+ lwz r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r11); \
+ stw r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r8); \
+ mr r11,r8; \
+2: mfspr r8,SPRN_SPRG_RSCRATCH_##exc_level; \
+ stw r12,GPR12(r11); /* save various registers */\
mflr r10; \
stw r10,_LINK(r11); \
mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\
@@ -127,12 +167,30 @@
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
-#define CRITICAL_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1)
+#define CRITICAL_EXCEPTION_PROLOG(intno) \
+ EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1)
#define DEBUG_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(DEBUG, SPRN_DSRR0, SPRN_DSRR1)
+ EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
#define MCHECK_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(MCHECK, SPRN_MCSRR0, SPRN_MCSRR1)
+ EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \
+ SPRN_MCSRR0, SPRN_MCSRR1)
+
+/*
+ * Guest Doorbell -- this is a bit odd in that uses GSRR0/1 despite
+ * being delivered to the host. This exception can only happen
+ * inside a KVM guest -- so we just handle up to the DO_KVM rather
+ * than try to fit this into one of the existing prolog macros.
+ */
+#define GUEST_DOORBELL_EXCEPTION \
+ START_EXCEPTION(GuestDoorbell); \
+ mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
+ mfspr r10, SPRN_SPRG_THREAD; \
+ stw r11, THREAD_NORMSAVE(0)(r10); \
+ mfspr r11, SPRN_SRR1; \
+ stw r13, THREAD_NORMSAVE(2)(r10); \
+ mfcr r13; /* save CR in r13 for now */\
+ DO_KVM BOOKE_INTERRUPT_GUEST_DBELL SPRN_GSRR1; \
+ trap
/*
* Exception vectors.
@@ -141,21 +199,16 @@
.align 5; \
label:
-#define FINISH_EXCEPTION(func) \
- bl transfer_to_handler_full; \
- .long func; \
- .long ret_from_except_full
-
-#define EXCEPTION(n, label, hdlr, xfer) \
+#define EXCEPTION(n, intno, label, hdlr, xfer) \
START_EXCEPTION(label); \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(intno); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
xfer(n, hdlr)
-#define CRITICAL_EXCEPTION(n, label, hdlr) \
- START_EXCEPTION(label); \
- CRITICAL_EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
+#define CRITICAL_EXCEPTION(n, intno, label, hdlr) \
+ START_EXCEPTION(label); \
+ CRITICAL_EXCEPTION_PROLOG(intno); \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
NOCOPY, crit_transfer_to_handler, \
ret_from_crit_exc)
@@ -166,7 +219,7 @@ label:
mfspr r5,SPRN_ESR; \
stw r5,_ESR(r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
+ EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
NOCOPY, mcheck_transfer_to_handler, \
ret_from_mcheck_exc)
@@ -212,9 +265,8 @@ label:
* save (and later restore) the MSR via SPRN_CSRR1, which will still have
* the MSR_DE bit set.
*/
-#ifdef CONFIG_E200
-#define DEBUG_EXCEPTION \
- START_EXCEPTION(Debug); \
+#define DEBUG_DEBUG_EXCEPTION \
+ START_EXCEPTION(DebugDebug); \
DEBUG_EXCEPTION_PROLOG; \
\
/* \
@@ -223,25 +275,25 @@ label:
* the code where the exception occurred (since exception entry \
* doesn't turn off DE automatically). We simulate the effect \
* of turning off DE on entry to an exception handler by turning \
- * off DE in the CSRR1 value and clearing the debug status. \
+ * off DE in the DSRR1 value and clearing the debug status. \
*/ \
mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \
- andis. r10,r10,DBSR_IC@h; \
+ andis. r10,r10,(DBSR_IC|DBSR_BT)@h; \
beq+ 2f; \
\
- lis r10,KERNELBASE@h; /* check if exception in vectors */ \
- ori r10,r10,KERNELBASE@l; \
+ lis r10,interrupt_base@h; /* check if exception in vectors */ \
+ ori r10,r10,interrupt_base@l; \
cmplw r12,r10; \
blt+ 2f; /* addr below exception vectors */ \
\
- lis r10,Debug@h; \
- ori r10,r10,Debug@l; \
+ lis r10,interrupt_end@h; \
+ ori r10,r10,interrupt_end@l; \
cmplw r12,r10; \
bgt+ 2f; /* addr above exception vectors */ \
\
/* here it looks like we got an inappropriate debug exception. */ \
1: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CDRR1 value */ \
- lis r10,DBSR_IC@h; /* clear the IC event */ \
+ lis r10,(DBSR_IC|DBSR_BT)@h; /* clear the IC event */ \
mtspr SPRN_DBSR,r10; \
/* restore state and get out */ \
lwz r10,_CCR(r11); \
@@ -252,23 +304,23 @@ label:
mtspr SPRN_DSRR1,r9; \
lwz r9,GPR9(r11); \
lwz r12,GPR12(r11); \
- mtspr DEBUG_SPRG,r8; \
- BOOKE_LOAD_EXC_LEVEL_STACK(DEBUG); /* r8 points to the debug stack */ \
- lwz r10,GPR10-INT_FRAME_SIZE(r8); \
- lwz r11,GPR11-INT_FRAME_SIZE(r8); \
- mfspr r8,DEBUG_SPRG; \
+ mtspr SPRN_SPRG_WSCRATCH_DBG,r8; \
+ BOOKE_LOAD_EXC_LEVEL_STACK(DBG); /* r8 points to the debug stack */ \
+ lwz r10,GPR10(r8); \
+ lwz r11,GPR11(r8); \
+ mfspr r8,SPRN_SPRG_RSCRATCH_DBG; \
\
- RFDI; \
+ PPC_RFDI; \
b .; \
\
- /* continue normal handling for a critical exception... */ \
+ /* continue normal handling for a debug exception... */ \
2: mfspr r4,SPRN_DBSR; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc)
-#else
-#define DEBUG_EXCEPTION \
- START_EXCEPTION(Debug); \
- CRITICAL_EXCEPTION_PROLOG; \
+ EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc)
+
+#define DEBUG_CRIT_EXCEPTION \
+ START_EXCEPTION(DebugCrit); \
+ CRITICAL_EXCEPTION_PROLOG(DEBUG); \
\
/* \
* If there is a single step or branch-taken exception in an \
@@ -279,22 +331,22 @@ label:
* off DE in the CSRR1 value and clearing the debug status. \
*/ \
mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \
- andis. r10,r10,DBSR_IC@h; \
+ andis. r10,r10,(DBSR_IC|DBSR_BT)@h; \
beq+ 2f; \
\
- lis r10,KERNELBASE@h; /* check if exception in vectors */ \
- ori r10,r10,KERNELBASE@l; \
+ lis r10,interrupt_base@h; /* check if exception in vectors */ \
+ ori r10,r10,interrupt_base@l; \
cmplw r12,r10; \
blt+ 2f; /* addr below exception vectors */ \
\
- lis r10,Debug@h; \
- ori r10,r10,Debug@l; \
+ lis r10,interrupt_end@h; \
+ ori r10,r10,interrupt_end@l; \
cmplw r12,r10; \
bgt+ 2f; /* addr above exception vectors */ \
\
/* here it looks like we got an inappropriate debug exception. */ \
1: rlwinm r9,r9,0,~MSR_DE; /* clear DE in the CSRR1 value */ \
- lis r10,DBSR_IC@h; /* clear the IC event */ \
+ lis r10,(DBSR_IC|DBSR_BT)@h; /* clear the IC event */ \
mtspr SPRN_DBSR,r10; \
/* restore state and get out */ \
lwz r10,_CCR(r11); \
@@ -305,11 +357,11 @@ label:
mtspr SPRN_CSRR1,r9; \
lwz r9,GPR9(r11); \
lwz r12,GPR12(r11); \
- mtspr CRIT_SPRG,r8; \
+ mtspr SPRN_SPRG_WSCRATCH_CRIT,r8; \
BOOKE_LOAD_EXC_LEVEL_STACK(CRIT); /* r8 points to the debug stack */ \
- lwz r10,GPR10-INT_FRAME_SIZE(r8); \
- lwz r11,GPR11-INT_FRAME_SIZE(r8); \
- mfspr r8,CRIT_SPRG; \
+ lwz r10,GPR10(r8); \
+ lwz r11,GPR11(r8); \
+ mfspr r8,SPRN_SPRG_RSCRATCH_CRIT; \
\
rfci; \
b .; \
@@ -318,20 +370,27 @@ label:
2: mfspr r4,SPRN_DBSR; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
-#endif
+
+#define DATA_STORAGE_EXCEPTION \
+ START_EXCEPTION(DataStorage) \
+ NORMAL_EXCEPTION_PROLOG(DATA_STORAGE); \
+ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
+ stw r5,_ESR(r11); \
+ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \
+ EXC_XFER_LITE(0x0300, handle_page_fault)
#define INSTRUCTION_STORAGE_EXCEPTION \
START_EXCEPTION(InstructionStorage) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \
mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
stw r5,_ESR(r11); \
mr r4,r12; /* Pass SRR0 as arg2 */ \
li r5,0; /* Pass zero as arg3 */ \
- EXC_XFER_EE_LITE(0x0400, handle_page_fault)
+ EXC_XFER_LITE(0x0400, handle_page_fault)
#define ALIGNMENT_EXCEPTION \
START_EXCEPTION(Alignment) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \
mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \
stw r4,_DEAR(r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -339,7 +398,7 @@ label:
#define PROGRAM_EXCEPTION \
START_EXCEPTION(Program) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(PROGRAM); \
mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \
stw r4,_ESR(r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -347,7 +406,7 @@ label:
#define DECREMENTER_EXCEPTION \
START_EXCEPTION(Decrementer) \
- NORMAL_EXCEPTION_PROLOG; \
+ NORMAL_EXCEPTION_PROLOG(DECREMENTER); \
lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \
mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \
addi r3,r1,STACK_FRAME_OVERHEAD; \
@@ -355,9 +414,32 @@ label:
#define FP_UNAVAILABLE_EXCEPTION \
START_EXCEPTION(FloatingPointUnavailable) \
- NORMAL_EXCEPTION_PROLOG; \
- bne load_up_fpu; /* if from user, just load it up */ \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
+ NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL); \
+ beq 1f; \
+ bl load_up_fpu; /* if from user, just load it up */ \
+ b fast_exception_return; \
+1: addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+#ifndef __ASSEMBLY__
+struct exception_regs {
+ unsigned long mas0;
+ unsigned long mas1;
+ unsigned long mas2;
+ unsigned long mas3;
+ unsigned long mas6;
+ unsigned long mas7;
+ unsigned long srr0;
+ unsigned long srr1;
+ unsigned long csrr0;
+ unsigned long csrr1;
+ unsigned long dsrr0;
+ unsigned long dsrr1;
+ unsigned long saved_ksp_limit;
+};
+
+/* ensure this structure is always sized to a multiple of the stack alignment */
+#define STACK_EXC_LVL_FRAME_SIZE _ALIGN_UP(sizeof (struct exception_regs), 16)
+
+#endif /* __ASSEMBLY__ */
#endif /* __HEAD_BOOKE_H__ */
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 66877bdfe0b..b497188a94a 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -2,27 +2,27 @@
* Kernel execution entry point code.
*
* Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
- * Initial PowerPC version.
+ * Initial PowerPC version.
* Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
- * Rewritten for PReP
+ * Rewritten for PReP
* Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
- * Low-level exception handers, MMU support, and rewrite.
+ * Low-level exception handers, MMU support, and rewrite.
* Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
- * PowerPC 8xx modifications.
+ * PowerPC 8xx modifications.
* Copyright (c) 1998-1999 TiVo, Inc.
- * PowerPC 403GCX modifications.
+ * PowerPC 403GCX modifications.
* Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
- * PowerPC 403GCX/405GP modifications.
+ * PowerPC 403GCX/405GP modifications.
* Copyright 2000 MontaVista Software Inc.
* PPC405 modifications
- * PowerPC 403GCX/405GP modifications.
- * Author: MontaVista Software, Inc.
- * frank_rowand@mvista.com or source@mvista.com
- * debbie_chu@mvista.com
+ * PowerPC 403GCX/405GP modifications.
+ * Author: MontaVista Software, Inc.
+ * frank_rowand@mvista.com or source@mvista.com
+ * debbie_chu@mvista.com
* Copyright 2002-2004 MontaVista Software, Inc.
- * PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
+ * PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
* Copyright 2004 Freescale Semiconductor, Inc
- * PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org>
+ * PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -30,6 +30,7 @@
* option) any later version.
*/
+#include <linux/init.h>
#include <linux/threads.h>
#include <asm/processor.h>
#include <asm/page.h>
@@ -39,6 +40,8 @@
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+#include <asm/ptrace.h>
#include "head_booke.h"
/* As with the other PowerPC ports, it is expected that when code
@@ -52,29 +55,93 @@
* r7 - End of kernel command line string
*
*/
- .text
-_GLOBAL(_stext)
-_GLOBAL(_start)
+ __HEAD
+_ENTRY(_stext);
+_ENTRY(_start);
/*
* Reserve a word at a fixed location to store the address
* of abatron_pteptrs
*/
nop
-/*
- * Save parameters we are passed
- */
- mr r31,r3
- mr r30,r4
- mr r29,r5
- mr r28,r6
- mr r27,r7
- li r24,0 /* CPU number */
+
+ /* Translate device tree address to physical, save in r30/r31 */
+ bl get_phys_addr
+ mr r30,r3
+ mr r31,r4
+
+ li r25,0 /* phys kernel start (low) */
+ li r24,0 /* CPU number */
+ li r23,0 /* phys kernel start (high) */
+
+#ifdef CONFIG_RELOCATABLE
+ LOAD_REG_ADDR_PIC(r3, _stext) /* Get our current runtime base */
+
+ /* Translate _stext address to physical, save in r23/r25 */
+ bl get_phys_addr
+ mr r23,r3
+ mr r25,r4
+
+ bl 0f
+0: mflr r8
+ addis r3,r8,(is_second_reloc - 0b)@ha
+ lwz r19,(is_second_reloc - 0b)@l(r3)
+
+ /* Check if this is the second relocation. */
+ cmpwi r19,1
+ bne 1f
+
+ /*
+ * For the second relocation, we already get the real memstart_addr
+ * from device tree. So we will map PAGE_OFFSET to memstart_addr,
+ * then the virtual address of start kernel should be:
+ * PAGE_OFFSET + (kernstart_addr - memstart_addr)
+ * Since the offset between kernstart_addr and memstart_addr should
+ * never be beyond 1G, so we can just use the lower 32bit of them
+ * for the calculation.
+ */
+ lis r3,PAGE_OFFSET@h
+
+ addis r4,r8,(kernstart_addr - 0b)@ha
+ addi r4,r4,(kernstart_addr - 0b)@l
+ lwz r5,4(r4)
+
+ addis r6,r8,(memstart_addr - 0b)@ha
+ addi r6,r6,(memstart_addr - 0b)@l
+ lwz r7,4(r6)
+
+ subf r5,r7,r5
+ add r3,r3,r5
+ b 2f
+
+1:
+ /*
+ * We have the runtime (virutal) address of our base.
+ * We calculate our shift of offset from a 64M page.
+ * We could map the 64M page we belong to at PAGE_OFFSET and
+ * get going from there.
+ */
+ lis r4,KERNELBASE@h
+ ori r4,r4,KERNELBASE@l
+ rlwinm r6,r25,0,0x3ffffff /* r6 = PHYS_START % 64M */
+ rlwinm r5,r4,0,0x3ffffff /* r5 = KERNELBASE % 64M */
+ subf r3,r5,r6 /* r3 = r6 - r5 */
+ add r3,r4,r3 /* Required Virtual Address */
+
+2: bl relocate
+
+ /*
+ * For the second relocation, we already set the right tlb entries
+ * for the kernel space, so skip the code in fsl_booke_entry_mapping.S
+ */
+ cmpwi r19,1
+ beq set_ivor
+#endif
/* We try to not make any assumptions about how the boot loader
* setup or used the TLBs. We invalidate all mappings from the
* boot loader and load a single entry in TLB1[0] to map the
- * first 16M of kernel memory. Any boot info passed from the
- * bootloader needs to live in this first 16M.
+ * first 64M of kernel memory. Any boot info passed from the
+ * bootloader needs to live in this first 64M.
*
* Requirement on bootloader:
* - The page we're executing in needs to reside in TLB1 and
@@ -89,175 +156,13 @@ _GLOBAL(_start)
* if needed
*/
-/* 1. Find the index of the entry we're executing in */
- bl invstr /* Find our address */
-invstr: mflr r6 /* Make it accessible */
- mfmsr r7
- rlwinm r4,r7,27,31,31 /* extract MSR[IS] */
- mfspr r7, SPRN_PID0
- slwi r7,r7,16
- or r7,r7,r4
- mtspr SPRN_MAS6,r7
- tlbsx 0,r6 /* search MSR[IS], SPID=PID0 */
-#ifndef CONFIG_E200
- mfspr r7,SPRN_MAS1
- andis. r7,r7,MAS1_VALID@h
- bne match_TLB
- mfspr r7,SPRN_PID1
- slwi r7,r7,16
- or r7,r7,r4
- mtspr SPRN_MAS6,r7
- tlbsx 0,r6 /* search MSR[IS], SPID=PID1 */
- mfspr r7,SPRN_MAS1
- andis. r7,r7,MAS1_VALID@h
- bne match_TLB
- mfspr r7, SPRN_PID2
- slwi r7,r7,16
- or r7,r7,r4
- mtspr SPRN_MAS6,r7
- tlbsx 0,r6 /* Fall through, we had to match */
-#endif
-match_TLB:
- mfspr r7,SPRN_MAS0
- rlwinm r3,r7,16,20,31 /* Extract MAS0(Entry) */
-
- mfspr r7,SPRN_MAS1 /* Insure IPROT set */
- oris r7,r7,MAS1_IPROT@h
- mtspr SPRN_MAS1,r7
- tlbwe
-
-/* 2. Invalidate all entries except the entry we're executing in */
- mfspr r9,SPRN_TLB1CFG
- andi. r9,r9,0xfff
- li r6,0 /* Set Entry counter to 0 */
-1: lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
- rlwimi r7,r6,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */
- mtspr SPRN_MAS0,r7
- tlbre
- mfspr r7,SPRN_MAS1
- rlwinm r7,r7,0,2,31 /* Clear MAS1 Valid and IPROT */
- cmpw r3,r6
- beq skpinv /* Dont update the current execution TLB */
- mtspr SPRN_MAS1,r7
- tlbwe
- isync
-skpinv: addi r6,r6,1 /* Increment */
- cmpw r6,r9 /* Are we done? */
- bne 1b /* If not, repeat */
-
- /* Invalidate TLB0 */
- li r6,0x04
- tlbivax 0,r6
-#ifdef CONFIG_SMP
- tlbsync
-#endif
- /* Invalidate TLB1 */
- li r6,0x0c
- tlbivax 0,r6
-#ifdef CONFIG_SMP
- tlbsync
-#endif
- msync
-
-/* 3. Setup a temp mapping and jump to it */
- andi. r5, r3, 0x1 /* Find an entry not used and is non-zero */
- addi r5, r5, 0x1
- lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
- rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
- mtspr SPRN_MAS0,r7
- tlbre
-
- /* Just modify the entry ID and EPN for the temp mapping */
- lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
- rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */
- mtspr SPRN_MAS0,r7
- xori r6,r4,1 /* Setup TMP mapping in the other Address space */
- slwi r6,r6,12
- oris r6,r6,(MAS1_VALID|MAS1_IPROT)@h
- ori r6,r6,(MAS1_TSIZE(BOOKE_PAGESZ_4K))@l
- mtspr SPRN_MAS1,r6
- mfspr r6,SPRN_MAS2
- li r7,0 /* temp EPN = 0 */
- rlwimi r7,r6,0,20,31
- mtspr SPRN_MAS2,r7
- tlbwe
-
- xori r6,r4,1
- slwi r6,r6,5 /* setup new context with other address space */
- bl 1f /* Find our address */
-1: mflr r9
- rlwimi r7,r9,0,20,31
- addi r7,r7,24
- mtspr SPRN_SRR0,r7
- mtspr SPRN_SRR1,r6
- rfi
+_ENTRY(__early_start)
-/* 4. Clear out PIDs & Search info */
- li r6,0
- mtspr SPRN_PID0,r6
-#ifndef CONFIG_E200
- mtspr SPRN_PID1,r6
- mtspr SPRN_PID2,r6
-#endif
- mtspr SPRN_MAS6,r6
-
-/* 5. Invalidate mapping we started in */
- lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
- rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
- mtspr SPRN_MAS0,r7
- tlbre
- li r6,0
- mtspr SPRN_MAS1,r6
- tlbwe
- /* Invalidate TLB1 */
- li r9,0x0c
- tlbivax 0,r9
-#ifdef CONFIG_SMP
- tlbsync
-#endif
- msync
-
-/* 6. Setup KERNELBASE mapping in TLB1[0] */
- lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */
- mtspr SPRN_MAS0,r6
- lis r6,(MAS1_VALID|MAS1_IPROT)@h
- ori r6,r6,(MAS1_TSIZE(BOOKE_PAGESZ_16M))@l
- mtspr SPRN_MAS1,r6
- li r7,0
- lis r6,KERNELBASE@h
- ori r6,r6,KERNELBASE@l
- rlwimi r6,r7,0,20,31
- mtspr SPRN_MAS2,r6
- li r7,(MAS3_SX|MAS3_SW|MAS3_SR)
- mtspr SPRN_MAS3,r7
- tlbwe
-
-/* 7. Jump to KERNELBASE mapping */
- lis r7,MSR_KERNEL@h
- ori r7,r7,MSR_KERNEL@l
- bl 1f /* Find our address */
-1: mflr r9
- rlwimi r6,r9,0,20,31
- addi r6,r6,24
- mtspr SPRN_SRR0,r6
- mtspr SPRN_SRR1,r7
- rfi /* start execution out of TLB1[0] entry */
-
-/* 8. Clear out the temp mapping */
- lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
- rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */
- mtspr SPRN_MAS0,r7
- tlbre
- mtspr SPRN_MAS1,r8
- tlbwe
- /* Invalidate TLB1 */
- li r9,0x0c
- tlbivax 0,r9
-#ifdef CONFIG_SMP
- tlbsync
-#endif
- msync
+#define ENTRY_MAPPING_BOOT_SETUP
+#include "fsl_booke_entry_mapping.S"
+#undef ENTRY_MAPPING_BOOT_SETUP
+set_ivor:
/* Establish the interrupt vector offsets */
SET_IVOR(0, CriticalInput);
SET_IVOR(1, MachineCheck);
@@ -274,24 +179,18 @@ skpinv: addi r6,r6,1 /* Increment */
SET_IVOR(12, WatchdogTimer);
SET_IVOR(13, DataTLBError);
SET_IVOR(14, InstructionTLBError);
- SET_IVOR(15, Debug);
- SET_IVOR(32, SPEUnavailable);
- SET_IVOR(33, SPEFloatingPointData);
- SET_IVOR(34, SPEFloatingPointRound);
-#ifndef CONFIG_E200
- SET_IVOR(35, PerformanceMonitor);
-#endif
+ SET_IVOR(15, DebugCrit);
/* Establish the interrupt vector base */
lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */
mtspr SPRN_IVPR,r4
/* Setup the defaults for TLB entries */
- li r2,(MAS4_TSIZED(BOOKE_PAGESZ_4K))@l
+ li r2,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l
#ifdef CONFIG_E200
oris r2,r2,MAS4_TLBSELD(1)@h
#endif
- mtspr SPRN_MAS4, r2
+ mtspr SPRN_MAS4, r2
#if 0
/* Enable DOZE */
@@ -299,12 +198,6 @@ skpinv: addi r6,r6,1 /* Increment */
oris r2,r2,HID0_DOZE@h
mtspr SPRN_HID0, r2
#endif
-#ifdef CONFIG_E200
- /* enable dedicated debug exception handling resources (Debug APU) */
- mfspr r2,SPRN_HID0
- ori r2,r2,HID0_DAPUEN@l
- mtspr SPRN_HID0,r2
-#endif
#if !defined(CONFIG_BDI_SWITCH)
/*
@@ -319,6 +212,17 @@ skpinv: addi r6,r6,1 /* Increment */
mtspr SPRN_DBSR,r2
#endif
+#ifdef CONFIG_SMP
+ /* Check to see if we're the second processor, and jump
+ * to the secondary_start code if so
+ */
+ LOAD_REG_ADDR_PIC(r24, boot_cpuid)
+ lwz r24, 0(r24)
+ cmpwi r24, -1
+ mfspr r24,SPRN_PIR
+ bne __secondary_start
+#endif
+
/*
* This is where the main kernel code starts.
*/
@@ -329,7 +233,7 @@ skpinv: addi r6,r6,1 /* Increment */
/* ptr to current thread */
addi r4,r2,THREAD /* init task's THREAD */
- mtspr SPRN_SPRG3,r4
+ mtspr SPRN_SPRG_THREAD,r4
/* stack */
lis r1,init_thread_union@h
@@ -337,20 +241,39 @@ skpinv: addi r6,r6,1 /* Increment */
li r0,0
stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ CURRENT_THREAD_INFO(r22, r1)
+ stw r24, TI_CPU(r22)
+
bl early_init
- mfspr r3,SPRN_TLB1CFG
- andi. r3,r3,0xfff
- lis r4,num_tlbcam_entries@ha
- stw r3,num_tlbcam_entries@l(r4)
+#ifdef CONFIG_RELOCATABLE
+ mr r3,r30
+ mr r4,r31
+#ifdef CONFIG_PHYS_64BIT
+ mr r5,r23
+ mr r6,r25
+#else
+ mr r5,r25
+#endif
+ bl relocate_init
+#endif
+
+#ifdef CONFIG_DYNAMIC_MEMSTART
+ lis r3,kernstart_addr@ha
+ la r3,kernstart_addr@l(r3)
+#ifdef CONFIG_PHYS_64BIT
+ stw r23,0(r3)
+ stw r25,4(r3)
+#else
+ stw r25,0(r3)
+#endif
+#endif
+
/*
* Decide what sort of machine this is and initialize the MMU.
*/
- mr r3,r31
- mr r4,r30
- mr r5,r29
- mr r6,r28
- mr r7,r27
+ mr r3,r30
+ mr r4,r31
bl machine_init
bl MMU_init
@@ -384,18 +307,33 @@ skpinv: addi r6,r6,1 /* Increment */
* if we find the pte (fall through):
* r11 is low pte word
* r12 is pointer to the pte
+ * r10 is the pshift from the PGD, if we're a hugepage
*/
#ifdef CONFIG_PTE_64BIT
-#define PTE_FLAGS_OFFSET 4
+#ifdef CONFIG_HUGETLB_PAGE
+#define FIND_PTE \
+ rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \
+ lwzx r11, r12, r11; /* Get pgd/pmd entry */ \
+ rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \
+ blt 1000f; /* Normal non-huge page */ \
+ beq 2f; /* Bail if no table */ \
+ oris r11, r11, PD_HUGE@h; /* Put back address bit */ \
+ andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \
+ xor r12, r10, r11; /* drop size bits from pointer */ \
+ b 1001f; \
+1000: rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \
+ li r10, 0; /* clear r10 */ \
+1001: lwz r11, 4(r12); /* Get pte entry */
+#else
#define FIND_PTE \
- rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \
+ rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \
lwzx r11, r12, r11; /* Get pgd/pmd entry */ \
rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \
beq 2f; /* Bail if no table */ \
rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \
lwz r11, 4(r12); /* Get pte entry */
-#else
-#define PTE_FLAGS_OFFSET 0
+#endif /* HUGEPAGE */
+#else /* !PTE_64BIT */
#define FIND_PTE \
rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \
lwz r11, 0(r11); /* Get L1 entry */ \
@@ -424,109 +362,35 @@ skpinv: addi r6,r6,1 /* Increment */
interrupt_base:
/* Critical Input Interrupt */
- CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception)
+ CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
/* Machine Check Interrupt */
#ifdef CONFIG_E200
/* no RFMCI, MCSRRs on E200 */
- CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
+ CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
+ machine_check_exception)
#else
MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
#endif
/* Data Storage Interrupt */
START_EXCEPTION(DataStorage)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
- mtspr SPRN_SPRG4W, r12
- mtspr SPRN_SPRG5W, r13
- mfcr r11
- mtspr SPRN_SPRG7W, r11
-
- /*
- * Check if it was a store fault, if not then bail
- * because a user tried to access a kernel or
- * read-protected page. Otherwise, get the
- * offending address and handle it.
- */
- mfspr r10, SPRN_ESR
- andis. r10, r10, ESR_ST@h
- beq 2f
-
- mfspr r10, SPRN_DEAR /* Get faulting address */
-
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- lis r11, TASK_SIZE@h
- ori r11, r11, TASK_SIZE@l
- cmplw 0, r10, r11
- bge 2f
-
- /* Get the PGD for the current thread */
-3:
- mfspr r11,SPRN_SPRG3
- lwz r11,PGDIR(r11)
-4:
- FIND_PTE
-
- /* Are _PAGE_USER & _PAGE_RW set & _PAGE_HWWRITE not? */
- andi. r13, r11, _PAGE_RW|_PAGE_USER|_PAGE_HWWRITE
- cmpwi 0, r13, _PAGE_RW|_PAGE_USER
- bne 2f /* Bail if not */
-
- /* Update 'changed'. */
- ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
- stw r11, PTE_FLAGS_OFFSET(r12) /* Update Linux page table */
-
- /* MAS2 not updated as the entry does exist in the tlb, this
- fault taken to detect state transition (eg: COW -> DIRTY)
- */
- andi. r11, r11, _PAGE_HWEXEC
- rlwimi r11, r11, 31, 27, 27 /* SX <- _PAGE_HWEXEC */
- ori r11, r11, (MAS3_UW|MAS3_SW|MAS3_UR|MAS3_SR)@l /* set static perms */
-
- /* update search PID in MAS6, AS = 0 */
- mfspr r12, SPRN_PID0
- slwi r12, r12, 16
- mtspr SPRN_MAS6, r12
-
- /* find the TLB index that caused the fault. It has to be here. */
- tlbsx 0, r10
-
- /* only update the perm bits, assume the RPN is fine */
- mfspr r12, SPRN_MAS3
- rlwimi r12, r11, 0, 20, 31
- mtspr SPRN_MAS3,r12
- tlbwe
-
- /* Done...restore registers and get out of here. */
- mfspr r11, SPRN_SPRG7R
- mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- rfi /* Force context change */
-
-2:
- /*
- * The bailout. Restore registers to pre-exception conditions
- * and call the heavyweights to help us out.
- */
- mfspr r11, SPRN_SPRG7R
- mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- b data_access
+ NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
+ mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
+ stw r5,_ESR(r11)
+ mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
+ andis. r10,r5,(ESR_ILK|ESR_DLK)@h
+ bne 1f
+ EXC_XFER_LITE(0x0300, handle_page_fault)
+1:
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_EE_LITE(0x0300, CacheLockingException)
/* Instruction Storage Interrupt */
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
@@ -540,49 +404,59 @@ interrupt_base:
#else
#ifdef CONFIG_E200
/* E200 treats 'normal' floating point instructions as FP Unavail exception */
- EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE)
+ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
+ program_check_exception, EXC_XFER_EE)
#else
- EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
+ unknown_exception, EXC_XFER_EE)
#endif
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG
+ NORMAL_EXCEPTION_PROLOG(SYSCALL)
EXC_XFER_EE_LITE(0x0c00, DoSyscall)
- /* Auxillary Processor Unavailable Interrupt */
- EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+ /* Auxiliary Processor Unavailable Interrupt */
+ EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
+ unknown_exception, EXC_XFER_EE)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
+ unknown_exception, EXC_XFER_EE)
/* Watchdog Timer Interrupt */
#ifdef CONFIG_BOOKE_WDT
- CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException)
+ CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, WatchdogException)
#else
- CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception)
+ CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, unknown_exception)
#endif
/* Data TLB Error Interrupt */
START_EXCEPTION(DataTLBError)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
- mtspr SPRN_SPRG4W, r12
- mtspr SPRN_SPRG5W, r13
- mfcr r11
- mtspr SPRN_SPRG7W, r11
+ mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
+ mfspr r10, SPRN_SPRG_THREAD
+ stw r11, THREAD_NORMSAVE(0)(r10)
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+ mfspr r11, SPRN_SRR1
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
+ stw r12, THREAD_NORMSAVE(1)(r10)
+ stw r13, THREAD_NORMSAVE(2)(r10)
+ mfcr r13
+ stw r13, THREAD_NORMSAVE(3)(r10)
+ DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1
mfspr r10, SPRN_DEAR /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
- lis r11, TASK_SIZE@h
- ori r11, r11, TASK_SIZE@l
+ lis r11, PAGE_OFFSET@h
cmplw 5, r10, r11
blt 5, 3f
lis r11, swapper_pg_dir@h
@@ -596,33 +470,60 @@ interrupt_base:
/* Get the PGD for the current thread */
3:
- mfspr r11,SPRN_SPRG3
+ mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
4:
+ /* Mask of required permission bits. Note that while we
+ * do copy ESR:ST to _PAGE_RW position as trying to write
+ * to an RO page is pretty common, we don't do it with
+ * _PAGE_DIRTY. We could do it, but it's a fairly rare
+ * event so I'd rather take the overhead when it happens
+ * rather than adding an instruction here. We should measure
+ * whether the whole thing is worth it in the first place
+ * as we could avoid loading SPRN_ESR completely in the first
+ * place...
+ *
+ * TODO: Is it worth doing that mfspr & rlwimi in the first
+ * place or can we save a couple of instructions here ?
+ */
+ mfspr r12,SPRN_ESR
+#ifdef CONFIG_PTE_64BIT
+ li r13,_PAGE_PRESENT
+ oris r13,r13,_PAGE_ACCESSED@h
+#else
+ li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+#endif
+ rlwimi r13,r12,11,29,29
+
FIND_PTE
- andi. r13, r11, _PAGE_PRESENT /* Is the page present? */
- beq 2f /* Bail if not present */
+ andc. r13,r13,r11 /* Check permission */
#ifdef CONFIG_PTE_64BIT
- lwz r13, 0(r12)
+#ifdef CONFIG_SMP
+ subf r13,r11,r12 /* create false data dep */
+ lwzx r13,r11,r13 /* Get upper pte bits */
+#else
+ lwz r13,0(r12) /* Get upper pte bits */
#endif
- ori r11, r11, _PAGE_ACCESSED
- stw r11, PTE_FLAGS_OFFSET(r12)
+#endif
+
+ bne 2f /* Bail if permission/valid mismach */
- /* Jump to common tlb load */
+ /* Jump to common tlb load */
b finish_tlb_load
2:
/* The bailout. Restore registers to pre-exception conditions
* and call the heavyweights to help us out.
*/
- mfspr r11, SPRN_SPRG7R
+ mfspr r10, SPRN_SPRG_THREAD
+ lwz r11, THREAD_NORMSAVE(3)(r10)
mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- b data_access
+ lwz r13, THREAD_NORMSAVE(2)(r10)
+ lwz r12, THREAD_NORMSAVE(1)(r10)
+ lwz r11, THREAD_NORMSAVE(0)(r10)
+ mfspr r10, SPRN_SPRG_RSCRATCH0
+ b DataStorage
/* Instruction TLB Error Interrupt */
/*
@@ -631,19 +532,25 @@ interrupt_base:
* to a different point.
*/
START_EXCEPTION(InstructionTLBError)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
- mtspr SPRN_SPRG4W, r12
- mtspr SPRN_SPRG5W, r13
- mfcr r11
- mtspr SPRN_SPRG7W, r11
+ mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */
+ mfspr r10, SPRN_SPRG_THREAD
+ stw r11, THREAD_NORMSAVE(0)(r10)
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+ mfspr r11, SPRN_SRR1
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
+ stw r12, THREAD_NORMSAVE(1)(r10)
+ stw r13, THREAD_NORMSAVE(2)(r10)
+ mfcr r13
+ stw r13, THREAD_NORMSAVE(3)(r10)
+ DO_KVM BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1
mfspr r10, SPRN_SRR0 /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
- lis r11, TASK_SIZE@h
- ori r11, r11, TASK_SIZE@l
+ lis r11, PAGE_OFFSET@h
cmplw 5, r10, r11
blt 5, 3f
lis r11, swapper_pg_dir@h
@@ -653,23 +560,42 @@ interrupt_base:
rlwinm r12,r12,0,16,1
mtspr SPRN_MAS1,r12
+ /* Make up the required permissions for kernel code */
+#ifdef CONFIG_PTE_64BIT
+ li r13,_PAGE_PRESENT | _PAGE_BAP_SX
+ oris r13,r13,_PAGE_ACCESSED@h
+#else
+ li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+#endif
b 4f
/* Get the PGD for the current thread */
3:
- mfspr r11,SPRN_SPRG3
+ mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+ /* Make up the required permissions for user code */
+#ifdef CONFIG_PTE_64BIT
+ li r13,_PAGE_PRESENT | _PAGE_BAP_UX
+ oris r13,r13,_PAGE_ACCESSED@h
+#else
+ li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+#endif
+
4:
FIND_PTE
- andi. r13, r11, _PAGE_PRESENT /* Is the page present? */
- beq 2f /* Bail if not present */
+ andc. r13,r13,r11 /* Check permission */
#ifdef CONFIG_PTE_64BIT
- lwz r13, 0(r12)
+#ifdef CONFIG_SMP
+ subf r13,r11,r12 /* create false data dep */
+ lwzx r13,r11,r13 /* Get upper pte bits */
+#else
+ lwz r13,0(r12) /* Get upper pte bits */
+#endif
#endif
- ori r11, r11, _PAGE_ACCESSED
- stw r11, PTE_FLAGS_OFFSET(r12)
+
+ bne 2f /* Bail if permission mismach */
/* Jump to common TLB load point */
b finish_tlb_load
@@ -678,116 +604,196 @@ interrupt_base:
/* The bailout. Restore registers to pre-exception conditions
* and call the heavyweights to help us out.
*/
- mfspr r11, SPRN_SPRG7R
+ mfspr r10, SPRN_SPRG_THREAD
+ lwz r11, THREAD_NORMSAVE(3)(r10)
mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
+ lwz r13, THREAD_NORMSAVE(2)(r10)
+ lwz r12, THREAD_NORMSAVE(1)(r10)
+ lwz r11, THREAD_NORMSAVE(0)(r10)
+ mfspr r10, SPRN_SPRG_RSCRATCH0
b InstructionStorage
#ifdef CONFIG_SPE
/* SPE Unavailable */
START_EXCEPTION(SPEUnavailable)
- NORMAL_EXCEPTION_PROLOG
- bne load_up_spe
- addi r3,r1,STACK_FRAME_OVERHEAD
+ NORMAL_EXCEPTION_PROLOG(SPE_ALTIVEC_UNAVAIL)
+ beq 1f
+ bl load_up_spe
+ b fast_exception_return
+1: addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_EE_LITE(0x2010, KernelSPE)
#else
- EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \
+ unknown_exception, EXC_XFER_EE)
#endif /* CONFIG_SPE */
/* SPE Floating Point Data */
#ifdef CONFIG_SPE
- EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE);
-#else
- EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE)
-#endif /* CONFIG_SPE */
+ EXCEPTION(0x2030, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,
+ SPEFloatingPointException, EXC_XFER_EE)
/* SPE Floating Point Round */
- EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
+ SPEFloatingPointRoundException, EXC_XFER_EE)
+#else
+ EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,
+ unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
+ unknown_exception, EXC_XFER_EE)
+#endif /* CONFIG_SPE */
/* Performance Monitor */
- EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD)
+ EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
+ performance_monitor_exception, EXC_XFER_STD)
+ EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD)
+
+ CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \
+ CriticalDoorbell, unknown_exception)
/* Debug Interrupt */
- DEBUG_EXCEPTION
+ DEBUG_DEBUG_EXCEPTION
+ DEBUG_CRIT_EXCEPTION
+
+ GUEST_DOORBELL_EXCEPTION
+
+ CRITICAL_EXCEPTION(0, GUEST_DBELL_CRIT, CriticalGuestDoorbell, \
+ unknown_exception)
+
+ /* Hypercall */
+ EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE)
+
+ /* Embedded Hypervisor Privilege */
+ EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE)
+
+interrupt_end:
/*
* Local functions
*/
- /*
- * Data TLB exceptions will bail out to this point
- * if they can't resolve the lightweight TLB fault.
- */
-data_access:
- NORMAL_EXCEPTION_PROLOG
- mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
- stw r5,_ESR(r11)
- mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
- andis. r10,r5,(ESR_ILK|ESR_DLK)@h
- bne 1f
- EXC_XFER_EE_LITE(0x0300, handle_page_fault)
-1:
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x0300, CacheLockingException)
-
/*
-
* Both the instruction and data TLB miss get to this
* point to load the TLB.
- * r10 - EA of fault
- * r11 - TLB (info from Linux PTE)
- * r12, r13 - available to use
- * CR5 - results of addr < TASK_SIZE
+ * r10 - tsize encoding (if HUGETLB_PAGE) or available to use
+ * r11 - TLB (info from Linux PTE)
+ * r12 - available to use
+ * r13 - upper bits of PTE (if PTE_64BIT) or available to use
+ * CR5 - results of addr >= PAGE_OFFSET
* MAS0, MAS1 - loaded with proper value when we get here
* MAS2, MAS3 - will need additional info from Linux PTE
* Upon exit, we reload everything and RFI.
*/
finish_tlb_load:
+#ifdef CONFIG_HUGETLB_PAGE
+ cmpwi 6, r10, 0 /* check for huge page */
+ beq 6, finish_tlb_load_cont /* !huge */
+
+ /* Alas, we need more scratch registers for hugepages */
+ mfspr r12, SPRN_SPRG_THREAD
+ stw r14, THREAD_NORMSAVE(4)(r12)
+ stw r15, THREAD_NORMSAVE(5)(r12)
+ stw r16, THREAD_NORMSAVE(6)(r12)
+ stw r17, THREAD_NORMSAVE(7)(r12)
+
+ /* Get the next_tlbcam_idx percpu var */
+#ifdef CONFIG_SMP
+ lwz r12, THREAD_INFO-THREAD(r12)
+ lwz r15, TI_CPU(r12)
+ lis r14, __per_cpu_offset@h
+ ori r14, r14, __per_cpu_offset@l
+ rlwinm r15, r15, 2, 0, 29
+ lwzx r16, r14, r15
+#else
+ li r16, 0
+#endif
+ lis r17, next_tlbcam_idx@h
+ ori r17, r17, next_tlbcam_idx@l
+ add r17, r17, r16 /* r17 = *next_tlbcam_idx */
+ lwz r15, 0(r17) /* r15 = next_tlbcam_idx */
+
+ lis r14, MAS0_TLBSEL(1)@h /* select TLB1 (TLBCAM) */
+ rlwimi r14, r15, 16, 4, 15 /* next_tlbcam_idx entry */
+ mtspr SPRN_MAS0, r14
+
+ /* Extract TLB1CFG(NENTRY) */
+ mfspr r16, SPRN_TLB1CFG
+ andi. r16, r16, 0xfff
+
+ /* Update next_tlbcam_idx, wrapping when necessary */
+ addi r15, r15, 1
+ cmpw r15, r16
+ blt 100f
+ lis r14, tlbcam_index@h
+ ori r14, r14, tlbcam_index@l
+ lwz r15, 0(r14)
+100: stw r15, 0(r17)
+
+ /*
+ * Calc MAS1_TSIZE from r10 (which has pshift encoded)
+ * tlb_enc = (pshift - 10).
+ */
+ subi r15, r10, 10
+ mfspr r16, SPRN_MAS1
+ rlwimi r16, r15, 7, 20, 24
+ mtspr SPRN_MAS1, r16
+
+ /* copy the pshift for use later */
+ mr r14, r10
+
+ /* fall through */
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
/*
* We set execute, because we don't have the granularity to
* properly set this at the page level (Linux problem).
* Many of these bits are software only. Bits we don't set
* here we (properly should) assume have the appropriate value.
*/
-
- mfspr r12, SPRN_MAS2
+finish_tlb_load_cont:
#ifdef CONFIG_PTE_64BIT
- rlwimi r12, r11, 26, 24, 31 /* extract ...WIMGE from pte */
+ rlwinm r12, r11, 32-2, 26, 31 /* Move in perm bits */
+ andi. r10, r11, _PAGE_DIRTY
+ bne 1f
+ li r10, MAS3_SW | MAS3_UW
+ andc r12, r12, r10
+1: rlwimi r12, r13, 20, 0, 11 /* grab RPN[32:43] */
+ rlwimi r12, r11, 20, 12, 19 /* grab RPN[44:51] */
+2: mtspr SPRN_MAS3, r12
+BEGIN_MMU_FTR_SECTION
+ srwi r10, r13, 12 /* grab RPN[12:31] */
+ mtspr SPRN_MAS7, r10
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
#else
- rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */
-#endif
- mtspr SPRN_MAS2, r12
-
- bge 5, 1f
-
- /* is user addr */
- andi. r12, r11, (_PAGE_USER | _PAGE_HWWRITE | _PAGE_HWEXEC)
+ li r10, (_PAGE_EXEC | _PAGE_PRESENT)
+ mr r13, r11
+ rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */
+ and r12, r11, r10
andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */
- srwi r10, r12, 1
- or r12, r12, r10 /* Copy user perms into supervisor */
- iseleq r12, 0, r12
- b 2f
-
- /* is kernel addr */
-1: rlwinm r12, r11, 31, 29, 29 /* Extract _PAGE_HWWRITE into SW */
- ori r12, r12, (MAS3_SX | MAS3_SR)
+ slwi r10, r12, 1
+ or r10, r10, r12
+ iseleq r12, r12, r10
+ rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */
+ mtspr SPRN_MAS3, r13
+#endif
+ mfspr r12, SPRN_MAS2
#ifdef CONFIG_PTE_64BIT
-2: rlwimi r12, r13, 24, 0, 7 /* grab RPN[32:39] */
- rlwimi r12, r11, 24, 8, 19 /* grab RPN[40:51] */
- mtspr SPRN_MAS3, r12
-BEGIN_FTR_SECTION
- srwi r10, r13, 8 /* grab RPN[8:31] */
- mtspr SPRN_MAS7, r10
-END_FTR_SECTION_IFSET(CPU_FTR_BIG_PHYS)
+ rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */
#else
-2: rlwimi r11, r12, 0, 20, 31 /* Extract RPN from PTE and merge with perms */
- mtspr SPRN_MAS3, r11
+ rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */
#endif
+#ifdef CONFIG_HUGETLB_PAGE
+ beq 6, 3f /* don't mask if page isn't huge */
+ li r13, 1
+ slw r13, r13, r14
+ subi r13, r13, 1
+ rlwinm r13, r13, 0, 0, 19 /* bottom bits used for WIMGE/etc */
+ andc r12, r12, r13 /* mask off ea bits within the page */
+#endif
+3: mtspr SPRN_MAS2, r12
+
#ifdef CONFIG_E200
/* Round robin TLB1 entries assignment */
mfspr r12, SPRN_MAS0
@@ -810,25 +816,34 @@ END_FTR_SECTION_IFSET(CPU_FTR_BIG_PHYS)
lwz r13, tlbcam_index@l(r13)
rlwimi r12, r13, 0, 20, 31
7:
- mtspr SPRN_MAS0,r12
+ mtspr SPRN_MAS0,r12
#endif /* CONFIG_E200 */
+tlb_write_entry:
tlbwe
/* Done...restore registers and get out of here. */
- mfspr r11, SPRN_SPRG7R
+ mfspr r10, SPRN_SPRG_THREAD
+#ifdef CONFIG_HUGETLB_PAGE
+ beq 6, 8f /* skip restore for 4k page faults */
+ lwz r14, THREAD_NORMSAVE(4)(r10)
+ lwz r15, THREAD_NORMSAVE(5)(r10)
+ lwz r16, THREAD_NORMSAVE(6)(r10)
+ lwz r17, THREAD_NORMSAVE(7)(r10)
+#endif
+8: lwz r11, THREAD_NORMSAVE(3)(r10)
mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
+ lwz r13, THREAD_NORMSAVE(2)(r10)
+ lwz r12, THREAD_NORMSAVE(1)(r10)
+ lwz r11, THREAD_NORMSAVE(0)(r10)
+ mfspr r10, SPRN_SPRG_RSCRATCH0
rfi /* Force context change */
#ifdef CONFIG_SPE
/* Note that the SPE support is closely modeled after the AltiVec
* support. Changes to one are likely to be applicable to the
* other! */
-load_up_spe:
+_GLOBAL(load_up_spe)
/*
* Disable SPE for the task which had SPE previously,
* and save its SPE registers in its thread_struct.
@@ -851,46 +866,32 @@ load_up_spe:
cmpi 0,r4,0
beq 1f
addi r4,r4,THREAD /* want THREAD of last_task_used_spe */
- SAVE_32EVRS(0,r10,r4)
- evxor evr10, evr10, evr10 /* clear out evr10 */
+ SAVE_32EVRS(0,r10,r4,THREAD_EVR0)
+ evxor evr10, evr10, evr10 /* clear out evr10 */
evmwumiaa evr10, evr10, evr10 /* evr10 <- ACC = 0 * 0 + ACC */
li r5,THREAD_ACC
- evstddx evr10, r4, r5 /* save off accumulator */
+ evstddx evr10, r4, r5 /* save off accumulator */
lwz r5,PT_REGS(r4)
lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5)
lis r10,MSR_SPE@h
andc r4,r4,r10 /* disable SPE for previous task */
stw r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1:
-#endif /* CONFIG_SMP */
+#endif /* !CONFIG_SMP */
/* enable use of SPE after return */
oris r9,r9,MSR_SPE@h
- mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */
+ mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
li r4,1
li r10,THREAD_ACC
stw r4,THREAD_USED_SPE(r5)
evlddx evr4,r10,r5
evmra evr4,evr4
- REST_32EVRS(0,r10,r5)
+ REST_32EVRS(0,r10,r5,THREAD_EVR0)
#ifndef CONFIG_SMP
subi r4,r5,THREAD
stw r4,last_task_used_spe@l(r3)
-#endif /* CONFIG_SMP */
- /* restore registers and return */
-2: REST_4GPRS(3, r11)
- lwz r10,_CCR(r11)
- REST_GPR(1, r11)
- mtcr r10
- lwz r10,_LINK(r11)
- mtlr r10
- REST_GPR(10, r11)
- mtspr SPRN_SRR1,r9
- mtspr SPRN_SRR0,r12
- REST_GPR(9, r11)
- REST_GPR(12, r11)
- lwz r11,GPR11(r11)
- SYNC
- rfi
+#endif /* !CONFIG_SMP */
+ blr
/*
* SPE unavailable trap from kernel - print a message, but let
@@ -900,49 +901,104 @@ KernelSPE:
lwz r3,_MSR(r1)
oris r3,r3,MSR_SPE@h
stw r3,_MSR(r1) /* enable use of SPE after return */
+#ifdef CONFIG_PRINTK
lis r3,87f@h
ori r3,r3,87f@l
mr r4,r2 /* current */
lwz r5,_NIP(r1)
bl printk
+#endif
b ret_from_except
+#ifdef CONFIG_PRINTK
87: .string "SPE used in kernel (task=%p, pc=%x) \n"
+#endif
.align 4,0
#endif /* CONFIG_SPE */
/*
- * Global functions
+ * Translate the effec addr in r3 to phys addr. The phys addr will be put
+ * into r3(higher 32bit) and r4(lower 32bit)
*/
+get_phys_addr:
+ mfmsr r8
+ mfspr r9,SPRN_PID
+ rlwinm r9,r9,16,0x3fff0000 /* turn PID into MAS6[SPID] */
+ rlwimi r9,r8,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */
+ mtspr SPRN_MAS6,r9
+
+ tlbsx 0,r3 /* must succeed */
+
+ mfspr r8,SPRN_MAS1
+ mfspr r12,SPRN_MAS3
+ rlwinm r9,r8,25,0x1f /* r9 = log2(page size) */
+ li r10,1024
+ slw r10,r10,r9 /* r10 = page size */
+ addi r10,r10,-1
+ and r11,r3,r10 /* r11 = page offset */
+ andc r4,r12,r10 /* r4 = page base */
+ or r4,r4,r11 /* r4 = devtree phys addr */
+#ifdef CONFIG_PHYS_64BIT
+ mfspr r3,SPRN_MAS7
+#endif
+ blr
/*
- * extern void loadcam_entry(unsigned int index)
- *
- * Load TLBCAM[index] entry in to the L2 CAM MMU
+ * Global functions
*/
-_GLOBAL(loadcam_entry)
- lis r4,TLBCAM@ha
- addi r4,r4,TLBCAM@l
- mulli r5,r3,20
- add r3,r5,r4
- lwz r4,0(r3)
- mtspr SPRN_MAS0,r4
- lwz r4,4(r3)
- mtspr SPRN_MAS1,r4
- lwz r4,8(r3)
- mtspr SPRN_MAS2,r4
- lwz r4,12(r3)
- mtspr SPRN_MAS3,r4
- tlbwe
- isync
+
+/* Adjust or setup IVORs for e200 */
+_GLOBAL(__setup_e200_ivors)
+ li r3,DebugDebug@l
+ mtspr SPRN_IVOR15,r3
+ li r3,SPEUnavailable@l
+ mtspr SPRN_IVOR32,r3
+ li r3,SPEFloatingPointData@l
+ mtspr SPRN_IVOR33,r3
+ li r3,SPEFloatingPointRound@l
+ mtspr SPRN_IVOR34,r3
+ sync
blr
-/*
- * extern void giveup_altivec(struct task_struct *prev)
- *
- * The e500 core does not have an AltiVec unit.
- */
-_GLOBAL(giveup_altivec)
+/* Adjust or setup IVORs for e500v1/v2 */
+_GLOBAL(__setup_e500_ivors)
+ li r3,DebugCrit@l
+ mtspr SPRN_IVOR15,r3
+ li r3,SPEUnavailable@l
+ mtspr SPRN_IVOR32,r3
+ li r3,SPEFloatingPointData@l
+ mtspr SPRN_IVOR33,r3
+ li r3,SPEFloatingPointRound@l
+ mtspr SPRN_IVOR34,r3
+ li r3,PerformanceMonitor@l
+ mtspr SPRN_IVOR35,r3
+ sync
+ blr
+
+/* Adjust or setup IVORs for e500mc */
+_GLOBAL(__setup_e500mc_ivors)
+ li r3,DebugDebug@l
+ mtspr SPRN_IVOR15,r3
+ li r3,PerformanceMonitor@l
+ mtspr SPRN_IVOR35,r3
+ li r3,Doorbell@l
+ mtspr SPRN_IVOR36,r3
+ li r3,CriticalDoorbell@l
+ mtspr SPRN_IVOR37,r3
+ sync
+ blr
+
+/* setup ehv ivors for */
+_GLOBAL(__setup_ehv_ivors)
+ li r3,GuestDoorbell@l
+ mtspr SPRN_IVOR38,r3
+ li r3,CriticalGuestDoorbell@l
+ mtspr SPRN_IVOR39,r3
+ li r3,Hypercall@l
+ mtspr SPRN_IVOR40,r3
+ li r3,Ehvpriv@l
+ mtspr SPRN_IVOR41,r3
+ sync
blr
#ifdef CONFIG_SPE
@@ -953,7 +1009,6 @@ _GLOBAL(giveup_altivec)
_GLOBAL(giveup_spe)
mfmsr r5
oris r5,r5,MSR_SPE@h
- SYNC
mtmsr r5 /* enable use of SPE now */
isync
cmpi 0,r3,0
@@ -961,13 +1016,11 @@ _GLOBAL(giveup_spe)
addi r3,r3,THREAD /* want THREAD of task */
lwz r5,PT_REGS(r3)
cmpi 0,r5,0
- SAVE_32EVRS(0, r4, r3)
- evxor evr6, evr6, evr6 /* clear out evr6 */
+ SAVE_32EVRS(0, r4, r3, THREAD_EVR0)
+ evxor evr6, evr6, evr6 /* clear out evr6 */
evmwumiaa evr6, evr6, evr6 /* evr6 <- ACC = 0 * 0 + ACC */
li r4,THREAD_ACC
- evstddx evr6, r4, r3 /* save off accumulator */
- mfspr r6,SPRN_SPEFSCR
- stw r6,THREAD_SPEFSCR(r3) /* save spefscr register value */
+ evstddx evr6, r4, r3 /* save off accumulator */
beq 1f
lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5)
lis r3,MSR_SPE@h
@@ -978,36 +1031,26 @@ _GLOBAL(giveup_spe)
li r5,0
lis r4,last_task_used_spe@ha
stw r5,last_task_used_spe@l(r4)
-#endif /* CONFIG_SMP */
+#endif /* !CONFIG_SMP */
blr
#endif /* CONFIG_SPE */
/*
- * extern void giveup_fpu(struct task_struct *prev)
- *
- * Not all FSL Book-E cores have an FPU
- */
-#ifndef CONFIG_PPC_FPU
-_GLOBAL(giveup_fpu)
- blr
-#endif
-
-/*
* extern void abort(void)
*
* At present, this routine just applies a system reset.
*/
_GLOBAL(abort)
li r13,0
- mtspr SPRN_DBCR0,r13 /* disable all debug events */
+ mtspr SPRN_DBCR0,r13 /* disable all debug events */
isync
mfmsr r13
ori r13,r13,MSR_DE@l /* Enable Debug Events */
mtmsr r13
isync
- mfspr r13,SPRN_DBCR0
- lis r13,(DBCR0_IDM|DBCR0_RST_CHIP)@h
- mtspr SPRN_DBCR0,r13
+ mfspr r13,SPRN_DBCR0
+ lis r13,(DBCR0_IDM|DBCR0_RST_CHIP)@h
+ mtspr SPRN_DBCR0,r13
isync
_GLOBAL(set_context)
@@ -1024,6 +1067,255 @@ _GLOBAL(set_context)
isync /* Force context change */
blr
+_GLOBAL(flush_dcache_L1)
+ mfspr r3,SPRN_L1CFG0
+
+ rlwinm r5,r3,9,3 /* Extract cache block size */
+ twlgti r5,1 /* Only 32 and 64 byte cache blocks
+ * are currently defined.
+ */
+ li r4,32
+ subfic r6,r5,2 /* r6 = log2(1KiB / cache block size) -
+ * log2(number of ways)
+ */
+ slw r5,r4,r5 /* r5 = cache block size */
+
+ rlwinm r7,r3,0,0xff /* Extract number of KiB in the cache */
+ mulli r7,r7,13 /* An 8-way cache will require 13
+ * loads per set.
+ */
+ slw r7,r7,r6
+
+ /* save off HID0 and set DCFA */
+ mfspr r8,SPRN_HID0
+ ori r9,r8,HID0_DCFA@l
+ mtspr SPRN_HID0,r9
+ isync
+
+ lis r4,KERNELBASE@h
+ mtctr r7
+
+1: lwz r3,0(r4) /* Load... */
+ add r4,r4,r5
+ bdnz 1b
+
+ msync
+ lis r4,KERNELBASE@h
+ mtctr r7
+
+1: dcbf 0,r4 /* ...and flush. */
+ add r4,r4,r5
+ bdnz 1b
+
+ /* restore HID0 */
+ mtspr SPRN_HID0,r8
+ isync
+
+ blr
+
+/* Flush L1 d-cache, invalidate and disable d-cache and i-cache */
+_GLOBAL(__flush_disable_L1)
+ mflr r10
+ bl flush_dcache_L1 /* Flush L1 d-cache */
+ mtlr r10
+
+ mfspr r4, SPRN_L1CSR0 /* Invalidate and disable d-cache */
+ li r5, 2
+ rlwimi r4, r5, 0, 3
+
+ msync
+ isync
+ mtspr SPRN_L1CSR0, r4
+ isync
+
+1: mfspr r4, SPRN_L1CSR0 /* Wait for the invalidate to finish */
+ andi. r4, r4, 2
+ bne 1b
+
+ mfspr r4, SPRN_L1CSR1 /* Invalidate and disable i-cache */
+ li r5, 2
+ rlwimi r4, r5, 0, 3
+
+ mtspr SPRN_L1CSR1, r4
+ isync
+
+ blr
+
+#ifdef CONFIG_SMP
+/* When we get here, r24 needs to hold the CPU # */
+ .globl __secondary_start
+__secondary_start:
+ LOAD_REG_ADDR_PIC(r3, tlbcam_index)
+ lwz r3,0(r3)
+ mtctr r3
+ li r26,0 /* r26 safe? */
+
+ bl switch_to_as1
+ mr r27,r3 /* tlb entry */
+ /* Load each CAM entry */
+1: mr r3,r26
+ bl loadcam_entry
+ addi r26,r26,1
+ bdnz 1b
+ mr r3,r27 /* tlb entry */
+ LOAD_REG_ADDR_PIC(r4, memstart_addr)
+ lwz r4,0(r4)
+ mr r5,r25 /* phys kernel start */
+ rlwinm r5,r5,0,~0x3ffffff /* aligned 64M */
+ subf r4,r5,r4 /* memstart_addr - phys kernel start */
+ li r5,0 /* no device tree */
+ li r6,0 /* not boot cpu */
+ bl restore_to_as0
+
+
+ lis r3,__secondary_hold_acknowledge@h
+ ori r3,r3,__secondary_hold_acknowledge@l
+ stw r24,0(r3)
+
+ li r3,0
+ mr r4,r24 /* Why? */
+ bl call_setup_cpu
+
+ /* get current_thread_info and current */
+ lis r1,secondary_ti@ha
+ lwz r1,secondary_ti@l(r1)
+ lwz r2,TI_TASK(r1)
+
+ /* stack */
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ li r0,0
+ stw r0,0(r1)
+
+ /* ptr to current thread */
+ addi r4,r2,THREAD /* address of our thread_struct */
+ mtspr SPRN_SPRG_THREAD,r4
+
+ /* Setup the defaults for TLB entries */
+ li r4,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l
+ mtspr SPRN_MAS4,r4
+
+ /* Jump to start_secondary */
+ lis r4,MSR_KERNEL@h
+ ori r4,r4,MSR_KERNEL@l
+ lis r3,start_secondary@h
+ ori r3,r3,start_secondary@l
+ mtspr SPRN_SRR0,r3
+ mtspr SPRN_SRR1,r4
+ sync
+ rfi
+ sync
+
+ .globl __secondary_hold_acknowledge
+__secondary_hold_acknowledge:
+ .long -1
+#endif
+
+/*
+ * Create a tlb entry with the same effective and physical address as
+ * the tlb entry used by the current running code. But set the TS to 1.
+ * Then switch to the address space 1. It will return with the r3 set to
+ * the ESEL of the new created tlb.
+ */
+_GLOBAL(switch_to_as1)
+ mflr r5
+
+ /* Find a entry not used */
+ mfspr r3,SPRN_TLB1CFG
+ andi. r3,r3,0xfff
+ mfspr r4,SPRN_PID
+ rlwinm r4,r4,16,0x3fff0000 /* turn PID into MAS6[SPID] */
+ mtspr SPRN_MAS6,r4
+1: lis r4,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ addi r3,r3,-1
+ rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
+ mtspr SPRN_MAS0,r4
+ tlbre
+ mfspr r4,SPRN_MAS1
+ andis. r4,r4,MAS1_VALID@h
+ bne 1b
+
+ /* Get the tlb entry used by the current running code */
+ bl 0f
+0: mflr r4
+ tlbsx 0,r4
+
+ mfspr r4,SPRN_MAS1
+ ori r4,r4,MAS1_TS /* Set the TS = 1 */
+ mtspr SPRN_MAS1,r4
+
+ mfspr r4,SPRN_MAS0
+ rlwinm r4,r4,0,~MAS0_ESEL_MASK
+ rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
+ mtspr SPRN_MAS0,r4
+ tlbwe
+ isync
+ sync
+
+ mfmsr r4
+ ori r4,r4,MSR_IS | MSR_DS
+ mtspr SPRN_SRR0,r5
+ mtspr SPRN_SRR1,r4
+ sync
+ rfi
+
+/*
+ * Restore to the address space 0 and also invalidate the tlb entry created
+ * by switch_to_as1.
+ * r3 - the tlb entry which should be invalidated
+ * r4 - __pa(PAGE_OFFSET in AS1) - __pa(PAGE_OFFSET in AS0)
+ * r5 - device tree virtual address. If r4 is 0, r5 is ignored.
+ * r6 - boot cpu
+*/
+_GLOBAL(restore_to_as0)
+ mflr r0
+
+ bl 0f
+0: mflr r9
+ addi r9,r9,1f - 0b
+
+ /*
+ * We may map the PAGE_OFFSET in AS0 to a different physical address,
+ * so we need calculate the right jump and device tree address based
+ * on the offset passed by r4.
+ */
+ add r9,r9,r4
+ add r5,r5,r4
+ add r0,r0,r4
+
+2: mfmsr r7
+ li r8,(MSR_IS | MSR_DS)
+ andc r7,r7,r8
+
+ mtspr SPRN_SRR0,r9
+ mtspr SPRN_SRR1,r7
+ sync
+ rfi
+
+ /* Invalidate the temporary tlb entry for AS1 */
+1: lis r9,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r9,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
+ mtspr SPRN_MAS0,r9
+ tlbre
+ mfspr r9,SPRN_MAS1
+ rlwinm r9,r9,0,2,31 /* Clear MAS1 Valid and IPPROT */
+ mtspr SPRN_MAS1,r9
+ tlbwe
+ isync
+
+ cmpwi r4,0
+ cmpwi cr1,r6,0
+ cror eq,4*cr1+eq,eq
+ bne 3f /* offset != 0 && is_boot_cpu */
+ mtlr r0
+ blr
+
+ /*
+ * The PAGE_OFFSET will map to a different physical address,
+ * jump to _start to do another relocation again.
+ */
+3: mr r3,r5
+ bl _start
+
/*
* We put a few things here that have to be page-aligned. This stuff
* goes at the beginning of the data segment, which is page-aligned.
@@ -1037,24 +1329,7 @@ empty_zero_page:
.space 4096
.globl swapper_pg_dir
swapper_pg_dir:
- .space 4096
-
-/* Reserved 4k for the critical exception stack & 4k for the machine
- * check stack per CPU for kernel mode exceptions */
- .section .bss
- .align 12
-exception_stack_bottom:
- .space BOOKE_EXCEPTION_STACK_SIZE * NR_CPUS
- .globl exception_stack_top
-exception_stack_top:
-
-/*
- * This space gets a copy of optional info passed to us by the bootstrap
- * which is used to pass parameters into the kernel like root=/dev/sda1, etc.
- */
- .globl cmd_line
-cmd_line:
- .space 512
+ .space PGD_TABLE_SIZE
/*
* Room for two PTE pointers, usually the kernel and current user pointers
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
new file mode 100644
index 00000000000..0bb5918faaa
--- /dev/null
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -0,0 +1,366 @@
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers. Derived from
+ * "arch/x86/kernel/hw_breakpoint.c"
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2010 IBM Corporation
+ * Author: K.Prasad <prasad@linux.vnet.ibm.com>
+ *
+ */
+
+#include <linux/hw_breakpoint.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/percpu.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/sstep.h>
+#include <asm/uaccess.h>
+
+/*
+ * Stores the breakpoints currently in use on each breakpoint address
+ * register for every cpu
+ */
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
+
+/*
+ * Returns total number of data or instruction breakpoints available.
+ */
+int hw_breakpoint_slots(int type)
+{
+ if (type == TYPE_DATA)
+ return HBP_NUM;
+ return 0; /* no instruction breakpoints available */
+}
+
+/*
+ * Install a perf counter breakpoint.
+ *
+ * We seek a free debug address register and use it for this
+ * breakpoint.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ struct perf_event **slot = &__get_cpu_var(bp_per_reg);
+
+ *slot = bp;
+
+ /*
+ * Do not install DABR values if the instruction must be single-stepped.
+ * If so, DABR will be populated in single_step_dabr_instruction().
+ */
+ if (current->thread.last_hit_ubp != bp)
+ __set_breakpoint(info);
+
+ return 0;
+}
+
+/*
+ * Uninstall the breakpoint contained in the given counter.
+ *
+ * First we search the debug address register it uses and then we disable
+ * it.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+ struct perf_event **slot = &__get_cpu_var(bp_per_reg);
+
+ if (*slot != bp) {
+ WARN_ONCE(1, "Can't find the breakpoint");
+ return;
+ }
+
+ *slot = NULL;
+ hw_breakpoint_disable();
+}
+
+/*
+ * Perform cleanup of arch-specific counters during unregistration
+ * of the perf-event
+ */
+void arch_unregister_hw_breakpoint(struct perf_event *bp)
+{
+ /*
+ * If the breakpoint is unregistered between a hw_breakpoint_handler()
+ * and the single_step_dabr_instruction(), then cleanup the breakpoint
+ * restoration variables to prevent dangling pointers.
+ */
+ if (bp->ctx && bp->ctx->task)
+ bp->ctx->task->thread.last_hit_ubp = NULL;
+}
+
+/*
+ * Check for virtual address in kernel space.
+ */
+int arch_check_bp_in_kernelspace(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+ return is_kernel_addr(info->address);
+}
+
+int arch_bp_generic_fields(int type, int *gen_bp_type)
+{
+ *gen_bp_type = 0;
+ if (type & HW_BRK_TYPE_READ)
+ *gen_bp_type |= HW_BREAKPOINT_R;
+ if (type & HW_BRK_TYPE_WRITE)
+ *gen_bp_type |= HW_BREAKPOINT_W;
+ if (*gen_bp_type == 0)
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp)
+{
+ int ret = -EINVAL, length_max;
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+ if (!bp)
+ return ret;
+
+ info->type = HW_BRK_TYPE_TRANSLATE;
+ if (bp->attr.bp_type & HW_BREAKPOINT_R)
+ info->type |= HW_BRK_TYPE_READ;
+ if (bp->attr.bp_type & HW_BREAKPOINT_W)
+ info->type |= HW_BRK_TYPE_WRITE;
+ if (info->type == HW_BRK_TYPE_TRANSLATE)
+ /* must set alteast read or write */
+ return ret;
+ if (!(bp->attr.exclude_user))
+ info->type |= HW_BRK_TYPE_USER;
+ if (!(bp->attr.exclude_kernel))
+ info->type |= HW_BRK_TYPE_KERNEL;
+ if (!(bp->attr.exclude_hv))
+ info->type |= HW_BRK_TYPE_HYP;
+ info->address = bp->attr.bp_addr;
+ info->len = bp->attr.bp_len;
+
+ /*
+ * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8)
+ * and breakpoint addresses are aligned to nearest double-word
+ * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
+ * 'symbolsize' should satisfy the check below.
+ */
+ length_max = 8; /* DABR */
+ if (cpu_has_feature(CPU_FTR_DAWR)) {
+ length_max = 512 ; /* 64 doublewords */
+ /* DAWR region can't cross 512 boundary */
+ if ((bp->attr.bp_addr >> 10) !=
+ ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10))
+ return -EINVAL;
+ }
+ if (info->len >
+ (length_max - (info->address & HW_BREAKPOINT_ALIGN)))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Restores the breakpoint on the debug registers.
+ * Invoke this function if it is known that the execution context is
+ * about to change to cause loss of MSR_SE settings.
+ */
+void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
+{
+ struct arch_hw_breakpoint *info;
+
+ if (likely(!tsk->thread.last_hit_ubp))
+ return;
+
+ info = counter_arch_bp(tsk->thread.last_hit_ubp);
+ regs->msr &= ~MSR_SE;
+ __set_breakpoint(info);
+ tsk->thread.last_hit_ubp = NULL;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_handler(struct die_args *args)
+{
+ int rc = NOTIFY_STOP;
+ struct perf_event *bp;
+ struct pt_regs *regs = args->regs;
+ int stepped = 1;
+ struct arch_hw_breakpoint *info;
+ unsigned int instr;
+ unsigned long dar = regs->dar;
+
+ /* Disable breakpoints during exception handling */
+ hw_breakpoint_disable();
+
+ /*
+ * The counter may be concurrently released but that can only
+ * occur from a call_rcu() path. We can then safely fetch
+ * the breakpoint, use its callback, touch its counter
+ * while we are in an rcu_read_lock() path.
+ */
+ rcu_read_lock();
+
+ bp = __get_cpu_var(bp_per_reg);
+ if (!bp)
+ goto out;
+ info = counter_arch_bp(bp);
+
+ /*
+ * Return early after invoking user-callback function without restoring
+ * DABR if the breakpoint is from ptrace which always operates in
+ * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal
+ * generated in do_dabr().
+ */
+ if (bp->overflow_handler == ptrace_triggered) {
+ perf_bp_event(bp, regs);
+ rc = NOTIFY_DONE;
+ goto out;
+ }
+
+ /*
+ * Verify if dar lies within the address range occupied by the symbol
+ * being watched to filter extraneous exceptions. If it doesn't,
+ * we still need to single-step the instruction, but we don't
+ * generate an event.
+ */
+ info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ if (!((bp->attr.bp_addr <= dar) &&
+ (dar - bp->attr.bp_addr < bp->attr.bp_len)))
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+
+ /* Do not emulate user-space instructions, instead single-step them */
+ if (user_mode(regs)) {
+ current->thread.last_hit_ubp = bp;
+ regs->msr |= MSR_SE;
+ goto out;
+ }
+
+ stepped = 0;
+ instr = 0;
+ if (!__get_user_inatomic(instr, (unsigned int *) regs->nip))
+ stepped = emulate_step(regs, instr);
+
+ /*
+ * emulate_step() could not execute it. We've failed in reliably
+ * handling the hw-breakpoint. Unregister it and throw a warning
+ * message to let the user know about it.
+ */
+ if (!stepped) {
+ WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
+ "0x%lx will be disabled.", info->address);
+ perf_event_disable(bp);
+ goto out;
+ }
+ /*
+ * As a policy, the callback is invoked in a 'trigger-after-execute'
+ * fashion
+ */
+ if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+ perf_bp_event(bp, regs);
+
+ __set_breakpoint(info);
+out:
+ rcu_read_unlock();
+ return rc;
+}
+
+/*
+ * Handle single-step exceptions following a DABR hit.
+ */
+int __kprobes single_step_dabr_instruction(struct die_args *args)
+{
+ struct pt_regs *regs = args->regs;
+ struct perf_event *bp = NULL;
+ struct arch_hw_breakpoint *info;
+
+ bp = current->thread.last_hit_ubp;
+ /*
+ * Check if we are single-stepping as a result of a
+ * previous HW Breakpoint exception
+ */
+ if (!bp)
+ return NOTIFY_DONE;
+
+ info = counter_arch_bp(bp);
+
+ /*
+ * We shall invoke the user-defined callback function in the single
+ * stepping handler to confirm to 'trigger-after-execute' semantics
+ */
+ if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+ perf_bp_event(bp, regs);
+
+ __set_breakpoint(info);
+ current->thread.last_hit_ubp = NULL;
+
+ /*
+ * If the process was being single-stepped by ptrace, let the
+ * other single-step actions occur (e.g. generate SIGTRAP).
+ */
+ if (test_thread_flag(TIF_SINGLESTEP))
+ return NOTIFY_DONE;
+
+ return NOTIFY_STOP;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_exceptions_notify(
+ struct notifier_block *unused, unsigned long val, void *data)
+{
+ int ret = NOTIFY_DONE;
+
+ switch (val) {
+ case DIE_DABR_MATCH:
+ ret = hw_breakpoint_handler(data);
+ break;
+ case DIE_SSTEP:
+ ret = single_step_dabr_instruction(data);
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Release the user breakpoints used by ptrace
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+ struct thread_struct *t = &tsk->thread;
+
+ unregister_hw_breakpoint(t->ptrace_bps[0]);
+ t->ptrace_bps[0] = NULL;
+}
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+ /* TODO */
+}
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index 124dbcba94a..1114d13ac19 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -2,60 +2,73 @@
* IBM PowerPC IBM eBus Infrastructure Support.
*
* Copyright (c) 2005 IBM Corporation
+ * Joachim Fenkes <fenkes@de.ibm.com>
* Heiko J Schick <schickhj@de.ibm.com>
- *
+ *
* All rights reserved.
*
- * This source code is distributed under a dual license of GPL v2.0 and OpenIB
- * BSD.
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
*
* OpenIB BSD License
*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
*
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
*
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
* and/or other materials
- * provided with the distribution.
+ * provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/init.h>
+#include <linux/export.h>
#include <linux/console.h>
#include <linux/kobject.h>
#include <linux/dma-mapping.h>
#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/of_platform.h>
#include <asm/ibmebus.h>
-#include <asm/abs_addr.h>
-static struct ibmebus_dev ibmebus_bus_device = { /* fake "parent" device */
- .name = ibmebus_bus_device.ofdev.dev.bus_id,
- .ofdev.dev.bus_id = "ibmebus",
- .ofdev.dev.bus = &ibmebus_bus_type,
+static struct device ibmebus_bus_device = { /* fake "parent" device */
+ .init_name = "ibmebus",
+};
+
+struct bus_type ibmebus_bus_type;
+
+/* These devices will automatically be added to the bus during init */
+static struct of_device_id __initdata ibmebus_matches[] = {
+ { .compatible = "IBM,lhca" },
+ { .compatible = "IBM,lhea" },
+ {},
};
static void *ibmebus_alloc_coherent(struct device *dev,
size_t size,
dma_addr_t *dma_handle,
- gfp_t flag)
+ gfp_t flag,
+ struct dma_attrs *attrs)
{
void *mem;
-
+
mem = kmalloc(size, flag);
*dma_handle = (dma_addr_t)mem;
@@ -63,329 +76,684 @@ static void *ibmebus_alloc_coherent(struct device *dev,
}
static void ibmebus_free_coherent(struct device *dev,
- size_t size, void *vaddr,
- dma_addr_t dma_handle)
+ size_t size, void *vaddr,
+ dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
{
kfree(vaddr);
}
-static dma_addr_t ibmebus_map_single(struct device *dev,
- void *ptr,
- size_t size,
- enum dma_data_direction direction)
+static dma_addr_t ibmebus_map_page(struct device *dev,
+ struct page *page,
+ unsigned long offset,
+ size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
- return (dma_addr_t)(ptr);
+ return (dma_addr_t)(page_address(page) + offset);
}
-static void ibmebus_unmap_single(struct device *dev,
- dma_addr_t dma_addr,
- size_t size,
- enum dma_data_direction direction)
+static void ibmebus_unmap_page(struct device *dev,
+ dma_addr_t dma_addr,
+ size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
return;
}
static int ibmebus_map_sg(struct device *dev,
- struct scatterlist *sg,
- int nents, enum dma_data_direction direction)
+ struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
+ struct scatterlist *sg;
int i;
-
- for (i = 0; i < nents; i++) {
- sg[i].dma_address = (dma_addr_t)page_address(sg[i].page)
- + sg[i].offset;
- sg[i].dma_length = sg[i].length;
+
+ for_each_sg(sgl, sg, nents, i) {
+ sg->dma_address = (dma_addr_t) sg_virt(sg);
+ sg->dma_length = sg->length;
}
-
+
return nents;
}
static void ibmebus_unmap_sg(struct device *dev,
struct scatterlist *sg,
- int nents, enum dma_data_direction direction)
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
return;
}
static int ibmebus_dma_supported(struct device *dev, u64 mask)
{
- return 1;
+ return mask == DMA_BIT_MASK(64);
}
-struct dma_mapping_ops ibmebus_dma_ops = {
- .alloc_coherent = ibmebus_alloc_coherent,
- .free_coherent = ibmebus_free_coherent,
- .map_single = ibmebus_map_single,
- .unmap_single = ibmebus_unmap_single,
- .map_sg = ibmebus_map_sg,
- .unmap_sg = ibmebus_unmap_sg,
- .dma_supported = ibmebus_dma_supported,
+static u64 ibmebus_dma_get_required_mask(struct device *dev)
+{
+ return DMA_BIT_MASK(64);
+}
+
+static struct dma_map_ops ibmebus_dma_ops = {
+ .alloc = ibmebus_alloc_coherent,
+ .free = ibmebus_free_coherent,
+ .map_sg = ibmebus_map_sg,
+ .unmap_sg = ibmebus_unmap_sg,
+ .dma_supported = ibmebus_dma_supported,
+ .get_required_mask = ibmebus_dma_get_required_mask,
+ .map_page = ibmebus_map_page,
+ .unmap_page = ibmebus_unmap_page,
};
-static int ibmebus_bus_probe(struct device *dev)
+static int ibmebus_match_path(struct device *dev, void *data)
{
- struct ibmebus_dev *ibmebusdev = to_ibmebus_dev(dev);
- struct ibmebus_driver *ibmebusdrv = to_ibmebus_driver(dev->driver);
- const struct of_device_id *id;
- int error = -ENODEV;
-
- if (!ibmebusdrv->probe)
- return error;
-
- id = of_match_device(ibmebusdrv->id_table, &ibmebusdev->ofdev);
- if (id) {
- error = ibmebusdrv->probe(ibmebusdev, id);
- }
-
- return error;
+ struct device_node *dn = to_platform_device(dev)->dev.of_node;
+ return (dn->full_name &&
+ (strcasecmp((char *)data, dn->full_name) == 0));
}
-static int ibmebus_bus_remove(struct device *dev)
+static int ibmebus_match_node(struct device *dev, void *data)
{
- struct ibmebus_dev *ibmebusdev = to_ibmebus_dev(dev);
- struct ibmebus_driver *ibmebusdrv = to_ibmebus_driver(dev->driver);
-
- if (ibmebusdrv->remove) {
- return ibmebusdrv->remove(ibmebusdev);
+ return to_platform_device(dev)->dev.of_node == data;
+}
+
+static int ibmebus_create_device(struct device_node *dn)
+{
+ struct platform_device *dev;
+ int ret;
+
+ dev = of_device_alloc(dn, NULL, &ibmebus_bus_device);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->dev.bus = &ibmebus_bus_type;
+ dev->dev.archdata.dma_ops = &ibmebus_dma_ops;
+
+ ret = of_device_add(dev);
+ if (ret)
+ platform_device_put(dev);
+ return ret;
+}
+
+static int ibmebus_create_devices(const struct of_device_id *matches)
+{
+ struct device_node *root, *child;
+ int ret = 0;
+
+ root = of_find_node_by_path("/");
+
+ for_each_child_of_node(root, child) {
+ if (!of_match_node(matches, child))
+ continue;
+
+ if (bus_find_device(&ibmebus_bus_type, NULL, child,
+ ibmebus_match_node))
+ continue;
+
+ ret = ibmebus_create_device(child);
+ if (ret) {
+ printk(KERN_ERR "%s: failed to create device (%i)",
+ __func__, ret);
+ of_node_put(child);
+ break;
+ }
}
-
- return 0;
+
+ of_node_put(root);
+ return ret;
}
-static void __devinit ibmebus_dev_release(struct device *dev)
+int ibmebus_register_driver(struct platform_driver *drv)
{
- of_node_put(to_ibmebus_dev(dev)->ofdev.node);
- kfree(to_ibmebus_dev(dev));
+ /* If the driver uses devices that ibmebus doesn't know, add them */
+ ibmebus_create_devices(drv->driver.of_match_table);
+
+ drv->driver.bus = &ibmebus_bus_type;
+ return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL(ibmebus_register_driver);
+
+void ibmebus_unregister_driver(struct platform_driver *drv)
+{
+ driver_unregister(&drv->driver);
}
+EXPORT_SYMBOL(ibmebus_unregister_driver);
-static ssize_t ibmebusdev_show_name(struct device *dev,
- struct device_attribute *attr, char *buf)
+int ibmebus_request_irq(u32 ist, irq_handler_t handler,
+ unsigned long irq_flags, const char *devname,
+ void *dev_id)
{
- return sprintf(buf, "%s\n", to_ibmebus_dev(dev)->name);
+ unsigned int irq = irq_create_mapping(NULL, ist);
+
+ if (irq == NO_IRQ)
+ return -EINVAL;
+
+ return request_irq(irq, handler, irq_flags, devname, dev_id);
}
-static DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, ibmebusdev_show_name,
- NULL);
+EXPORT_SYMBOL(ibmebus_request_irq);
-static struct ibmebus_dev* __devinit ibmebus_register_device_common(
- struct ibmebus_dev *dev, const char *name)
+void ibmebus_free_irq(u32 ist, void *dev_id)
{
- int err = 0;
+ unsigned int irq = irq_find_mapping(NULL, ist);
- dev->name = name;
- dev->ofdev.dev.parent = &ibmebus_bus_device.ofdev.dev;
- dev->ofdev.dev.bus = &ibmebus_bus_type;
- dev->ofdev.dev.release = ibmebus_dev_release;
+ free_irq(irq, dev_id);
+ irq_dispose_mapping(irq);
+}
+EXPORT_SYMBOL(ibmebus_free_irq);
+
+static char *ibmebus_chomp(const char *in, size_t count)
+{
+ char *out = kmalloc(count + 1, GFP_KERNEL);
- /* An ibmebusdev is based on a of_device. We have to change the
- * bus type to use our own DMA mapping operations.
- */
- if ((err = of_device_register(&dev->ofdev)) != 0) {
- printk(KERN_ERR "%s: failed to register device (%d).\n",
- __FUNCTION__, err);
+ if (!out)
return NULL;
+
+ memcpy(out, in, count);
+ out[count] = '\0';
+ if (out[count - 1] == '\n')
+ out[count - 1] = '\0';
+
+ return out;
+}
+
+static ssize_t ibmebus_store_probe(struct bus_type *bus,
+ const char *buf, size_t count)
+{
+ struct device_node *dn = NULL;
+ char *path;
+ ssize_t rc = 0;
+
+ path = ibmebus_chomp(buf, count);
+ if (!path)
+ return -ENOMEM;
+
+ if (bus_find_device(&ibmebus_bus_type, NULL, path,
+ ibmebus_match_path)) {
+ printk(KERN_WARNING "%s: %s has already been probed\n",
+ __func__, path);
+ rc = -EEXIST;
+ goto out;
}
-
- device_create_file(&dev->ofdev.dev, &dev_attr_name);
-
- return dev;
+
+ if ((dn = of_find_node_by_path(path))) {
+ rc = ibmebus_create_device(dn);
+ of_node_put(dn);
+ } else {
+ printk(KERN_WARNING "%s: no such device node: %s\n",
+ __func__, path);
+ rc = -ENODEV;
+ }
+
+out:
+ kfree(path);
+ if (rc)
+ return rc;
+ return count;
}
+static BUS_ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe);
-static struct ibmebus_dev* __devinit ibmebus_register_device_node(
- struct device_node *dn)
+static ssize_t ibmebus_store_remove(struct bus_type *bus,
+ const char *buf, size_t count)
{
- struct ibmebus_dev *dev;
- const char *loc_code;
- int length;
+ struct device *dev;
+ char *path;
- loc_code = get_property(dn, "ibm,loc-code", NULL);
- if (!loc_code) {
- printk(KERN_WARNING "%s: node %s missing 'ibm,loc-code'\n",
- __FUNCTION__, dn->name ? dn->name : "<unknown>");
- return NULL;
- }
-
- if (strlen(loc_code) == 0) {
- printk(KERN_WARNING "%s: 'ibm,loc-code' is invalid\n",
- __FUNCTION__);
- return NULL;
+ path = ibmebus_chomp(buf, count);
+ if (!path)
+ return -ENOMEM;
+
+ if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path,
+ ibmebus_match_path))) {
+ of_device_unregister(to_platform_device(dev));
+
+ kfree(path);
+ return count;
+ } else {
+ printk(KERN_WARNING "%s: %s not on the bus\n",
+ __func__, path);
+
+ kfree(path);
+ return -ENODEV;
}
+}
+static BUS_ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove);
- dev = kmalloc(sizeof(struct ibmebus_dev), GFP_KERNEL);
- if (!dev) {
- return NULL;
+static struct attribute *ibmbus_bus_attrs[] = {
+ &bus_attr_probe.attr,
+ &bus_attr_remove.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(ibmbus_bus);
+
+static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv)
+{
+ const struct of_device_id *matches = drv->of_match_table;
+
+ if (!matches)
+ return 0;
+
+ return of_match_device(matches, dev) != NULL;
+}
+
+static int ibmebus_bus_device_probe(struct device *dev)
+{
+ int error = -ENODEV;
+ struct platform_driver *drv;
+ struct platform_device *of_dev;
+
+ drv = to_platform_driver(dev->driver);
+ of_dev = to_platform_device(dev);
+
+ if (!drv->probe)
+ return error;
+
+ of_dev_get(of_dev);
+
+ if (of_driver_match_device(dev, dev->driver))
+ error = drv->probe(of_dev);
+ if (error)
+ of_dev_put(of_dev);
+
+ return error;
+}
+
+static int ibmebus_bus_device_remove(struct device *dev)
+{
+ struct platform_device *of_dev = to_platform_device(dev);
+ struct platform_driver *drv = to_platform_driver(dev->driver);
+
+ if (dev->driver && drv->remove)
+ drv->remove(of_dev);
+ return 0;
+}
+
+static void ibmebus_bus_device_shutdown(struct device *dev)
+{
+ struct platform_device *of_dev = to_platform_device(dev);
+ struct platform_driver *drv = to_platform_driver(dev->driver);
+
+ if (dev->driver && drv->shutdown)
+ drv->shutdown(of_dev);
+}
+
+/*
+ * ibmebus_bus_device_attrs
+ */
+static ssize_t devspec_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *ofdev;
+
+ ofdev = to_platform_device(dev);
+ return sprintf(buf, "%s\n", ofdev->dev.of_node->full_name);
+}
+
+static ssize_t name_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *ofdev;
+
+ ofdev = to_platform_device(dev);
+ return sprintf(buf, "%s\n", ofdev->dev.of_node->name);
+}
+
+static ssize_t modalias_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ ssize_t len = of_device_get_modalias(dev, buf, PAGE_SIZE - 2);
+ buf[len] = '\n';
+ buf[len+1] = 0;
+ return len+1;
+}
+
+struct device_attribute ibmebus_bus_device_attrs[] = {
+ __ATTR_RO(devspec),
+ __ATTR_RO(name),
+ __ATTR_RO(modalias),
+ __ATTR_NULL
+};
+
+#ifdef CONFIG_PM_SLEEP
+static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg)
+{
+ struct platform_device *of_dev = to_platform_device(dev);
+ struct platform_driver *drv = to_platform_driver(dev->driver);
+ int ret = 0;
+
+ if (dev->driver && drv->suspend)
+ ret = drv->suspend(of_dev, mesg);
+ return ret;
+}
+
+static int ibmebus_bus_legacy_resume(struct device *dev)
+{
+ struct platform_device *of_dev = to_platform_device(dev);
+ struct platform_driver *drv = to_platform_driver(dev->driver);
+ int ret = 0;
+
+ if (dev->driver && drv->resume)
+ ret = drv->resume(of_dev);
+ return ret;
+}
+
+static int ibmebus_bus_pm_prepare(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (drv && drv->pm && drv->pm->prepare)
+ ret = drv->pm->prepare(dev);
+
+ return ret;
+}
+
+static void ibmebus_bus_pm_complete(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+
+ if (drv && drv->pm && drv->pm->complete)
+ drv->pm->complete(dev);
+}
+
+#ifdef CONFIG_SUSPEND
+
+static int ibmebus_bus_pm_suspend(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->suspend)
+ ret = drv->pm->suspend(dev);
+ } else {
+ ret = ibmebus_bus_legacy_suspend(dev, PMSG_SUSPEND);
}
- memset(dev, 0, sizeof(struct ibmebus_dev));
-
- dev->ofdev.node = of_node_get(dn);
-
- length = strlen(loc_code);
- memcpy(dev->ofdev.dev.bus_id, loc_code
- + (length - min(length, BUS_ID_SIZE - 1)),
- min(length, BUS_ID_SIZE - 1));
-
- /* Register with generic device framework. */
- if (ibmebus_register_device_common(dev, dn->name) == NULL) {
- kfree(dev);
- return NULL;
+
+ return ret;
+}
+
+static int ibmebus_bus_pm_suspend_noirq(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->suspend_noirq)
+ ret = drv->pm->suspend_noirq(dev);
}
- return dev;
+ return ret;
}
-static void ibmebus_probe_of_nodes(char* name)
+static int ibmebus_bus_pm_resume(struct device *dev)
{
- struct device_node *dn = NULL;
-
- while ((dn = of_find_node_by_name(dn, name))) {
- if (ibmebus_register_device_node(dn) == NULL) {
- of_node_put(dn);
-
- return;
- }
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->resume)
+ ret = drv->pm->resume(dev);
+ } else {
+ ret = ibmebus_bus_legacy_resume(dev);
}
-
- of_node_put(dn);
-
- return;
+
+ return ret;
}
-static void ibmebus_add_devices_by_id(struct of_device_id *idt)
+static int ibmebus_bus_pm_resume_noirq(struct device *dev)
{
- while (strlen(idt->name) > 0) {
- ibmebus_probe_of_nodes(idt->name);
- idt++;
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->resume_noirq)
+ ret = drv->pm->resume_noirq(dev);
}
- return;
+ return ret;
}
-static int ibmebus_match_helper(struct device *dev, void *data)
+#else /* !CONFIG_SUSPEND */
+
+#define ibmebus_bus_pm_suspend NULL
+#define ibmebus_bus_pm_resume NULL
+#define ibmebus_bus_pm_suspend_noirq NULL
+#define ibmebus_bus_pm_resume_noirq NULL
+
+#endif /* !CONFIG_SUSPEND */
+
+#ifdef CONFIG_HIBERNATE_CALLBACKS
+
+static int ibmebus_bus_pm_freeze(struct device *dev)
{
- if (strcmp((char*)data, to_ibmebus_dev(dev)->name) == 0)
- return 1;
-
- return 0;
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->freeze)
+ ret = drv->pm->freeze(dev);
+ } else {
+ ret = ibmebus_bus_legacy_suspend(dev, PMSG_FREEZE);
+ }
+
+ return ret;
}
-static int ibmebus_unregister_device(struct device *dev)
+static int ibmebus_bus_pm_freeze_noirq(struct device *dev)
{
- device_remove_file(dev, &dev_attr_name);
- of_device_unregister(to_of_device(dev));
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
- return 0;
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->freeze_noirq)
+ ret = drv->pm->freeze_noirq(dev);
+ }
+
+ return ret;
}
-static void ibmebus_remove_devices_by_id(struct of_device_id *idt)
+static int ibmebus_bus_pm_thaw(struct device *dev)
{
- struct device *dev;
-
- while (strlen(idt->name) > 0) {
- while ((dev = bus_find_device(&ibmebus_bus_type, NULL,
- (void*)idt->name,
- ibmebus_match_helper))) {
- ibmebus_unregister_device(dev);
- }
- idt++;
-
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->thaw)
+ ret = drv->pm->thaw(dev);
+ } else {
+ ret = ibmebus_bus_legacy_resume(dev);
}
-
- return;
+
+ return ret;
}
-int ibmebus_register_driver(struct ibmebus_driver *drv)
+static int ibmebus_bus_pm_thaw_noirq(struct device *dev)
{
- int err = 0;
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
- drv->driver.name = drv->name;
- drv->driver.bus = &ibmebus_bus_type;
- drv->driver.probe = ibmebus_bus_probe;
- drv->driver.remove = ibmebus_bus_remove;
+ if (!drv)
+ return 0;
- if ((err = driver_register(&drv->driver) != 0))
- return err;
+ if (drv->pm) {
+ if (drv->pm->thaw_noirq)
+ ret = drv->pm->thaw_noirq(dev);
+ }
- ibmebus_add_devices_by_id(drv->id_table);
-
- return 0;
+ return ret;
}
-EXPORT_SYMBOL(ibmebus_register_driver);
-void ibmebus_unregister_driver(struct ibmebus_driver *drv)
-{
- driver_unregister(&drv->driver);
- ibmebus_remove_devices_by_id(drv->id_table);
+static int ibmebus_bus_pm_poweroff(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->poweroff)
+ ret = drv->pm->poweroff(dev);
+ } else {
+ ret = ibmebus_bus_legacy_suspend(dev, PMSG_HIBERNATE);
+ }
+
+ return ret;
}
-EXPORT_SYMBOL(ibmebus_unregister_driver);
-int ibmebus_request_irq(struct ibmebus_dev *dev,
- u32 ist,
- irqreturn_t (*handler)(int, void*, struct pt_regs *),
- unsigned long irq_flags, const char * devname,
- void *dev_id)
+static int ibmebus_bus_pm_poweroff_noirq(struct device *dev)
{
- unsigned int irq = irq_create_mapping(NULL, ist);
-
- if (irq == NO_IRQ)
- return -EINVAL;
-
- return request_irq(irq, handler,
- irq_flags, devname, dev_id);
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->poweroff_noirq)
+ ret = drv->pm->poweroff_noirq(dev);
+ }
+
+ return ret;
}
-EXPORT_SYMBOL(ibmebus_request_irq);
-void ibmebus_free_irq(struct ibmebus_dev *dev, u32 ist, void *dev_id)
+static int ibmebus_bus_pm_restore(struct device *dev)
{
- unsigned int irq = irq_find_mapping(NULL, ist);
-
- free_irq(irq, dev_id);
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
+ return 0;
+
+ if (drv->pm) {
+ if (drv->pm->restore)
+ ret = drv->pm->restore(dev);
+ } else {
+ ret = ibmebus_bus_legacy_resume(dev);
+ }
+
+ return ret;
}
-EXPORT_SYMBOL(ibmebus_free_irq);
-static int ibmebus_bus_match(struct device *dev, struct device_driver *drv)
-{
- const struct ibmebus_dev *ebus_dev = to_ibmebus_dev(dev);
- struct ibmebus_driver *ebus_drv = to_ibmebus_driver(drv);
- const struct of_device_id *ids = ebus_drv->id_table;
- const struct of_device_id *found_id;
-
- if (!ids)
+static int ibmebus_bus_pm_restore_noirq(struct device *dev)
+{
+ struct device_driver *drv = dev->driver;
+ int ret = 0;
+
+ if (!drv)
return 0;
-
- found_id = of_match_device(ids, &ebus_dev->ofdev);
- if (found_id)
- return 1;
-
- return 0;
+
+ if (drv->pm) {
+ if (drv->pm->restore_noirq)
+ ret = drv->pm->restore_noirq(dev);
+ }
+
+ return ret;
}
+#else /* !CONFIG_HIBERNATE_CALLBACKS */
+
+#define ibmebus_bus_pm_freeze NULL
+#define ibmebus_bus_pm_thaw NULL
+#define ibmebus_bus_pm_poweroff NULL
+#define ibmebus_bus_pm_restore NULL
+#define ibmebus_bus_pm_freeze_noirq NULL
+#define ibmebus_bus_pm_thaw_noirq NULL
+#define ibmebus_bus_pm_poweroff_noirq NULL
+#define ibmebus_bus_pm_restore_noirq NULL
+
+#endif /* !CONFIG_HIBERNATE_CALLBACKS */
+
+static struct dev_pm_ops ibmebus_bus_dev_pm_ops = {
+ .prepare = ibmebus_bus_pm_prepare,
+ .complete = ibmebus_bus_pm_complete,
+ .suspend = ibmebus_bus_pm_suspend,
+ .resume = ibmebus_bus_pm_resume,
+ .freeze = ibmebus_bus_pm_freeze,
+ .thaw = ibmebus_bus_pm_thaw,
+ .poweroff = ibmebus_bus_pm_poweroff,
+ .restore = ibmebus_bus_pm_restore,
+ .suspend_noirq = ibmebus_bus_pm_suspend_noirq,
+ .resume_noirq = ibmebus_bus_pm_resume_noirq,
+ .freeze_noirq = ibmebus_bus_pm_freeze_noirq,
+ .thaw_noirq = ibmebus_bus_pm_thaw_noirq,
+ .poweroff_noirq = ibmebus_bus_pm_poweroff_noirq,
+ .restore_noirq = ibmebus_bus_pm_restore_noirq,
+};
+
+#define IBMEBUS_BUS_PM_OPS_PTR (&ibmebus_bus_dev_pm_ops)
+
+#else /* !CONFIG_PM_SLEEP */
+
+#define IBMEBUS_BUS_PM_OPS_PTR NULL
+
+#endif /* !CONFIG_PM_SLEEP */
+
struct bus_type ibmebus_bus_type = {
- .name = "ibmebus",
- .match = ibmebus_bus_match,
+ .name = "ibmebus",
+ .uevent = of_device_uevent_modalias,
+ .bus_groups = ibmbus_bus_groups,
+ .match = ibmebus_bus_bus_match,
+ .probe = ibmebus_bus_device_probe,
+ .remove = ibmebus_bus_device_remove,
+ .shutdown = ibmebus_bus_device_shutdown,
+ .dev_attrs = ibmebus_bus_device_attrs,
+ .pm = IBMEBUS_BUS_PM_OPS_PTR,
};
EXPORT_SYMBOL(ibmebus_bus_type);
static int __init ibmebus_bus_init(void)
{
int err;
-
+
printk(KERN_INFO "IBM eBus Device Driver\n");
-
+
err = bus_register(&ibmebus_bus_type);
if (err) {
- printk(KERN_ERR ":%s: failed to register IBM eBus.\n",
- __FUNCTION__);
+ printk(KERN_ERR "%s: failed to register IBM eBus.\n",
+ __func__);
return err;
}
-
- err = device_register(&ibmebus_bus_device.ofdev.dev);
+
+ err = device_register(&ibmebus_bus_device);
if (err) {
- printk(KERN_WARNING "%s: device_register returned %i\n",
- __FUNCTION__, err);
+ printk(KERN_WARNING "%s: device_register returned %i\n",
+ __func__, err);
bus_unregister(&ibmebus_bus_type);
return err;
}
-
+
+ err = ibmebus_create_devices(ibmebus_matches);
+ if (err) {
+ device_unregister(&ibmebus_bus_device);
+ bus_unregister(&ibmebus_bus_type);
+ return err;
+ }
+
return 0;
}
-__initcall(ibmebus_bus_init);
+postcore_initcall(ibmebus_bus_init);
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 4180c3998b3..d7216c9abda 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -24,68 +24,59 @@
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/sysctl.h>
+#include <linux/tick.h>
-#include <asm/system.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/time.h>
#include <asm/machdep.h>
+#include <asm/runlatch.h>
#include <asm/smp.h>
-#ifdef CONFIG_HOTPLUG_CPU
-#define cpu_should_die() (cpu_is_offline(smp_processor_id()) && \
- system_state == SYSTEM_RUNNING)
-#else
-#define cpu_should_die() 0
-#endif
-/*
- * The body of the idle task.
- */
-void cpu_idle(void)
-{
- if (ppc_md.idle_loop)
- ppc_md.idle_loop(); /* doesn't return */
+unsigned long cpuidle_disable = IDLE_NO_OVERRIDE;
+EXPORT_SYMBOL(cpuidle_disable);
- set_thread_flag(TIF_POLLING_NRFLAG);
- while (1) {
- while (!need_resched() && !cpu_should_die()) {
- ppc64_runlatch_off();
-
- if (ppc_md.power_save) {
- clear_thread_flag(TIF_POLLING_NRFLAG);
- /*
- * smp_mb is so clearing of TIF_POLLING_NRFLAG
- * is ordered w.r.t. need_resched() test.
- */
- smp_mb();
- local_irq_disable();
-
- /* check again after disabling irqs */
- if (!need_resched() && !cpu_should_die())
- ppc_md.power_save();
+static int __init powersave_off(char *arg)
+{
+ ppc_md.power_save = NULL;
+ cpuidle_disable = IDLE_POWERSAVE_OFF;
+ return 0;
+}
+__setup("powersave=off", powersave_off);
- local_irq_enable();
- set_thread_flag(TIF_POLLING_NRFLAG);
+#ifdef CONFIG_HOTPLUG_CPU
+void arch_cpu_idle_dead(void)
+{
+ sched_preempt_enable_no_resched();
+ cpu_die();
+}
+#endif
- } else {
- /*
- * Go into low thread priority and possibly
- * low power mode.
- */
- HMT_low();
- HMT_very_low();
- }
- }
+void arch_cpu_idle(void)
+{
+ ppc64_runlatch_off();
- HMT_medium();
- ppc64_runlatch_on();
- if (cpu_should_die())
- cpu_die();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
+ if (ppc_md.power_save) {
+ ppc_md.power_save();
+ /*
+ * Some power_save functions return with
+ * interrupts enabled, some don't.
+ */
+ if (irqs_disabled())
+ local_irq_enable();
+ } else {
+ local_irq_enable();
+ /*
+ * Go into low thread priority and possibly
+ * low power mode.
+ */
+ HMT_low();
+ HMT_very_low();
}
+
+ HMT_medium();
+ ppc64_runlatch_on();
}
int powersave_nap;
@@ -94,26 +85,29 @@ int powersave_nap;
/*
* Register the sysctl to set/clear powersave_nap.
*/
-static ctl_table powersave_nap_ctl_table[]={
+static struct ctl_table powersave_nap_ctl_table[] = {
{
- .ctl_name = KERN_PPC_POWERSAVE_NAP,
.procname = "powersave-nap",
.data = &powersave_nap,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
- { 0, },
+ {}
};
-static ctl_table powersave_nap_sysctl_root[] = {
- { 1, "kernel", NULL, 0, 0755, powersave_nap_ctl_table, },
- { 0,},
+static struct ctl_table powersave_nap_sysctl_root[] = {
+ {
+ .procname = "kernel",
+ .mode = 0555,
+ .child = powersave_nap_ctl_table,
+ },
+ {}
};
static int __init
register_powersave_nap_sysctl(void)
{
- register_sysctl_table(powersave_nap_sysctl_root, 0);
+ register_sysctl_table(powersave_nap_sysctl_root);
return 0;
}
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 01bcd52bbf8..1686916cc7f 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -135,7 +135,7 @@ BEGIN_FTR_SECTION
DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- rlwinm r9,r1,0,0,31-THREAD_SHIFT /* current thread_info */
+ CURRENT_THREAD_INFO(r9, r1)
lwz r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
ori r8,r8,_TLF_NAPPING /* so when we take an exception */
stw r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
@@ -153,12 +153,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
* address of current. R11 points to the exception frame (physical
* address). We have to preserve r10.
*/
-_GLOBAL(power_save_6xx_restore)
+_GLOBAL(power_save_ppc32_restore)
lwz r9,_LINK(r11) /* interrupted in ppc6xx_idle: */
stw r9,_NIP(r11) /* make it do a blr */
#ifdef CONFIG_SMP
- mfspr r12,SPRN_SPRG3
+ CURRENT_THREAD_INFO(r12, r11)
lwz r11,TI_CPU(r12) /* get cpu number * 4 */
slwi r11,r11,2
#else
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
new file mode 100644
index 00000000000..48c21acef91
--- /dev/null
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *
+ * Generic idle routine for Book3E processors
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ppc-opcode.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/epapr_hcalls.h>
+
+/* 64-bit version only for now */
+#ifdef CONFIG_PPC64
+
+.macro BOOK3E_IDLE name loop
+_GLOBAL(\name)
+ /* Save LR for later */
+ mflr r0
+ std r0,16(r1)
+
+ /* Hard disable interrupts */
+ wrteei 0
+
+ /* Now check if an interrupt came in while we were soft disabled
+ * since we may otherwise lose it (doorbells etc...).
+ */
+ lbz r3,PACAIRQHAPPENED(r13)
+ cmpwi cr0,r3,0
+ bnelr
+
+ /* Now we are going to mark ourselves as soft and hard enabled in
+ * order to be able to take interrupts while asleep. We inform lockdep
+ * of that. We don't actually turn interrupts on just yet tho.
+ */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ stdu r1,-128(r1)
+ bl trace_hardirqs_on
+ addi r1,r1,128
+#endif
+ li r0,1
+ stb r0,PACASOFTIRQEN(r13)
+
+ /* Interrupts will make use return to LR, so get something we want
+ * in there
+ */
+ bl 1f
+
+ /* And return (interrupts are on) */
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+1: /* Let's set the _TLF_NAPPING flag so interrupts make us return
+ * to the right spot
+ */
+ CURRENT_THREAD_INFO(r11, r1)
+ ld r10,TI_LOCAL_FLAGS(r11)
+ ori r10,r10,_TLF_NAPPING
+ std r10,TI_LOCAL_FLAGS(r11)
+
+ /* We can now re-enable hard interrupts and go to sleep */
+ wrteei 1
+ \loop
+
+.endm
+
+.macro BOOK3E_IDLE_LOOP
+1:
+ PPC_WAIT(0)
+ b 1b
+.endm
+
+/* epapr_ev_idle_start below is patched with the proper hcall
+ opcodes during kernel initialization */
+.macro EPAPR_EV_IDLE_LOOP
+idle_loop:
+ LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE))
+
+.global epapr_ev_idle_start
+epapr_ev_idle_start:
+ li r3, -1
+ nop
+ nop
+ nop
+ b idle_loop
+.endm
+
+BOOK3E_IDLE epapr_ev_idle EPAPR_EV_IDLE_LOOP
+
+BOOK3E_IDLE book3e_idle BOOK3E_IDLE_LOOP
+
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
new file mode 100644
index 00000000000..15448668988
--- /dev/null
+++ b/arch/powerpc/kernel/idle_e500.S
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Dave Liu <daveliu@freescale.com>
+ * copy from idle_6xx.S and modify for e500 based processor,
+ * implement the power_save function in idle.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+_GLOBAL(e500_idle)
+ CURRENT_THREAD_INFO(r3, r1)
+ lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */
+ ori r4,r4,_TLF_NAPPING /* so when we take an exception */
+ stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+
+#ifdef CONFIG_PPC_E500MC
+ wrteei 1
+1: wait
+
+ /*
+ * Guard against spurious wakeups (e.g. from a hypervisor) --
+ * any real interrupt will cause us to return to LR due to
+ * _TLF_NAPPING.
+ */
+ b 1b
+#else
+ /* Check if we can nap or doze, put HID0 mask in r3 */
+ lis r3,0
+BEGIN_FTR_SECTION
+ lis r3,HID0_DOZE@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
+
+BEGIN_FTR_SECTION
+ /* Now check if user enabled NAP mode */
+ lis r4,powersave_nap@ha
+ lwz r4,powersave_nap@l(r4)
+ cmpwi 0,r4,0
+ beq 1f
+ stwu r1,-16(r1)
+ mflr r0
+ stw r0,20(r1)
+ bl flush_dcache_L1
+ lwz r0,20(r1)
+ addi r1,r1,16
+ mtlr r0
+ lis r3,HID0_NAP@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
+BEGIN_FTR_SECTION
+ msync
+ li r7,L2CSR0_L2FL@l
+ mtspr SPRN_L2CSR0,r7
+2:
+ mfspr r7,SPRN_L2CSR0
+ andi. r4,r7,L2CSR0_L2FL@l
+ bne 2b
+END_FTR_SECTION_IFSET(CPU_FTR_L2CSR|CPU_FTR_CAN_NAP)
+1:
+ /* Go to NAP or DOZE now */
+ mfspr r4,SPRN_HID0
+ rlwinm r4,r4,0,~(HID0_DOZE|HID0_NAP|HID0_SLEEP)
+ or r4,r4,r3
+ isync
+ mtspr SPRN_HID0,r4
+ isync
+
+ mfmsr r7
+ oris r7,r7,MSR_WE@h
+ ori r7,r7,MSR_EE
+ msync
+ mtmsr r7
+ isync
+2: b 2b
+#endif /* !E500MC */
+
+/*
+ * Return from NAP/DOZE mode, restore some CPU specific registers,
+ * r2 containing physical address of current.
+ * r11 points to the exception frame (physical address).
+ * We have to preserve r10.
+ */
+_GLOBAL(power_save_ppc32_restore)
+ lwz r9,_LINK(r11) /* interrupted in e500_idle */
+ stw r9,_NIP(r11) /* make it do a blr */
+
+#ifdef CONFIG_SMP
+ CURRENT_THREAD_INFO(r12, r1)
+ lwz r11,TI_CPU(r12) /* get cpu number * 4 */
+ slwi r11,r11,2
+#else
+ li r11,0
+#endif
+
+ b transfer_to_handler_cont
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index 30de81da7b4..f57a19348bd 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -14,6 +14,7 @@
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/irqflags.h>
#undef DEBUG
@@ -29,16 +30,39 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
cmpwi 0,r4,0
beqlr
- /* Go to NAP now */
+ /* Hard disable interrupts */
+ mfmsr r7
+ rldicl r0,r7,48,1
+ rotldi r0,r0,16
+ mtmsrd r0,1
+
+ /* Check if something happened while soft-disabled */
+ lbz r0,PACAIRQHAPPENED(r13)
+ cmpwi cr0,r0,0
+ bnelr
+
+ /* Soft-enable interrupts */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-128(r1)
+ bl trace_hardirqs_on
+ addi r1,r1,128
+ ld r0,16(r1)
+ mtlr r0
+ mfmsr r7
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+ li r0,1
+ stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */
BEGIN_FTR_SECTION
DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- clrrdi r9,r1,THREAD_SHIFT /* current thread_info */
+ CURRENT_THREAD_INFO(r9, r1)
ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
ori r8,r8,_TLF_NAPPING /* so when we take an exception */
std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
- mfmsr r7
ori r7,r7,MSR_EE
oris r7,r7,MSR_POW@h
1: sync
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
new file mode 100644
index 00000000000..5cf3d367190
--- /dev/null
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -0,0 +1,187 @@
+/*
+ * This file contains the power_save function for Power7 CPUs.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ppc-opcode.h>
+#include <asm/hw_irq.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/opal.h>
+
+#undef DEBUG
+
+/* Idle state entry routines */
+
+#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
+ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
+ std r0,0(r1); \
+ ptesync; \
+ ld r0,0(r1); \
+1: cmp cr0,r0,r0; \
+ bne 1b; \
+ IDLE_INST; \
+ b .
+
+ .text
+
+/*
+ * Pass requested state in r3:
+ * 0 - nap
+ * 1 - sleep
+ *
+ * To check IRQ_HAPPENED in r4
+ * 0 - don't check
+ * 1 - check
+ */
+_GLOBAL(power7_powersave_common)
+ /* Use r3 to pass state nap/sleep/winkle */
+ /* NAP is a state loss, we create a regs frame on the
+ * stack, fill it up with the state we care about and
+ * stick a pointer to it in PACAR1. We really only
+ * need to save PC, some CR bits and the NV GPRs,
+ * but for now an interrupt frame will do.
+ */
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-INT_FRAME_SIZE(r1)
+ std r0,_LINK(r1)
+ std r0,_NIP(r1)
+
+#ifndef CONFIG_SMP
+ /* Make sure FPU, VSX etc... are flushed as we may lose
+ * state when going to nap mode
+ */
+ bl discard_lazy_cpu_state
+#endif /* CONFIG_SMP */
+
+ /* Hard disable interrupts */
+ mfmsr r9
+ rldicl r9,r9,48,1
+ rotldi r9,r9,16
+ mtmsrd r9,1 /* hard-disable interrupts */
+
+ /* Check if something happened while soft-disabled */
+ lbz r0,PACAIRQHAPPENED(r13)
+ cmpwi cr0,r0,0
+ beq 1f
+ cmpwi cr0,r4,0
+ beq 1f
+ addi r1,r1,INT_FRAME_SIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
+1: /* We mark irqs hard disabled as this is the state we'll
+ * be in when returning and we need to tell arch_local_irq_restore()
+ * about it
+ */
+ li r0,PACA_IRQ_HARD_DIS
+ stb r0,PACAIRQHAPPENED(r13)
+
+ /* We haven't lost state ... yet */
+ li r0,0
+ stb r0,PACA_NAPSTATELOST(r13)
+
+ /* Continue saving state */
+ SAVE_GPR(2, r1)
+ SAVE_NVGPRS(r1)
+ mfcr r4
+ std r4,_CCR(r1)
+ std r9,_MSR(r1)
+ std r1,PACAR1(r13)
+
+_GLOBAL(power7_enter_nap_mode)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* Tell KVM we're napping */
+ li r4,KVM_HWTHREAD_IN_NAP
+ stb r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
+ cmpwi cr0,r3,1
+ beq 2f
+ IDLE_STATE_ENTER_SEQ(PPC_NAP)
+ /* No return */
+2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+ /* No return */
+
+_GLOBAL(power7_idle)
+ /* Now check if user or arch enabled NAP mode */
+ LOAD_REG_ADDRBASE(r3,powersave_nap)
+ lwz r4,ADDROFF(powersave_nap)(r3)
+ cmpwi 0,r4,0
+ beqlr
+ li r3, 1
+ /* fall through */
+
+_GLOBAL(power7_nap)
+ mr r4,r3
+ li r3,0
+ b power7_powersave_common
+ /* No return */
+
+_GLOBAL(power7_sleep)
+ li r3,1
+ li r4,1
+ b power7_powersave_common
+ /* No return */
+
+_GLOBAL(power7_wakeup_tb_loss)
+ ld r2,PACATOC(r13);
+ ld r1,PACAR1(r13)
+
+ /* Time base re-sync */
+ li r0,OPAL_RESYNC_TIMEBASE
+ LOAD_REG_ADDR(r11,opal);
+ ld r12,8(r11);
+ ld r2,0(r11);
+ mtctr r12
+ bctrl
+
+ /* TODO: Check r3 for failure */
+
+ REST_NVGPRS(r1)
+ REST_GPR(2, r1)
+ ld r3,_CCR(r1)
+ ld r4,_MSR(r1)
+ ld r5,_NIP(r1)
+ addi r1,r1,INT_FRAME_SIZE
+ mtcr r3
+ mfspr r3,SPRN_SRR1 /* Return SRR1 */
+ mtspr SPRN_SRR1,r4
+ mtspr SPRN_SRR0,r5
+ rfid
+
+_GLOBAL(power7_wakeup_loss)
+ ld r1,PACAR1(r13)
+ REST_NVGPRS(r1)
+ REST_GPR(2, r1)
+ ld r3,_CCR(r1)
+ ld r4,_MSR(r1)
+ ld r5,_NIP(r1)
+ addi r1,r1,INT_FRAME_SIZE
+ mtcr r3
+ mtspr SPRN_SRR1,r4
+ mtspr SPRN_SRR0,r5
+ rfid
+
+_GLOBAL(power7_wakeup_noloss)
+ lbz r0,PACA_NAPSTATELOST(r13)
+ cmpwi r0,0
+ bne power7_wakeup_loss
+ ld r1,PACAR1(r13)
+ ld r4,_MSR(r1)
+ ld r5,_NIP(r1)
+ addi r1,r1,INT_FRAME_SIZE
+ mtspr SPRN_SRR1,r4
+ mtspr SPRN_SRR0,r5
+ rfid
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c
deleted file mode 100644
index 941043ae040..00000000000
--- a/arch/powerpc/kernel/init_task.c
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-#include <asm/uaccess.h>
-
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
-/*
- * Initial thread structure.
- *
- * We need to make sure that this is 16384-byte aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_task);
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
new file mode 100644
index 00000000000..24b968f8e4d
--- /dev/null
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -0,0 +1,212 @@
+/*
+ * Support PCI IO workaround
+ *
+ * Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ * IBM, Corp.
+ * (C) Copyright 2007-2008 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h> /* for init_mm */
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/pgtable.h>
+#include <asm/ppc-pci.h>
+#include <asm/io-workarounds.h>
+
+#define IOWA_MAX_BUS 8
+
+static struct iowa_bus iowa_busses[IOWA_MAX_BUS];
+static unsigned int iowa_bus_count;
+
+static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
+{
+ int i, j;
+ struct resource *res;
+ unsigned long vstart, vend;
+
+ for (i = 0; i < iowa_bus_count; i++) {
+ struct iowa_bus *bus = &iowa_busses[i];
+ struct pci_controller *phb = bus->phb;
+
+ if (vaddr) {
+ vstart = (unsigned long)phb->io_base_virt;
+ vend = vstart + phb->pci_io_size - 1;
+ if ((vaddr >= vstart) && (vaddr <= vend))
+ return bus;
+ }
+
+ if (paddr)
+ for (j = 0; j < 3; j++) {
+ res = &phb->mem_resources[j];
+ if (paddr >= res->start && paddr <= res->end)
+ return bus;
+ }
+ }
+
+ return NULL;
+}
+
+#ifdef CONFIG_PPC_INDIRECT_MMIO
+struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
+{
+ unsigned hugepage_shift;
+ struct iowa_bus *bus;
+ int token;
+
+ token = PCI_GET_ADDR_TOKEN(addr);
+
+ if (token && token <= iowa_bus_count)
+ bus = &iowa_busses[token - 1];
+ else {
+ unsigned long vaddr, paddr;
+ pte_t *ptep;
+
+ vaddr = (unsigned long)PCI_FIX_ADDR(addr);
+ if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
+ return NULL;
+
+ ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
+ &hugepage_shift);
+ if (ptep == NULL)
+ paddr = 0;
+ else {
+ /*
+ * we don't have hugepages backing iomem
+ */
+ WARN_ON(hugepage_shift);
+ paddr = pte_pfn(*ptep) << PAGE_SHIFT;
+ }
+ bus = iowa_pci_find(vaddr, paddr);
+
+ if (bus == NULL)
+ return NULL;
+ }
+
+ return bus;
+}
+#else /* CONFIG_PPC_INDIRECT_MMIO */
+struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
+{
+ return NULL;
+}
+#endif /* !CONFIG_PPC_INDIRECT_MMIO */
+
+#ifdef CONFIG_PPC_INDIRECT_PIO
+struct iowa_bus *iowa_pio_find_bus(unsigned long port)
+{
+ unsigned long vaddr = (unsigned long)pci_io_base + port;
+ return iowa_pci_find(vaddr, 0);
+}
+#else
+struct iowa_bus *iowa_pio_find_bus(unsigned long port)
+{
+ return NULL;
+}
+#endif
+
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \
+static ret iowa_##name at \
+{ \
+ struct iowa_bus *bus; \
+ bus = iowa_##space##_find_bus(aa); \
+ if (bus && bus->ops && bus->ops->name) \
+ return bus->ops->name al; \
+ return __do_##name al; \
+}
+
+#define DEF_PCI_AC_NORET(name, at, al, space, aa) \
+static void iowa_##name at \
+{ \
+ struct iowa_bus *bus; \
+ bus = iowa_##space##_find_bus(aa); \
+ if (bus && bus->ops && bus->ops->name) { \
+ bus->ops->name al; \
+ return; \
+ } \
+ __do_##name al; \
+}
+
+#include <asm/io-defs.h>
+
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
+
+static const struct ppc_pci_io iowa_pci_io = {
+
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) .name = iowa_##name,
+#define DEF_PCI_AC_NORET(name, at, al, space, aa) .name = iowa_##name,
+
+#include <asm/io-defs.h>
+
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
+
+};
+
+#ifdef CONFIG_PPC_INDIRECT_MMIO
+static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
+ unsigned long flags, void *caller)
+{
+ struct iowa_bus *bus;
+ void __iomem *res = __ioremap_caller(addr, size, flags, caller);
+ int busno;
+
+ bus = iowa_pci_find(0, (unsigned long)addr);
+ if (bus != NULL) {
+ busno = bus - iowa_busses;
+ PCI_SET_ADDR_TOKEN(res, busno + 1);
+ }
+ return res;
+}
+#else /* CONFIG_PPC_INDIRECT_MMIO */
+#define iowa_ioremap NULL
+#endif /* !CONFIG_PPC_INDIRECT_MMIO */
+
+/* Enable IO workaround */
+static void io_workaround_init(void)
+{
+ static int io_workaround_inited;
+
+ if (io_workaround_inited)
+ return;
+ ppc_pci_io = iowa_pci_io;
+ ppc_md.ioremap = iowa_ioremap;
+ io_workaround_inited = 1;
+}
+
+/* Register new bus to support workaround */
+void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops,
+ int (*initfunc)(struct iowa_bus *, void *), void *data)
+{
+ struct iowa_bus *bus;
+ struct device_node *np = phb->dn;
+
+ io_workaround_init();
+
+ if (iowa_bus_count >= IOWA_MAX_BUS) {
+ pr_err("IOWA:Too many pci bridges, "
+ "workarounds disabled for %s\n", np->full_name);
+ return;
+ }
+
+ bus = &iowa_busses[iowa_bus_count];
+ bus->phb = phb;
+ bus->ops = ops;
+ bus->private = data;
+
+ if (initfunc)
+ if ((*initfunc)(bus, data))
+ return;
+
+ iowa_bus_count++;
+
+ pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
+}
+
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
index e98180686b3..2a2b4aeab80 100644
--- a/arch/powerpc/kernel/io.c
+++ b/arch/powerpc/kernel/io.c
@@ -19,25 +19,26 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/compiler.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <asm/io.h>
#include <asm/firmware.h>
#include <asm/bug.h>
-void _insb(volatile u8 __iomem *port, void *buf, long count)
+/* See definition in io.h */
+bool isa_io_special;
+
+void _insb(const volatile u8 __iomem *port, void *buf, long count)
{
u8 *tbuf = buf;
u8 tmp;
- BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
-
if (unlikely(count <= 0))
return;
asm volatile("sync");
do {
tmp = *port;
- asm volatile("eieio");
+ eieio();
*tbuf++ = tmp;
} while (--count != 0);
asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
@@ -48,8 +49,6 @@ void _outsb(volatile u8 __iomem *port, const void *buf, long count)
{
const u8 *tbuf = buf;
- BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
-
if (unlikely(count <= 0))
return;
asm volatile("sync");
@@ -60,19 +59,17 @@ void _outsb(volatile u8 __iomem *port, const void *buf, long count)
}
EXPORT_SYMBOL(_outsb);
-void _insw_ns(volatile u16 __iomem *port, void *buf, long count)
+void _insw_ns(const volatile u16 __iomem *port, void *buf, long count)
{
u16 *tbuf = buf;
u16 tmp;
- BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
-
if (unlikely(count <= 0))
return;
asm volatile("sync");
do {
tmp = *port;
- asm volatile("eieio");
+ eieio();
*tbuf++ = tmp;
} while (--count != 0);
asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
@@ -83,8 +80,6 @@ void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count)
{
const u16 *tbuf = buf;
- BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
-
if (unlikely(count <= 0))
return;
asm volatile("sync");
@@ -95,19 +90,17 @@ void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count)
}
EXPORT_SYMBOL(_outsw_ns);
-void _insl_ns(volatile u32 __iomem *port, void *buf, long count)
+void _insl_ns(const volatile u32 __iomem *port, void *buf, long count)
{
u32 *tbuf = buf;
u32 tmp;
- BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
-
if (unlikely(count <= 0))
return;
asm volatile("sync");
do {
tmp = *port;
- asm volatile("eieio");
+ eieio();
*tbuf++ = tmp;
} while (--count != 0);
asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
@@ -118,8 +111,6 @@ void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count)
{
const u32 *tbuf = buf;
- BUG_ON(firmware_has_feature(FW_FEATURE_ISERIES));
-
if (unlikely(count <= 0))
return;
asm volatile("sync");
@@ -129,3 +120,91 @@ void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count)
asm volatile("sync");
}
EXPORT_SYMBOL(_outsl_ns);
+
+#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0)
+
+notrace void
+_memset_io(volatile void __iomem *addr, int c, unsigned long n)
+{
+ void *p = (void __force *)addr;
+ u32 lc = c;
+ lc |= lc << 8;
+ lc |= lc << 16;
+
+ __asm__ __volatile__ ("sync" : : : "memory");
+ while(n && !IO_CHECK_ALIGN(p, 4)) {
+ *((volatile u8 *)p) = c;
+ p++;
+ n--;
+ }
+ while(n >= 4) {
+ *((volatile u32 *)p) = lc;
+ p += 4;
+ n -= 4;
+ }
+ while(n) {
+ *((volatile u8 *)p) = c;
+ p++;
+ n--;
+ }
+ __asm__ __volatile__ ("sync" : : : "memory");
+}
+EXPORT_SYMBOL(_memset_io);
+
+void _memcpy_fromio(void *dest, const volatile void __iomem *src,
+ unsigned long n)
+{
+ void *vsrc = (void __force *) src;
+
+ __asm__ __volatile__ ("sync" : : : "memory");
+ while(n && (!IO_CHECK_ALIGN(vsrc, 4) || !IO_CHECK_ALIGN(dest, 4))) {
+ *((u8 *)dest) = *((volatile u8 *)vsrc);
+ eieio();
+ vsrc++;
+ dest++;
+ n--;
+ }
+ while(n >= 4) {
+ *((u32 *)dest) = *((volatile u32 *)vsrc);
+ eieio();
+ vsrc += 4;
+ dest += 4;
+ n -= 4;
+ }
+ while(n) {
+ *((u8 *)dest) = *((volatile u8 *)vsrc);
+ eieio();
+ vsrc++;
+ dest++;
+ n--;
+ }
+ __asm__ __volatile__ ("sync" : : : "memory");
+}
+EXPORT_SYMBOL(_memcpy_fromio);
+
+void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n)
+{
+ void *vdest = (void __force *) dest;
+
+ __asm__ __volatile__ ("sync" : : : "memory");
+ while(n && (!IO_CHECK_ALIGN(vdest, 4) || !IO_CHECK_ALIGN(src, 4))) {
+ *((volatile u8 *)vdest) = *((u8 *)src);
+ src++;
+ vdest++;
+ n--;
+ }
+ while(n >= 4) {
+ *((volatile u32 *)vdest) = *((volatile u32 *)src);
+ src += 4;
+ vdest += 4;
+ n-=4;
+ }
+ while(n) {
+ *((volatile u8 *)vdest) = *((u8 *)src);
+ src++;
+ vdest++;
+ n--;
+ }
+ __asm__ __volatile__ ("sync" : : : "memory");
+}
+EXPORT_SYMBOL(_memcpy_toio);
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index a13a93dfc65..12e48d56f77 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -3,34 +3,35 @@
*
* (C) Copyright 2004 Linus Torvalds
*/
-#include <linux/init.h>
#include <linux/pci.h>
#include <linux/mm.h>
+#include <linux/export.h>
#include <asm/io.h>
+#include <asm/pci-bridge.h>
/*
* Here comes the ppc64 implementation of the IOMAP
* interfaces.
*/
-unsigned int fastcall ioread8(void __iomem *addr)
+unsigned int ioread8(void __iomem *addr)
{
return readb(addr);
}
-unsigned int fastcall ioread16(void __iomem *addr)
+unsigned int ioread16(void __iomem *addr)
{
return readw(addr);
}
-unsigned int fastcall ioread16be(void __iomem *addr)
+unsigned int ioread16be(void __iomem *addr)
{
- return in_be16(addr);
+ return readw_be(addr);
}
-unsigned int fastcall ioread32(void __iomem *addr)
+unsigned int ioread32(void __iomem *addr)
{
return readl(addr);
}
-unsigned int fastcall ioread32be(void __iomem *addr)
+unsigned int ioread32be(void __iomem *addr)
{
- return in_be32(addr);
+ return readl_be(addr);
}
EXPORT_SYMBOL(ioread8);
EXPORT_SYMBOL(ioread16);
@@ -38,25 +39,25 @@ EXPORT_SYMBOL(ioread16be);
EXPORT_SYMBOL(ioread32);
EXPORT_SYMBOL(ioread32be);
-void fastcall iowrite8(u8 val, void __iomem *addr)
+void iowrite8(u8 val, void __iomem *addr)
{
writeb(val, addr);
}
-void fastcall iowrite16(u16 val, void __iomem *addr)
+void iowrite16(u16 val, void __iomem *addr)
{
writew(val, addr);
}
-void fastcall iowrite16be(u16 val, void __iomem *addr)
+void iowrite16be(u16 val, void __iomem *addr)
{
- out_be16(addr, val);
+ writew_be(val, addr);
}
-void fastcall iowrite32(u32 val, void __iomem *addr)
+void iowrite32(u32 val, void __iomem *addr)
{
writel(val, addr);
}
-void fastcall iowrite32be(u32 val, void __iomem *addr)
+void iowrite32be(u32 val, void __iomem *addr)
{
- out_be32(addr, val);
+ writel_be(val, addr);
}
EXPORT_SYMBOL(iowrite8);
EXPORT_SYMBOL(iowrite16);
@@ -74,15 +75,15 @@ EXPORT_SYMBOL(iowrite32be);
*/
void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
{
- _insb((u8 __iomem *) addr, dst, count);
+ readsb(addr, dst, count);
}
void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
{
- _insw_ns((u16 __iomem *) addr, dst, count);
+ readsw(addr, dst, count);
}
void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
{
- _insl_ns((u32 __iomem *) addr, dst, count);
+ readsl(addr, dst, count);
}
EXPORT_SYMBOL(ioread8_rep);
EXPORT_SYMBOL(ioread16_rep);
@@ -90,15 +91,15 @@ EXPORT_SYMBOL(ioread32_rep);
void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
{
- _outsb((u8 __iomem *) addr, src, count);
+ writesb(addr, src, count);
}
void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
{
- _outsw_ns((u16 __iomem *) addr, src, count);
+ writesw(addr, src, count);
}
void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
{
- _outsl_ns((u32 __iomem *) addr, src, count);
+ writesl(addr, src, count);
}
EXPORT_SYMBOL(iowrite8_rep);
EXPORT_SYMBOL(iowrite16_rep);
@@ -106,7 +107,7 @@ EXPORT_SYMBOL(iowrite32_rep);
void __iomem *ioport_map(unsigned long port, unsigned int len)
{
- return (void __iomem *) (port+pci_io_base);
+ return (void __iomem *) (port + _IO_BASE);
}
void ioport_unmap(void __iomem *addr)
@@ -116,27 +117,15 @@ void ioport_unmap(void __iomem *addr)
EXPORT_SYMBOL(ioport_map);
EXPORT_SYMBOL(ioport_unmap);
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
-{
- unsigned long start = pci_resource_start(dev, bar);
- unsigned long len = pci_resource_len(dev, bar);
- unsigned long flags = pci_resource_flags(dev, bar);
-
- if (!len)
- return NULL;
- if (max && len > max)
- len = max;
- if (flags & IORESOURCE_IO)
- return ioport_map(start, len);
- if (flags & IORESOURCE_MEM)
- return ioremap(start, len);
- /* What? */
- return NULL;
-}
-
+#ifdef CONFIG_PCI
void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
{
- /* Nothing to do */
+ if (isa_vaddr_is_ioport(addr))
+ return;
+ if (pcibios_vaddr_is_ioport(addr))
+ return;
+ iounmap(addr);
}
-EXPORT_SYMBOL(pci_iomap);
+
EXPORT_SYMBOL(pci_iounmap);
+#endif /* CONFIG_PCI */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index ba069407172..88e3ec6e1d9 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -30,22 +30,29 @@
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/dma-mapping.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/iommu-helper.h>
+#include <linux/crash_dump.h>
+#include <linux/hash.h>
+#include <linux/fault-inject.h>
+#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/sched.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include <asm/kdump.h>
+#include <asm/fadump.h>
+#include <asm/vio.h>
+#include <asm/tce.h>
#define DBG(...)
-#ifdef CONFIG_IOMMU_VMERGE
-static int novmerge = 0;
-#else
-static int novmerge = 1;
-#endif
+static int novmerge;
+
+static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
static int __init setup_iommu(char *str)
{
@@ -58,99 +65,232 @@ static int __init setup_iommu(char *str)
__setup("iommu=", setup_iommu);
-static unsigned long iommu_range_alloc(struct iommu_table *tbl,
+static DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
+
+/*
+ * We precalculate the hash to avoid doing it on every allocation.
+ *
+ * The hash is important to spread CPUs across all the pools. For example,
+ * on a POWER7 with 4 way SMT we want interrupts on the primary threads and
+ * with 4 pools all primary threads would map to the same pool.
+ */
+static int __init setup_iommu_pool_hash(void)
+{
+ unsigned int i;
+
+ for_each_possible_cpu(i)
+ per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
+
+ return 0;
+}
+subsys_initcall(setup_iommu_pool_hash);
+
+#ifdef CONFIG_FAIL_IOMMU
+
+static DECLARE_FAULT_ATTR(fail_iommu);
+
+static int __init setup_fail_iommu(char *str)
+{
+ return setup_fault_attr(&fail_iommu, str);
+}
+__setup("fail_iommu=", setup_fail_iommu);
+
+static bool should_fail_iommu(struct device *dev)
+{
+ return dev->archdata.fail_iommu && should_fail(&fail_iommu, 1);
+}
+
+static int __init fail_iommu_debugfs(void)
+{
+ struct dentry *dir = fault_create_debugfs_attr("fail_iommu",
+ NULL, &fail_iommu);
+
+ return PTR_ERR_OR_ZERO(dir);
+}
+late_initcall(fail_iommu_debugfs);
+
+static ssize_t fail_iommu_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", dev->archdata.fail_iommu);
+}
+
+static ssize_t fail_iommu_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ int i;
+
+ if (count > 0 && sscanf(buf, "%d", &i) > 0)
+ dev->archdata.fail_iommu = (i == 0) ? 0 : 1;
+
+ return count;
+}
+
+static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show,
+ fail_iommu_store);
+
+static int fail_iommu_bus_notify(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+
+ if (action == BUS_NOTIFY_ADD_DEVICE) {
+ if (device_create_file(dev, &dev_attr_fail_iommu))
+ pr_warn("Unable to create IOMMU fault injection sysfs "
+ "entries\n");
+ } else if (action == BUS_NOTIFY_DEL_DEVICE) {
+ device_remove_file(dev, &dev_attr_fail_iommu);
+ }
+
+ return 0;
+}
+
+static struct notifier_block fail_iommu_bus_notifier = {
+ .notifier_call = fail_iommu_bus_notify
+};
+
+static int __init fail_iommu_setup(void)
+{
+#ifdef CONFIG_PCI
+ bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier);
+#endif
+#ifdef CONFIG_IBMVIO
+ bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier);
+#endif
+
+ return 0;
+}
+/*
+ * Must execute after PCI and VIO subsystem have initialised but before
+ * devices are probed.
+ */
+arch_initcall(fail_iommu_setup);
+#else
+static inline bool should_fail_iommu(struct device *dev)
+{
+ return false;
+}
+#endif
+
+static unsigned long iommu_range_alloc(struct device *dev,
+ struct iommu_table *tbl,
unsigned long npages,
unsigned long *handle,
unsigned long mask,
unsigned int align_order)
{
- unsigned long n, end, i, start;
+ unsigned long n, end, start;
unsigned long limit;
int largealloc = npages > 15;
int pass = 0;
unsigned long align_mask;
+ unsigned long boundary_size;
+ unsigned long flags;
+ unsigned int pool_nr;
+ struct iommu_pool *pool;
align_mask = 0xffffffffffffffffl >> (64 - align_order);
/* This allocator was derived from x86_64's bit string search */
/* Sanity check */
- if (unlikely(npages) == 0) {
+ if (unlikely(npages == 0)) {
if (printk_ratelimit())
WARN_ON(1);
return DMA_ERROR_CODE;
}
- if (handle && *handle)
- start = *handle;
+ if (should_fail_iommu(dev))
+ return DMA_ERROR_CODE;
+
+ /*
+ * We don't need to disable preemption here because any CPU can
+ * safely use any IOMMU pool.
+ */
+ pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1);
+
+ if (largealloc)
+ pool = &(tbl->large_pool);
else
- start = largealloc ? tbl->it_largehint : tbl->it_hint;
+ pool = &(tbl->pools[pool_nr]);
+
+ spin_lock_irqsave(&(pool->lock), flags);
- /* Use only half of the table for small allocs (15 pages or less) */
- limit = largealloc ? tbl->it_size : tbl->it_halfpoint;
+again:
+ if ((pass == 0) && handle && *handle &&
+ (*handle >= pool->start) && (*handle < pool->end))
+ start = *handle;
+ else
+ start = pool->hint;
- if (largealloc && start < tbl->it_halfpoint)
- start = tbl->it_halfpoint;
+ limit = pool->end;
/* The case below can happen if we have a small segment appended
* to a large, or when the previous alloc was at the very end of
* the available space. If so, go back to the initial start.
*/
if (start >= limit)
- start = largealloc ? tbl->it_largehint : tbl->it_hint;
-
- again:
+ start = pool->start;
if (limit + tbl->it_offset > mask) {
limit = mask - tbl->it_offset + 1;
/* If we're constrained on address range, first try
* at the masked hint to avoid O(n) search complexity,
- * but on second pass, start at 0.
+ * but on second pass, start at 0 in pool 0.
*/
- if ((start & mask) >= limit || pass > 0)
- start = 0;
- else
+ if ((start & mask) >= limit || pass > 0) {
+ spin_unlock(&(pool->lock));
+ pool = &(tbl->pools[0]);
+ spin_lock(&(pool->lock));
+ start = pool->start;
+ } else {
start &= mask;
+ }
}
- n = find_next_zero_bit(tbl->it_map, limit, start);
-
- /* Align allocation */
- n = (n + align_mask) & ~align_mask;
-
- end = n + npages;
+ if (dev)
+ boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+ 1 << tbl->it_page_shift);
+ else
+ boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift);
+ /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
+
+ n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
+ boundary_size >> tbl->it_page_shift, align_mask);
+ if (n == -1) {
+ if (likely(pass == 0)) {
+ /* First try the pool from the start */
+ pool->hint = pool->start;
+ pass++;
+ goto again;
- if (unlikely(end >= limit)) {
- if (likely(pass < 2)) {
- /* First failure, just rescan the half of the table.
- * Second failure, rescan the other half of the table.
- */
- start = (largealloc ^ pass) ? tbl->it_halfpoint : 0;
- limit = pass ? tbl->it_size : limit;
+ } else if (pass <= tbl->nr_pools) {
+ /* Now try scanning all the other pools */
+ spin_unlock(&(pool->lock));
+ pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1);
+ pool = &tbl->pools[pool_nr];
+ spin_lock(&(pool->lock));
+ pool->hint = pool->start;
pass++;
goto again;
+
} else {
- /* Third failure, give up */
+ /* Give up */
+ spin_unlock_irqrestore(&(pool->lock), flags);
return DMA_ERROR_CODE;
}
}
- for (i = n; i < end; i++)
- if (test_bit(i, tbl->it_map)) {
- start = i+1;
- goto again;
- }
-
- for (i = n; i < end; i++)
- __set_bit(i, tbl->it_map);
+ end = n + npages;
/* Bump the hint to a new block for small allocs. */
if (largealloc) {
/* Don't bump to new block to avoid fragmentation */
- tbl->it_largehint = end;
+ pool->hint = end;
} else {
/* Overflow will be taken care of at the next allocation */
- tbl->it_hint = (end + tbl->it_blocksize - 1) &
+ pool->hint = (end + tbl->it_blocksize - 1) &
~(tbl->it_blocksize - 1);
}
@@ -158,52 +298,60 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl,
if (handle)
*handle = end;
+ spin_unlock_irqrestore(&(pool->lock), flags);
+
return n;
}
-static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
- unsigned int npages, enum dma_data_direction direction,
- unsigned long mask, unsigned int align_order)
+static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
+ void *page, unsigned int npages,
+ enum dma_data_direction direction,
+ unsigned long mask, unsigned int align_order,
+ struct dma_attrs *attrs)
{
- unsigned long entry, flags;
+ unsigned long entry;
dma_addr_t ret = DMA_ERROR_CODE;
+ int build_fail;
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
- entry = iommu_range_alloc(tbl, npages, NULL, mask, align_order);
+ entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
- if (unlikely(entry == DMA_ERROR_CODE)) {
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ if (unlikely(entry == DMA_ERROR_CODE))
return DMA_ERROR_CODE;
- }
entry += tbl->it_offset; /* Offset into real TCE table */
- ret = entry << PAGE_SHIFT; /* Set the return dma address */
+ ret = entry << tbl->it_page_shift; /* Set the return dma address */
/* Put the TCEs in the HW table */
- ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK,
- direction);
-
+ build_fail = ppc_md.tce_build(tbl, entry, npages,
+ (unsigned long)page &
+ IOMMU_PAGE_MASK(tbl), direction, attrs);
+
+ /* ppc_md.tce_build() only returns non-zero for transient errors.
+ * Clean up the table bitmap in this case and return
+ * DMA_ERROR_CODE. For all other errors the functionality is
+ * not altered.
+ */
+ if (unlikely(build_fail)) {
+ __iommu_free(tbl, ret, npages);
+ return DMA_ERROR_CODE;
+ }
/* Flush/invalidate TLB caches if necessary */
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
-
/* Make sure updates are seen by hardware */
mb();
return ret;
}
-static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
- unsigned int npages)
+static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
{
unsigned long entry, free_entry;
- unsigned long i;
- entry = dma_addr >> PAGE_SHIFT;
+ entry = dma_addr >> tbl->it_page_shift;
free_entry = entry - tbl->it_offset;
if (((free_entry + npages) > tbl->it_size) ||
@@ -211,30 +359,65 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
if (printk_ratelimit()) {
printk(KERN_INFO "iommu_free: invalid entry\n");
printk(KERN_INFO "\tentry = 0x%lx\n", entry);
- printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr);
- printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl);
- printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno);
- printk(KERN_INFO "\tsize = 0x%lx\n", (u64)tbl->it_size);
- printk(KERN_INFO "\tstartOff = 0x%lx\n", (u64)tbl->it_offset);
- printk(KERN_INFO "\tindex = 0x%lx\n", (u64)tbl->it_index);
+ printk(KERN_INFO "\tdma_addr = 0x%llx\n", (u64)dma_addr);
+ printk(KERN_INFO "\tTable = 0x%llx\n", (u64)tbl);
+ printk(KERN_INFO "\tbus# = 0x%llx\n", (u64)tbl->it_busno);
+ printk(KERN_INFO "\tsize = 0x%llx\n", (u64)tbl->it_size);
+ printk(KERN_INFO "\tstartOff = 0x%llx\n", (u64)tbl->it_offset);
+ printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index);
WARN_ON(1);
}
- return;
+
+ return false;
}
- ppc_md.tce_free(tbl, entry, npages);
-
- for (i = 0; i < npages; i++)
- __clear_bit(free_entry+i, tbl->it_map);
+ return true;
}
-static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
- unsigned int npages)
+static struct iommu_pool *get_pool(struct iommu_table *tbl,
+ unsigned long entry)
{
+ struct iommu_pool *p;
+ unsigned long largepool_start = tbl->large_pool.start;
+
+ /* The large pool is the last pool at the top of the table */
+ if (entry >= largepool_start) {
+ p = &tbl->large_pool;
+ } else {
+ unsigned int pool_nr = entry / tbl->poolsize;
+
+ BUG_ON(pool_nr > tbl->nr_pools);
+ p = &tbl->pools[pool_nr];
+ }
+
+ return p;
+}
+
+static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
+ unsigned long entry, free_entry;
unsigned long flags;
+ struct iommu_pool *pool;
- spin_lock_irqsave(&(tbl->it_lock), flags);
+ entry = dma_addr >> tbl->it_page_shift;
+ free_entry = entry - tbl->it_offset;
+
+ pool = get_pool(tbl, free_entry);
+ if (!iommu_free_check(tbl, dma_addr, npages))
+ return;
+
+ ppc_md.tce_free(tbl, entry, npages);
+
+ spin_lock_irqsave(&(pool->lock), flags);
+ bitmap_clear(tbl->it_map, free_entry, npages);
+ spin_unlock_irqrestore(&(pool->lock), flags);
+}
+
+static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+ unsigned int npages)
+{
__iommu_free(tbl, dma_addr, npages);
/* Make sure TLB cache is flushed if the HW needs it. We do
@@ -243,19 +426,19 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
*/
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
-
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
}
int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
- struct scatterlist *sglist, int nelems,
- unsigned long mask, enum dma_data_direction direction)
+ struct scatterlist *sglist, int nelems,
+ unsigned long mask, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
dma_addr_t dma_next = 0, dma_addr;
- unsigned long flags;
struct scatterlist *s, *outs, *segstart;
- int outcount, incount;
+ int outcount, incount, i, build_fail = 0;
+ unsigned int align;
unsigned long handle;
+ unsigned int max_seg_size;
BUG_ON(direction == DMA_NONE);
@@ -270,11 +453,10 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
/* Init first segment length for backout at failure */
outs->dma_length = 0;
- DBG("mapping %d elements:\n", nelems);
-
- spin_lock_irqsave(&(tbl->it_lock), flags);
+ DBG("sg mapping %d elements:\n", nelems);
- for (s = outs; nelems; nelems--, s++) {
+ max_seg_size = dma_get_max_seg_size(dev);
+ for_each_sg(sglist, s, nelems, i) {
unsigned long vaddr, npages, entry, slen;
slen = s->length;
@@ -284,31 +466,40 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
continue;
}
/* Allocate iommu entries for that segment */
- vaddr = (unsigned long)page_address(s->page) + s->offset;
- npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK);
- npages >>= PAGE_SHIFT;
- entry = iommu_range_alloc(tbl, npages, &handle, mask >> PAGE_SHIFT, 0);
+ vaddr = (unsigned long) sg_virt(s);
+ npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE(tbl));
+ align = 0;
+ if (tbl->it_page_shift < PAGE_SHIFT && slen >= PAGE_SIZE &&
+ (vaddr & ~PAGE_MASK) == 0)
+ align = PAGE_SHIFT - tbl->it_page_shift;
+ entry = iommu_range_alloc(dev, tbl, npages, &handle,
+ mask >> tbl->it_page_shift, align);
DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
/* Handle failure */
if (unlikely(entry == DMA_ERROR_CODE)) {
if (printk_ratelimit())
- printk(KERN_INFO "iommu_alloc failed, tbl %p vaddr %lx"
- " npages %lx\n", tbl, vaddr, npages);
+ dev_info(dev, "iommu_alloc failed, tbl %p "
+ "vaddr %lx npages %lu\n", tbl, vaddr,
+ npages);
goto failure;
}
/* Convert entry to a dma_addr_t */
entry += tbl->it_offset;
- dma_addr = entry << PAGE_SHIFT;
- dma_addr |= s->offset;
+ dma_addr = entry << tbl->it_page_shift;
+ dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl));
- DBG(" - %lx pages, entry: %lx, dma_addr: %lx\n",
+ DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n",
npages, entry, dma_addr);
/* Insert into HW table */
- ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction);
+ build_fail = ppc_md.tce_build(tbl, entry, npages,
+ vaddr & IOMMU_PAGE_MASK(tbl),
+ direction, attrs);
+ if(unlikely(build_fail))
+ goto failure;
/* If we are in an open segment, try merging */
if (segstart != s) {
@@ -316,14 +507,16 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
/* We cannot merge if:
* - allocated dma_addr isn't contiguous to previous allocation
*/
- if (novmerge || (dma_addr != dma_next)) {
+ if (novmerge || (dma_addr != dma_next) ||
+ (outs->dma_length + s->length > max_seg_size)) {
/* Can't merge: create a new segment */
segstart = s;
- outcount++; outs++;
+ outcount++;
+ outs = sg_next(outs);
DBG(" can't merge, new segment.\n");
} else {
outs->dma_length += s->length;
- DBG(" merged, new len: %lx\n", outs->dma_length);
+ DBG(" merged, new len: %ux\n", outs->dma_length);
}
}
@@ -344,15 +537,13 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
-
DBG("mapped %d elements:\n", outcount);
/* For the sake of iommu_unmap_sg, we clear out the length in the
* next entry of the sglist if we didn't fill the list completely
*/
if (outcount < incount) {
- outs++;
+ outs = sg_next(outs);
outs->dma_address = DMA_ERROR_CODE;
outs->dma_length = 0;
}
@@ -363,45 +554,46 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
return outcount;
failure:
- for (s = &sglist[0]; s <= outs; s++) {
+ for_each_sg(sglist, s, nelems, i) {
if (s->dma_length != 0) {
unsigned long vaddr, npages;
- vaddr = s->dma_address & PAGE_MASK;
- npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr)
- >> PAGE_SHIFT;
+ vaddr = s->dma_address & IOMMU_PAGE_MASK(tbl);
+ npages = iommu_num_pages(s->dma_address, s->dma_length,
+ IOMMU_PAGE_SIZE(tbl));
__iommu_free(tbl, vaddr, npages);
s->dma_address = DMA_ERROR_CODE;
s->dma_length = 0;
}
+ if (s == outs)
+ break;
}
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
return 0;
}
void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction)
+ int nelems, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
- unsigned long flags;
+ struct scatterlist *sg;
BUG_ON(direction == DMA_NONE);
if (!tbl)
return;
- spin_lock_irqsave(&(tbl->it_lock), flags);
-
+ sg = sglist;
while (nelems--) {
unsigned int npages;
- dma_addr_t dma_handle = sglist->dma_address;
+ dma_addr_t dma_handle = sg->dma_address;
- if (sglist->dma_length == 0)
+ if (sg->dma_length == 0)
break;
- npages = (PAGE_ALIGN(dma_handle + sglist->dma_length)
- - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT;
+ npages = iommu_num_pages(dma_handle, sg->dma_length,
+ IOMMU_PAGE_SIZE(tbl));
__iommu_free(tbl, dma_handle, npages);
- sglist++;
+ sg = sg_next(sg);
}
/* Flush/invalidate TLBs if necessary. As for iommu_free(), we
@@ -410,44 +602,26 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
*/
if (ppc_md.tce_flush)
ppc_md.tce_flush(tbl);
-
- spin_unlock_irqrestore(&(tbl->it_lock), flags);
}
-/*
- * Build a iommu_table structure. This contains a bit map which
- * is used to manage allocation of the tce space.
- */
-struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
+static void iommu_table_clear(struct iommu_table *tbl)
{
- unsigned long sz;
- static int welcomed = 0;
- struct page *page;
-
- /* Set aside 1/4 of the table for large allocations. */
- tbl->it_halfpoint = tbl->it_size * 3 / 4;
-
- /* number of bytes needed for the bitmap */
- sz = (tbl->it_size + 7) >> 3;
-
- page = alloc_pages_node(nid, GFP_ATOMIC, get_order(sz));
- if (!page)
- panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
- tbl->it_map = page_address(page);
- memset(tbl->it_map, 0, sz);
-
- tbl->it_hint = 0;
- tbl->it_largehint = tbl->it_halfpoint;
- spin_lock_init(&tbl->it_lock);
+ /*
+ * In case of firmware assisted dump system goes through clean
+ * reboot process at the time of system crash. Hence it's safe to
+ * clear the TCE entries if firmware assisted dump is active.
+ */
+ if (!is_kdump_kernel() || is_fadump_active()) {
+ /* Clear the table in case firmware left allocations in it */
+ ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
+ return;
+ }
#ifdef CONFIG_CRASH_DUMP
if (ppc_md.tce_get) {
- unsigned long index, tceval;
- unsigned long tcecount = 0;
+ unsigned long index, tceval, tcecount = 0;
- /*
- * Reserve the existing mappings left by the first kernel.
- */
+ /* Reserve the existing mappings left by the first kernel. */
for (index = 0; index < tbl->it_size; index++) {
tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
/*
@@ -458,19 +632,72 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
tcecount++;
}
}
+
if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
- printk(KERN_WARNING "TCE table is full; ");
- printk(KERN_WARNING "freeing %d entries for the kdump boot\n",
+ printk(KERN_WARNING "TCE table is full; freeing ");
+ printk(KERN_WARNING "%d entries for the kdump boot\n",
KDUMP_MIN_TCE_ENTRIES);
for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
index < tbl->it_size; index++)
__clear_bit(index, tbl->it_map);
}
}
-#else
- /* Clear the hardware table in case firmware left allocations in it */
- ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
#endif
+}
+
+/*
+ * Build a iommu_table structure. This contains a bit map which
+ * is used to manage allocation of the tce space.
+ */
+struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
+{
+ unsigned long sz;
+ static int welcomed = 0;
+ struct page *page;
+ unsigned int i;
+ struct iommu_pool *p;
+
+ /* number of bytes needed for the bitmap */
+ sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
+
+ page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz));
+ if (!page)
+ panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
+ tbl->it_map = page_address(page);
+ memset(tbl->it_map, 0, sz);
+
+ /*
+ * Reserve page 0 so it will not be used for any mappings.
+ * This avoids buggy drivers that consider page 0 to be invalid
+ * to crash the machine or even lose data.
+ */
+ if (tbl->it_offset == 0)
+ set_bit(0, tbl->it_map);
+
+ /* We only split the IOMMU table if we have 1GB or more of space */
+ if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
+ tbl->nr_pools = IOMMU_NR_POOLS;
+ else
+ tbl->nr_pools = 1;
+
+ /* We reserve the top 1/4 of the table for large allocations */
+ tbl->poolsize = (tbl->it_size * 3 / 4) / tbl->nr_pools;
+
+ for (i = 0; i < tbl->nr_pools; i++) {
+ p = &tbl->pools[i];
+ spin_lock_init(&(p->lock));
+ p->start = tbl->poolsize * i;
+ p->hint = p->start;
+ p->end = p->start + tbl->poolsize;
+ }
+
+ p = &tbl->large_pool;
+ spin_lock_init(&(p->lock));
+ p->start = tbl->poolsize * i;
+ p->hint = p->start;
+ p->end = tbl->it_size;
+
+ iommu_table_clear(tbl);
if (!welcomed) {
printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
@@ -481,31 +708,37 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
return tbl;
}
-void iommu_free_table(struct device_node *dn)
+void iommu_free_table(struct iommu_table *tbl, const char *node_name)
{
- struct pci_dn *pdn = dn->data;
- struct iommu_table *tbl = pdn->iommu_table;
- unsigned long bitmap_sz, i;
+ unsigned long bitmap_sz;
unsigned int order;
if (!tbl || !tbl->it_map) {
- printk(KERN_ERR "%s: expected TCE map for %s\n", __FUNCTION__,
- dn->full_name);
+ printk(KERN_ERR "%s: expected TCE map for %s\n", __func__,
+ node_name);
return;
}
- /* verify that table contains no entries */
- /* it_size is in entries, and we're examining 64 at a time */
- for (i = 0; i < (tbl->it_size/64); i++) {
- if (tbl->it_map[i] != 0) {
- printk(KERN_WARNING "%s: Unexpected TCEs for %s\n",
- __FUNCTION__, dn->full_name);
- break;
- }
+ /*
+ * In case we have reserved the first bit, we should not emit
+ * the warning below.
+ */
+ if (tbl->it_offset == 0)
+ clear_bit(0, tbl->it_map);
+
+#ifdef CONFIG_IOMMU_API
+ if (tbl->it_group) {
+ iommu_group_put(tbl->it_group);
+ BUG_ON(tbl->it_group);
}
+#endif
+
+ /* verify that table contains no entries */
+ if (!bitmap_empty(tbl->it_map, tbl->it_size))
+ pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
/* calculate bitmap size in bytes */
- bitmap_sz = (tbl->it_size + 7) / 8;
+ bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
/* free bitmap */
order = get_order(bitmap_sz);
@@ -516,65 +749,78 @@ void iommu_free_table(struct device_node *dn)
}
/* Creates TCEs for a user provided buffer. The user buffer must be
- * contiguous real kernel storage (not vmalloc). The address of the buffer
- * passed here is the kernel (virtual) address of the buffer. The buffer
- * need not be page aligned, the dma_addr_t returned will point to the same
- * byte within the page as vaddr.
+ * contiguous real kernel storage (not vmalloc). The address passed here
+ * comprises a page address and offset into that page. The dma_addr_t
+ * returned will point to the same byte within the page as was passed in.
*/
-dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
- size_t size, unsigned long mask,
- enum dma_data_direction direction)
+dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
+ struct page *page, unsigned long offset, size_t size,
+ unsigned long mask, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
dma_addr_t dma_handle = DMA_ERROR_CODE;
+ void *vaddr;
unsigned long uaddr;
- unsigned int npages;
+ unsigned int npages, align;
BUG_ON(direction == DMA_NONE);
+ vaddr = page_address(page) + offset;
uaddr = (unsigned long)vaddr;
- npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK);
- npages >>= PAGE_SHIFT;
+ npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
if (tbl) {
- dma_handle = iommu_alloc(tbl, vaddr, npages, direction,
- mask >> PAGE_SHIFT, 0);
+ align = 0;
+ if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
+ ((unsigned long)vaddr & ~PAGE_MASK) == 0)
+ align = PAGE_SHIFT - tbl->it_page_shift;
+
+ dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
+ mask >> tbl->it_page_shift, align,
+ attrs);
if (dma_handle == DMA_ERROR_CODE) {
if (printk_ratelimit()) {
- printk(KERN_INFO "iommu_alloc failed, "
- "tbl %p vaddr %p npages %d\n",
- tbl, vaddr, npages);
+ dev_info(dev, "iommu_alloc failed, tbl %p "
+ "vaddr %p npages %d\n", tbl, vaddr,
+ npages);
}
} else
- dma_handle |= (uaddr & ~PAGE_MASK);
+ dma_handle |= (uaddr & ~IOMMU_PAGE_MASK(tbl));
}
return dma_handle;
}
-void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
- size_t size, enum dma_data_direction direction)
+void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
+ unsigned int npages;
+
BUG_ON(direction == DMA_NONE);
- if (tbl)
- iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) -
- (dma_handle & PAGE_MASK)) >> PAGE_SHIFT);
+ if (tbl) {
+ npages = iommu_num_pages(dma_handle, size,
+ IOMMU_PAGE_SIZE(tbl));
+ iommu_free(tbl, dma_handle, npages);
+ }
}
/* Allocates a contiguous real buffer and creates mappings over it.
* Returns the virtual address of the buffer and sets dma_handle
* to the dma address (mapping) of the first page.
*/
-void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
- dma_addr_t *dma_handle, unsigned long mask, gfp_t flag, int node)
+void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
+ size_t size, dma_addr_t *dma_handle,
+ unsigned long mask, gfp_t flag, int node)
{
void *ret = NULL;
dma_addr_t mapping;
- unsigned int npages, order;
+ unsigned int order;
+ unsigned int nio_pages, io_order;
struct page *page;
size = PAGE_ALIGN(size);
- npages = size >> PAGE_SHIFT;
order = get_order(size);
/*
@@ -583,7 +829,8 @@ void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
* the tce tables.
*/
if (order >= IOMAP_MAX_ORDER) {
- printk("iommu_alloc_consistent size too large: 0x%lx\n", size);
+ dev_info(dev, "iommu_alloc_consistent size too large: 0x%lx\n",
+ size);
return NULL;
}
@@ -598,8 +845,10 @@ void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
memset(ret, 0, size);
/* Set up tces to cover the allocated range */
- mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL,
- mask >> PAGE_SHIFT, order);
+ nio_pages = size >> tbl->it_page_shift;
+ io_order = get_iommu_order(size, tbl);
+ mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
+ mask >> tbl->it_page_shift, io_order, NULL);
if (mapping == DMA_ERROR_CODE) {
free_pages((unsigned long)ret, order);
return NULL;
@@ -611,12 +860,315 @@ void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
void iommu_free_coherent(struct iommu_table *tbl, size_t size,
void *vaddr, dma_addr_t dma_handle)
{
- unsigned int npages;
-
if (tbl) {
+ unsigned int nio_pages;
+
+ size = PAGE_ALIGN(size);
+ nio_pages = size >> tbl->it_page_shift;
+ iommu_free(tbl, dma_handle, nio_pages);
size = PAGE_ALIGN(size);
- npages = size >> PAGE_SHIFT;
- iommu_free(tbl, dma_handle, npages);
free_pages((unsigned long)vaddr, get_order(size));
}
}
+
+#ifdef CONFIG_IOMMU_API
+/*
+ * SPAPR TCE API
+ */
+static void group_release(void *iommu_data)
+{
+ struct iommu_table *tbl = iommu_data;
+ tbl->it_group = NULL;
+}
+
+void iommu_register_group(struct iommu_table *tbl,
+ int pci_domain_number, unsigned long pe_num)
+{
+ struct iommu_group *grp;
+ char *name;
+
+ grp = iommu_group_alloc();
+ if (IS_ERR(grp)) {
+ pr_warn("powerpc iommu api: cannot create new group, err=%ld\n",
+ PTR_ERR(grp));
+ return;
+ }
+ tbl->it_group = grp;
+ iommu_group_set_iommudata(grp, tbl, group_release);
+ name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
+ pci_domain_number, pe_num);
+ if (!name)
+ return;
+ iommu_group_set_name(grp, name);
+ kfree(name);
+}
+
+enum dma_data_direction iommu_tce_direction(unsigned long tce)
+{
+ if ((tce & TCE_PCI_READ) && (tce & TCE_PCI_WRITE))
+ return DMA_BIDIRECTIONAL;
+ else if (tce & TCE_PCI_READ)
+ return DMA_TO_DEVICE;
+ else if (tce & TCE_PCI_WRITE)
+ return DMA_FROM_DEVICE;
+ else
+ return DMA_NONE;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_direction);
+
+void iommu_flush_tce(struct iommu_table *tbl)
+{
+ /* Flush/invalidate TLB caches if necessary */
+ if (ppc_md.tce_flush)
+ ppc_md.tce_flush(tbl);
+
+ /* Make sure updates are seen by hardware */
+ mb();
+}
+EXPORT_SYMBOL_GPL(iommu_flush_tce);
+
+int iommu_tce_clear_param_check(struct iommu_table *tbl,
+ unsigned long ioba, unsigned long tce_value,
+ unsigned long npages)
+{
+ /* ppc_md.tce_free() does not support any value but 0 */
+ if (tce_value)
+ return -EINVAL;
+
+ if (ioba & ~IOMMU_PAGE_MASK(tbl))
+ return -EINVAL;
+
+ ioba >>= tbl->it_page_shift;
+ if (ioba < tbl->it_offset)
+ return -EINVAL;
+
+ if ((ioba + npages) > (tbl->it_offset + tbl->it_size))
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
+
+int iommu_tce_put_param_check(struct iommu_table *tbl,
+ unsigned long ioba, unsigned long tce)
+{
+ if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
+ return -EINVAL;
+
+ if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
+ return -EINVAL;
+
+ if (ioba & ~IOMMU_PAGE_MASK(tbl))
+ return -EINVAL;
+
+ ioba >>= tbl->it_page_shift;
+ if (ioba < tbl->it_offset)
+ return -EINVAL;
+
+ if ((ioba + 1) > (tbl->it_offset + tbl->it_size))
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
+
+unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
+{
+ unsigned long oldtce;
+ struct iommu_pool *pool = get_pool(tbl, entry);
+
+ spin_lock(&(pool->lock));
+
+ oldtce = ppc_md.tce_get(tbl, entry);
+ if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
+ ppc_md.tce_free(tbl, entry, 1);
+ else
+ oldtce = 0;
+
+ spin_unlock(&(pool->lock));
+
+ return oldtce;
+}
+EXPORT_SYMBOL_GPL(iommu_clear_tce);
+
+int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
+ unsigned long entry, unsigned long pages)
+{
+ unsigned long oldtce;
+ struct page *page;
+
+ for ( ; pages; --pages, ++entry) {
+ oldtce = iommu_clear_tce(tbl, entry);
+ if (!oldtce)
+ continue;
+
+ page = pfn_to_page(oldtce >> PAGE_SHIFT);
+ WARN_ON(!page);
+ if (page) {
+ if (oldtce & TCE_PCI_WRITE)
+ SetPageDirty(page);
+ put_page(page);
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages);
+
+/*
+ * hwaddr is a kernel virtual address here (0xc... bazillion),
+ * tce_build converts it to a physical address.
+ */
+int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
+ unsigned long hwaddr, enum dma_data_direction direction)
+{
+ int ret = -EBUSY;
+ unsigned long oldtce;
+ struct iommu_pool *pool = get_pool(tbl, entry);
+
+ spin_lock(&(pool->lock));
+
+ oldtce = ppc_md.tce_get(tbl, entry);
+ /* Add new entry if it is not busy */
+ if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
+ ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL);
+
+ spin_unlock(&(pool->lock));
+
+ /* if (unlikely(ret))
+ pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
+ __func__, hwaddr, entry << IOMMU_PAGE_SHIFT(tbl),
+ hwaddr, ret); */
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_build);
+
+int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
+ unsigned long tce)
+{
+ int ret;
+ struct page *page = NULL;
+ unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
+ enum dma_data_direction direction = iommu_tce_direction(tce);
+
+ ret = get_user_pages_fast(tce & PAGE_MASK, 1,
+ direction != DMA_TO_DEVICE, &page);
+ if (unlikely(ret != 1)) {
+ /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
+ tce, entry << IOMMU_PAGE_SHIFT(tbl), ret); */
+ return -EFAULT;
+ }
+ hwaddr = (unsigned long) page_address(page) + offset;
+
+ ret = iommu_tce_build(tbl, entry, hwaddr, direction);
+ if (ret)
+ put_page(page);
+
+ if (ret < 0)
+ pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
+ __func__, entry << tbl->it_page_shift, tce, ret);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode);
+
+int iommu_take_ownership(struct iommu_table *tbl)
+{
+ unsigned long sz = (tbl->it_size + 7) >> 3;
+
+ if (tbl->it_offset == 0)
+ clear_bit(0, tbl->it_map);
+
+ if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
+ pr_err("iommu_tce: it_map is not empty");
+ return -EBUSY;
+ }
+
+ memset(tbl->it_map, 0xff, sz);
+ iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
+
+ /*
+ * Disable iommu bypass, otherwise the user can DMA to all of
+ * our physical memory via the bypass window instead of just
+ * the pages that has been explicitly mapped into the iommu
+ */
+ if (tbl->set_bypass)
+ tbl->set_bypass(tbl, false);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_take_ownership);
+
+void iommu_release_ownership(struct iommu_table *tbl)
+{
+ unsigned long sz = (tbl->it_size + 7) >> 3;
+
+ iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
+ memset(tbl->it_map, 0, sz);
+
+ /* Restore bit#0 set by iommu_init_table() */
+ if (tbl->it_offset == 0)
+ set_bit(0, tbl->it_map);
+
+ /* The kernel owns the device now, we can restore the iommu bypass */
+ if (tbl->set_bypass)
+ tbl->set_bypass(tbl, true);
+}
+EXPORT_SYMBOL_GPL(iommu_release_ownership);
+
+int iommu_add_device(struct device *dev)
+{
+ struct iommu_table *tbl;
+ int ret = 0;
+
+ if (WARN_ON(dev->iommu_group)) {
+ pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n",
+ dev_name(dev),
+ iommu_group_id(dev->iommu_group));
+ return -EBUSY;
+ }
+
+ tbl = get_iommu_table_base(dev);
+ if (!tbl || !tbl->it_group) {
+ pr_debug("iommu_tce: skipping device %s with no tbl\n",
+ dev_name(dev));
+ return 0;
+ }
+
+ pr_debug("iommu_tce: adding %s to iommu group %d\n",
+ dev_name(dev), iommu_group_id(tbl->it_group));
+
+ if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
+ pr_err("iommu_tce: unsupported iommu page size.");
+ pr_err("%s has not been added\n", dev_name(dev));
+ return -EINVAL;
+ }
+
+ ret = iommu_group_add_device(tbl->it_group, dev);
+ if (ret < 0)
+ pr_err("iommu_tce: %s has not been added, ret=%d\n",
+ dev_name(dev), ret);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_add_device);
+
+void iommu_del_device(struct device *dev)
+{
+ /*
+ * Some devices might not have IOMMU table and group
+ * and we needn't detach them from the associated
+ * IOMMU groups
+ */
+ if (!dev->iommu_group) {
+ pr_debug("iommu_tce: skipping device %s with no tbl\n",
+ dev_name(dev));
+ return;
+ }
+
+ iommu_group_remove_device(dev);
+}
+EXPORT_SYMBOL_GPL(iommu_del_device);
+
+#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index b4432332341..248ee7e5beb 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -7,7 +7,6 @@
* Copyright (C) 1996-2001 Cort Dougan
* Adapted for Power Macintosh by Paul Mackerras
* Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -31,7 +30,7 @@
#undef DEBUG
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/threads.h>
#include <linux/kernel_stat.h>
#include <linux/signal.h>
@@ -53,9 +52,11 @@
#include <linux/mutex.h>
#include <linux/bootmem.h>
#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/irq.h>
@@ -64,837 +65,607 @@
#include <asm/ptrace.h>
#include <asm/machdep.h>
#include <asm/udbg.h>
-#ifdef CONFIG_PPC_ISERIES
+#include <asm/smp.h>
+#include <asm/debug.h>
+
+#ifdef CONFIG_PPC64
#include <asm/paca.h>
+#include <asm/firmware.h>
+#include <asm/lv1call.h>
#endif
+#define CREATE_TRACE_POINTS
+#include <asm/trace.h>
+
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
int __irq_offset_value;
-static int ppc_spurious_interrupts;
#ifdef CONFIG_PPC32
EXPORT_SYMBOL(__irq_offset_value);
atomic_t ppc_n_lost_interrupts;
-#ifndef CONFIG_PPC_MERGE
-#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
-unsigned long ppc_cached_irq_mask[NR_MASK_WORDS];
-#endif
-
#ifdef CONFIG_TAU_INT
extern int tau_initialized;
extern int tau_interrupts(int);
#endif
#endif /* CONFIG_PPC32 */
-#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE)
-extern atomic_t ipi_recv;
-extern atomic_t ipi_sent;
-#endif
-
#ifdef CONFIG_PPC64
-EXPORT_SYMBOL(irq_desc);
int distribute_irqs = 1;
-#endif /* CONFIG_PPC64 */
-int show_interrupts(struct seq_file *p, void *v)
+static inline notrace unsigned long get_irq_happened(void)
{
- int i = *(loff_t *)v, j;
- struct irqaction *action;
- irq_desc_t *desc;
- unsigned long flags;
+ unsigned long happened;
- if (i == 0) {
- seq_puts(p, " ");
- for_each_online_cpu(j)
- seq_printf(p, "CPU%d ", j);
- seq_putc(p, '\n');
- }
+ __asm__ __volatile__("lbz %0,%1(13)"
+ : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened)));
- if (i < NR_IRQS) {
- desc = get_irq_desc(i);
- spin_lock_irqsave(&desc->lock, flags);
- action = desc->action;
- if (!action || !action->handler)
- goto skip;
- seq_printf(p, "%3d: ", i);
-#ifdef CONFIG_SMP
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
-#else
- seq_printf(p, "%10u ", kstat_irqs(i));
-#endif /* CONFIG_SMP */
- if (desc->chip)
- seq_printf(p, " %s ", desc->chip->typename);
- else
- seq_puts(p, " None ");
- seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge ");
- seq_printf(p, " %s", action->name);
- for (action = action->next; action; action = action->next)
- seq_printf(p, ", %s", action->name);
- seq_putc(p, '\n');
-skip:
- spin_unlock_irqrestore(&desc->lock, flags);
- } else if (i == NR_IRQS) {
-#ifdef CONFIG_PPC32
-#ifdef CONFIG_TAU_INT
- if (tau_initialized){
- seq_puts(p, "TAU: ");
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", tau_interrupts(j));
- seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n");
- }
-#endif
-#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_MERGE)
- /* should this be per processor send/receive? */
- seq_printf(p, "IPI (recv/sent): %10u/%u\n",
- atomic_read(&ipi_recv), atomic_read(&ipi_sent));
-#endif
-#endif /* CONFIG_PPC32 */
- seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts);
- }
- return 0;
+ return happened;
}
-#ifdef CONFIG_HOTPLUG_CPU
-void fixup_irqs(cpumask_t map)
+static inline notrace void set_soft_enabled(unsigned long enable)
{
- unsigned int irq;
- static int warned;
-
- for_each_irq(irq) {
- cpumask_t mask;
-
- if (irq_desc[irq].status & IRQ_PER_CPU)
- continue;
-
- cpus_and(mask, irq_desc[irq].affinity, map);
- if (any_online_cpu(mask) == NR_CPUS) {
- printk("Breaking affinity for irq %i\n", irq);
- mask = map;
- }
- if (irq_desc[irq].chip->set_affinity)
- irq_desc[irq].chip->set_affinity(irq, mask);
- else if (irq_desc[irq].action && !(warned++))
- printk("Cannot set affinity for irq %i\n", irq);
- }
-
- local_irq_enable();
- mdelay(1);
- local_irq_disable();
+ __asm__ __volatile__("stb %0,%1(13)"
+ : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
}
-#endif
-void do_IRQ(struct pt_regs *regs)
+static inline notrace int decrementer_check_overflow(void)
{
- unsigned int irq;
-#ifdef CONFIG_IRQSTACKS
- struct thread_info *curtp, *irqtp;
-#endif
-
- irq_enter();
+ u64 now = get_tb_or_rtc();
+ u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+
+ return now >= *next_tb;
+}
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
- /* Debugging check for stack overflow: is there less than 2KB free? */
- {
- long sp;
+/* This is called whenever we are re-enabling interrupts
+ * and returns either 0 (nothing to do) or 500/900/280/a00/e80 if
+ * there's an EE, DEC or DBELL to generate.
+ *
+ * This is called in two contexts: From arch_local_irq_restore()
+ * before soft-enabling interrupts, and from the exception exit
+ * path when returning from an interrupt from a soft-disabled to
+ * a soft enabled context. In both case we have interrupts hard
+ * disabled.
+ *
+ * We take care of only clearing the bits we handled in the
+ * PACA irq_happened field since we can only re-emit one at a
+ * time and we don't want to "lose" one.
+ */
+notrace unsigned int __check_irq_replay(void)
+{
+ /*
+ * We use local_paca rather than get_paca() to avoid all
+ * the debug_smp_processor_id() business in this low level
+ * function
+ */
+ unsigned char happened = local_paca->irq_happened;
- sp = __get_SP() & (THREAD_SIZE-1);
+ /* Clear bit 0 which we wouldn't clear otherwise */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
- if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
- printk("do_IRQ: stack overflow: %ld\n",
- sp - sizeof(struct thread_info));
- dump_stack();
- }
+ /*
+ * Force the delivery of pending soft-disabled interrupts on PS3.
+ * Any HV call will have this side effect.
+ */
+ if (firmware_has_feature(FW_FEATURE_PS3_LV1)) {
+ u64 tmp, tmp2;
+ lv1_get_version_info(&tmp, &tmp2);
}
-#endif
/*
- * Every platform is required to implement ppc_md.get_irq.
- * This function will either return an irq number or -1 to
- * indicate there are no more pending.
- * The value -2 is for buggy hardware and means that this IRQ
- * has already been handled. -- Tom
+ * We may have missed a decrementer interrupt. We check the
+ * decrementer itself rather than the paca irq_happened field
+ * in case we also had a rollover while hard disabled
*/
- irq = ppc_md.get_irq(regs);
-
- if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) {
-#ifdef CONFIG_IRQSTACKS
- /* Switch to the irq stack to handle this */
- curtp = current_thread_info();
- irqtp = hardirq_ctx[smp_processor_id()];
- if (curtp != irqtp) {
- struct irq_desc *desc = irq_desc + irq;
- void *handler = desc->handle_irq;
- if (handler == NULL)
- handler = &__do_IRQ;
- irqtp->task = curtp->task;
- irqtp->flags = 0;
- call_handle_irq(irq, desc, regs, irqtp, handler);
- irqtp->task = NULL;
- if (irqtp->flags)
- set_bits(irqtp->flags, &curtp->flags);
- } else
-#endif
- generic_handle_irq(irq, regs);
- } else if (irq != NO_IRQ_IGNORE)
- /* That's not SMP safe ... but who cares ? */
- ppc_spurious_interrupts++;
-
- irq_exit();
-
-#ifdef CONFIG_PPC_ISERIES
- if (get_lppaca()->int_dword.fields.decr_int) {
- get_lppaca()->int_dword.fields.decr_int = 0;
- /* Signal a fake decrementer interrupt */
- timer_interrupt(regs);
- }
-#endif
-}
+ local_paca->irq_happened &= ~PACA_IRQ_DEC;
+ if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow())
+ return 0x900;
+
+ /* Finally check if an external interrupt happened */
+ local_paca->irq_happened &= ~PACA_IRQ_EE;
+ if (happened & PACA_IRQ_EE)
+ return 0x500;
+
+#ifdef CONFIG_PPC_BOOK3E
+ /* Finally check if an EPR external interrupt happened
+ * this bit is typically set if we need to handle another
+ * "edge" interrupt from within the MPIC "EPR" handler
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
+ if (happened & PACA_IRQ_EE_EDGE)
+ return 0x500;
-void __init init_IRQ(void)
-{
- ppc_md.init_IRQ();
-#ifdef CONFIG_PPC64
- irq_ctx_init();
-#endif
-}
+ local_paca->irq_happened &= ~PACA_IRQ_DBELL;
+ if (happened & PACA_IRQ_DBELL)
+ return 0x280;
+#else
+ local_paca->irq_happened &= ~PACA_IRQ_DBELL;
+ if (happened & PACA_IRQ_DBELL) {
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ return 0xe80;
+ return 0xa00;
+ }
+#endif /* CONFIG_PPC_BOOK3E */
+ /* There should be nothing left ! */
+ BUG_ON(local_paca->irq_happened != 0);
-#ifdef CONFIG_IRQSTACKS
-struct thread_info *softirq_ctx[NR_CPUS] __read_mostly;
-struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly;
+ return 0;
+}
-void irq_ctx_init(void)
+notrace void arch_local_irq_restore(unsigned long en)
{
- struct thread_info *tp;
- int i;
+ unsigned char irq_happened;
+ unsigned int replay;
- for_each_possible_cpu(i) {
- memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
- tp = softirq_ctx[i];
- tp->cpu = i;
- tp->preempt_count = SOFTIRQ_OFFSET;
+ /* Write the new soft-enabled value */
+ set_soft_enabled(en);
+ if (!en)
+ return;
+ /*
+ * From this point onward, we can take interrupts, preempt,
+ * etc... unless we got hard-disabled. We check if an event
+ * happened. If none happened, we know we can just return.
+ *
+ * We may have preempted before the check below, in which case
+ * we are checking the "new" CPU instead of the old one. This
+ * is only a problem if an event happened on the "old" CPU.
+ *
+ * External interrupt events will have caused interrupts to
+ * be hard-disabled, so there is no problem, we
+ * cannot have preempted.
+ */
+ irq_happened = get_irq_happened();
+ if (!irq_happened)
+ return;
- memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
- tp = hardirq_ctx[i];
- tp->cpu = i;
- tp->preempt_count = HARDIRQ_OFFSET;
+ /*
+ * We need to hard disable to get a trusted value from
+ * __check_irq_replay(). We also need to soft-disable
+ * again to avoid warnings in there due to the use of
+ * per-cpu variables.
+ *
+ * We know that if the value in irq_happened is exactly 0x01
+ * then we are already hard disabled (there are other less
+ * common cases that we'll ignore for now), so we skip the
+ * (expensive) mtmsrd.
+ */
+ if (unlikely(irq_happened != PACA_IRQ_HARD_DIS))
+ __hard_irq_disable();
+#ifdef CONFIG_TRACE_IRQFLAGS
+ else {
+ /*
+ * We should already be hard disabled here. We had bugs
+ * where that wasn't the case so let's dbl check it and
+ * warn if we are wrong. Only do that when IRQ tracing
+ * is enabled as mfmsr() can be costly.
+ */
+ if (WARN_ON(mfmsr() & MSR_EE))
+ __hard_irq_disable();
}
-}
-
-static inline void do_softirq_onstack(void)
-{
- struct thread_info *curtp, *irqtp;
+#endif /* CONFIG_TRACE_IRQFLAG */
- curtp = current_thread_info();
- irqtp = softirq_ctx[smp_processor_id()];
- irqtp->task = curtp->task;
- call_do_softirq(irqtp);
- irqtp->task = NULL;
-}
+ set_soft_enabled(0);
-#else
-#define do_softirq_onstack() __do_softirq()
-#endif /* CONFIG_IRQSTACKS */
+ /*
+ * Check if anything needs to be re-emitted. We haven't
+ * soft-enabled yet to avoid warnings in decrementer_check_overflow
+ * accessing per-cpu variables
+ */
+ replay = __check_irq_replay();
-void do_softirq(void)
-{
- unsigned long flags;
+ /* We can soft-enable now */
+ set_soft_enabled(1);
- if (in_interrupt())
+ /*
+ * And replay if we have to. This will return with interrupts
+ * hard-enabled.
+ */
+ if (replay) {
+ __replay_interrupt(replay);
return;
+ }
- local_irq_save(flags);
-
- if (local_softirq_pending())
- do_softirq_onstack();
-
- local_irq_restore(flags);
+ /* Finally, let's ensure we are hard enabled */
+ __hard_irq_enable();
}
-EXPORT_SYMBOL(do_softirq);
-
+EXPORT_SYMBOL(arch_local_irq_restore);
/*
- * IRQ controller and virtual interrupts
+ * This is specifically called by assembly code to re-enable interrupts
+ * if they are currently disabled. This is typically called before
+ * schedule() or do_signal() when returning to userspace. We do it
+ * in C to avoid the burden of dealing with lockdep etc...
+ *
+ * NOTE: This is called with interrupts hard disabled but not marked
+ * as such in paca->irq_happened, so we need to resync this.
*/
-
-#ifdef CONFIG_PPC_MERGE
-
-static LIST_HEAD(irq_hosts);
-static spinlock_t irq_big_lock = SPIN_LOCK_UNLOCKED;
-static DEFINE_PER_CPU(unsigned int, irq_radix_reader);
-static unsigned int irq_radix_writer;
-struct irq_map_entry irq_map[NR_IRQS];
-static unsigned int irq_virq_count = NR_IRQS;
-static struct irq_host *irq_default_host;
-
-struct irq_host *irq_alloc_host(unsigned int revmap_type,
- unsigned int revmap_arg,
- struct irq_host_ops *ops,
- irq_hw_number_t inval_irq)
+void notrace restore_interrupts(void)
{
- struct irq_host *host;
- unsigned int size = sizeof(struct irq_host);
- unsigned int i;
- unsigned int *rmap;
- unsigned long flags;
-
- /* Allocate structure and revmap table if using linear mapping */
- if (revmap_type == IRQ_HOST_MAP_LINEAR)
- size += revmap_arg * sizeof(unsigned int);
- if (mem_init_done)
- host = kzalloc(size, GFP_KERNEL);
- else {
- host = alloc_bootmem(size);
- if (host)
- memset(host, 0, size);
- }
- if (host == NULL)
- return NULL;
-
- /* Fill structure */
- host->revmap_type = revmap_type;
- host->inval_irq = inval_irq;
- host->ops = ops;
+ if (irqs_disabled()) {
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ local_irq_enable();
+ } else
+ __hard_irq_enable();
+}
- spin_lock_irqsave(&irq_big_lock, flags);
+/*
+ * This is a helper to use when about to go into idle low-power
+ * when the latter has the side effect of re-enabling interrupts
+ * (such as calling H_CEDE under pHyp).
+ *
+ * You call this function with interrupts soft-disabled (this is
+ * already the case when ppc_md.power_save is called). The function
+ * will return whether to enter power save or just return.
+ *
+ * In the former case, it will have notified lockdep of interrupts
+ * being re-enabled and generally sanitized the lazy irq state,
+ * and in the latter case it will leave with interrupts hard
+ * disabled and marked as such, so the local_irq_enable() call
+ * in arch_cpu_idle() will properly re-enable everything.
+ */
+bool prep_irq_for_idle(void)
+{
+ /*
+ * First we need to hard disable to ensure no interrupt
+ * occurs before we effectively enter the low power state
+ */
+ hard_irq_disable();
- /* If it's a legacy controller, check for duplicates and
- * mark it as allocated (we use irq 0 host pointer for that
+ /*
+ * If anything happened while we were soft-disabled,
+ * we return now and do not enter the low power state.
*/
- if (revmap_type == IRQ_HOST_MAP_LEGACY) {
- if (irq_map[0].host != NULL) {
- spin_unlock_irqrestore(&irq_big_lock, flags);
- /* If we are early boot, we can't free the structure,
- * too bad...
- * this will be fixed once slab is made available early
- * instead of the current cruft
- */
- if (mem_init_done)
- kfree(host);
- return NULL;
- }
- irq_map[0].host = host;
- }
+ if (lazy_irq_pending())
+ return false;
- list_add(&host->link, &irq_hosts);
- spin_unlock_irqrestore(&irq_big_lock, flags);
-
- /* Additional setups per revmap type */
- switch(revmap_type) {
- case IRQ_HOST_MAP_LEGACY:
- /* 0 is always the invalid number for legacy */
- host->inval_irq = 0;
- /* setup us as the host for all legacy interrupts */
- for (i = 1; i < NUM_ISA_INTERRUPTS; i++) {
- irq_map[i].hwirq = 0;
- smp_wmb();
- irq_map[i].host = host;
- smp_wmb();
-
- /* Clear norequest flags */
- get_irq_desc(i)->status &= ~IRQ_NOREQUEST;
-
- /* Legacy flags are left to default at this point,
- * one can then use irq_create_mapping() to
- * explicitely change them
- */
- ops->map(host, i, i);
- }
- break;
- case IRQ_HOST_MAP_LINEAR:
- rmap = (unsigned int *)(host + 1);
- for (i = 0; i < revmap_arg; i++)
- rmap[i] = IRQ_NONE;
- host->revmap_data.linear.size = revmap_arg;
- smp_wmb();
- host->revmap_data.linear.revmap = rmap;
- break;
- default:
- break;
- }
+ /* Tell lockdep we are about to re-enable */
+ trace_hardirqs_on();
- pr_debug("irq: Allocated host of type %d @0x%p\n", revmap_type, host);
+ /*
+ * Mark interrupts as soft-enabled and clear the
+ * PACA_IRQ_HARD_DIS from the pending mask since we
+ * are about to hard enable as well as a side effect
+ * of entering the low power state.
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ local_paca->soft_enabled = 1;
- return host;
+ /* Tell the caller to enter the low power state */
+ return true;
}
-struct irq_host *irq_find_host(struct device_node *node)
-{
- struct irq_host *h, *found = NULL;
- unsigned long flags;
-
- /* We might want to match the legacy controller last since
- * it might potentially be set to match all interrupts in
- * the absence of a device node. This isn't a problem so far
- * yet though...
- */
- spin_lock_irqsave(&irq_big_lock, flags);
- list_for_each_entry(h, &irq_hosts, link)
- if (h->ops->match == NULL || h->ops->match(h, node)) {
- found = h;
- break;
- }
- spin_unlock_irqrestore(&irq_big_lock, flags);
- return found;
-}
-EXPORT_SYMBOL_GPL(irq_find_host);
+#endif /* CONFIG_PPC64 */
-void irq_set_default_host(struct irq_host *host)
+int arch_show_interrupts(struct seq_file *p, int prec)
{
- pr_debug("irq: Default host set to @0x%p\n", host);
+ int j;
- irq_default_host = host;
-}
-
-void irq_set_virq_count(unsigned int count)
-{
- pr_debug("irq: Trying to set virq count to %d\n", count);
+#if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT)
+ if (tau_initialized) {
+ seq_printf(p, "%*s: ", prec, "TAU");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", tau_interrupts(j));
+ seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n");
+ }
+#endif /* CONFIG_PPC32 && CONFIG_TAU_INT */
+
+ seq_printf(p, "%*s: ", prec, "LOC");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event);
+ seq_printf(p, " Local timer interrupts for timer event device\n");
+
+ seq_printf(p, "%*s: ", prec, "LOC");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others);
+ seq_printf(p, " Local timer interrupts for others\n");
+
+ seq_printf(p, "%*s: ", prec, "SPU");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs);
+ seq_printf(p, " Spurious interrupts\n");
+
+ seq_printf(p, "%*s: ", prec, "PMI");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs);
+ seq_printf(p, " Performance monitoring interrupts\n");
+
+ seq_printf(p, "%*s: ", prec, "MCE");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions);
+ seq_printf(p, " Machine check exceptions\n");
+
+#ifdef CONFIG_PPC_DOORBELL
+ if (cpu_has_feature(CPU_FTR_DBELL)) {
+ seq_printf(p, "%*s: ", prec, "DBL");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(irq_stat, j).doorbell_irqs);
+ seq_printf(p, " Doorbell interrupts\n");
+ }
+#endif
- BUG_ON(count < NUM_ISA_INTERRUPTS);
- if (count < NR_IRQS)
- irq_virq_count = count;
+ return 0;
}
-/* radix tree not lockless safe ! we use a brlock-type mecanism
- * for now, until we can use a lockless radix tree
+/*
+ * /proc/stat helpers
*/
-static void irq_radix_wrlock(unsigned long *flags)
+u64 arch_irq_stat_cpu(unsigned int cpu)
{
- unsigned int cpu, ok;
-
- spin_lock_irqsave(&irq_big_lock, *flags);
- irq_radix_writer = 1;
- smp_mb();
- do {
- barrier();
- ok = 1;
- for_each_possible_cpu(cpu) {
- if (per_cpu(irq_radix_reader, cpu)) {
- ok = 0;
- break;
- }
- }
- if (!ok)
- cpu_relax();
- } while(!ok);
-}
+ u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event;
+
+ sum += per_cpu(irq_stat, cpu).pmu_irqs;
+ sum += per_cpu(irq_stat, cpu).mce_exceptions;
+ sum += per_cpu(irq_stat, cpu).spurious_irqs;
+ sum += per_cpu(irq_stat, cpu).timer_irqs_others;
+#ifdef CONFIG_PPC_DOORBELL
+ sum += per_cpu(irq_stat, cpu).doorbell_irqs;
+#endif
-static void irq_radix_wrunlock(unsigned long flags)
-{
- smp_wmb();
- irq_radix_writer = 0;
- spin_unlock_irqrestore(&irq_big_lock, flags);
+ return sum;
}
-static void irq_radix_rdlock(unsigned long *flags)
+#ifdef CONFIG_HOTPLUG_CPU
+void migrate_irqs(void)
{
- local_irq_save(*flags);
- __get_cpu_var(irq_radix_reader) = 1;
- smp_mb();
- if (likely(irq_radix_writer == 0))
- return;
- __get_cpu_var(irq_radix_reader) = 0;
- smp_wmb();
- spin_lock(&irq_big_lock);
- __get_cpu_var(irq_radix_reader) = 1;
- spin_unlock(&irq_big_lock);
-}
+ struct irq_desc *desc;
+ unsigned int irq;
+ static int warned;
+ cpumask_var_t mask;
+ const struct cpumask *map = cpu_online_mask;
-static void irq_radix_rdunlock(unsigned long flags)
-{
- __get_cpu_var(irq_radix_reader) = 0;
- local_irq_restore(flags);
-}
+ alloc_cpumask_var(&mask, GFP_KERNEL);
+ for_each_irq_desc(irq, desc) {
+ struct irq_data *data;
+ struct irq_chip *chip;
-unsigned int irq_create_mapping(struct irq_host *host,
- irq_hw_number_t hwirq)
-{
- unsigned int virq, hint;
-
- pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", host, hwirq);
-
- /* Look for default host if nececssary */
- if (host == NULL)
- host = irq_default_host;
- if (host == NULL) {
- printk(KERN_WARNING "irq_create_mapping called for"
- " NULL host, hwirq=%lx\n", hwirq);
- WARN_ON(1);
- return NO_IRQ;
- }
- pr_debug("irq: -> using host @%p\n", host);
+ data = irq_desc_get_irq_data(desc);
+ if (irqd_is_per_cpu(data))
+ continue;
- /* Check if mapping already exist, if it does, call
- * host->ops->map() to update the flags
- */
- virq = irq_find_mapping(host, hwirq);
- if (virq != IRQ_NONE) {
- pr_debug("irq: -> existing mapping on virq %d\n", virq);
- return virq;
- }
+ chip = irq_data_get_irq_chip(data);
- /* Get a virtual interrupt number */
- if (host->revmap_type == IRQ_HOST_MAP_LEGACY) {
- /* Handle legacy */
- virq = (unsigned int)hwirq;
- if (virq == 0 || virq >= NUM_ISA_INTERRUPTS)
- return NO_IRQ;
- return virq;
- } else {
- /* Allocate a virtual interrupt number */
- hint = hwirq % irq_virq_count;
- virq = irq_alloc_virt(host, 1, hint);
- if (virq == NO_IRQ) {
- pr_debug("irq: -> virq allocation failed\n");
- return NO_IRQ;
+ cpumask_and(mask, data->affinity, map);
+ if (cpumask_any(mask) >= nr_cpu_ids) {
+ printk("Breaking affinity for irq %i\n", irq);
+ cpumask_copy(mask, map);
}
+ if (chip->irq_set_affinity)
+ chip->irq_set_affinity(data, mask, true);
+ else if (desc->action && !(warned++))
+ printk("Cannot set affinity for irq %i\n", irq);
}
- pr_debug("irq: -> obtained virq %d\n", virq);
-
- /* Clear IRQ_NOREQUEST flag */
- get_irq_desc(virq)->status &= ~IRQ_NOREQUEST;
-
- /* map it */
- smp_wmb();
- irq_map[virq].hwirq = hwirq;
- smp_mb();
- if (host->ops->map(host, virq, hwirq)) {
- pr_debug("irq: -> mapping failed, freeing\n");
- irq_free_virt(virq, 1);
- return NO_IRQ;
- }
- return virq;
+
+ free_cpumask_var(mask);
+
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
}
-EXPORT_SYMBOL_GPL(irq_create_mapping);
+#endif
-extern unsigned int irq_create_of_mapping(struct device_node *controller,
- u32 *intspec, unsigned int intsize)
+static inline void check_stack_overflow(void)
{
- struct irq_host *host;
- irq_hw_number_t hwirq;
- unsigned int type = IRQ_TYPE_NONE;
- unsigned int virq;
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+ long sp;
- if (controller == NULL)
- host = irq_default_host;
- else
- host = irq_find_host(controller);
- if (host == NULL) {
- printk(KERN_WARNING "irq: no irq host found for %s !\n",
- controller->full_name);
- return NO_IRQ;
- }
+ sp = __get_SP() & (THREAD_SIZE-1);
- /* If host has no translation, then we assume interrupt line */
- if (host->ops->xlate == NULL)
- hwirq = intspec[0];
- else {
- if (host->ops->xlate(host, controller, intspec, intsize,
- &hwirq, &type))
- return NO_IRQ;
+ /* check for stack overflow: is there less than 2KB free? */
+ if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
+ printk("do_IRQ: stack overflow: %ld\n",
+ sp - sizeof(struct thread_info));
+ dump_stack();
}
-
- /* Create mapping */
- virq = irq_create_mapping(host, hwirq);
- if (virq == NO_IRQ)
- return virq;
-
- /* Set type if specified and different than the current one */
- if (type != IRQ_TYPE_NONE &&
- type != (get_irq_desc(virq)->status & IRQF_TRIGGER_MASK))
- set_irq_type(virq, type);
- return virq;
+#endif
}
-EXPORT_SYMBOL_GPL(irq_create_of_mapping);
-unsigned int irq_of_parse_and_map(struct device_node *dev, int index)
+void __do_irq(struct pt_regs *regs)
{
- struct of_irq oirq;
+ unsigned int irq;
- if (of_irq_map_one(dev, index, &oirq))
- return NO_IRQ;
+ irq_enter();
- return irq_create_of_mapping(oirq.controller, oirq.specifier,
- oirq.size);
-}
-EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
+ trace_irq_entry(regs);
-void irq_dispose_mapping(unsigned int virq)
-{
- struct irq_host *host = irq_map[virq].host;
- irq_hw_number_t hwirq;
- unsigned long flags;
+ check_stack_overflow();
- WARN_ON (host == NULL);
- if (host == NULL)
- return;
-
- /* Never unmap legacy interrupts */
- if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
- return;
+ /*
+ * Query the platform PIC for the interrupt & ack it.
+ *
+ * This will typically lower the interrupt line to the CPU
+ */
+ irq = ppc_md.get_irq();
- /* remove chip and handler */
- set_irq_chip_and_handler(virq, NULL, NULL);
-
- /* Make sure it's completed */
- synchronize_irq(virq);
-
- /* Tell the PIC about it */
- if (host->ops->unmap)
- host->ops->unmap(host, virq);
- smp_mb();
-
- /* Clear reverse map */
- hwirq = irq_map[virq].hwirq;
- switch(host->revmap_type) {
- case IRQ_HOST_MAP_LINEAR:
- if (hwirq < host->revmap_data.linear.size)
- host->revmap_data.linear.revmap[hwirq] = IRQ_NONE;
- break;
- case IRQ_HOST_MAP_TREE:
- /* Check if radix tree allocated yet */
- if (host->revmap_data.tree.gfp_mask == 0)
- break;
- irq_radix_wrlock(&flags);
- radix_tree_delete(&host->revmap_data.tree, hwirq);
- irq_radix_wrunlock(flags);
- break;
- }
+ /* We can hard enable interrupts now to allow perf interrupts */
+ may_hard_irq_enable();
- /* Destroy map */
- smp_mb();
- irq_map[virq].hwirq = host->inval_irq;
+ /* And finally process it */
+ if (unlikely(irq == NO_IRQ))
+ __get_cpu_var(irq_stat).spurious_irqs++;
+ else
+ generic_handle_irq(irq);
- /* Set some flags */
- get_irq_desc(virq)->status |= IRQ_NOREQUEST;
+ trace_irq_exit(regs);
- /* Free it */
- irq_free_virt(virq, 1);
+ irq_exit();
}
-EXPORT_SYMBOL_GPL(irq_dispose_mapping);
-unsigned int irq_find_mapping(struct irq_host *host,
- irq_hw_number_t hwirq)
+void do_IRQ(struct pt_regs *regs)
{
- unsigned int i;
- unsigned int hint = hwirq % irq_virq_count;
-
- /* Look for default host if nececssary */
- if (host == NULL)
- host = irq_default_host;
- if (host == NULL)
- return NO_IRQ;
-
- /* legacy -> bail early */
- if (host->revmap_type == IRQ_HOST_MAP_LEGACY)
- return hwirq;
-
- /* Slow path does a linear search of the map */
- if (hint < NUM_ISA_INTERRUPTS)
- hint = NUM_ISA_INTERRUPTS;
- i = hint;
- do {
- if (irq_map[i].host == host &&
- irq_map[i].hwirq == hwirq)
- return i;
- i++;
- if (i >= irq_virq_count)
- i = NUM_ISA_INTERRUPTS;
- } while(i != hint);
- return NO_IRQ;
-}
-EXPORT_SYMBOL_GPL(irq_find_mapping);
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ struct thread_info *curtp, *irqtp, *sirqtp;
+ /* Switch to the irq stack to handle this */
+ curtp = current_thread_info();
+ irqtp = hardirq_ctx[raw_smp_processor_id()];
+ sirqtp = softirq_ctx[raw_smp_processor_id()];
-unsigned int irq_radix_revmap(struct irq_host *host,
- irq_hw_number_t hwirq)
-{
- struct radix_tree_root *tree;
- struct irq_map_entry *ptr;
- unsigned int virq;
- unsigned long flags;
+ /* Already there ? */
+ if (unlikely(curtp == irqtp || curtp == sirqtp)) {
+ __do_irq(regs);
+ set_irq_regs(old_regs);
+ return;
+ }
- WARN_ON(host->revmap_type != IRQ_HOST_MAP_TREE);
+ /* Prepare the thread_info in the irq stack */
+ irqtp->task = curtp->task;
+ irqtp->flags = 0;
- /* Check if the radix tree exist yet. We test the value of
- * the gfp_mask for that. Sneaky but saves another int in the
- * structure. If not, we fallback to slow mode
- */
- tree = &host->revmap_data.tree;
- if (tree->gfp_mask == 0)
- return irq_find_mapping(host, hwirq);
-
- /* Now try to resolve */
- irq_radix_rdlock(&flags);
- ptr = radix_tree_lookup(tree, hwirq);
- irq_radix_rdunlock(flags);
-
- /* Found it, return */
- if (ptr) {
- virq = ptr - irq_map;
- return virq;
- }
+ /* Copy the preempt_count so that the [soft]irq checks work. */
+ irqtp->preempt_count = curtp->preempt_count;
- /* If not there, try to insert it */
- virq = irq_find_mapping(host, hwirq);
- if (virq != NO_IRQ) {
- irq_radix_wrlock(&flags);
- radix_tree_insert(tree, hwirq, &irq_map[virq]);
- irq_radix_wrunlock(flags);
- }
- return virq;
-}
+ /* Switch stack and call */
+ call_do_irq(regs, irqtp);
-unsigned int irq_linear_revmap(struct irq_host *host,
- irq_hw_number_t hwirq)
-{
- unsigned int *revmap;
+ /* Restore stack limit */
+ irqtp->task = NULL;
- WARN_ON(host->revmap_type != IRQ_HOST_MAP_LINEAR);
+ /* Copy back updates to the thread_info */
+ if (irqtp->flags)
+ set_bits(irqtp->flags, &curtp->flags);
- /* Check revmap bounds */
- if (unlikely(hwirq >= host->revmap_data.linear.size))
- return irq_find_mapping(host, hwirq);
+ set_irq_regs(old_regs);
+}
- /* Check if revmap was allocated */
- revmap = host->revmap_data.linear.revmap;
- if (unlikely(revmap == NULL))
- return irq_find_mapping(host, hwirq);
+void __init init_IRQ(void)
+{
+ if (ppc_md.init_IRQ)
+ ppc_md.init_IRQ();
- /* Fill up revmap with slow path if no mapping found */
- if (unlikely(revmap[hwirq] == NO_IRQ))
- revmap[hwirq] = irq_find_mapping(host, hwirq);
+ exc_lvl_ctx_init();
- return revmap[hwirq];
+ irq_ctx_init();
}
-unsigned int irq_alloc_virt(struct irq_host *host,
- unsigned int count,
- unsigned int hint)
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+struct thread_info *critirq_ctx[NR_CPUS] __read_mostly;
+struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly;
+struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly;
+
+void exc_lvl_ctx_init(void)
{
- unsigned long flags;
- unsigned int i, j, found = NO_IRQ;
- unsigned int limit = irq_virq_count - count;
+ struct thread_info *tp;
+ int i, cpu_nr;
- if (count == 0 || count > (irq_virq_count - NUM_ISA_INTERRUPTS))
- return NO_IRQ;
+ for_each_possible_cpu(i) {
+#ifdef CONFIG_PPC64
+ cpu_nr = i;
+#else
+#ifdef CONFIG_SMP
+ cpu_nr = get_hard_smp_processor_id(i);
+#else
+ cpu_nr = 0;
+#endif
+#endif
- spin_lock_irqsave(&irq_big_lock, flags);
+ memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);
+ tp = critirq_ctx[cpu_nr];
+ tp->cpu = cpu_nr;
+ tp->preempt_count = 0;
- /* Use hint for 1 interrupt if any */
- if (count == 1 && hint >= NUM_ISA_INTERRUPTS &&
- hint < irq_virq_count && irq_map[hint].host == NULL) {
- found = hint;
- goto hint_found;
- }
+#ifdef CONFIG_BOOKE
+ memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE);
+ tp = dbgirq_ctx[cpu_nr];
+ tp->cpu = cpu_nr;
+ tp->preempt_count = 0;
- /* Look for count consecutive numbers in the allocatable
- * (non-legacy) space
- */
- for (i = NUM_ISA_INTERRUPTS; i <= limit; ) {
- for (j = i; j < (i + count); j++)
- if (irq_map[j].host != NULL) {
- i = j + 1;
- continue;
- }
- found = i;
- break;
- }
- if (found == NO_IRQ) {
- spin_unlock_irqrestore(&irq_big_lock, flags);
- return NO_IRQ;
- }
- hint_found:
- for (i = found; i < (found + count); i++) {
- irq_map[i].hwirq = host->inval_irq;
- smp_wmb();
- irq_map[i].host = host;
+ memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE);
+ tp = mcheckirq_ctx[cpu_nr];
+ tp->cpu = cpu_nr;
+ tp->preempt_count = HARDIRQ_OFFSET;
+#endif
}
- spin_unlock_irqrestore(&irq_big_lock, flags);
- return found;
}
+#endif
-void irq_free_virt(unsigned int virq, unsigned int count)
-{
- unsigned long flags;
- unsigned int i;
-
- WARN_ON (virq < NUM_ISA_INTERRUPTS);
- WARN_ON (count == 0 || (virq + count) > irq_virq_count);
+struct thread_info *softirq_ctx[NR_CPUS] __read_mostly;
+struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly;
- spin_lock_irqsave(&irq_big_lock, flags);
- for (i = virq; i < (virq + count); i++) {
- struct irq_host *host;
+void irq_ctx_init(void)
+{
+ struct thread_info *tp;
+ int i;
- if (i < NUM_ISA_INTERRUPTS ||
- (virq + count) > irq_virq_count)
- continue;
+ for_each_possible_cpu(i) {
+ memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
+ tp = softirq_ctx[i];
+ tp->cpu = i;
- host = irq_map[i].host;
- irq_map[i].hwirq = host->inval_irq;
- smp_wmb();
- irq_map[i].host = NULL;
+ memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
+ tp = hardirq_ctx[i];
+ tp->cpu = i;
}
- spin_unlock_irqrestore(&irq_big_lock, flags);
}
-void irq_early_init(void)
+void do_softirq_own_stack(void)
{
- unsigned int i;
+ struct thread_info *curtp, *irqtp;
- for (i = 0; i < NR_IRQS; i++)
- get_irq_desc(i)->status |= IRQ_NOREQUEST;
+ curtp = current_thread_info();
+ irqtp = softirq_ctx[smp_processor_id()];
+ irqtp->task = curtp->task;
+ irqtp->flags = 0;
+ call_do_softirq(irqtp);
+ irqtp->task = NULL;
+
+ /* Set any flag that may have been set on the
+ * alternate stack
+ */
+ if (irqtp->flags)
+ set_bits(irqtp->flags, &curtp->flags);
}
-/* We need to create the radix trees late */
-static int irq_late_init(void)
+irq_hw_number_t virq_to_hw(unsigned int virq)
{
- struct irq_host *h;
- unsigned long flags;
+ struct irq_data *irq_data = irq_get_irq_data(virq);
+ return WARN_ON(!irq_data) ? 0 : irq_data->hwirq;
+}
+EXPORT_SYMBOL_GPL(virq_to_hw);
- irq_radix_wrlock(&flags);
- list_for_each_entry(h, &irq_hosts, link) {
- if (h->revmap_type == IRQ_HOST_MAP_TREE)
- INIT_RADIX_TREE(&h->revmap_data.tree, GFP_ATOMIC);
- }
- irq_radix_wrunlock(flags);
+#ifdef CONFIG_SMP
+int irq_choose_cpu(const struct cpumask *mask)
+{
+ int cpuid;
- return 0;
-}
-arch_initcall(irq_late_init);
+ if (cpumask_equal(mask, cpu_online_mask)) {
+ static int irq_rover;
+ static DEFINE_RAW_SPINLOCK(irq_rover_lock);
+ unsigned long flags;
+
+ /* Round-robin distribution... */
+do_round_robin:
+ raw_spin_lock_irqsave(&irq_rover_lock, flags);
-#endif /* CONFIG_PPC_MERGE */
+ irq_rover = cpumask_next(irq_rover, cpu_online_mask);
+ if (irq_rover >= nr_cpu_ids)
+ irq_rover = cpumask_first(cpu_online_mask);
+
+ cpuid = irq_rover;
+
+ raw_spin_unlock_irqrestore(&irq_rover_lock, flags);
+ } else {
+ cpuid = cpumask_first_and(mask, cpu_online_mask);
+ if (cpuid >= nr_cpu_ids)
+ goto do_round_robin;
+ }
-#ifdef CONFIG_PCI_MSI
-int pci_enable_msi(struct pci_dev * pdev)
+ return get_hard_smp_processor_id(cpuid);
+}
+#else
+int irq_choose_cpu(const struct cpumask *mask)
{
- if (ppc_md.enable_msi)
- return ppc_md.enable_msi(pdev);
- else
- return -1;
+ return hard_smp_processor_id();
}
-EXPORT_SYMBOL(pci_enable_msi);
+#endif
-void pci_disable_msi(struct pci_dev * pdev)
+int arch_early_irq_init(void)
{
- if (ppc_md.disable_msi)
- ppc_md.disable_msi(pdev);
+ return 0;
}
-EXPORT_SYMBOL(pci_disable_msi);
-
-void pci_scan_msi_device(struct pci_dev *dev) {}
-int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec) {return -1;}
-void pci_disable_msix(struct pci_dev *dev) {}
-void msi_remove_pci_irq_vectors(struct pci_dev *dev) {}
-void disable_msi_mode(struct pci_dev *dev, int pos, int type) {}
-void pci_no_msi(void) {}
-EXPORT_SYMBOL(pci_enable_msix);
-EXPORT_SYMBOL(pci_disable_msix);
-
-#endif
#ifdef CONFIG_PPC64
static int __init setup_noirqdistrib(char *str)
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
new file mode 100644
index 00000000000..0f199709796
--- /dev/null
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -0,0 +1,266 @@
+/*
+ * Routines for tracking a legacy ISA bridge
+ *
+ * Copyrigh 2007 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ *
+ * Some bits and pieces moved over from pci_64.c
+ *
+ * Copyrigh 2003 Anton Blanchard <anton@au.ibm.com>, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+
+unsigned long isa_io_base; /* NULL if no ISA bus */
+EXPORT_SYMBOL(isa_io_base);
+
+/* Cached ISA bridge dev. */
+static struct device_node *isa_bridge_devnode;
+struct pci_dev *isa_bridge_pcidev;
+EXPORT_SYMBOL_GPL(isa_bridge_pcidev);
+
+#define ISA_SPACE_MASK 0x1
+#define ISA_SPACE_IO 0x1
+
+static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
+ unsigned long phb_io_base_phys)
+{
+ /* We should get some saner parsing here and remove these structs */
+ struct pci_address {
+ u32 a_hi;
+ u32 a_mid;
+ u32 a_lo;
+ };
+
+ struct isa_address {
+ u32 a_hi;
+ u32 a_lo;
+ };
+
+ struct isa_range {
+ struct isa_address isa_addr;
+ struct pci_address pci_addr;
+ unsigned int size;
+ };
+
+ const struct isa_range *range;
+ unsigned long pci_addr;
+ unsigned int isa_addr;
+ unsigned int size;
+ int rlen = 0;
+
+ range = of_get_property(isa_node, "ranges", &rlen);
+ if (range == NULL || (rlen < sizeof(struct isa_range)))
+ goto inval_range;
+
+ /* From "ISA Binding to 1275"
+ * The ranges property is laid out as an array of elements,
+ * each of which comprises:
+ * cells 0 - 1: an ISA address
+ * cells 2 - 4: a PCI address
+ * (size depending on dev->n_addr_cells)
+ * cell 5: the size of the range
+ */
+ if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) {
+ range++;
+ rlen -= sizeof(struct isa_range);
+ if (rlen < sizeof(struct isa_range))
+ goto inval_range;
+ }
+ if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO)
+ goto inval_range;
+
+ isa_addr = range->isa_addr.a_lo;
+ pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
+ range->pci_addr.a_lo;
+
+ /* Assume these are both zero. Note: We could fix that and
+ * do a proper parsing instead ... oh well, that will do for
+ * now as nobody uses fancy mappings for ISA bridges
+ */
+ if ((pci_addr != 0) || (isa_addr != 0)) {
+ printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
+ __func__);
+ return;
+ }
+
+ /* Align size and make sure it's cropped to 64K */
+ size = PAGE_ALIGN(range->size);
+ if (size > 0x10000)
+ size = 0x10000;
+
+ __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
+ size, _PAGE_NO_CACHE|_PAGE_GUARDED);
+ return;
+
+inval_range:
+ printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
+ "mapping 64k\n");
+ __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
+ 0x10000, _PAGE_NO_CACHE|_PAGE_GUARDED);
+}
+
+
+/**
+ * isa_bridge_find_early - Find and map the ISA IO space early before
+ * main PCI discovery. This is optionally called by
+ * the arch code when adding PCI PHBs to get early
+ * access to ISA IO ports
+ */
+void __init isa_bridge_find_early(struct pci_controller *hose)
+{
+ struct device_node *np, *parent = NULL, *tmp;
+
+ /* If we already have an ISA bridge, bail off */
+ if (isa_bridge_devnode != NULL)
+ return;
+
+ /* For each "isa" node in the system. Note : we do a search by
+ * type and not by name. It might be better to do by name but that's
+ * what the code used to do and I don't want to break too much at
+ * once. We can look into changing that separately
+ */
+ for_each_node_by_type(np, "isa") {
+ /* Look for our hose being a parent */
+ for (parent = of_get_parent(np); parent;) {
+ if (parent == hose->dn) {
+ of_node_put(parent);
+ break;
+ }
+ tmp = parent;
+ parent = of_get_parent(parent);
+ of_node_put(tmp);
+ }
+ if (parent != NULL)
+ break;
+ }
+ if (np == NULL)
+ return;
+ isa_bridge_devnode = np;
+
+ /* Now parse the "ranges" property and setup the ISA mapping */
+ pci_process_ISA_OF_ranges(np, hose->io_base_phys);
+
+ /* Set the global ISA io base to indicate we have an ISA bridge */
+ isa_io_base = ISA_IO_BASE;
+
+ pr_debug("ISA bridge (early) is %s\n", np->full_name);
+}
+
+/**
+ * isa_bridge_find_late - Find and map the ISA IO space upon discovery of
+ * a new ISA bridge
+ */
+static void isa_bridge_find_late(struct pci_dev *pdev,
+ struct device_node *devnode)
+{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+
+ /* Store ISA device node and PCI device */
+ isa_bridge_devnode = of_node_get(devnode);
+ isa_bridge_pcidev = pdev;
+
+ /* Now parse the "ranges" property and setup the ISA mapping */
+ pci_process_ISA_OF_ranges(devnode, hose->io_base_phys);
+
+ /* Set the global ISA io base to indicate we have an ISA bridge */
+ isa_io_base = ISA_IO_BASE;
+
+ pr_debug("ISA bridge (late) is %s on %s\n",
+ devnode->full_name, pci_name(pdev));
+}
+
+/**
+ * isa_bridge_remove - Remove/unmap an ISA bridge
+ */
+static void isa_bridge_remove(void)
+{
+ pr_debug("ISA bridge removed !\n");
+
+ /* Clear the global ISA io base to indicate that we have no more
+ * ISA bridge. Note that drivers don't quite handle that, though
+ * we should probably do something about it. But do we ever really
+ * have ISA bridges being removed on machines using legacy devices ?
+ */
+ isa_io_base = ISA_IO_BASE;
+
+ /* Clear references to the bridge */
+ of_node_put(isa_bridge_devnode);
+ isa_bridge_devnode = NULL;
+ isa_bridge_pcidev = NULL;
+
+ /* Unmap the ISA area */
+ __iounmap_at((void *)ISA_IO_BASE, 0x10000);
+}
+
+/**
+ * isa_bridge_notify - Get notified of PCI devices addition/removal
+ */
+static int isa_bridge_notify(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct device *dev = data;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct device_node *devnode = pci_device_to_OF_node(pdev);
+
+ switch(action) {
+ case BUS_NOTIFY_ADD_DEVICE:
+ /* Check if we have an early ISA device, without PCI dev */
+ if (isa_bridge_devnode && isa_bridge_devnode == devnode &&
+ !isa_bridge_pcidev) {
+ pr_debug("ISA bridge PCI attached: %s\n",
+ pci_name(pdev));
+ isa_bridge_pcidev = pdev;
+ }
+
+ /* Check if we have no ISA device, and this happens to be one,
+ * register it as such if it has an OF device
+ */
+ if (!isa_bridge_devnode && devnode && devnode->type &&
+ !strcmp(devnode->type, "isa"))
+ isa_bridge_find_late(pdev, devnode);
+
+ return 0;
+ case BUS_NOTIFY_DEL_DEVICE:
+ /* Check if this our existing ISA device */
+ if (pdev == isa_bridge_pcidev ||
+ (devnode && devnode == isa_bridge_devnode))
+ isa_bridge_remove();
+ return 0;
+ }
+ return 0;
+}
+
+static struct notifier_block isa_bridge_notifier = {
+ .notifier_call = isa_bridge_notify
+};
+
+/**
+ * isa_bridge_init - register to be notified of ISA bridge addition/removal
+ *
+ */
+static int __init isa_bridge_init(void)
+{
+ bus_register_notifier(&pci_bus_type, &isa_bridge_notifier);
+ return 0;
+}
+arch_initcall(isa_bridge_init);
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
new file mode 100644
index 00000000000..a1ed8a8c7cb
--- /dev/null
+++ b/arch/powerpc/kernel/jump_label.c
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2010 Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+#include <asm/code-patching.h>
+
+#ifdef HAVE_JUMP_LABEL
+void arch_jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ u32 *addr = (u32 *)(unsigned long)entry->code;
+
+ if (type == JUMP_LABEL_ENABLE)
+ patch_branch(addr, entry->target, 0);
+ else
+ patch_instruction(addr, PPC_INST_NOP);
+}
+#endif
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
new file mode 100644
index 00000000000..8504657379f
--- /dev/null
+++ b/arch/powerpc/kernel/kgdb.c
@@ -0,0 +1,494 @@
+/*
+ * PowerPC backend to the KGDB stub.
+ *
+ * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu)
+ * Copyright (C) 2003 Timesys Corporation.
+ * Copyright (C) 2004-2006 MontaVista Software, Inc.
+ * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com)
+ * PPC32 support restored by Vitaly Wool <vwool@ru.mvista.com> and
+ * Sergei Shtylyov <sshtylyov@ru.mvista.com>
+ * Copyright (C) 2007-2008 Wind River Systems, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program as licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kgdb.h>
+#include <linux/smp.h>
+#include <linux/signal.h>
+#include <linux/ptrace.h>
+#include <linux/kdebug.h>
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/machdep.h>
+#include <asm/debug.h>
+#include <linux/slab.h>
+
+/*
+ * This table contains the mapping between PowerPC hardware trap types, and
+ * signals, which are primarily what GDB understands. GDB and the kernel
+ * don't always agree on values, so we use constants taken from gdb-6.2.
+ */
+static struct hard_trap_info
+{
+ unsigned int tt; /* Trap type code for powerpc */
+ unsigned char signo; /* Signal that we map this trap into */
+} hard_trap_info[] = {
+ { 0x0100, 0x02 /* SIGINT */ }, /* system reset */
+ { 0x0200, 0x0b /* SIGSEGV */ }, /* machine check */
+ { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */
+ { 0x0400, 0x0b /* SIGSEGV */ }, /* instruction access */
+ { 0x0500, 0x02 /* SIGINT */ }, /* external interrupt */
+ { 0x0600, 0x0a /* SIGBUS */ }, /* alignment */
+ { 0x0700, 0x05 /* SIGTRAP */ }, /* program check */
+ { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */
+ { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */
+ { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+ { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */
+#if defined(CONFIG_FSL_BOOKE)
+ { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */
+ { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */
+ { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */
+ { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */
+ { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */
+ { 0x2060, 0x0e /* SIGILL */ }, /* performance monitor */
+ { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */
+ { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */
+ { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */
+#else /* ! CONFIG_FSL_BOOKE */
+ { 0x1000, 0x0e /* SIGALRM */ }, /* prog interval timer */
+ { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */
+ { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */
+ { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */
+ { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */
+#endif
+#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */
+ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */
+#if defined(CONFIG_8xx)
+ { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
+#else /* ! CONFIG_8xx */
+ { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */
+ { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */
+ { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */
+#if defined(CONFIG_PPC64)
+ { 0x1200, 0x05 /* SIGILL */ }, /* system error */
+ { 0x1500, 0x04 /* SIGILL */ }, /* soft patch */
+ { 0x1600, 0x04 /* SIGILL */ }, /* maintenance */
+ { 0x1700, 0x08 /* SIGFPE */ }, /* altivec assist */
+ { 0x1800, 0x04 /* SIGILL */ }, /* thermal */
+#else /* ! CONFIG_PPC64 */
+ { 0x1400, 0x02 /* SIGINT */ }, /* SMI */
+ { 0x1600, 0x08 /* SIGFPE */ }, /* altivec assist */
+ { 0x1700, 0x04 /* SIGILL */ }, /* TAU */
+ { 0x2000, 0x05 /* SIGTRAP */ }, /* run mode */
+#endif
+#endif
+#endif
+ { 0x0000, 0x00 } /* Must be last */
+};
+
+static int computeSignal(unsigned int tt)
+{
+ struct hard_trap_info *ht;
+
+ for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
+ if (ht->tt == tt)
+ return ht->signo;
+
+ return SIGHUP; /* default for things we don't know about */
+}
+
+/**
+ *
+ * kgdb_skipexception - Bail out of KGDB when we've been triggered.
+ * @exception: Exception vector number
+ * @regs: Current &struct pt_regs.
+ *
+ * On some architectures we need to skip a breakpoint exception when
+ * it occurs after a breakpoint has been removed.
+ *
+ */
+int kgdb_skipexception(int exception, struct pt_regs *regs)
+{
+ return kgdb_isremovedbreak(regs->nip);
+}
+
+static int kgdb_call_nmi_hook(struct pt_regs *regs)
+{
+ kgdb_nmicallback(raw_smp_processor_id(), regs);
+ return 0;
+}
+
+#ifdef CONFIG_SMP
+void kgdb_roundup_cpus(unsigned long flags)
+{
+ smp_send_debugger_break();
+}
+#endif
+
+/* KGDB functions to use existing PowerPC64 hooks. */
+static int kgdb_debugger(struct pt_regs *regs)
+{
+ return !kgdb_handle_exception(1, computeSignal(TRAP(regs)),
+ DIE_OOPS, regs);
+}
+
+static int kgdb_handle_breakpoint(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return 0;
+
+ if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0)
+ return 0;
+
+ if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
+ regs->nip += BREAK_INSTR_SIZE;
+
+ return 1;
+}
+
+static DEFINE_PER_CPU(struct thread_info, kgdb_thread_info);
+static int kgdb_singlestep(struct pt_regs *regs)
+{
+ struct thread_info *thread_info, *exception_thread_info;
+ struct thread_info *backup_current_thread_info =
+ &__get_cpu_var(kgdb_thread_info);
+
+ if (user_mode(regs))
+ return 0;
+
+ /*
+ * On Book E and perhaps other processors, singlestep is handled on
+ * the critical exception stack. This causes current_thread_info()
+ * to fail, since it it locates the thread_info by masking off
+ * the low bits of the current stack pointer. We work around
+ * this issue by copying the thread_info from the kernel stack
+ * before calling kgdb_handle_exception, and copying it back
+ * afterwards. On most processors the copy is avoided since
+ * exception_thread_info == thread_info.
+ */
+ thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1));
+ exception_thread_info = current_thread_info();
+
+ if (thread_info != exception_thread_info) {
+ /* Save the original current_thread_info. */
+ memcpy(backup_current_thread_info, exception_thread_info, sizeof *thread_info);
+ memcpy(exception_thread_info, thread_info, sizeof *thread_info);
+ }
+
+ kgdb_handle_exception(0, SIGTRAP, 0, regs);
+
+ if (thread_info != exception_thread_info)
+ /* Restore current_thread_info lastly. */
+ memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info);
+
+ return 1;
+}
+
+static int kgdb_iabr_match(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return 0;
+
+ if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0)
+ return 0;
+ return 1;
+}
+
+static int kgdb_break_match(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return 0;
+
+ if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0)
+ return 0;
+ return 1;
+}
+
+#define PACK64(ptr, src) do { *(ptr++) = (src); } while (0)
+
+#define PACK32(ptr, src) do { \
+ u32 *ptr32; \
+ ptr32 = (u32 *)ptr; \
+ *(ptr32++) = (src); \
+ ptr = (unsigned long *)ptr32; \
+ } while (0)
+
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+ struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp +
+ STACK_FRAME_OVERHEAD);
+ unsigned long *ptr = gdb_regs;
+ int reg;
+
+ memset(gdb_regs, 0, NUMREGBYTES);
+
+ /* Regs GPR0-2 */
+ for (reg = 0; reg < 3; reg++)
+ PACK64(ptr, regs->gpr[reg]);
+
+ /* Regs GPR3-13 are caller saved, not in regs->gpr[] */
+ ptr += 11;
+
+ /* Regs GPR14-31 */
+ for (reg = 14; reg < 32; reg++)
+ PACK64(ptr, regs->gpr[reg]);
+
+#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_SPE
+ for (reg = 0; reg < 32; reg++)
+ PACK64(ptr, p->thread.evr[reg]);
+#else
+ ptr += 32;
+#endif
+#else
+ /* fp registers not used by kernel, leave zero */
+ ptr += 32 * 8 / sizeof(long);
+#endif
+
+ PACK64(ptr, regs->nip);
+ PACK64(ptr, regs->msr);
+ PACK32(ptr, regs->ccr);
+ PACK64(ptr, regs->link);
+ PACK64(ptr, regs->ctr);
+ PACK32(ptr, regs->xer);
+
+ BUG_ON((unsigned long)ptr >
+ (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
+}
+
+#define GDB_SIZEOF_REG sizeof(unsigned long)
+#define GDB_SIZEOF_REG_U32 sizeof(u32)
+
+#ifdef CONFIG_FSL_BOOKE
+#define GDB_SIZEOF_FLOAT_REG sizeof(unsigned long)
+#else
+#define GDB_SIZEOF_FLOAT_REG sizeof(u64)
+#endif
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
+{
+ { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[0]) },
+ { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[1]) },
+ { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[2]) },
+ { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[3]) },
+ { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[4]) },
+ { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[5]) },
+ { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[6]) },
+ { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[7]) },
+ { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[8]) },
+ { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[9]) },
+ { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[10]) },
+ { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[11]) },
+ { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[12]) },
+ { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[13]) },
+ { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[14]) },
+ { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[15]) },
+ { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[16]) },
+ { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[17]) },
+ { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[18]) },
+ { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[19]) },
+ { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[20]) },
+ { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[21]) },
+ { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[22]) },
+ { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[23]) },
+ { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[24]) },
+ { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[25]) },
+ { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[26]) },
+ { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[27]) },
+ { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[28]) },
+ { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[29]) },
+ { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[30]) },
+ { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, gpr[31]) },
+
+ { "f0", GDB_SIZEOF_FLOAT_REG, 0 },
+ { "f1", GDB_SIZEOF_FLOAT_REG, 1 },
+ { "f2", GDB_SIZEOF_FLOAT_REG, 2 },
+ { "f3", GDB_SIZEOF_FLOAT_REG, 3 },
+ { "f4", GDB_SIZEOF_FLOAT_REG, 4 },
+ { "f5", GDB_SIZEOF_FLOAT_REG, 5 },
+ { "f6", GDB_SIZEOF_FLOAT_REG, 6 },
+ { "f7", GDB_SIZEOF_FLOAT_REG, 7 },
+ { "f8", GDB_SIZEOF_FLOAT_REG, 8 },
+ { "f9", GDB_SIZEOF_FLOAT_REG, 9 },
+ { "f10", GDB_SIZEOF_FLOAT_REG, 10 },
+ { "f11", GDB_SIZEOF_FLOAT_REG, 11 },
+ { "f12", GDB_SIZEOF_FLOAT_REG, 12 },
+ { "f13", GDB_SIZEOF_FLOAT_REG, 13 },
+ { "f14", GDB_SIZEOF_FLOAT_REG, 14 },
+ { "f15", GDB_SIZEOF_FLOAT_REG, 15 },
+ { "f16", GDB_SIZEOF_FLOAT_REG, 16 },
+ { "f17", GDB_SIZEOF_FLOAT_REG, 17 },
+ { "f18", GDB_SIZEOF_FLOAT_REG, 18 },
+ { "f19", GDB_SIZEOF_FLOAT_REG, 19 },
+ { "f20", GDB_SIZEOF_FLOAT_REG, 20 },
+ { "f21", GDB_SIZEOF_FLOAT_REG, 21 },
+ { "f22", GDB_SIZEOF_FLOAT_REG, 22 },
+ { "f23", GDB_SIZEOF_FLOAT_REG, 23 },
+ { "f24", GDB_SIZEOF_FLOAT_REG, 24 },
+ { "f25", GDB_SIZEOF_FLOAT_REG, 25 },
+ { "f26", GDB_SIZEOF_FLOAT_REG, 26 },
+ { "f27", GDB_SIZEOF_FLOAT_REG, 27 },
+ { "f28", GDB_SIZEOF_FLOAT_REG, 28 },
+ { "f29", GDB_SIZEOF_FLOAT_REG, 29 },
+ { "f30", GDB_SIZEOF_FLOAT_REG, 30 },
+ { "f31", GDB_SIZEOF_FLOAT_REG, 31 },
+
+ { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, nip) },
+ { "msr", GDB_SIZEOF_REG, offsetof(struct pt_regs, msr) },
+ { "cr", GDB_SIZEOF_REG_U32, offsetof(struct pt_regs, ccr) },
+ { "lr", GDB_SIZEOF_REG, offsetof(struct pt_regs, link) },
+ { "ctr", GDB_SIZEOF_REG_U32, offsetof(struct pt_regs, ctr) },
+ { "xer", GDB_SIZEOF_REG, offsetof(struct pt_regs, xer) },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ if (regno >= DBG_MAX_REG_NUM || regno < 0)
+ return NULL;
+
+ if (regno < 32 || regno >= 64)
+ /* First 0 -> 31 gpr registers*/
+ /* pc, msr, ls... registers 64 -> 69 */
+ memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
+ dbg_reg_def[regno].size);
+
+ if (regno >= 32 && regno < 64) {
+ /* FP registers 32 -> 63 */
+#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE)
+ if (current)
+ memcpy(mem, &current->thread.evr[regno-32],
+ dbg_reg_def[regno].size);
+#else
+ /* fp registers not used by kernel, leave zero */
+ memset(mem, 0, dbg_reg_def[regno].size);
+#endif
+ }
+
+ return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ if (regno >= DBG_MAX_REG_NUM || regno < 0)
+ return -EINVAL;
+
+ if (regno < 32 || regno >= 64)
+ /* First 0 -> 31 gpr registers*/
+ /* pc, msr, ls... registers 64 -> 69 */
+ memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
+ dbg_reg_def[regno].size);
+
+ if (regno >= 32 && regno < 64) {
+ /* FP registers 32 -> 63 */
+#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE)
+ memcpy(&current->thread.evr[regno-32], mem,
+ dbg_reg_def[regno].size);
+#else
+ /* fp registers not used by kernel, leave zero */
+ return 0;
+#endif
+ }
+
+ return 0;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+ regs->nip = pc;
+}
+
+/*
+ * This function does PowerPC specific procesing for interfacing to gdb.
+ */
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+ char *remcom_in_buffer, char *remcom_out_buffer,
+ struct pt_regs *linux_regs)
+{
+ char *ptr = &remcom_in_buffer[1];
+ unsigned long addr;
+
+ switch (remcom_in_buffer[0]) {
+ /*
+ * sAA..AA Step one instruction from AA..AA
+ * This will return an error to gdb ..
+ */
+ case 's':
+ case 'c':
+ /* handle the optional parameter */
+ if (kgdb_hex2long(&ptr, &addr))
+ linux_regs->nip = addr;
+
+ atomic_set(&kgdb_cpu_doing_single_step, -1);
+ /* set the trace bit if we're stepping */
+ if (remcom_in_buffer[0] == 's') {
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ mtspr(SPRN_DBCR0,
+ mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+ linux_regs->msr |= MSR_DE;
+#else
+ linux_regs->msr |= MSR_SE;
+#endif
+ atomic_set(&kgdb_cpu_doing_single_step,
+ raw_smp_processor_id());
+ }
+ return 0;
+ }
+
+ return -1;
+}
+
+/*
+ * Global data
+ */
+struct kgdb_arch arch_kgdb_ops = {
+ .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
+};
+
+static int kgdb_not_implemented(struct pt_regs *regs)
+{
+ return 0;
+}
+
+static void *old__debugger_ipi;
+static void *old__debugger;
+static void *old__debugger_bpt;
+static void *old__debugger_sstep;
+static void *old__debugger_iabr_match;
+static void *old__debugger_break_match;
+static void *old__debugger_fault_handler;
+
+int kgdb_arch_init(void)
+{
+ old__debugger_ipi = __debugger_ipi;
+ old__debugger = __debugger;
+ old__debugger_bpt = __debugger_bpt;
+ old__debugger_sstep = __debugger_sstep;
+ old__debugger_iabr_match = __debugger_iabr_match;
+ old__debugger_break_match = __debugger_break_match;
+ old__debugger_fault_handler = __debugger_fault_handler;
+
+ __debugger_ipi = kgdb_call_nmi_hook;
+ __debugger = kgdb_debugger;
+ __debugger_bpt = kgdb_handle_breakpoint;
+ __debugger_sstep = kgdb_singlestep;
+ __debugger_iabr_match = kgdb_iabr_match;
+ __debugger_break_match = kgdb_break_match;
+ __debugger_fault_handler = kgdb_not_implemented;
+
+ return 0;
+}
+
+void kgdb_arch_exit(void)
+{
+ __debugger_ipi = old__debugger_ipi;
+ __debugger = old__debugger;
+ __debugger_bpt = old__debugger_bpt;
+ __debugger_sstep = old__debugger_sstep;
+ __debugger_iabr_match = old__debugger_iabr_match;
+ __debugger_break_match = old__debugger_break_match;
+ __debugger_fault_handler = old__debugger_fault_handler;
+}
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index cd65c367b8b..2f72af82513 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -30,14 +30,18 @@
#include <linux/ptrace.h>
#include <linux/preempt.h>
#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/slab.h>
+#include <asm/code-patching.h>
#include <asm/cacheflush.h>
-#include <asm/kdebug.h>
#include <asm/sstep.h>
#include <asm/uaccess.h>
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
int ret = 0;
@@ -46,12 +50,13 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
if ((unsigned long)p->addr & 0x03) {
printk("Attempt to register kprobe at an unaligned address\n");
ret = -EINVAL;
- } else if (IS_MTMSRD(insn) || IS_RFID(insn)) {
- printk("Cannot register a kprobe on rfid or mtmsrd\n");
+ } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) {
+ printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n");
ret = -EINVAL;
}
- /* insn must be on a special executable page on ppc64 */
+ /* insn must be on a special executable page on ppc64. This is
+ * not explicitly required on ppc32 (right now), but it doesn't hurt */
if (!ret) {
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
@@ -59,12 +64,14 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
}
if (!ret) {
- memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ memcpy(p->ainsn.insn, p->addr,
+ MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
p->opcode = *p->addr;
flush_icache_range((unsigned long)p->ainsn.insn,
(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
}
+ p->ainsn.boostable = 0;
return ret;
}
@@ -84,14 +91,15 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
- mutex_lock(&kprobe_mutex);
- free_insn_slot(p->ainsn.insn);
- mutex_unlock(&kprobe_mutex);
+ if (p->ainsn.insn) {
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+ }
}
static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
{
- regs->msr |= MSR_SE;
+ enable_single_step(regs);
/*
* On powerpc we should single step on the original
@@ -123,23 +131,13 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
kcb->kprobe_saved_msr = regs->msr;
}
-/* Called with kretprobe_lock held */
-void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
- struct kretprobe_instance *ri;
-
- if ((ri = get_free_rp_inst(rp)) != NULL) {
- ri->rp = rp;
- ri->task = current;
- ri->ret_addr = (kprobe_opcode_t *)regs->link;
-
- /* Replace the return addr with trampoline addr */
- regs->link = (unsigned long)kretprobe_trampoline;
- add_rp_inst(ri);
- } else {
- rp->nmissed++;
- }
+ ri->ret_addr = (kprobe_opcode_t *)regs->link;
+
+ /* Replace the return addr with trampoline addr */
+ regs->link = (unsigned long)kretprobe_trampoline;
}
static int __kprobes kprobe_handler(struct pt_regs *regs)
@@ -163,7 +161,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
kprobe_opcode_t insn = *p->ainsn.insn;
if (kcb->kprobe_status == KPROBE_HIT_SS &&
is_trap(insn)) {
- regs->msr &= ~MSR_SE;
+ /* Turn off 'trace' bits */
+ regs->msr &= ~MSR_SINGLESTEP;
regs->msr |= kcb->kprobe_saved_msr;
goto no_kprobe;
}
@@ -232,6 +231,38 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
return 1;
ss_probe:
+ if (p->ainsn.boostable >= 0) {
+ unsigned int insn = *p->ainsn.insn;
+
+ /* regs->nip is also adjusted if emulate_step returns 1 */
+ ret = emulate_step(regs, insn);
+ if (ret > 0) {
+ /*
+ * Once this instruction has been boosted
+ * successfully, set the boostable flag
+ */
+ if (unlikely(p->ainsn.boostable == 0))
+ p->ainsn.boostable = 1;
+
+ if (p->post_handler)
+ p->post_handler(p, regs, 0);
+
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ return 1;
+ } else if (ret < 0) {
+ /*
+ * We don't allow kprobes on mtmsr(d)/rfi(d), etc.
+ * So, we should never get here... but, its still
+ * good to catch them, just in case...
+ */
+ printk("Can't step on instruction %x\n", insn);
+ BUG();
+ } else if (ret == 0)
+ /* This instruction can't be boosted */
+ p->ainsn.boostable = -1;
+ }
prepare_singlestep(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
return 1;
@@ -247,7 +278,7 @@ no_kprobe:
* - When the probed function returns, this probe
* causes the handlers to fire
*/
-void kretprobe_trampoline_holder(void)
+static void __used kretprobe_trampoline_holder(void)
{
asm volatile(".global kretprobe_trampoline\n"
"kretprobe_trampoline:\n"
@@ -257,40 +288,41 @@ void kretprobe_trampoline_holder(void)
/*
* Called when the probe at kretprobe trampoline is hit
*/
-int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+static int __kprobes trampoline_probe_handler(struct kprobe *p,
+ struct pt_regs *regs)
{
- struct kretprobe_instance *ri = NULL;
- struct hlist_head *head;
- struct hlist_node *node, *tmp;
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ INIT_HLIST_HEAD(&empty_rp);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
* task either because an multiple functions in the call path
- * have a return probe installed on them, and/or more then one return
+ * have a return probe installed on them, and/or more than one return
* return probe was registered for a target function.
*
* We can handle this because:
* - instances are always inserted at the head of the list
* - when multiple return probes are registered for the same
- * function, the first instance's ret_addr will point to the
+ * function, the first instance's ret_addr will point to the
* real return address, and all the rest will point to
* kretprobe_trampoline
*/
- hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
- if (ri->task != current)
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
/* another task is sharing our hash bucket */
- continue;
+ continue;
if (ri->rp && ri->rp->handler)
ri->rp->handler(ri, regs);
orig_ret_address = (unsigned long)ri->ret_addr;
- recycle_rp_inst(ri);
+ recycle_rp_inst(ri, &empty_rp);
if (orig_ret_address != trampoline_address)
/*
@@ -301,19 +333,23 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
break;
}
- BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
regs->nip = orig_ret_address;
reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
preempt_enable_no_resched();
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we don't want the post_handler
- * to run (and have re-enabled preemption)
- */
- return 1;
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
}
/*
@@ -324,17 +360,6 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
* single-stepped a copy of the instruction. The address of this
* copy is p->ainsn.insn.
*/
-static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
-{
- int ret;
- unsigned int insn = *p->ainsn.insn;
-
- regs->nip = (unsigned long)p->addr;
- ret = emulate_step(regs, insn);
- if (ret == 0)
- regs->nip = (unsigned long)p->addr + 4;
-}
-
static int __kprobes post_kprobe_handler(struct pt_regs *regs)
{
struct kprobe *cur = kprobe_running();
@@ -343,12 +368,17 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
if (!cur)
return 0;
+ /* make sure we got here for instruction we have a kprobe on */
+ if (((unsigned long)cur->ainsn.insn + 4) != regs->nip)
+ return 0;
+
if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
cur->post_handler(cur, regs, 0);
}
- resume_execution(cur, regs);
+ /* Adjust nip to after the single-stepped instruction */
+ regs->nip = (unsigned long)cur->addr + 4;
regs->msr |= kcb->kprobe_saved_msr;
/*Restore back the original saved kprobes variables and continue. */
@@ -362,16 +392,16 @@ out:
/*
* if somebody else is singlestepping across a probe point, msr
- * will have SE set, in which case, continue the remaining processing
+ * will have DE/SE set, in which case, continue the remaining processing
* of do_debug, as if this is not a probe hit.
*/
- if (regs->msr & MSR_SE)
+ if (regs->msr & MSR_SINGLESTEP)
return 0;
return 1;
}
-static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -388,7 +418,7 @@ static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* normal page fault.
*/
regs->nip = (unsigned long)cur->addr;
- regs->msr &= ~MSR_SE;
+ regs->msr &= ~MSR_SINGLESTEP; /* Turn off 'trace' bits */
regs->msr |= kcb->kprobe_saved_msr;
if (kcb->kprobe_status == KPROBE_REENTER)
restore_previous_kprobe(kcb);
@@ -400,7 +430,7 @@ static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
case KPROBE_HIT_SSDONE:
/*
* We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accouting
+ * we can also use npre/npostfault count for accounting
* these specific fault cases.
*/
kprobes_inc_nmissed_count(cur);
@@ -456,20 +486,17 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
if (post_kprobe_handler(args->regs))
ret = NOTIFY_STOP;
break;
- case DIE_PAGE_FAULT:
- /* kprobe_running() needs smp_processor_id() */
- preempt_disable();
- if (kprobe_running() &&
- kprobe_fault_handler(args->regs, args->trapnr))
- ret = NOTIFY_STOP;
- preempt_enable();
- break;
default:
break;
}
return ret;
}
+unsigned long arch_deref_entry_point(void *entry)
+{
+ return ppc_global_function_entry(entry);
+}
+
int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct jprobe *jp = container_of(p, struct jprobe, kp);
@@ -478,18 +505,24 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
/* setup return addr to the jprobe handler routine */
- regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry);
+ regs->nip = arch_deref_entry_point(jp->entry);
+#ifdef CONFIG_PPC64
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+ regs->gpr[12] = (unsigned long)jp->entry;
+#else
regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
+#endif
+#endif
return 1;
}
-void __kprobes jprobe_return(void)
+void __used __kprobes jprobe_return(void)
{
asm volatile("trap" ::: "memory");
}
-void __kprobes jprobe_return_end(void)
+static void __used __kprobes jprobe_return_end(void)
{
};
@@ -516,3 +549,11 @@ int __init arch_init_kprobes(void)
{
return register_kprobe(&trampoline_p);
}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
+ return 1;
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
new file mode 100644
index 00000000000..33aa4ddf597
--- /dev/null
+++ b/arch/powerpc/kernel/kvm.c
@@ -0,0 +1,742 @@
+/*
+ * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * Authors:
+ * Alexander Graf <agraf@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/kvm_para.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include <asm/reg.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
+#include <asm/epapr_hcalls.h>
+
+#define KVM_MAGIC_PAGE (-4096L)
+#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
+
+#define KVM_INST_LWZ 0x80000000
+#define KVM_INST_STW 0x90000000
+#define KVM_INST_LD 0xe8000000
+#define KVM_INST_STD 0xf8000000
+#define KVM_INST_NOP 0x60000000
+#define KVM_INST_B 0x48000000
+#define KVM_INST_B_MASK 0x03ffffff
+#define KVM_INST_B_MAX 0x01ffffff
+#define KVM_INST_LI 0x38000000
+
+#define KVM_MASK_RT 0x03e00000
+#define KVM_RT_30 0x03c00000
+#define KVM_MASK_RB 0x0000f800
+#define KVM_INST_MFMSR 0x7c0000a6
+
+#define SPR_FROM 0
+#define SPR_TO 0x100
+
+#define KVM_INST_SPR(sprn, moveto) (0x7c0002a6 | \
+ (((sprn) & 0x1f) << 16) | \
+ (((sprn) & 0x3e0) << 6) | \
+ (moveto))
+
+#define KVM_INST_MFSPR(sprn) KVM_INST_SPR(sprn, SPR_FROM)
+#define KVM_INST_MTSPR(sprn) KVM_INST_SPR(sprn, SPR_TO)
+
+#define KVM_INST_TLBSYNC 0x7c00046c
+#define KVM_INST_MTMSRD_L0 0x7c000164
+#define KVM_INST_MTMSRD_L1 0x7c010164
+#define KVM_INST_MTMSR 0x7c000124
+
+#define KVM_INST_WRTEE 0x7c000106
+#define KVM_INST_WRTEEI_0 0x7c000146
+#define KVM_INST_WRTEEI_1 0x7c008146
+
+#define KVM_INST_MTSRIN 0x7c0001e4
+
+static bool kvm_patching_worked = true;
+char kvm_tmp[1024 * 1024];
+static int kvm_tmp_index;
+
+static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
+{
+ *inst = new_inst;
+ flush_icache_range((ulong)inst, (ulong)inst + 4);
+}
+
+static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+ kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
+#else
+ kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000fffc));
+#endif
+}
+
+static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+ kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
+#else
+ kvm_patch_ins(inst, KVM_INST_LWZ | rt | ((addr + 4) & 0x0000fffc));
+#endif
+}
+
+static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
+{
+ kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
+}
+
+static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+ kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
+#else
+ kvm_patch_ins(inst, KVM_INST_STW | rt | ((addr + 4) & 0x0000fffc));
+#endif
+}
+
+static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
+{
+ kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
+}
+
+static void kvm_patch_ins_nop(u32 *inst)
+{
+ kvm_patch_ins(inst, KVM_INST_NOP);
+}
+
+static void kvm_patch_ins_b(u32 *inst, int addr)
+{
+#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S)
+ /* On relocatable kernels interrupts handlers and our code
+ can be in different regions, so we don't patch them */
+
+ if ((ulong)inst < (ulong)&__end_interrupts)
+ return;
+#endif
+
+ kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
+}
+
+static u32 *kvm_alloc(int len)
+{
+ u32 *p;
+
+ if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
+ printk(KERN_ERR "KVM: No more space (%d + %d)\n",
+ kvm_tmp_index, len);
+ kvm_patching_worked = false;
+ return NULL;
+ }
+
+ p = (void*)&kvm_tmp[kvm_tmp_index];
+ kvm_tmp_index += len;
+
+ return p;
+}
+
+extern u32 kvm_emulate_mtmsrd_branch_offs;
+extern u32 kvm_emulate_mtmsrd_reg_offs;
+extern u32 kvm_emulate_mtmsrd_orig_ins_offs;
+extern u32 kvm_emulate_mtmsrd_len;
+extern u32 kvm_emulate_mtmsrd[];
+
+static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_mtmsrd_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsrd_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Modify the chunk to fit the invocation */
+ memcpy(p, kvm_emulate_mtmsrd, kvm_emulate_mtmsrd_len * 4);
+ p[kvm_emulate_mtmsrd_branch_offs] |= distance_end & KVM_INST_B_MASK;
+ switch (get_rt(rt)) {
+ case 30:
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs],
+ magic_var(scratch2), KVM_RT_30);
+ break;
+ case 31:
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs],
+ magic_var(scratch1), KVM_RT_30);
+ break;
+ default:
+ p[kvm_emulate_mtmsrd_reg_offs] |= rt;
+ break;
+ }
+
+ p[kvm_emulate_mtmsrd_orig_ins_offs] = *inst;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsrd_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+extern u32 kvm_emulate_mtmsr_branch_offs;
+extern u32 kvm_emulate_mtmsr_reg1_offs;
+extern u32 kvm_emulate_mtmsr_reg2_offs;
+extern u32 kvm_emulate_mtmsr_orig_ins_offs;
+extern u32 kvm_emulate_mtmsr_len;
+extern u32 kvm_emulate_mtmsr[];
+
+static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_mtmsr_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsr_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Modify the chunk to fit the invocation */
+ memcpy(p, kvm_emulate_mtmsr, kvm_emulate_mtmsr_len * 4);
+ p[kvm_emulate_mtmsr_branch_offs] |= distance_end & KVM_INST_B_MASK;
+
+ /* Make clobbered registers work too */
+ switch (get_rt(rt)) {
+ case 30:
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs],
+ magic_var(scratch2), KVM_RT_30);
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs],
+ magic_var(scratch2), KVM_RT_30);
+ break;
+ case 31:
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs],
+ magic_var(scratch1), KVM_RT_30);
+ kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs],
+ magic_var(scratch1), KVM_RT_30);
+ break;
+ default:
+ p[kvm_emulate_mtmsr_reg1_offs] |= rt;
+ p[kvm_emulate_mtmsr_reg2_offs] |= rt;
+ break;
+ }
+
+ p[kvm_emulate_mtmsr_orig_ins_offs] = *inst;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsr_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+#ifdef CONFIG_BOOKE
+
+extern u32 kvm_emulate_wrtee_branch_offs;
+extern u32 kvm_emulate_wrtee_reg_offs;
+extern u32 kvm_emulate_wrtee_orig_ins_offs;
+extern u32 kvm_emulate_wrtee_len;
+extern u32 kvm_emulate_wrtee[];
+
+static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_wrtee_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_wrtee_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Modify the chunk to fit the invocation */
+ memcpy(p, kvm_emulate_wrtee, kvm_emulate_wrtee_len * 4);
+ p[kvm_emulate_wrtee_branch_offs] |= distance_end & KVM_INST_B_MASK;
+
+ if (imm_one) {
+ p[kvm_emulate_wrtee_reg_offs] =
+ KVM_INST_LI | __PPC_RT(R30) | MSR_EE;
+ } else {
+ /* Make clobbered registers work too */
+ switch (get_rt(rt)) {
+ case 30:
+ kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs],
+ magic_var(scratch2), KVM_RT_30);
+ break;
+ case 31:
+ kvm_patch_ins_ll(&p[kvm_emulate_wrtee_reg_offs],
+ magic_var(scratch1), KVM_RT_30);
+ break;
+ default:
+ p[kvm_emulate_wrtee_reg_offs] |= rt;
+ break;
+ }
+ }
+
+ p[kvm_emulate_wrtee_orig_ins_offs] = *inst;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrtee_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+extern u32 kvm_emulate_wrteei_0_branch_offs;
+extern u32 kvm_emulate_wrteei_0_len;
+extern u32 kvm_emulate_wrteei_0[];
+
+static void kvm_patch_ins_wrteei_0(u32 *inst)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_wrteei_0_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_0_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ memcpy(p, kvm_emulate_wrteei_0, kvm_emulate_wrteei_0_len * 4);
+ p[kvm_emulate_wrteei_0_branch_offs] |= distance_end & KVM_INST_B_MASK;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_0_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+
+extern u32 kvm_emulate_mtsrin_branch_offs;
+extern u32 kvm_emulate_mtsrin_reg1_offs;
+extern u32 kvm_emulate_mtsrin_reg2_offs;
+extern u32 kvm_emulate_mtsrin_orig_ins_offs;
+extern u32 kvm_emulate_mtsrin_len;
+extern u32 kvm_emulate_mtsrin[];
+
+static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
+{
+ u32 *p;
+ int distance_start;
+ int distance_end;
+ ulong next_inst;
+
+ p = kvm_alloc(kvm_emulate_mtsrin_len * 4);
+ if (!p)
+ return;
+
+ /* Find out where we are and put everything there */
+ distance_start = (ulong)p - (ulong)inst;
+ next_inst = ((ulong)inst + 4);
+ distance_end = next_inst - (ulong)&p[kvm_emulate_mtsrin_branch_offs];
+
+ /* Make sure we only write valid b instructions */
+ if (distance_start > KVM_INST_B_MAX) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Modify the chunk to fit the invocation */
+ memcpy(p, kvm_emulate_mtsrin, kvm_emulate_mtsrin_len * 4);
+ p[kvm_emulate_mtsrin_branch_offs] |= distance_end & KVM_INST_B_MASK;
+ p[kvm_emulate_mtsrin_reg1_offs] |= (rb << 10);
+ p[kvm_emulate_mtsrin_reg2_offs] |= rt;
+ p[kvm_emulate_mtsrin_orig_ins_offs] = *inst;
+ flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtsrin_len * 4);
+
+ /* Patch the invocation */
+ kvm_patch_ins_b(inst, distance_start);
+}
+
+#endif
+
+static void kvm_map_magic_page(void *data)
+{
+ u32 *features = data;
+
+ ulong in[8] = {0};
+ ulong out[8];
+
+ in[0] = KVM_MAGIC_PAGE;
+ in[1] = KVM_MAGIC_PAGE | MAGIC_PAGE_FLAG_NOT_MAPPED_NX;
+
+ epapr_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
+
+ *features = out[0];
+}
+
+static void kvm_check_ins(u32 *inst, u32 features)
+{
+ u32 _inst = *inst;
+ u32 inst_no_rt = _inst & ~KVM_MASK_RT;
+ u32 inst_rt = _inst & KVM_MASK_RT;
+
+ switch (inst_no_rt) {
+ /* Loads */
+ case KVM_INST_MFMSR:
+ kvm_patch_ins_ld(inst, magic_var(msr), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG0):
+ kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG1):
+ kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG2):
+ kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG3):
+ kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SRR0):
+ kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SRR1):
+ kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt);
+ break;
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_DEAR):
+#else
+ case KVM_INST_MFSPR(SPRN_DAR):
+#endif
+ kvm_patch_ins_ld(inst, magic_var(dar), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_DSISR):
+ kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt);
+ break;
+
+#ifdef CONFIG_PPC_BOOK3E_MMU
+ case KVM_INST_MFSPR(SPRN_MAS0):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas0), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS1):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas1), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS2):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(mas2), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS3):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas7_3) + 4, inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS4):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas4), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS6):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas6), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_MAS7):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(mas7_3), inst_rt);
+ break;
+#endif /* CONFIG_PPC_BOOK3E_MMU */
+
+ case KVM_INST_MFSPR(SPRN_SPRG4):
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_SPRG4R):
+#endif
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(sprg4), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG5):
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_SPRG5R):
+#endif
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(sprg5), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG6):
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_SPRG6R):
+#endif
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(sprg6), inst_rt);
+ break;
+ case KVM_INST_MFSPR(SPRN_SPRG7):
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_SPRG7R):
+#endif
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_ld(inst, magic_var(sprg7), inst_rt);
+ break;
+
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MFSPR(SPRN_ESR):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(esr), inst_rt);
+ break;
+#endif
+
+ case KVM_INST_MFSPR(SPRN_PIR):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_lwz(inst, magic_var(pir), inst_rt);
+ break;
+
+
+ /* Stores */
+ case KVM_INST_MTSPR(SPRN_SPRG0):
+ kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG1):
+ kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG2):
+ kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG3):
+ kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SRR0):
+ kvm_patch_ins_std(inst, magic_var(srr0), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SRR1):
+ kvm_patch_ins_std(inst, magic_var(srr1), inst_rt);
+ break;
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MTSPR(SPRN_DEAR):
+#else
+ case KVM_INST_MTSPR(SPRN_DAR):
+#endif
+ kvm_patch_ins_std(inst, magic_var(dar), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_DSISR):
+ kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt);
+ break;
+#ifdef CONFIG_PPC_BOOK3E_MMU
+ case KVM_INST_MTSPR(SPRN_MAS0):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas0), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS1):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas1), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS2):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(mas2), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS3):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas7_3) + 4, inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS4):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas4), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS6):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas6), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_MAS7):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(mas7_3), inst_rt);
+ break;
+#endif /* CONFIG_PPC_BOOK3E_MMU */
+
+ case KVM_INST_MTSPR(SPRN_SPRG4):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(sprg4), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG5):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(sprg5), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG6):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(sprg6), inst_rt);
+ break;
+ case KVM_INST_MTSPR(SPRN_SPRG7):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_std(inst, magic_var(sprg7), inst_rt);
+ break;
+
+#ifdef CONFIG_BOOKE
+ case KVM_INST_MTSPR(SPRN_ESR):
+ if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
+ kvm_patch_ins_stw(inst, magic_var(esr), inst_rt);
+ break;
+#endif
+
+ /* Nops */
+ case KVM_INST_TLBSYNC:
+ kvm_patch_ins_nop(inst);
+ break;
+
+ /* Rewrites */
+ case KVM_INST_MTMSRD_L1:
+ kvm_patch_ins_mtmsrd(inst, inst_rt);
+ break;
+ case KVM_INST_MTMSR:
+ case KVM_INST_MTMSRD_L0:
+ kvm_patch_ins_mtmsr(inst, inst_rt);
+ break;
+#ifdef CONFIG_BOOKE
+ case KVM_INST_WRTEE:
+ kvm_patch_ins_wrtee(inst, inst_rt, 0);
+ break;
+#endif
+ }
+
+ switch (inst_no_rt & ~KVM_MASK_RB) {
+#ifdef CONFIG_PPC_BOOK3S_32
+ case KVM_INST_MTSRIN:
+ if (features & KVM_MAGIC_FEAT_SR) {
+ u32 inst_rb = _inst & KVM_MASK_RB;
+ kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb);
+ }
+ break;
+ break;
+#endif
+ }
+
+ switch (_inst) {
+#ifdef CONFIG_BOOKE
+ case KVM_INST_WRTEEI_0:
+ kvm_patch_ins_wrteei_0(inst);
+ break;
+
+ case KVM_INST_WRTEEI_1:
+ kvm_patch_ins_wrtee(inst, 0, 1);
+ break;
+#endif
+ }
+}
+
+extern u32 kvm_template_start[];
+extern u32 kvm_template_end[];
+
+static void kvm_use_magic_page(void)
+{
+ u32 *p;
+ u32 *start, *end;
+ u32 tmp;
+ u32 features;
+
+ /* Tell the host to map the magic page to -4096 on all CPUs */
+ on_each_cpu(kvm_map_magic_page, &features, 1);
+
+ /* Quick self-test to see if the mapping works */
+ if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) {
+ kvm_patching_worked = false;
+ return;
+ }
+
+ /* Now loop through all code and find instructions */
+ start = (void*)_stext;
+ end = (void*)_etext;
+
+ /*
+ * Being interrupted in the middle of patching would
+ * be bad for SPRG4-7, which KVM can't keep in sync
+ * with emulated accesses because reads don't trap.
+ */
+ local_irq_disable();
+
+ for (p = start; p < end; p++) {
+ /* Avoid patching the template code */
+ if (p >= kvm_template_start && p < kvm_template_end) {
+ p = kvm_template_end - 1;
+ continue;
+ }
+ kvm_check_ins(p, features);
+ }
+
+ local_irq_enable();
+
+ printk(KERN_INFO "KVM: Live patching for a fast VM %s\n",
+ kvm_patching_worked ? "worked" : "failed");
+}
+
+static __init void kvm_free_tmp(void)
+{
+ free_reserved_area(&kvm_tmp[kvm_tmp_index],
+ &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
+}
+
+static int __init kvm_guest_init(void)
+{
+ if (!kvm_para_available())
+ goto free_tmp;
+
+ if (!epapr_paravirt_enabled)
+ goto free_tmp;
+
+ if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
+ kvm_use_magic_page();
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Enable napping */
+ powersave_nap = 1;
+#endif
+
+free_tmp:
+ kvm_free_tmp();
+
+ return 0;
+}
+
+postcore_initcall(kvm_guest_init);
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
new file mode 100644
index 00000000000..e100ff324a8
--- /dev/null
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -0,0 +1,348 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2010
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+
+#define KVM_MAGIC_PAGE (-4096)
+
+#ifdef CONFIG_64BIT
+#define LL64(reg, offs, reg2) ld reg, (offs)(reg2)
+#define STL64(reg, offs, reg2) std reg, (offs)(reg2)
+#else
+#define LL64(reg, offs, reg2) lwz reg, (offs + 4)(reg2)
+#define STL64(reg, offs, reg2) stw reg, (offs + 4)(reg2)
+#endif
+
+#define SCRATCH_SAVE \
+ /* Enable critical section. We are critical if \
+ shared->critical == r1 */ \
+ STL64(r1, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0); \
+ \
+ /* Save state */ \
+ PPC_STL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \
+ PPC_STL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \
+ mfcr r31; \
+ stw r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0);
+
+#define SCRATCH_RESTORE \
+ /* Restore state */ \
+ PPC_LL r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0); \
+ lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0); \
+ mtcr r30; \
+ PPC_LL r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0); \
+ \
+ /* Disable critical section. We are critical if \
+ shared->critical == r1 and r2 is always != r1 */ \
+ STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0);
+
+.global kvm_template_start
+kvm_template_start:
+
+.global kvm_emulate_mtmsrd
+kvm_emulate_mtmsrd:
+
+ SCRATCH_SAVE
+
+ /* Put MSR & ~(MSR_EE|MSR_RI) in r31 */
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+ lis r30, (~(MSR_EE | MSR_RI))@h
+ ori r30, r30, (~(MSR_EE | MSR_RI))@l
+ and r31, r31, r30
+
+ /* OR the register's (MSR_EE|MSR_RI) on MSR */
+kvm_emulate_mtmsrd_reg:
+ ori r30, r0, 0
+ andi. r30, r30, (MSR_EE|MSR_RI)
+ or r31, r31, r30
+
+ /* Put MSR back into magic page */
+ STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Check if we have to fetch an interrupt */
+ lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+ cmpwi r31, 0
+ beq+ no_check
+
+ /* Check if we may trigger an interrupt */
+ andi. r30, r30, MSR_EE
+ beq no_check
+
+ SCRATCH_RESTORE
+
+ /* Nag hypervisor */
+kvm_emulate_mtmsrd_orig_ins:
+ tlbsync
+
+ b kvm_emulate_mtmsrd_branch
+
+no_check:
+
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_mtmsrd_branch:
+ b .
+kvm_emulate_mtmsrd_end:
+
+.global kvm_emulate_mtmsrd_branch_offs
+kvm_emulate_mtmsrd_branch_offs:
+ .long (kvm_emulate_mtmsrd_branch - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_reg_offs
+kvm_emulate_mtmsrd_reg_offs:
+ .long (kvm_emulate_mtmsrd_reg - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_orig_ins_offs
+kvm_emulate_mtmsrd_orig_ins_offs:
+ .long (kvm_emulate_mtmsrd_orig_ins - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_len
+kvm_emulate_mtmsrd_len:
+ .long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4
+
+
+#define MSR_SAFE_BITS (MSR_EE | MSR_RI)
+#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS
+
+.global kvm_emulate_mtmsr
+kvm_emulate_mtmsr:
+
+ SCRATCH_SAVE
+
+ /* Fetch old MSR in r31 */
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Find the changed bits between old and new MSR */
+kvm_emulate_mtmsr_reg1:
+ ori r30, r0, 0
+ xor r31, r30, r31
+
+ /* Check if we need to really do mtmsr */
+ LOAD_REG_IMMEDIATE(r30, MSR_CRITICAL_BITS)
+ and. r31, r31, r30
+
+ /* No critical bits changed? Maybe we can stay in the guest. */
+ beq maybe_stay_in_guest
+
+do_mtmsr:
+
+ SCRATCH_RESTORE
+
+ /* Just fire off the mtmsr if it's critical */
+kvm_emulate_mtmsr_orig_ins:
+ mtmsr r0
+
+ b kvm_emulate_mtmsr_branch
+
+maybe_stay_in_guest:
+
+ /* Get the target register in r30 */
+kvm_emulate_mtmsr_reg2:
+ ori r30, r0, 0
+
+ /* Put MSR into magic page because we don't call mtmsr */
+ STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Check if we have to fetch an interrupt */
+ lwz r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+ cmpwi r31, 0
+ beq+ no_mtmsr
+
+ /* Check if we may trigger an interrupt */
+ andi. r31, r30, MSR_EE
+ bne do_mtmsr
+
+no_mtmsr:
+
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_mtmsr_branch:
+ b .
+kvm_emulate_mtmsr_end:
+
+.global kvm_emulate_mtmsr_branch_offs
+kvm_emulate_mtmsr_branch_offs:
+ .long (kvm_emulate_mtmsr_branch - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_reg1_offs
+kvm_emulate_mtmsr_reg1_offs:
+ .long (kvm_emulate_mtmsr_reg1 - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_reg2_offs
+kvm_emulate_mtmsr_reg2_offs:
+ .long (kvm_emulate_mtmsr_reg2 - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_orig_ins_offs
+kvm_emulate_mtmsr_orig_ins_offs:
+ .long (kvm_emulate_mtmsr_orig_ins - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_len
+kvm_emulate_mtmsr_len:
+ .long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
+
+/* also used for wrteei 1 */
+.global kvm_emulate_wrtee
+kvm_emulate_wrtee:
+
+ SCRATCH_SAVE
+
+ /* Fetch old MSR in r31 */
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Insert new MSR[EE] */
+kvm_emulate_wrtee_reg:
+ ori r30, r0, 0
+ rlwimi r31, r30, 0, MSR_EE
+
+ /*
+ * If MSR[EE] is now set, check for a pending interrupt.
+ * We could skip this if MSR[EE] was already on, but that
+ * should be rare, so don't bother.
+ */
+ andi. r30, r30, MSR_EE
+
+ /* Put MSR into magic page because we don't call wrtee */
+ STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ beq no_wrtee
+
+ /* Check if we have to fetch an interrupt */
+ lwz r30, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+ cmpwi r30, 0
+ bne do_wrtee
+
+no_wrtee:
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_wrtee_branch:
+ b .
+
+do_wrtee:
+ SCRATCH_RESTORE
+
+ /* Just fire off the wrtee if it's critical */
+kvm_emulate_wrtee_orig_ins:
+ wrtee r0
+
+ b kvm_emulate_wrtee_branch
+
+kvm_emulate_wrtee_end:
+
+.global kvm_emulate_wrtee_branch_offs
+kvm_emulate_wrtee_branch_offs:
+ .long (kvm_emulate_wrtee_branch - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrtee_reg_offs
+kvm_emulate_wrtee_reg_offs:
+ .long (kvm_emulate_wrtee_reg - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrtee_orig_ins_offs
+kvm_emulate_wrtee_orig_ins_offs:
+ .long (kvm_emulate_wrtee_orig_ins - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrtee_len
+kvm_emulate_wrtee_len:
+ .long (kvm_emulate_wrtee_end - kvm_emulate_wrtee) / 4
+
+.global kvm_emulate_wrteei_0
+kvm_emulate_wrteei_0:
+ SCRATCH_SAVE
+
+ /* Fetch old MSR in r31 */
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ /* Remove MSR_EE from old MSR */
+ rlwinm r31, r31, 0, ~MSR_EE
+
+ /* Write new MSR value back */
+ STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_wrteei_0_branch:
+ b .
+kvm_emulate_wrteei_0_end:
+
+.global kvm_emulate_wrteei_0_branch_offs
+kvm_emulate_wrteei_0_branch_offs:
+ .long (kvm_emulate_wrteei_0_branch - kvm_emulate_wrteei_0) / 4
+
+.global kvm_emulate_wrteei_0_len
+kvm_emulate_wrteei_0_len:
+ .long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
+
+.global kvm_emulate_mtsrin
+kvm_emulate_mtsrin:
+
+ SCRATCH_SAVE
+
+ LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+ andi. r31, r31, MSR_DR | MSR_IR
+ beq kvm_emulate_mtsrin_reg1
+
+ SCRATCH_RESTORE
+
+kvm_emulate_mtsrin_orig_ins:
+ nop
+ b kvm_emulate_mtsrin_branch
+
+kvm_emulate_mtsrin_reg1:
+ /* rX >> 26 */
+ rlwinm r30,r0,6,26,29
+
+kvm_emulate_mtsrin_reg2:
+ stw r0, (KVM_MAGIC_PAGE + KVM_MAGIC_SR)(r30)
+
+ SCRATCH_RESTORE
+
+ /* Go back to caller */
+kvm_emulate_mtsrin_branch:
+ b .
+kvm_emulate_mtsrin_end:
+
+.global kvm_emulate_mtsrin_branch_offs
+kvm_emulate_mtsrin_branch_offs:
+ .long (kvm_emulate_mtsrin_branch - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_reg1_offs
+kvm_emulate_mtsrin_reg1_offs:
+ .long (kvm_emulate_mtsrin_reg1 - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_reg2_offs
+kvm_emulate_mtsrin_reg2_offs:
+ .long (kvm_emulate_mtsrin_reg2 - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_orig_ins_offs
+kvm_emulate_mtsrin_orig_ins_offs:
+ .long (kvm_emulate_mtsrin_orig_ins - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_len
+kvm_emulate_mtsrin_len:
+ .long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
+
+.global kvm_template_end
+kvm_template_end:
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 858f28ac8a0..97ec8557f97 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -1,6 +1,6 @@
/*
L2CR functions
- Copyright © 1997-1998 by PowerLogix R & D, Inc.
+ Copyright © 1997-1998 by PowerLogix R & D, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -151,7 +151,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
/**** Might be a good idea to set L2DO here - to prevent instructions
from getting into the cache. But since we invalidate
the next time we enable the cache it doesn't really matter.
- Don't do this unless you accomodate all processor variations.
+ Don't do this unless you accommodate all processor variations.
The bit moved on the 7450.....
****/
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 5e6ddfa474c..936258881c9 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -4,6 +4,9 @@
#include <linux/serial_core.h>
#include <linux/console.h>
#include <linux/pci.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/serial_reg.h>
#include <asm/io.h>
#include <asm/mmu.h>
#include <asm/prom.h>
@@ -31,25 +34,63 @@ static struct legacy_serial_info {
int irq_check_parent;
phys_addr_t taddr;
} legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS];
+
+static struct of_device_id legacy_serial_parents[] __initdata = {
+ {.type = "soc",},
+ {.type = "tsi-bridge",},
+ {.type = "opb", },
+ {.compatible = "ibm,opb",},
+ {.compatible = "simple-bus",},
+ {.compatible = "wrs,epld-localbus",},
+ {},
+};
+
static unsigned int legacy_serial_count;
static int legacy_serial_console = -1;
+static const upf_t legacy_port_flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
+ UPF_SHARE_IRQ | UPF_FIXED_PORT;
+
+static unsigned int tsi_serial_in(struct uart_port *p, int offset)
+{
+ unsigned int tmp;
+ offset = offset << p->regshift;
+ if (offset == UART_IIR) {
+ tmp = readl(p->membase + (UART_IIR & ~3));
+ return (tmp >> 16) & 0xff; /* UART_IIR % 4 == 2 */
+ } else
+ return readb(p->membase + offset);
+}
+
+static void tsi_serial_out(struct uart_port *p, int offset, int value)
+{
+ offset = offset << p->regshift;
+ if (!((offset == UART_IER) && (value & UART_IER_UUE)))
+ writeb(value, p->membase + offset);
+}
+
static int __init add_legacy_port(struct device_node *np, int want_index,
int iotype, phys_addr_t base,
phys_addr_t taddr, unsigned long irq,
upf_t flags, int irq_check_parent)
{
- const u32 *clk, *spd;
+ const __be32 *clk, *spd, *rs;
u32 clock = BASE_BAUD * 16;
+ u32 shift = 0;
int index;
/* get clock freq. if present */
- clk = get_property(np, "clock-frequency", NULL);
+ clk = of_get_property(np, "clock-frequency", NULL);
if (clk && *clk)
- clock = *clk;
+ clock = be32_to_cpup(clk);
/* get default speed if present */
- spd = get_property(np, "current-speed", NULL);
+ spd = of_get_property(np, "current-speed", NULL);
+
+ /* get register shift if present */
+ rs = of_get_property(np, "reg-shift", NULL);
+ if (rs && *rs)
+ shift = be32_to_cpup(rs);
/* If we have a location index, then try to use it */
if (want_index >= 0 && want_index < MAX_LEGACY_SERIAL_PORTS)
@@ -67,7 +108,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
legacy_serial_count = index + 1;
/* Check if there is a port who already claimed our slot */
- if (legacy_serial_infos[index].np != 0) {
+ if (legacy_serial_infos[index].np != NULL) {
/* if we still have some room, move it, else override */
if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) {
printk(KERN_DEBUG "Moved legacy port %d -> %d\n",
@@ -89,16 +130,23 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
legacy_serial_ports[index].iobase = base;
else
legacy_serial_ports[index].mapbase = base;
+
legacy_serial_ports[index].iotype = iotype;
legacy_serial_ports[index].uartclk = clock;
legacy_serial_ports[index].irq = irq;
legacy_serial_ports[index].flags = flags;
+ legacy_serial_ports[index].regshift = shift;
legacy_serial_infos[index].taddr = taddr;
legacy_serial_infos[index].np = of_node_get(np);
legacy_serial_infos[index].clock = clock;
- legacy_serial_infos[index].speed = spd ? *spd : 0;
+ legacy_serial_infos[index].speed = spd ? be32_to_cpup(spd) : 0;
legacy_serial_infos[index].irq_check_parent = irq_check_parent;
+ if (iotype == UPIO_TSI) {
+ legacy_serial_ports[index].serial_in = tsi_serial_in;
+ legacy_serial_ports[index].serial_out = tsi_serial_out;
+ }
+
printk(KERN_DEBUG "Found legacy serial port %d for %s\n",
index, np->full_name);
printk(KERN_DEBUG " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n",
@@ -114,14 +162,21 @@ static int __init add_legacy_soc_port(struct device_node *np,
struct device_node *soc_dev)
{
u64 addr;
- const u32 *addrp;
- upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ;
+ const __be32 *addrp;
struct device_node *tsi = of_get_parent(np);
/* We only support ports that have a clock frequency properly
* encoded in the device-tree.
*/
- if (get_property(np, "clock-frequency", NULL) == NULL)
+ if (of_get_property(np, "clock-frequency", NULL) == NULL)
+ return -1;
+
+ /* if reg-offset don't try to use it */
+ if ((of_get_property(np, "reg-offset", NULL) != NULL))
+ return -1;
+
+ /* if rtas uses this device, don't try to use it as well */
+ if (of_get_property(np, "used-by-rtas", NULL) != NULL)
return -1;
/* Get the address */
@@ -137,15 +192,17 @@ static int __init add_legacy_soc_port(struct device_node *np,
* IO port value. It will be fixed up later along with the irq
*/
if (tsi && !strcmp(tsi->type, "tsi-bridge"))
- return add_legacy_port(np, -1, UPIO_TSI, addr, addr, NO_IRQ, flags, 0);
+ return add_legacy_port(np, -1, UPIO_TSI, addr, addr,
+ NO_IRQ, legacy_port_flags, 0);
else
- return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0);
+ return add_legacy_port(np, -1, UPIO_MEM, addr, addr,
+ NO_IRQ, legacy_port_flags, 0);
}
static int __init add_legacy_isa_port(struct device_node *np,
struct device_node *isa_brg)
{
- const u32 *reg;
+ const __be32 *reg;
const char *typep;
int index = -1;
u64 taddr;
@@ -153,18 +210,18 @@ static int __init add_legacy_isa_port(struct device_node *np,
DBG(" -> add_legacy_isa_port(%s)\n", np->full_name);
/* Get the ISA port number */
- reg = get_property(np, "reg", NULL);
+ reg = of_get_property(np, "reg", NULL);
if (reg == NULL)
return -1;
/* Verify it's an IO port, we don't support anything else */
- if (!(reg[0] & 0x00000001))
+ if (!(be32_to_cpu(reg[0]) & 0x00000001))
return -1;
/* Now look for an "ibm,aix-loc" property that gives us ordering
* if any...
*/
- typep = get_property(np, "ibm,aix-loc", NULL);
+ typep = of_get_property(np, "ibm,aix-loc", NULL);
/* If we have a location index, then use it */
if (typep && *typep == 'S')
@@ -173,14 +230,19 @@ static int __init add_legacy_isa_port(struct device_node *np,
/* Translate ISA address. If it fails, we still register the port
* with no translated address so that it can be picked up as an IO
* port later by the serial driver
+ *
+ * Note: Don't even try on P8 lpc, we know it's not directly mapped
*/
- taddr = of_translate_address(np, reg);
- if (taddr == OF_BAD_ADDR)
+ if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc")) {
+ taddr = of_translate_address(np, reg);
+ if (taddr == OF_BAD_ADDR)
+ taddr = 0;
+ } else
taddr = 0;
/* Add port, irq will be dealt with later */
- return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr,
- NO_IRQ, UPF_BOOT_AUTOCONF, 0);
+ return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]),
+ taddr, NO_IRQ, legacy_port_flags, 0);
}
@@ -189,7 +251,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
struct device_node *pci_dev)
{
u64 addr, base;
- const u32 *addrp;
+ const __be32 *addrp;
unsigned int flags;
int iotype, index = -1, lindex = 0;
@@ -202,7 +264,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
* compatible UARTs on PCI need all sort of quirks (port offsets
* etc...) that this code doesn't know about
*/
- if (get_property(np, "clock-frequency", NULL) == NULL)
+ if (of_get_property(np, "clock-frequency", NULL) == NULL)
return -1;
/* Get the PCI address. Assume BAR 0 */
@@ -222,15 +284,15 @@ static int __init add_legacy_pci_port(struct device_node *np,
if (iotype == UPIO_MEM)
base = addr;
else
- base = addrp[2];
+ base = of_read_number(&addrp[2], 1);
/* Try to guess an index... If we have subdevices of the pci dev,
* we get to their "reg" property
*/
if (np != pci_dev) {
- const u32 *reg = get_property(np, "reg", NULL);
- if (reg && (*reg < 4))
- index = lindex = *reg;
+ const __be32 *reg = of_get_property(np, "reg", NULL);
+ if (reg && (be32_to_cpup(reg) < 4))
+ index = lindex = be32_to_cpup(reg);
}
/* Local index means it's the Nth port in the PCI chip. Unfortunately
@@ -239,9 +301,9 @@ static int __init add_legacy_pci_port(struct device_node *np,
* doesn't work for these settings, you'll have to add your own special
* cases here
*/
- if (device_is_compatible(pci_dev, "pci13a8,152") ||
- device_is_compatible(pci_dev, "pci13a8,154") ||
- device_is_compatible(pci_dev, "pci13a8,158")) {
+ if (of_device_is_compatible(pci_dev, "pci13a8,152") ||
+ of_device_is_compatible(pci_dev, "pci13a8,154") ||
+ of_device_is_compatible(pci_dev, "pci13a8,158")) {
addr += 0x200 * lindex;
base += 0x200 * lindex;
} else {
@@ -253,25 +315,40 @@ static int __init add_legacy_pci_port(struct device_node *np,
* IO port value. It will be fixed up later along with the irq
*/
return add_legacy_port(np, index, iotype, base, addr, NO_IRQ,
- UPF_BOOT_AUTOCONF, np != pci_dev);
+ legacy_port_flags, np != pci_dev);
}
#endif
static void __init setup_legacy_serial_console(int console)
{
- struct legacy_serial_info *info =
- &legacy_serial_infos[console];
+ struct legacy_serial_info *info = &legacy_serial_infos[console];
+ struct plat_serial8250_port *port = &legacy_serial_ports[console];
void __iomem *addr;
+ unsigned int stride;
- if (info->taddr == 0)
- return;
- addr = ioremap(info->taddr, 0x1000);
- if (addr == NULL)
- return;
+ stride = 1 << port->regshift;
+
+ /* Check if a translated MMIO address has been found */
+ if (info->taddr) {
+ addr = ioremap(info->taddr, 0x1000);
+ if (addr == NULL)
+ return;
+ udbg_uart_init_mmio(addr, stride);
+ } else {
+ /* Check if it's PIO and we support untranslated PIO */
+ if (port->iotype == UPIO_PORT && isa_io_special)
+ udbg_uart_init_pio(port->iobase, stride);
+ else
+ return;
+ }
+
+ /* Try to query the current speed */
if (info->speed == 0)
- info->speed = udbg_probe_uart_speed(addr, info->clock);
+ info->speed = udbg_probe_uart_speed(info->clock);
+
+ /* Set it up */
DBG("default console speed = %d\n", info->speed);
- udbg_init_uart(addr, info->speed, info->clock);
+ udbg_uart_setup(info->speed, info->clock);
}
/*
@@ -292,7 +369,7 @@ void __init find_legacy_serial_ports(void)
DBG(" -> find_legacy_serial_port()\n");
/* Now find out if one of these is out firmware console */
- path = get_property(of_chosen, "linux,stdout-path", NULL);
+ path = of_get_property(of_chosen, "linux,stdout-path", NULL);
if (path != NULL) {
stdout = of_find_node_by_path(path);
if (stdout)
@@ -301,39 +378,35 @@ void __init find_legacy_serial_ports(void)
DBG(" no linux,stdout-path !\n");
}
- /* First fill our array with SOC ports */
- for (np = NULL; (np = of_find_compatible_node(np, "serial", "ns16550")) != NULL;) {
- struct device_node *soc = of_get_parent(np);
- if (soc && !strcmp(soc->type, "soc")) {
- index = add_legacy_soc_port(np, np);
- if (index >= 0 && np == stdout)
- legacy_serial_console = index;
+ /* Iterate over all the 16550 ports, looking for known parents */
+ for_each_compatible_node(np, "serial", "ns16550") {
+ struct device_node *parent = of_get_parent(np);
+ if (!parent)
+ continue;
+ if (of_match_node(legacy_serial_parents, parent) != NULL) {
+ if (of_device_is_available(np)) {
+ index = add_legacy_soc_port(np, np);
+ if (index >= 0 && np == stdout)
+ legacy_serial_console = index;
+ }
}
- of_node_put(soc);
+ of_node_put(parent);
}
- /* First fill our array with ISA ports */
- for (np = NULL; (np = of_find_node_by_type(np, "serial"));) {
+ /* Next, fill our array with ISA ports */
+ for_each_node_by_type(np, "serial") {
struct device_node *isa = of_get_parent(np);
- if (isa && !strcmp(isa->name, "isa")) {
- index = add_legacy_isa_port(np, isa);
- if (index >= 0 && np == stdout)
- legacy_serial_console = index;
+ if (isa && (!strcmp(isa->name, "isa") ||
+ !strcmp(isa->name, "lpc"))) {
+ if (of_device_is_available(np)) {
+ index = add_legacy_isa_port(np, isa);
+ if (index >= 0 && np == stdout)
+ legacy_serial_console = index;
+ }
}
of_node_put(isa);
}
- /* First fill our array with tsi-bridge ports */
- for (np = NULL; (np = of_find_compatible_node(np, "serial", "ns16550")) != NULL;) {
- struct device_node *tsi = of_get_parent(np);
- if (tsi && !strcmp(tsi->type, "tsi-bridge")) {
- index = add_legacy_soc_port(np, np);
- if (index >= 0 && np == stdout)
- legacy_serial_console = index;
- }
- of_node_put(tsi);
- }
-
#ifdef CONFIG_PCI
/* Next, try to locate PCI ports */
for (np = NULL; (np = of_find_all_nodes(np));) {
@@ -346,14 +419,14 @@ void __init find_legacy_serial_ports(void)
of_node_put(parent);
continue;
}
- /* Check for known pciclass, and also check wether we have
+ /* Check for known pciclass, and also check whether we have
* a device with child nodes for ports or not
*/
- if (device_is_compatible(np, "pciclass,0700") ||
- device_is_compatible(np, "pciclass,070002"))
+ if (of_device_is_compatible(np, "pciclass,0700") ||
+ of_device_is_compatible(np, "pciclass,070002"))
pci = np;
- else if (device_is_compatible(parent, "pciclass,0700") ||
- device_is_compatible(parent, "pciclass,070002"))
+ else if (of_device_is_compatible(parent, "pciclass,0700") ||
+ of_device_is_compatible(parent, "pciclass,070002"))
pci = parent;
else {
of_node_put(parent);
@@ -400,6 +473,11 @@ static void __init fixup_port_irq(int index,
return;
port->irq = virq;
+
+#ifdef CONFIG_SERIAL_8250_FSL
+ if (of_device_is_compatible(np, "fsl,ns16550"))
+ port->handle_irq = fsl8250_handle_irq;
+#endif
}
static void __init fixup_port_pio(int index,
@@ -456,8 +534,8 @@ static int __init serial_dev_init(void)
return -ENODEV;
/*
- * Before we register the platfrom serial devices, we need
- * to fixup their interrutps and their IO ports.
+ * Before we register the platform serial devices, we need
+ * to fixup their interrupts and their IO ports.
*/
DBG("Fixing serial ports interrupts and IO ports ...\n");
@@ -477,28 +555,28 @@ static int __init serial_dev_init(void)
return platform_device_register(&serial_device);
}
-arch_initcall(serial_dev_init);
+device_initcall(serial_dev_init);
+#ifdef CONFIG_SERIAL_8250_CONSOLE
/*
* This is called very early, as part of console_init() (typically just after
* time_init()). This function is respondible for trying to find a good
* default console on serial ports. It tries to match the open firmware
- * default output with one of the available serial console drivers, either
- * one of the platform serial ports that have been probed earlier by
- * find_legacy_serial_ports() or some more platform specific ones.
+ * default output with one of the available serial console drivers that have
+ * been probed earlier by find_legacy_serial_ports()
*/
static int __init check_legacy_serial_console(void)
{
struct device_node *prom_stdout = NULL;
- int speed = 0, offset = 0;
+ int i, speed = 0, offset = 0;
const char *name;
- const u32 *spd;
+ const __be32 *spd;
DBG(" -> check_legacy_serial_console()\n");
/* The user has requested a console so this is already set up. */
- if (strstr(saved_command_line, "console=")) {
+ if (strstr(boot_command_line, "console=")) {
DBG(" console was specified !\n");
return -EBUSY;
}
@@ -514,7 +592,7 @@ static int __init check_legacy_serial_console(void)
}
/* We are getting a weird phandle from OF ... */
/* ... So use the full path instead */
- name = get_property(of_chosen, "linux,stdout-path", NULL);
+ name = of_get_property(of_chosen, "linux,stdout-path", NULL);
if (name == NULL) {
DBG(" no linux,stdout-path !\n");
return -ENODEV;
@@ -526,40 +604,29 @@ static int __init check_legacy_serial_console(void)
}
DBG("stdout is %s\n", prom_stdout->full_name);
- name = get_property(prom_stdout, "name", NULL);
+ name = of_get_property(prom_stdout, "name", NULL);
if (!name) {
DBG(" stdout package has no name !\n");
goto not_found;
}
- spd = get_property(prom_stdout, "current-speed", NULL);
+ spd = of_get_property(prom_stdout, "current-speed", NULL);
if (spd)
- speed = *spd;
+ speed = be32_to_cpup(spd);
- if (0)
- ;
-#ifdef CONFIG_SERIAL_8250_CONSOLE
- else if (strcmp(name, "serial") == 0) {
- int i;
- /* Look for it in probed array */
- for (i = 0; i < legacy_serial_count; i++) {
- if (prom_stdout != legacy_serial_infos[i].np)
- continue;
- offset = i;
- speed = legacy_serial_infos[i].speed;
- break;
- }
- if (i >= legacy_serial_count)
- goto not_found;
+ if (strcmp(name, "serial") != 0)
+ goto not_found;
+
+ /* Look for it in probed array */
+ for (i = 0; i < legacy_serial_count; i++) {
+ if (prom_stdout != legacy_serial_infos[i].np)
+ continue;
+ offset = i;
+ speed = legacy_serial_infos[i].speed;
+ break;
}
-#endif /* CONFIG_SERIAL_8250_CONSOLE */
-#ifdef CONFIG_SERIAL_PMACZILOG_CONSOLE
- else if (strcmp(name, "ch-a") == 0)
- offset = 0;
- else if (strcmp(name, "ch-b") == 0)
- offset = 1;
-#endif /* CONFIG_SERIAL_PMACZILOG_CONSOLE */
- else
+ if (i >= legacy_serial_count)
goto not_found;
+
of_node_put(prom_stdout);
DBG("Found serial console at ttyS%d\n", offset);
@@ -578,3 +645,4 @@ static int __init check_legacy_serial_console(void)
}
console_initcall(check_legacy_serial_console);
+#endif /* CONFIG_SERIAL_8250_CONSOLE */
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
deleted file mode 100644
index 41c05dcd68f..00000000000
--- a/arch/powerpc/kernel/lparcfg.c
+++ /dev/null
@@ -1,620 +0,0 @@
-/*
- * PowerPC64 LPAR Configuration Information Driver
- *
- * Dave Engebretsen engebret@us.ibm.com
- * Copyright (c) 2003 Dave Engebretsen
- * Will Schmidt willschm@us.ibm.com
- * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
- * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
- * Nathan Lynch nathanl@austin.ibm.com
- * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * This driver creates a proc file at /proc/ppc64/lparcfg which contains
- * keyword - value pairs that specify the configuration of the partition.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/seq_file.h>
-#include <asm/uaccess.h>
-#include <asm/iseries/hv_lp_config.h>
-#include <asm/lppaca.h>
-#include <asm/hvcall.h>
-#include <asm/firmware.h>
-#include <asm/rtas.h>
-#include <asm/system.h>
-#include <asm/time.h>
-#include <asm/prom.h>
-#include <asm/vdso_datapage.h>
-
-#define MODULE_VERS "1.7"
-#define MODULE_NAME "lparcfg"
-
-/* #define LPARCFG_DEBUG */
-
-static struct proc_dir_entry *proc_ppc64_lparcfg;
-#define LPARCFG_BUFF_SIZE 4096
-
-/*
- * Track sum of all purrs across all processors. This is used to further
- * calculate usage values by different applications
- */
-static unsigned long get_purr(void)
-{
- unsigned long sum_purr = 0;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- if (firmware_has_feature(FW_FEATURE_ISERIES))
- sum_purr += lppaca[cpu].emulated_time_base;
- else {
- struct cpu_usage *cu;
-
- cu = &per_cpu(cpu_usage_array, cpu);
- sum_purr += cu->current_tb;
- }
- }
- return sum_purr;
-}
-
-#ifdef CONFIG_PPC_ISERIES
-
-/*
- * Methods used to fetch LPAR data when running on an iSeries platform.
- */
-static int iseries_lparcfg_data(struct seq_file *m, void *v)
-{
- unsigned long pool_id;
- int shared, entitled_capacity, max_entitled_capacity;
- int processors, max_processors;
- unsigned long purr = get_purr();
-
- shared = (int)(get_lppaca()->shared_proc);
-
- seq_printf(m, "system_active_processors=%d\n",
- (int)HvLpConfig_getSystemPhysicalProcessors());
-
- seq_printf(m, "system_potential_processors=%d\n",
- (int)HvLpConfig_getSystemPhysicalProcessors());
-
- processors = (int)HvLpConfig_getPhysicalProcessors();
- seq_printf(m, "partition_active_processors=%d\n", processors);
-
- max_processors = (int)HvLpConfig_getMaxPhysicalProcessors();
- seq_printf(m, "partition_potential_processors=%d\n", max_processors);
-
- if (shared) {
- entitled_capacity = HvLpConfig_getSharedProcUnits();
- max_entitled_capacity = HvLpConfig_getMaxSharedProcUnits();
- } else {
- entitled_capacity = processors * 100;
- max_entitled_capacity = max_processors * 100;
- }
- seq_printf(m, "partition_entitled_capacity=%d\n", entitled_capacity);
-
- seq_printf(m, "partition_max_entitled_capacity=%d\n",
- max_entitled_capacity);
-
- if (shared) {
- pool_id = HvLpConfig_getSharedPoolIndex();
- seq_printf(m, "pool=%d\n", (int)pool_id);
- seq_printf(m, "pool_capacity=%d\n",
- (int)(HvLpConfig_getNumProcsInSharedPool(pool_id) *
- 100));
- seq_printf(m, "purr=%ld\n", purr);
- }
-
- seq_printf(m, "shared_processor_mode=%d\n", shared);
-
- return 0;
-}
-
-#else /* CONFIG_PPC_ISERIES */
-
-static int iseries_lparcfg_data(struct seq_file *m, void *v)
-{
- return 0;
-}
-
-#endif /* CONFIG_PPC_ISERIES */
-
-#ifdef CONFIG_PPC_PSERIES
-/*
- * Methods used to fetch LPAR data when running on a pSeries platform.
- */
-/* find a better place for this function... */
-static void log_plpar_hcall_return(unsigned long rc, char *tag)
-{
- if (rc == 0) /* success, return */
- return;
-/* check for null tag ? */
- if (rc == H_HARDWARE)
- printk(KERN_INFO
- "plpar-hcall (%s) failed with hardware fault\n", tag);
- else if (rc == H_FUNCTION)
- printk(KERN_INFO
- "plpar-hcall (%s) failed; function not allowed\n", tag);
- else if (rc == H_AUTHORITY)
- printk(KERN_INFO
- "plpar-hcall (%s) failed; not authorized to this"
- " function\n", tag);
- else if (rc == H_PARAMETER)
- printk(KERN_INFO "plpar-hcall (%s) failed; Bad parameter(s)\n",
- tag);
- else
- printk(KERN_INFO
- "plpar-hcall (%s) failed with unexpected rc(0x%lx)\n",
- tag, rc);
-
-}
-
-/*
- * H_GET_PPP hcall returns info in 4 parms.
- * entitled_capacity,unallocated_capacity,
- * aggregation, resource_capability).
- *
- * R4 = Entitled Processor Capacity Percentage.
- * R5 = Unallocated Processor Capacity Percentage.
- * R6 (AABBCCDDEEFFGGHH).
- * XXXX - reserved (0)
- * XXXX - reserved (0)
- * XXXX - Group Number
- * XXXX - Pool Number.
- * R7 (IIJJKKLLMMNNOOPP).
- * XX - reserved. (0)
- * XX - bit 0-6 reserved (0). bit 7 is Capped indicator.
- * XX - variable processor Capacity Weight
- * XX - Unallocated Variable Processor Capacity Weight.
- * XXXX - Active processors in Physical Processor Pool.
- * XXXX - Processors active on platform.
- */
-static unsigned int h_get_ppp(unsigned long *entitled,
- unsigned long *unallocated,
- unsigned long *aggregation,
- unsigned long *resource)
-{
- unsigned long rc;
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- rc = plpar_hcall(H_GET_PPP, retbuf);
-
- *entitled = retbuf[0];
- *unallocated = retbuf[1];
- *aggregation = retbuf[2];
- *resource = retbuf[3];
-
- log_plpar_hcall_return(rc, "H_GET_PPP");
-
- return rc;
-}
-
-static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
-{
- unsigned long rc;
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- rc = plpar_hcall(H_PIC, retbuf);
-
- *pool_idle_time = retbuf[0];
- *num_procs = retbuf[1];
-
- if (rc != H_AUTHORITY)
- log_plpar_hcall_return(rc, "H_PIC");
-}
-
-#define SPLPAR_CHARACTERISTICS_TOKEN 20
-#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
-
-/*
- * parse_system_parameter_string()
- * Retrieve the potential_processors, max_entitled_capacity and friends
- * through the get-system-parameter rtas call. Replace keyword strings as
- * necessary.
- */
-static void parse_system_parameter_string(struct seq_file *m)
-{
- int call_status;
-
- unsigned char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
- if (!local_buffer) {
- printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
- __FILE__, __FUNCTION__, __LINE__);
- return;
- }
-
- spin_lock(&rtas_data_buf_lock);
- memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH);
- call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
- NULL,
- SPLPAR_CHARACTERISTICS_TOKEN,
- __pa(rtas_data_buf),
- RTAS_DATA_BUF_SIZE);
- memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH);
- spin_unlock(&rtas_data_buf_lock);
-
- if (call_status != 0) {
- printk(KERN_INFO
- "%s %s Error calling get-system-parameter (0x%x)\n",
- __FILE__, __FUNCTION__, call_status);
- } else {
- int splpar_strlen;
- int idx, w_idx;
- char *workbuffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
- if (!workbuffer) {
- printk(KERN_ERR "%s %s kmalloc failure at line %d \n",
- __FILE__, __FUNCTION__, __LINE__);
- kfree(local_buffer);
- return;
- }
-#ifdef LPARCFG_DEBUG
- printk(KERN_INFO "success calling get-system-parameter \n");
-#endif
- splpar_strlen = local_buffer[0] * 256 + local_buffer[1];
- local_buffer += 2; /* step over strlen value */
-
- memset(workbuffer, 0, SPLPAR_MAXLENGTH);
- w_idx = 0;
- idx = 0;
- while ((*local_buffer) && (idx < splpar_strlen)) {
- workbuffer[w_idx++] = local_buffer[idx++];
- if ((local_buffer[idx] == ',')
- || (local_buffer[idx] == '\0')) {
- workbuffer[w_idx] = '\0';
- if (w_idx) {
- /* avoid the empty string */
- seq_printf(m, "%s\n", workbuffer);
- }
- memset(workbuffer, 0, SPLPAR_MAXLENGTH);
- idx++; /* skip the comma */
- w_idx = 0;
- } else if (local_buffer[idx] == '=') {
- /* code here to replace workbuffer contents
- with different keyword strings */
- if (0 == strcmp(workbuffer, "MaxEntCap")) {
- strcpy(workbuffer,
- "partition_max_entitled_capacity");
- w_idx = strlen(workbuffer);
- }
- if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
- strcpy(workbuffer,
- "system_potential_processors");
- w_idx = strlen(workbuffer);
- }
- }
- }
- kfree(workbuffer);
- local_buffer -= 2; /* back up over strlen value */
- }
- kfree(local_buffer);
-}
-
-/* Return the number of processors in the system.
- * This function reads through the device tree and counts
- * the virtual processors, this does not include threads.
- */
-static int lparcfg_count_active_processors(void)
-{
- struct device_node *cpus_dn = NULL;
- int count = 0;
-
- while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) {
-#ifdef LPARCFG_DEBUG
- printk(KERN_ERR "cpus_dn %p \n", cpus_dn);
-#endif
- count++;
- }
- return count;
-}
-
-static int pseries_lparcfg_data(struct seq_file *m, void *v)
-{
- int partition_potential_processors;
- int partition_active_processors;
- struct device_node *rtas_node;
- const int *lrdrp = NULL;
-
- rtas_node = find_path_device("/rtas");
- if (rtas_node)
- lrdrp = get_property(rtas_node, "ibm,lrdr-capacity", NULL);
-
- if (lrdrp == NULL) {
- partition_potential_processors = vdso_data->processorCount;
- } else {
- partition_potential_processors = *(lrdrp + 4);
- }
-
- partition_active_processors = lparcfg_count_active_processors();
-
- if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
- unsigned long h_entitled, h_unallocated;
- unsigned long h_aggregation, h_resource;
- unsigned long pool_idle_time, pool_procs;
- unsigned long purr;
-
- h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
- &h_resource);
-
- seq_printf(m, "R4=0x%lx\n", h_entitled);
- seq_printf(m, "R5=0x%lx\n", h_unallocated);
- seq_printf(m, "R6=0x%lx\n", h_aggregation);
- seq_printf(m, "R7=0x%lx\n", h_resource);
-
- purr = get_purr();
-
- /* this call handles the ibm,get-system-parameter contents */
- parse_system_parameter_string(m);
-
- seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
-
- seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
-
- seq_printf(m, "system_active_processors=%ld\n",
- (h_resource >> 0 * 8) & 0xffff);
-
- /* pool related entries are apropriate for shared configs */
- if (lppaca[0].shared_proc) {
-
- h_pic(&pool_idle_time, &pool_procs);
-
- seq_printf(m, "pool=%ld\n",
- (h_aggregation >> 0 * 8) & 0xffff);
-
- /* report pool_capacity in percentage */
- seq_printf(m, "pool_capacity=%ld\n",
- ((h_resource >> 2 * 8) & 0xffff) * 100);
-
- seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
-
- seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
- }
-
- seq_printf(m, "unallocated_capacity_weight=%ld\n",
- (h_resource >> 4 * 8) & 0xFF);
-
- seq_printf(m, "capacity_weight=%ld\n",
- (h_resource >> 5 * 8) & 0xFF);
-
- seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
-
- seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
-
- seq_printf(m, "purr=%ld\n", purr);
-
- } else { /* non SPLPAR case */
-
- seq_printf(m, "system_active_processors=%d\n",
- partition_potential_processors);
-
- seq_printf(m, "system_potential_processors=%d\n",
- partition_potential_processors);
-
- seq_printf(m, "partition_max_entitled_capacity=%d\n",
- partition_potential_processors * 100);
-
- seq_printf(m, "partition_entitled_capacity=%d\n",
- partition_active_processors * 100);
- }
-
- seq_printf(m, "partition_active_processors=%d\n",
- partition_active_processors);
-
- seq_printf(m, "partition_potential_processors=%d\n",
- partition_potential_processors);
-
- seq_printf(m, "shared_processor_mode=%d\n", lppaca[0].shared_proc);
-
- return 0;
-}
-
-/*
- * Interface for changing system parameters (variable capacity weight
- * and entitled capacity). Format of input is "param_name=value";
- * anything after value is ignored. Valid parameters at this time are
- * "partition_entitled_capacity" and "capacity_weight". We use
- * H_SET_PPP to alter parameters.
- *
- * This function should be invoked only on systems with
- * FW_FEATURE_SPLPAR.
- */
-static ssize_t lparcfg_write(struct file *file, const char __user * buf,
- size_t count, loff_t * off)
-{
- char *kbuf;
- char *tmp;
- u64 new_entitled, *new_entitled_ptr = &new_entitled;
- u8 new_weight, *new_weight_ptr = &new_weight;
-
- unsigned long current_entitled; /* parameters for h_get_ppp */
- unsigned long dummy;
- unsigned long resource;
- u8 current_weight;
-
- ssize_t retval = -ENOMEM;
-
- kbuf = kmalloc(count, GFP_KERNEL);
- if (!kbuf)
- goto out;
-
- retval = -EFAULT;
- if (copy_from_user(kbuf, buf, count))
- goto out;
-
- retval = -EINVAL;
- kbuf[count - 1] = '\0';
- tmp = strchr(kbuf, '=');
- if (!tmp)
- goto out;
-
- *tmp++ = '\0';
-
- if (!strcmp(kbuf, "partition_entitled_capacity")) {
- char *endp;
- *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
- if (endp == tmp)
- goto out;
- new_weight_ptr = &current_weight;
- } else if (!strcmp(kbuf, "capacity_weight")) {
- char *endp;
- *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
- if (endp == tmp)
- goto out;
- new_entitled_ptr = &current_entitled;
- } else
- goto out;
-
- /* Get our current parameters */
- retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
- if (retval) {
- retval = -EIO;
- goto out;
- }
-
- current_weight = (resource >> 5 * 8) & 0xFF;
-
- pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
- __FUNCTION__, current_entitled, current_weight);
-
- pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
- __FUNCTION__, *new_entitled_ptr, *new_weight_ptr);
-
- retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
- *new_weight_ptr);
-
- if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
- retval = count;
- } else if (retval == H_BUSY) {
- retval = -EBUSY;
- } else if (retval == H_HARDWARE) {
- retval = -EIO;
- } else if (retval == H_PARAMETER) {
- retval = -EINVAL;
- } else {
- printk(KERN_WARNING "%s: received unknown hv return code %ld",
- __FUNCTION__, retval);
- retval = -EIO;
- }
-
-out:
- kfree(kbuf);
- return retval;
-}
-
-#else /* CONFIG_PPC_PSERIES */
-
-static int pseries_lparcfg_data(struct seq_file *m, void *v)
-{
- return 0;
-}
-
-static ssize_t lparcfg_write(struct file *file, const char __user * buf,
- size_t count, loff_t * off)
-{
- return count;
-}
-
-#endif /* CONFIG_PPC_PSERIES */
-
-static int lparcfg_data(struct seq_file *m, void *v)
-{
- struct device_node *rootdn;
- const char *model = "";
- const char *system_id = "";
- const char *tmp;
- const unsigned int *lp_index_ptr;
- unsigned int lp_index = 0;
-
- seq_printf(m, "%s %s \n", MODULE_NAME, MODULE_VERS);
-
- rootdn = find_path_device("/");
- if (rootdn) {
- tmp = get_property(rootdn, "model", NULL);
- if (tmp) {
- model = tmp;
- /* Skip "IBM," - see platforms/iseries/dt.c */
- if (firmware_has_feature(FW_FEATURE_ISERIES))
- model += 4;
- }
- tmp = get_property(rootdn, "system-id", NULL);
- if (tmp) {
- system_id = tmp;
- /* Skip "IBM," - see platforms/iseries/dt.c */
- if (firmware_has_feature(FW_FEATURE_ISERIES))
- system_id += 4;
- }
- lp_index_ptr = get_property(rootdn, "ibm,partition-no", NULL);
- if (lp_index_ptr)
- lp_index = *lp_index_ptr;
- }
- seq_printf(m, "serial_number=%s\n", system_id);
- seq_printf(m, "system_type=%s\n", model);
- seq_printf(m, "partition_id=%d\n", (int)lp_index);
-
- if (firmware_has_feature(FW_FEATURE_ISERIES))
- return iseries_lparcfg_data(m, v);
- return pseries_lparcfg_data(m, v);
-}
-
-static int lparcfg_open(struct inode *inode, struct file *file)
-{
- return single_open(file, lparcfg_data, NULL);
-}
-
-struct file_operations lparcfg_fops = {
- .owner = THIS_MODULE,
- .read = seq_read,
- .open = lparcfg_open,
- .release = single_release,
-};
-
-int __init lparcfg_init(void)
-{
- struct proc_dir_entry *ent;
- mode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
-
- /* Allow writing if we have FW_FEATURE_SPLPAR */
- if (firmware_has_feature(FW_FEATURE_SPLPAR) &&
- !firmware_has_feature(FW_FEATURE_ISERIES)) {
- lparcfg_fops.write = lparcfg_write;
- mode |= S_IWUSR;
- }
-
- ent = create_proc_entry("ppc64/lparcfg", mode, NULL);
- if (ent) {
- ent->proc_fops = &lparcfg_fops;
- ent->data = kmalloc(LPARCFG_BUFF_SIZE, GFP_KERNEL);
- if (!ent->data) {
- printk(KERN_ERR
- "Failed to allocate buffer for lparcfg\n");
- remove_proc_entry("lparcfg", ent->parent);
- return -ENOMEM;
- }
- } else {
- printk(KERN_ERR "Failed to create ppc64/lparcfg\n");
- return -EIO;
- }
-
- proc_ppc64_lparcfg = ent;
- return 0;
-}
-
-void __exit lparcfg_cleanup(void)
-{
- if (proc_ppc64_lparcfg) {
- kfree(proc_ppc64_lparcfg->data);
- remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent);
- }
-}
-
-module_init(lparcfg_init);
-module_exit(lparcfg_cleanup);
-MODULE_DESCRIPTION("Interface for LPAR configuration data");
-MODULE_AUTHOR("Dave Engebretsen");
-MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c
deleted file mode 100644
index 584d1e3c013..00000000000
--- a/arch/powerpc/kernel/lparmap.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (C) 2005 Stephen Rothwell IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/iseries/lpar_map.h>
-
-const struct LparMap __attribute__((__section__(".text"))) xLparMap = {
- .xNumberEsids = HvEsidsToMap,
- .xNumberRanges = HvRangesToMap,
- .xSegmentTableOffs = STAB0_PAGE,
-
- .xEsids = {
- { .xKernelEsid = GET_ESID(PAGE_OFFSET),
- .xKernelVsid = KERNEL_VSID(PAGE_OFFSET), },
- { .xKernelEsid = GET_ESID(VMALLOC_START),
- .xKernelVsid = KERNEL_VSID(VMALLOC_START), },
- },
-
- .xRanges = {
- { .xPages = HvPagesToMap,
- .xOffset = 0,
- .xVPN = KERNEL_VSID(PAGE_OFFSET) << (SID_SHIFT - HW_PAGE_SHIFT),
- },
- },
-};
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index e60a0c544d6..015ae55c186 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -12,13 +12,41 @@
#include <linux/kexec.h>
#include <linux/reboot.h>
#include <linux/threads.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/irq.h>
+#include <linux/ftrace.h>
+
#include <asm/machdep.h>
-#include <asm/lmb.h>
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/sections.h>
+
+void machine_kexec_mask_interrupts(void) {
+ unsigned int i;
+ struct irq_desc *desc;
+
+ for_each_irq_desc(i, desc) {
+ struct irq_chip *chip;
+
+ chip = irq_desc_get_chip(desc);
+ if (!chip)
+ continue;
+
+ if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
+ chip->irq_eoi(&desc->irq_data);
+
+ if (chip->irq_mask)
+ chip->irq_mask(&desc->irq_data);
+
+ if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+ chip->irq_disable(&desc->irq_data);
+ }
+}
void machine_crash_shutdown(struct pt_regs *regs)
{
- if (ppc_md.machine_crash_shutdown)
- ppc_md.machine_crash_shutdown(regs);
+ default_machine_crash_shutdown(regs);
}
/*
@@ -30,88 +58,224 @@ int machine_kexec_prepare(struct kimage *image)
{
if (ppc_md.machine_kexec_prepare)
return ppc_md.machine_kexec_prepare(image);
- /*
- * Fail if platform doesn't provide its own machine_kexec_prepare
- * implementation.
- */
- return -ENOSYS;
+ else
+ return default_machine_kexec_prepare(image);
}
void machine_kexec_cleanup(struct kimage *image)
{
- if (ppc_md.machine_kexec_cleanup)
- ppc_md.machine_kexec_cleanup(image);
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+ VMCOREINFO_SYMBOL(node_data);
+ VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+ VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+ VMCOREINFO_SYMBOL(vmemmap_list);
+ VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
+ VMCOREINFO_SYMBOL(mmu_psize_defs);
+ VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
+ VMCOREINFO_OFFSET(vmemmap_backing, list);
+ VMCOREINFO_OFFSET(vmemmap_backing, phys);
+ VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
+ VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
+ VMCOREINFO_OFFSET(mmu_psize_def, shift);
+#endif
}
/*
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.
*/
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
{
- if (ppc_md.machine_kexec)
- ppc_md.machine_kexec(image);
- else {
- /*
- * Fall back to normal restart if platform doesn't provide
- * its own kexec function, and user insist to kexec...
- */
- machine_restart(NULL);
- }
- for(;;);
-}
+ int save_ftrace_enabled;
-static int __init early_parse_crashk(char *p)
-{
- unsigned long size;
-
- if (!p)
- return 1;
+ save_ftrace_enabled = __ftrace_enabled_save();
- size = memparse(p, &p);
-
- if (*p == '@')
- crashk_res.start = memparse(p + 1, &p);
+ if (ppc_md.machine_kexec)
+ ppc_md.machine_kexec(image);
else
- crashk_res.start = KDUMP_KERNELBASE;
+ default_machine_kexec(image);
- crashk_res.end = crashk_res.start + size - 1;
+ __ftrace_enabled_restore(save_ftrace_enabled);
- return 0;
+ /* Fall back to normal restart if we're still alive. */
+ machine_restart(NULL);
+ for(;;);
}
-early_param("crashkernel", early_parse_crashk);
void __init reserve_crashkernel(void)
{
- unsigned long size;
+ unsigned long long crash_size, crash_base;
+ int ret;
- if (crashk_res.start == 0)
+ /* use common parsing */
+ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+ &crash_size, &crash_base);
+ if (ret == 0 && crash_size > 0) {
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+ }
+
+ if (crashk_res.end == crashk_res.start) {
+ crashk_res.start = crashk_res.end = 0;
return;
+ }
/* We might have got these values via the command line or the
* device tree, either way sanitise them now. */
- size = crashk_res.end - crashk_res.start + 1;
+ crash_size = resource_size(&crashk_res);
+#ifndef CONFIG_NONSTATIC_KERNEL
if (crashk_res.start != KDUMP_KERNELBASE)
printk("Crash kernel location must be 0x%x\n",
KDUMP_KERNELBASE);
crashk_res.start = KDUMP_KERNELBASE;
- size = PAGE_ALIGN(size);
- crashk_res.end = crashk_res.start + size - 1;
+#else
+ if (!crashk_res.start) {
+#ifdef CONFIG_PPC64
+ /*
+ * On 64bit we split the RMO in half but cap it at half of
+ * a small SLB (128MB) since the crash kernel needs to place
+ * itself and some stacks to be in the first segment.
+ */
+ crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
+#else
+ crashk_res.start = KDUMP_KERNELBASE;
+#endif
+ }
+
+ crash_base = PAGE_ALIGN(crashk_res.start);
+ if (crash_base != crashk_res.start) {
+ printk("Crash kernel base must be aligned to 0x%lx\n",
+ PAGE_SIZE);
+ crashk_res.start = crash_base;
+ }
+
+#endif
+ crash_size = PAGE_ALIGN(crash_size);
+ crashk_res.end = crashk_res.start + crash_size - 1;
+
+ /* The crash region must not overlap the current kernel */
+ if (overlaps_crashkernel(__pa(_stext), _end - _stext)) {
+ printk(KERN_WARNING
+ "Crash kernel can not overlap current kernel\n");
+ crashk_res.start = crashk_res.end = 0;
+ return;
+ }
/* Crash kernel trumps memory limit */
if (memory_limit && memory_limit <= crashk_res.end) {
memory_limit = crashk_res.end + 1;
- printk("Adjusted memory limit for crashkernel, now 0x%lx\n",
- memory_limit);
+ printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
+ memory_limit);
}
- lmb_reserve(crashk_res.start, size);
+ printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+ "for crashkernel (System RAM: %ldMB)\n",
+ (unsigned long)(crash_size >> 20),
+ (unsigned long)(crashk_res.start >> 20),
+ (unsigned long)(memblock_phys_mem_size() >> 20));
+
+ memblock_reserve(crashk_res.start, crash_size);
}
int overlaps_crashkernel(unsigned long start, unsigned long size)
{
return (start + size) > crashk_res.start && start <= crashk_res.end;
}
+
+/* Values we need to export to the second kernel via the device tree. */
+static phys_addr_t kernel_end;
+static phys_addr_t crashk_base;
+static phys_addr_t crashk_size;
+static unsigned long long mem_limit;
+
+static struct property kernel_end_prop = {
+ .name = "linux,kernel-end",
+ .length = sizeof(phys_addr_t),
+ .value = &kernel_end,
+};
+
+static struct property crashk_base_prop = {
+ .name = "linux,crashkernel-base",
+ .length = sizeof(phys_addr_t),
+ .value = &crashk_base
+};
+
+static struct property crashk_size_prop = {
+ .name = "linux,crashkernel-size",
+ .length = sizeof(phys_addr_t),
+ .value = &crashk_size,
+};
+
+static struct property memory_limit_prop = {
+ .name = "linux,memory-limit",
+ .length = sizeof(unsigned long long),
+ .value = &mem_limit,
+};
+
+#define cpu_to_be_ulong __PASTE(cpu_to_be, BITS_PER_LONG)
+
+static void __init export_crashk_values(struct device_node *node)
+{
+ struct property *prop;
+
+ /* There might be existing crash kernel properties, but we can't
+ * be sure what's in them, so remove them. */
+ prop = of_find_property(node, "linux,crashkernel-base", NULL);
+ if (prop)
+ of_remove_property(node, prop);
+
+ prop = of_find_property(node, "linux,crashkernel-size", NULL);
+ if (prop)
+ of_remove_property(node, prop);
+
+ if (crashk_res.start != 0) {
+ crashk_base = cpu_to_be_ulong(crashk_res.start),
+ of_add_property(node, &crashk_base_prop);
+ crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
+ of_add_property(node, &crashk_size_prop);
+ }
+
+ /*
+ * memory_limit is required by the kexec-tools to limit the
+ * crash regions to the actual memory used.
+ */
+ mem_limit = cpu_to_be_ulong(memory_limit);
+ of_update_property(node, &memory_limit_prop);
+}
+
+static int __init kexec_setup(void)
+{
+ struct device_node *node;
+ struct property *prop;
+
+ node = of_find_node_by_path("/chosen");
+ if (!node)
+ return -ENOENT;
+
+ /* remove any stale properties so ours can be found */
+ prop = of_find_property(node, kernel_end_prop.name, NULL);
+ if (prop)
+ of_remove_property(node, prop);
+
+ /* information needed by userspace when using default_machine_kexec */
+ kernel_end = cpu_to_be_ulong(__pa(_end));
+ of_add_property(node, &kernel_end_prop);
+
+ export_crashk_values(node);
+
+ of_node_put(node);
+ return 0;
+}
+late_initcall(kexec_setup);
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
index cbaa3419679..affe5dcce7f 100644
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ b/arch/powerpc/kernel/machine_kexec_32.c
@@ -16,10 +16,10 @@
#include <asm/hw_irq.h>
#include <asm/io.h>
-typedef NORET_TYPE void (*relocate_new_kernel_t)(
+typedef void (*relocate_new_kernel_t)(
unsigned long indirection_page,
unsigned long reboot_code_buffer,
- unsigned long start_address) ATTRIB_NORET;
+ unsigned long start_address) __noreturn;
/*
* This is a generic machine_kexec function suitable at least for
@@ -39,6 +39,10 @@ void default_machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ /* mask each interrupt so we are in a more sane state for the
+ * kexec kernel */
+ machine_kexec_mask_interrupts();
+
page_list = image->head;
/* we need both effective and real address here */
@@ -51,7 +55,7 @@ void default_machine_kexec(struct kimage *image)
relocate_new_kernel_size);
flush_icache_range(reboot_code_buffer,
- reboot_code_buffer + KEXEC_CONTROL_CODE_SIZE);
+ reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
printk(KERN_INFO "Bye!\n");
/* now call it */
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index a24b09c2771..879b3aacac3 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -13,7 +13,11 @@
#include <linux/kexec.h>
#include <linux/smp.h>
#include <linux/thread_info.h>
+#include <linux/init_task.h>
#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/hardirq.h>
#include <asm/page.h>
#include <asm/current.h>
@@ -24,6 +28,7 @@
#include <asm/sections.h> /* _end */
#include <asm/prom.h>
#include <asm/smp.h>
+#include <asm/hw_breakpoint.h>
int default_machine_kexec_prepare(struct kimage *image)
{
@@ -70,10 +75,9 @@ int default_machine_kexec_prepare(struct kimage *image)
}
/* We also should not overwrite the tce tables */
- for (node = of_find_node_by_type(NULL, "pci"); node != NULL;
- node = of_find_node_by_type(node, "pci")) {
- basep = get_property(node, "linux,tce-base", NULL);
- sizep = get_property(node, "linux,tce-size", NULL);
+ for_each_node_by_type(node, "pci") {
+ basep = of_get_property(node, "linux,tce-base", NULL);
+ sizep = of_get_property(node, "linux,tce-size", NULL);
if (basep == NULL || sizep == NULL)
continue;
@@ -154,66 +158,115 @@ void kexec_copy_flush(struct kimage *image)
#ifdef CONFIG_SMP
-/* FIXME: we should schedule this function to be called on all cpus based
- * on calling the interrupts, but we would like to call it off irq level
- * so that the interrupt controller is clean.
- */
-void kexec_smp_down(void *arg)
+static int kexec_all_irq_disabled = 0;
+
+static void kexec_smp_down(void *arg)
{
+ local_irq_disable();
+ hard_irq_disable();
+
+ mb(); /* make sure our irqs are disabled before we say they are */
+ get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
+ while(kexec_all_irq_disabled == 0)
+ cpu_relax();
+ mb(); /* make sure all irqs are disabled before this */
+ hw_breakpoint_disable();
+ /*
+ * Now every CPU has IRQs off, we can clear out any pending
+ * IPIs and be sure that no more will come in after this.
+ */
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(0, 1);
- local_irq_disable();
kexec_smp_wait();
/* NOTREACHED */
}
-static void kexec_prepare_cpus(void)
+static void kexec_prepare_cpus_wait(int wait_state)
{
int my_cpu, i, notified=-1;
- smp_call_function(kexec_smp_down, NULL, 0, /* wait */0);
+ hw_breakpoint_disable();
my_cpu = get_cpu();
-
- /* check the others cpus are now down (via paca hw cpu id == -1) */
- for (i=0; i < NR_CPUS; i++) {
+ /* Make sure each CPU has at least made it to the state we need.
+ *
+ * FIXME: There is a (slim) chance of a problem if not all of the CPUs
+ * are correctly onlined. If somehow we start a CPU on boot with RTAS
+ * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
+ * time, the boot CPU will timeout. If it does eventually execute
+ * stuff, the secondary will start up (paca[].cpu_start was written) and
+ * get into a peculiar state. If the platform supports
+ * smp_ops->take_timebase(), the secondary CPU will probably be spinning
+ * in there. If not (i.e. pseries), the secondary will continue on and
+ * try to online itself/idle/etc. If it survives that, we need to find
+ * these possible-but-not-online-but-should-be CPUs and chaperone them
+ * into kexec_smp_wait().
+ */
+ for_each_online_cpu(i) {
if (i == my_cpu)
continue;
- while (paca[i].hw_cpu_id != -1) {
+ while (paca[i].kexec_state < wait_state) {
barrier();
- if (!cpu_possible(i)) {
- printk("kexec: cpu %d hw_cpu_id %d is not"
- " possible, ignoring\n",
- i, paca[i].hw_cpu_id);
- break;
- }
- if (!cpu_online(i)) {
- /* Fixme: this can be spinning in
- * pSeries_secondary_wait with a paca
- * waiting for it to go online.
- */
- printk("kexec: cpu %d hw_cpu_id %d is not"
- " online, ignoring\n",
- i, paca[i].hw_cpu_id);
- break;
- }
if (i != notified) {
- printk( "kexec: waiting for cpu %d (physical"
- " %d) to go down\n",
- i, paca[i].hw_cpu_id);
+ printk(KERN_INFO "kexec: waiting for cpu %d "
+ "(physical %d) to enter %i state\n",
+ i, paca[i].hw_cpu_id, wait_state);
notified = i;
}
}
}
+ mb();
+}
+
+/*
+ * We need to make sure each present CPU is online. The next kernel will scan
+ * the device tree and assume primary threads are online and query secondary
+ * threads via RTAS to online them if required. If we don't online primary
+ * threads, they will be stuck. However, we also online secondary threads as we
+ * may be using 'cede offline'. In this case RTAS doesn't see the secondary
+ * threads as offline -- and again, these CPUs will be stuck.
+ *
+ * So, we online all CPUs that should be running, including secondary threads.
+ */
+static void wake_offline_cpus(void)
+{
+ int cpu = 0;
+
+ for_each_present_cpu(cpu) {
+ if (!cpu_online(cpu)) {
+ printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
+ cpu);
+ WARN_ON(cpu_up(cpu));
+ }
+ }
+}
+
+static void kexec_prepare_cpus(void)
+{
+ wake_offline_cpus();
+ smp_call_function(kexec_smp_down, NULL, /* wait */0);
+ local_irq_disable();
+ hard_irq_disable();
+
+ mb(); /* make sure IRQs are disabled before we say they are */
+ get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
+
+ kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
+ /* we are sure every CPU has IRQs off at this point */
+ kexec_all_irq_disabled = 1;
/* after we tell the others to go down */
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(0, 0);
- put_cpu();
+ /*
+ * Before removing MMU mappings make sure all CPUs have entered real
+ * mode:
+ */
+ kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
- local_irq_disable();
+ put_cpu();
}
#else /* ! SMP */
@@ -233,6 +286,7 @@ static void kexec_prepare_cpus(void)
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(0, 0);
local_irq_disable();
+ hard_irq_disable();
}
#endif /* SMP */
@@ -249,13 +303,19 @@ static void kexec_prepare_cpus(void)
* We could use a smaller stack if we don't care about anything using
* current, but that audit has not been performed.
*/
-union thread_union kexec_stack
- __attribute__((__section__(".data.init_task"))) = { };
+static union thread_union kexec_stack __init_task_data =
+ { };
+
+/*
+ * For similar reasons to the stack above, the kexecing CPU needs to be on a
+ * static PACA; we switch to kexec_paca.
+ */
+struct paca_struct kexec_paca;
-/* Our assembly helper, in kexec_stub.S */
-extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
- void *image, void *control,
- void (*clear_all)(void)) ATTRIB_NORET;
+/* Our assembly helper, in misc_64.S */
+extern void kexec_sequence(void *newstack, unsigned long start,
+ void *image, void *control,
+ void (*clear_all)(void)) __noreturn;
/* too late to fail here */
void default_machine_kexec(struct kimage *image)
@@ -270,14 +330,33 @@ void default_machine_kexec(struct kimage *image)
* using debugger IPI.
*/
- if (crashing_cpu == -1)
- kexec_prepare_cpus();
+ if (crashing_cpu == -1)
+ kexec_prepare_cpus();
+
+ pr_debug("kexec: Starting switchover sequence.\n");
/* switch to a staticly allocated stack. Based on irq stack code.
+ * We setup preempt_count to avoid using VMX in memcpy.
* XXX: the task struct will likely be invalid once we do the copy!
*/
kexec_stack.thread_info.task = current_thread_info()->task;
kexec_stack.thread_info.flags = 0;
+ kexec_stack.thread_info.preempt_count = HARDIRQ_OFFSET;
+ kexec_stack.thread_info.cpu = current_thread_info()->cpu;
+
+ /* We need a static PACA, too; copy this CPU's PACA over and switch to
+ * it. Also poison per_cpu_offset to catch anyone using non-static
+ * data.
+ */
+ memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
+ kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
+ paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) -
+ kexec_paca.paca_index;
+ setup_paca(&kexec_paca);
+
+ /* XXX: If anyone does 'dynamic lppacas' this will also need to be
+ * switched to a static version!
+ */
/* Some things are best done in assembly. Finding globals with
* a toc is easier in C, so pass in what we can.
@@ -289,95 +368,48 @@ void default_machine_kexec(struct kimage *image)
}
/* Values we need to export to the second kernel via the device tree. */
-static unsigned long htab_base, kernel_end;
+static unsigned long htab_base;
+static unsigned long htab_size;
static struct property htab_base_prop = {
.name = "linux,htab-base",
.length = sizeof(unsigned long),
- .value = (unsigned char *)&htab_base,
+ .value = &htab_base,
};
static struct property htab_size_prop = {
.name = "linux,htab-size",
.length = sizeof(unsigned long),
- .value = (unsigned char *)&htab_size_bytes,
-};
-
-static struct property kernel_end_prop = {
- .name = "linux,kernel-end",
- .length = sizeof(unsigned long),
- .value = (unsigned char *)&kernel_end,
+ .value = &htab_size,
};
-static void __init export_htab_values(void)
+static int __init export_htab_values(void)
{
struct device_node *node;
-
- node = of_find_node_by_path("/chosen");
- if (!node)
- return;
-
- kernel_end = __pa(_end);
- prom_add_property(node, &kernel_end_prop);
+ struct property *prop;
/* On machines with no htab htab_address is NULL */
- if (NULL == htab_address)
- goto out;
-
- htab_base = __pa(htab_address);
- prom_add_property(node, &htab_base_prop);
- prom_add_property(node, &htab_size_prop);
-
- out:
- of_node_put(node);
-}
-
-static struct property crashk_base_prop = {
- .name = "linux,crashkernel-base",
- .length = sizeof(unsigned long),
- .value = (unsigned char *)&crashk_res.start,
-};
-
-static unsigned long crashk_size;
-
-static struct property crashk_size_prop = {
- .name = "linux,crashkernel-size",
- .length = sizeof(unsigned long),
- .value = (unsigned char *)&crashk_size,
-};
-
-static void __init export_crashk_values(void)
-{
- struct device_node *node;
- struct property *prop;
+ if (!htab_address)
+ return -ENODEV;
node = of_find_node_by_path("/chosen");
if (!node)
- return;
+ return -ENODEV;
- /* There might be existing crash kernel properties, but we can't
- * be sure what's in them, so remove them. */
- prop = of_find_property(node, "linux,crashkernel-base", NULL);
+ /* remove any stale propertys so ours can be found */
+ prop = of_find_property(node, htab_base_prop.name, NULL);
if (prop)
- prom_remove_property(node, prop);
-
- prop = of_find_property(node, "linux,crashkernel-size", NULL);
+ of_remove_property(node, prop);
+ prop = of_find_property(node, htab_size_prop.name, NULL);
if (prop)
- prom_remove_property(node, prop);
+ of_remove_property(node, prop);
- if (crashk_res.start != 0) {
- prom_add_property(node, &crashk_base_prop);
- crashk_size = crashk_res.end - crashk_res.start + 1;
- prom_add_property(node, &crashk_size_prop);
- }
+ htab_base = cpu_to_be64(__pa(htab_address));
+ of_add_property(node, &htab_base_prop);
+ htab_size = cpu_to_be64(htab_size_bytes);
+ of_add_property(node, &htab_size_prop);
of_node_put(node);
-}
-
-static int __init kexec_setup(void)
-{
- export_htab_values();
- export_crashk_values();
return 0;
}
-__initcall(kexec_setup);
+late_initcall(export_htab_values);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
new file mode 100644
index 00000000000..a7fd4cb78b7
--- /dev/null
+++ b/arch/powerpc/kernel/mce.c
@@ -0,0 +1,352 @@
+/*
+ * Machine check exception handling.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "mce: " fmt
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <linux/irq_work.h>
+#include <asm/mce.h>
+
+static DEFINE_PER_CPU(int, mce_nest_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
+
+/* Queue for delayed MCE events. */
+static DEFINE_PER_CPU(int, mce_queue_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
+
+static void machine_check_process_queued_event(struct irq_work *work);
+struct irq_work mce_event_process_work = {
+ .func = machine_check_process_queued_event,
+};
+
+static void mce_set_error_info(struct machine_check_event *mce,
+ struct mce_error_info *mce_err)
+{
+ mce->error_type = mce_err->error_type;
+ switch (mce_err->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
+ break;
+ case MCE_ERROR_TYPE_UNKNOWN:
+ default:
+ break;
+ }
+}
+
+/*
+ * Decode and save high level MCE information into per cpu buffer which
+ * is an array of machine_check_event structure.
+ */
+void save_mce_event(struct pt_regs *regs, long handled,
+ struct mce_error_info *mce_err,
+ uint64_t nip, uint64_t addr)
+{
+ uint64_t srr1;
+ int index = __get_cpu_var(mce_nest_count)++;
+ struct machine_check_event *mce = &__get_cpu_var(mce_event[index]);
+
+ /*
+ * Return if we don't have enough space to log mce event.
+ * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
+ * the check below will stop buffer overrun.
+ */
+ if (index >= MAX_MC_EVT)
+ return;
+
+ /* Populate generic machine check info */
+ mce->version = MCE_V1;
+ mce->srr0 = nip;
+ mce->srr1 = regs->msr;
+ mce->gpr3 = regs->gpr[3];
+ mce->in_use = 1;
+
+ mce->initiator = MCE_INITIATOR_CPU;
+ if (handled)
+ mce->disposition = MCE_DISPOSITION_RECOVERED;
+ else
+ mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
+ mce->severity = MCE_SEV_ERROR_SYNC;
+
+ srr1 = regs->msr;
+
+ /*
+ * Populate the mce error_type and type-specific error_type.
+ */
+ mce_set_error_info(mce, mce_err);
+
+ if (!addr)
+ return;
+
+ if (mce->error_type == MCE_ERROR_TYPE_TLB) {
+ mce->u.tlb_error.effective_address_provided = true;
+ mce->u.tlb_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
+ mce->u.slb_error.effective_address_provided = true;
+ mce->u.slb_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
+ mce->u.erat_error.effective_address_provided = true;
+ mce->u.erat_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
+ mce->u.ue_error.effective_address_provided = true;
+ mce->u.ue_error.effective_address = addr;
+ }
+ return;
+}
+
+/*
+ * get_mce_event:
+ * mce Pointer to machine_check_event structure to be filled.
+ * release Flag to indicate whether to free the event slot or not.
+ * 0 <= do not release the mce event. Caller will invoke
+ * release_mce_event() once event has been consumed.
+ * 1 <= release the slot.
+ *
+ * return 1 = success
+ * 0 = failure
+ *
+ * get_mce_event() will be called by platform specific machine check
+ * handle routine and in KVM.
+ * When we call get_mce_event(), we are still in interrupt context and
+ * preemption will not be scheduled until ret_from_expect() routine
+ * is called.
+ */
+int get_mce_event(struct machine_check_event *mce, bool release)
+{
+ int index = __get_cpu_var(mce_nest_count) - 1;
+ struct machine_check_event *mc_evt;
+ int ret = 0;
+
+ /* Sanity check */
+ if (index < 0)
+ return ret;
+
+ /* Check if we have MCE info to process. */
+ if (index < MAX_MC_EVT) {
+ mc_evt = &__get_cpu_var(mce_event[index]);
+ /* Copy the event structure and release the original */
+ if (mce)
+ *mce = *mc_evt;
+ if (release)
+ mc_evt->in_use = 0;
+ ret = 1;
+ }
+ /* Decrement the count to free the slot. */
+ if (release)
+ __get_cpu_var(mce_nest_count)--;
+
+ return ret;
+}
+
+void release_mce_event(void)
+{
+ get_mce_event(NULL, true);
+}
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+void machine_check_queue_event(void)
+{
+ int index;
+ struct machine_check_event evt;
+
+ if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+ return;
+
+ index = __get_cpu_var(mce_queue_count)++;
+ /* If queue is full, just return for now. */
+ if (index >= MAX_MC_EVT) {
+ __get_cpu_var(mce_queue_count)--;
+ return;
+ }
+ __get_cpu_var(mce_event_queue[index]) = evt;
+
+ /* Queue irq work to process this event later. */
+ irq_work_queue(&mce_event_process_work);
+}
+
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+static void machine_check_process_queued_event(struct irq_work *work)
+{
+ int index;
+
+ /*
+ * For now just print it to console.
+ * TODO: log this error event to FSP or nvram.
+ */
+ while (__get_cpu_var(mce_queue_count) > 0) {
+ index = __get_cpu_var(mce_queue_count) - 1;
+ machine_check_print_event_info(
+ &__get_cpu_var(mce_event_queue[index]));
+ __get_cpu_var(mce_queue_count)--;
+ }
+}
+
+void machine_check_print_event_info(struct machine_check_event *evt)
+{
+ const char *level, *sevstr, *subtype;
+ static const char *mc_ue_types[] = {
+ "Indeterminate",
+ "Instruction fetch",
+ "Page table walk ifetch",
+ "Load/Store",
+ "Page table walk Load/Store",
+ };
+ static const char *mc_slb_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+ static const char *mc_erat_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+ static const char *mc_tlb_types[] = {
+ "Indeterminate",
+ "Parity",
+ "Multihit",
+ };
+
+ /* Print things out */
+ if (evt->version != MCE_V1) {
+ pr_err("Machine Check Exception, Unknown event version %d !\n",
+ evt->version);
+ return;
+ }
+ switch (evt->severity) {
+ case MCE_SEV_NO_ERROR:
+ level = KERN_INFO;
+ sevstr = "Harmless";
+ break;
+ case MCE_SEV_WARNING:
+ level = KERN_WARNING;
+ sevstr = "";
+ break;
+ case MCE_SEV_ERROR_SYNC:
+ level = KERN_ERR;
+ sevstr = "Severe";
+ break;
+ case MCE_SEV_FATAL:
+ default:
+ level = KERN_ERR;
+ sevstr = "Fatal";
+ break;
+ }
+
+ printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
+ evt->disposition == MCE_DISPOSITION_RECOVERED ?
+ "Recovered" : "[Not recovered");
+ printk("%s Initiator: %s\n", level,
+ evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
+ switch (evt->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ subtype = evt->u.ue_error.ue_error_type <
+ ARRAY_SIZE(mc_ue_types) ?
+ mc_ue_types[evt->u.ue_error.ue_error_type]
+ : "Unknown";
+ printk("%s Error type: UE [%s]\n", level, subtype);
+ if (evt->u.ue_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.ue_error.effective_address);
+ if (evt->u.ue_error.physical_address_provided)
+ printk("%s Physial address: %016llx\n",
+ level, evt->u.ue_error.physical_address);
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ subtype = evt->u.slb_error.slb_error_type <
+ ARRAY_SIZE(mc_slb_types) ?
+ mc_slb_types[evt->u.slb_error.slb_error_type]
+ : "Unknown";
+ printk("%s Error type: SLB [%s]\n", level, subtype);
+ if (evt->u.slb_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.slb_error.effective_address);
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ subtype = evt->u.erat_error.erat_error_type <
+ ARRAY_SIZE(mc_erat_types) ?
+ mc_erat_types[evt->u.erat_error.erat_error_type]
+ : "Unknown";
+ printk("%s Error type: ERAT [%s]\n", level, subtype);
+ if (evt->u.erat_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.erat_error.effective_address);
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ subtype = evt->u.tlb_error.tlb_error_type <
+ ARRAY_SIZE(mc_tlb_types) ?
+ mc_tlb_types[evt->u.tlb_error.tlb_error_type]
+ : "Unknown";
+ printk("%s Error type: TLB [%s]\n", level, subtype);
+ if (evt->u.tlb_error.effective_address_provided)
+ printk("%s Effective address: %016llx\n",
+ level, evt->u.tlb_error.effective_address);
+ break;
+ default:
+ case MCE_ERROR_TYPE_UNKNOWN:
+ printk("%s Error type: Unknown\n", level);
+ break;
+ }
+}
+
+uint64_t get_mce_fault_addr(struct machine_check_event *evt)
+{
+ switch (evt->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ if (evt->u.ue_error.effective_address_provided)
+ return evt->u.ue_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ if (evt->u.slb_error.effective_address_provided)
+ return evt->u.slb_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ if (evt->u.erat_error.effective_address_provided)
+ return evt->u.erat_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ if (evt->u.tlb_error.effective_address_provided)
+ return evt->u.tlb_error.effective_address;
+ break;
+ default:
+ case MCE_ERROR_TYPE_UNKNOWN:
+ break;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(get_mce_fault_addr);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
new file mode 100644
index 00000000000..aa9aff3d6ad
--- /dev/null
+++ b/arch/powerpc/kernel/mce_power.c
@@ -0,0 +1,313 @@
+/*
+ * Machine check exception handling CPU-side for power7 and power8
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "mce_power: " fmt
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <asm/mmu.h>
+#include <asm/mce.h>
+#include <asm/machdep.h>
+
+/* flush SLBs and reload */
+static void flush_and_reload_slb(void)
+{
+ struct slb_shadow *slb;
+ unsigned long i, n;
+
+ /* Invalidate all SLBs */
+ asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+
+#ifdef CONFIG_KVM_BOOK3S_HANDLER
+ /*
+ * If machine check is hit when in guest or in transition, we will
+ * only flush the SLBs and continue.
+ */
+ if (get_paca()->kvm_hstate.in_guest)
+ return;
+#endif
+
+ /* For host kernel, reload the SLBs from shadow SLB buffer. */
+ slb = get_slb_shadow();
+ if (!slb)
+ return;
+
+ n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
+
+ /* Load up the SLB entries from shadow SLB */
+ for (i = 0; i < n; i++) {
+ unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
+ unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
+
+ rb = (rb & ~0xFFFul) | i;
+ asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
+ }
+}
+
+static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
+{
+ long handled = 1;
+
+ /*
+ * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
+ * reset the error bits whenever we handle them so that at the end
+ * we can check whether we handled all of them or not.
+ * */
+ if (dsisr & slb_error_bits) {
+ flush_and_reload_slb();
+ /* reset error bits */
+ dsisr &= ~(slb_error_bits);
+ }
+ if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
+ /* reset error bits */
+ dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
+ }
+ /* Any other errors we don't understand? */
+ if (dsisr & 0xffffffffUL)
+ handled = 0;
+
+ return handled;
+}
+
+static long mce_handle_derror_p7(uint64_t dsisr)
+{
+ return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
+}
+
+static long mce_handle_common_ierror(uint64_t srr1)
+{
+ long handled = 0;
+
+ switch (P7_SRR1_MC_IFETCH(srr1)) {
+ case 0:
+ break;
+ case P7_SRR1_MC_IFETCH_SLB_PARITY:
+ case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
+ /* flush and reload SLBs for SLB errors. */
+ flush_and_reload_slb();
+ handled = 1;
+ break;
+ case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+ cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
+ handled = 1;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return handled;
+}
+
+static long mce_handle_ierror_p7(uint64_t srr1)
+{
+ long handled = 0;
+
+ handled = mce_handle_common_ierror(srr1);
+
+ if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
+ flush_and_reload_slb();
+ handled = 1;
+ }
+ return handled;
+}
+
+static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
+{
+ switch (P7_SRR1_MC_IFETCH(srr1)) {
+ case P7_SRR1_MC_IFETCH_SLB_PARITY:
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ break;
+ case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ break;
+ case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
+ mce_err->error_type = MCE_ERROR_TYPE_TLB;
+ mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ break;
+ case P7_SRR1_MC_IFETCH_UE:
+ case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
+ break;
+ case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type =
+ MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+ break;
+ }
+}
+
+static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
+{
+ mce_get_common_ierror(mce_err, srr1);
+ if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ }
+}
+
+static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
+{
+ if (dsisr & P7_DSISR_MC_UE) {
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+ } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
+ mce_err->error_type = MCE_ERROR_TYPE_UE;
+ mce_err->u.ue_error_type =
+ MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+ } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
+ mce_err->error_type = MCE_ERROR_TYPE_TLB;
+ mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
+ mce_err->error_type = MCE_ERROR_TYPE_SLB;
+ mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ }
+}
+
+static long mce_handle_ue_error(struct pt_regs *regs)
+{
+ long handled = 0;
+
+ /*
+ * On specific SCOM read via MMIO we may get a machine check
+ * exception with SRR0 pointing inside opal. If that is the
+ * case OPAL may have recovery address to re-read SCOM data in
+ * different way and hence we can recover from this MC.
+ */
+
+ if (ppc_md.mce_check_early_recovery) {
+ if (ppc_md.mce_check_early_recovery(regs))
+ handled = 1;
+ }
+ return handled;
+}
+
+long __machine_check_early_realmode_p7(struct pt_regs *regs)
+{
+ uint64_t srr1, nip, addr;
+ long handled = 1;
+ struct mce_error_info mce_error_info = { 0 };
+
+ srr1 = regs->msr;
+ nip = regs->nip;
+
+ /*
+ * Handle memory errors depending whether this was a load/store or
+ * ifetch exception. Also, populate the mce error_type and
+ * type-specific error_type from either SRR1 or DSISR, depending
+ * whether this was a load/store or ifetch exception
+ */
+ if (P7_SRR1_MC_LOADSTORE(srr1)) {
+ handled = mce_handle_derror_p7(regs->dsisr);
+ mce_get_derror_p7(&mce_error_info, regs->dsisr);
+ addr = regs->dar;
+ } else {
+ handled = mce_handle_ierror_p7(srr1);
+ mce_get_ierror_p7(&mce_error_info, srr1);
+ addr = regs->nip;
+ }
+
+ /* Handle UE error. */
+ if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+ handled = mce_handle_ue_error(regs);
+
+ save_mce_event(regs, handled, &mce_error_info, nip, addr);
+ return handled;
+}
+
+static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
+{
+ mce_get_common_ierror(mce_err, srr1);
+ if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ }
+}
+
+static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
+{
+ mce_get_derror_p7(mce_err, dsisr);
+ if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
+ mce_err->error_type = MCE_ERROR_TYPE_ERAT;
+ mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ }
+}
+
+static long mce_handle_ierror_p8(uint64_t srr1)
+{
+ long handled = 0;
+
+ handled = mce_handle_common_ierror(srr1);
+
+ if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
+ flush_and_reload_slb();
+ handled = 1;
+ }
+ return handled;
+}
+
+static long mce_handle_derror_p8(uint64_t dsisr)
+{
+ return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
+}
+
+long __machine_check_early_realmode_p8(struct pt_regs *regs)
+{
+ uint64_t srr1, nip, addr;
+ long handled = 1;
+ struct mce_error_info mce_error_info = { 0 };
+
+ srr1 = regs->msr;
+ nip = regs->nip;
+
+ if (P7_SRR1_MC_LOADSTORE(srr1)) {
+ handled = mce_handle_derror_p8(regs->dsisr);
+ mce_get_derror_p8(&mce_error_info, regs->dsisr);
+ addr = regs->dar;
+ } else {
+ handled = mce_handle_ierror_p8(srr1);
+ mce_get_ierror_p8(&mce_error_info, srr1);
+ addr = regs->nip;
+ }
+
+ /* Handle UE error. */
+ if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
+ handled = mce_handle_ue_error(regs);
+
+ save_mce_event(regs, handled, &mce_error_info, nip, addr);
+ return handled;
+}
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 330c9dc7db8..7ce26d45777 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -5,15 +5,19 @@
* Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
* and Paul Mackerras.
*
- * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
* PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
*
+ * setjmp/longjmp code by Paul Mackerras.
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/ppc_asm.h>
+#include <asm/unistd.h>
+#include <asm/asm-compat.h>
+#include <asm/asm-offsets.h>
.text
@@ -26,11 +30,14 @@ _GLOBAL(reloc_offset)
mflr r0
bl 1f
1: mflr r3
- LOAD_REG_IMMEDIATE(r4,1b)
+ PPC_LL r4,(2f-1b)(r3)
subf r3,r4,r3
mtlr r0
blr
+ .align 3
+2: PPC_LONG 1b
+
/*
* add_reloc_offset(x) returns x + reloc_offset().
*/
@@ -38,8 +45,72 @@ _GLOBAL(add_reloc_offset)
mflr r0
bl 1f
1: mflr r5
- LOAD_REG_IMMEDIATE(r4,1b)
+ PPC_LL r4,(2f-1b)(r5)
subf r5,r4,r5
add r3,r3,r5
mtlr r0
blr
+
+ .align 3
+2: PPC_LONG 1b
+
+_GLOBAL(setjmp)
+ mflr r0
+ PPC_STL r0,0(r3)
+ PPC_STL r1,SZL(r3)
+ PPC_STL r2,2*SZL(r3)
+ mfcr r0
+ PPC_STL r0,3*SZL(r3)
+ PPC_STL r13,4*SZL(r3)
+ PPC_STL r14,5*SZL(r3)
+ PPC_STL r15,6*SZL(r3)
+ PPC_STL r16,7*SZL(r3)
+ PPC_STL r17,8*SZL(r3)
+ PPC_STL r18,9*SZL(r3)
+ PPC_STL r19,10*SZL(r3)
+ PPC_STL r20,11*SZL(r3)
+ PPC_STL r21,12*SZL(r3)
+ PPC_STL r22,13*SZL(r3)
+ PPC_STL r23,14*SZL(r3)
+ PPC_STL r24,15*SZL(r3)
+ PPC_STL r25,16*SZL(r3)
+ PPC_STL r26,17*SZL(r3)
+ PPC_STL r27,18*SZL(r3)
+ PPC_STL r28,19*SZL(r3)
+ PPC_STL r29,20*SZL(r3)
+ PPC_STL r30,21*SZL(r3)
+ PPC_STL r31,22*SZL(r3)
+ li r3,0
+ blr
+
+_GLOBAL(longjmp)
+ PPC_LCMPI r4,0
+ bne 1f
+ li r4,1
+1: PPC_LL r13,4*SZL(r3)
+ PPC_LL r14,5*SZL(r3)
+ PPC_LL r15,6*SZL(r3)
+ PPC_LL r16,7*SZL(r3)
+ PPC_LL r17,8*SZL(r3)
+ PPC_LL r18,9*SZL(r3)
+ PPC_LL r19,10*SZL(r3)
+ PPC_LL r20,11*SZL(r3)
+ PPC_LL r21,12*SZL(r3)
+ PPC_LL r22,13*SZL(r3)
+ PPC_LL r23,14*SZL(r3)
+ PPC_LL r24,15*SZL(r3)
+ PPC_LL r25,16*SZL(r3)
+ PPC_LL r26,17*SZL(r3)
+ PPC_LL r27,18*SZL(r3)
+ PPC_LL r28,19*SZL(r3)
+ PPC_LL r29,20*SZL(r3)
+ PPC_LL r30,21*SZL(r3)
+ PPC_LL r31,22*SZL(r3)
+ PPC_LL r0,3*SZL(r3)
+ mtcrf 0x38,r0
+ PPC_LL r0,0(r3)
+ PPC_LL r1,SZL(r3)
+ PPC_LL r2,2*SZL(r3)
+ mtlr r0
+ mr r3,r4
+ blr
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 58758d88336..7c6bb4b17b4 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -8,6 +8,8 @@
* kexec bits:
* Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
* GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * PPC44x port. Copyright (C) 2011, IBM Corporation
+ * Author: Suzuki Poulose <suzuki@in.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -29,10 +31,53 @@
#include <asm/asm-offsets.h>
#include <asm/processor.h>
#include <asm/kexec.h>
+#include <asm/bug.h>
+#include <asm/ptrace.h>
.text
/*
+ * We store the saved ksp_limit in the unused part
+ * of the STACK_FRAME_OVERHEAD
+ */
+_GLOBAL(call_do_softirq)
+ mflr r0
+ stw r0,4(r1)
+ lwz r10,THREAD+KSP_LIMIT(r2)
+ addi r11,r3,THREAD_INFO_GAP
+ stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
+ mr r1,r3
+ stw r10,8(r1)
+ stw r11,THREAD+KSP_LIMIT(r2)
+ bl __do_softirq
+ lwz r10,8(r1)
+ lwz r1,0(r1)
+ lwz r0,4(r1)
+ stw r10,THREAD+KSP_LIMIT(r2)
+ mtlr r0
+ blr
+
+/*
+ * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
+ */
+_GLOBAL(call_do_irq)
+ mflr r0
+ stw r0,4(r1)
+ lwz r10,THREAD+KSP_LIMIT(r2)
+ addi r11,r4,THREAD_INFO_GAP
+ stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
+ mr r1,r4
+ stw r10,8(r1)
+ stw r11,THREAD+KSP_LIMIT(r2)
+ bl __do_irq
+ lwz r10,8(r1)
+ lwz r1,0(r1)
+ lwz r0,4(r1)
+ stw r10,THREAD+KSP_LIMIT(r2)
+ mtlr r0
+ blr
+
+/*
* This returns the high 64 bits of the product of two 64-bit numbers.
*/
_GLOBAL(mulhdu)
@@ -102,80 +147,6 @@ _GLOBAL(reloc_got2)
blr
/*
- * identify_cpu,
- * called with r3 = data offset and r4 = CPU number
- * doesn't change r3
- */
-_GLOBAL(identify_cpu)
- addis r8,r3,cpu_specs@ha
- addi r8,r8,cpu_specs@l
- mfpvr r7
-1:
- lwz r5,CPU_SPEC_PVR_MASK(r8)
- and r5,r5,r7
- lwz r6,CPU_SPEC_PVR_VALUE(r8)
- cmplw 0,r6,r5
- beq 1f
- addi r8,r8,CPU_SPEC_ENTRY_SIZE
- b 1b
-1:
- addis r6,r3,cur_cpu_spec@ha
- addi r6,r6,cur_cpu_spec@l
- sub r8,r8,r3
- stw r8,0(r6)
- blr
-
-/*
- * do_cpu_ftr_fixups - goes through the list of CPU feature fixups
- * and writes nop's over sections of code that don't apply for this cpu.
- * r3 = data offset (not changed)
- */
-_GLOBAL(do_cpu_ftr_fixups)
- /* Get CPU 0 features */
- addis r6,r3,cur_cpu_spec@ha
- addi r6,r6,cur_cpu_spec@l
- lwz r4,0(r6)
- add r4,r4,r3
- lwz r4,CPU_SPEC_FEATURES(r4)
-
- /* Get the fixup table */
- addis r6,r3,__start___ftr_fixup@ha
- addi r6,r6,__start___ftr_fixup@l
- addis r7,r3,__stop___ftr_fixup@ha
- addi r7,r7,__stop___ftr_fixup@l
-
- /* Do the fixup */
-1: cmplw 0,r6,r7
- bgelr
- addi r6,r6,16
- lwz r8,-16(r6) /* mask */
- and r8,r8,r4
- lwz r9,-12(r6) /* value */
- cmplw 0,r8,r9
- beq 1b
- lwz r8,-8(r6) /* section begin */
- lwz r9,-4(r6) /* section end */
- subf. r9,r8,r9
- beq 1b
- /* write nops over the section of code */
- /* todo: if large section, add a branch at the start of it */
- srwi r9,r9,2
- mtctr r9
- add r8,r8,r3
- lis r0,0x60000000@h /* nop */
-3: stw r0,0(r8)
- andi. r10,r4,CPU_FTR_SPLIT_ID_CACHE@l
- beq 2f
- dcbst 0,r8 /* suboptimal, but simpler */
- sync
- icbi 0,r8
-2: addi r8,r8,4
- bdnz 3b
- sync /* additional sync needed on g4 */
- isync
- b 1b
-
-/*
* call_setup_cpu - call the setup_cpu function for this cpu
* r3 = data offset, r24 = cpu number
*
@@ -226,7 +197,7 @@ _GLOBAL(low_choose_750fx_pll)
mtspr SPRN_HID1,r4
/* Store new HID1 image */
- rlwinm r6,r1,0,0,18
+ CURRENT_THREAD_INFO(r6, r1)
lwz r6,TI_CPU(r6)
slwi r6,r6,2
addis r6,r6,nap_save_hid1@ha
@@ -280,163 +251,47 @@ _GLOBAL(_nmask_and_or_msr)
isync
blr /* Done */
+#ifdef CONFIG_40x
/*
- * Flush MMU TLB
+ * Do an IO access in real mode
*/
-_GLOBAL(_tlbia)
-#if defined(CONFIG_40x)
- sync /* Flush to memory before changing mapping */
- tlbia
- isync /* Flush shadow TLB */
-#elif defined(CONFIG_44x)
- li r3,0
+_GLOBAL(real_readb)
+ mfmsr r7
+ ori r0,r7,MSR_DR
+ xori r0,r0,MSR_DR
sync
-
- /* Load high watermark */
- lis r4,tlb_44x_hwater@ha
- lwz r5,tlb_44x_hwater@l(r4)
-
-1: tlbwe r3,r3,PPC44x_TLB_PAGEID
- addi r3,r3,1
- cmpw 0,r3,r5
- ble 1b
-
- isync
-#elif defined(CONFIG_FSL_BOOKE)
- /* Invalidate all entries in TLB0 */
- li r3, 0x04
- tlbivax 0,3
- /* Invalidate all entries in TLB1 */
- li r3, 0x0c
- tlbivax 0,3
- /* Invalidate all entries in TLB2 */
- li r3, 0x14
- tlbivax 0,3
- /* Invalidate all entries in TLB3 */
- li r3, 0x1c
- tlbivax 0,3
- msync
-#ifdef CONFIG_SMP
- tlbsync
-#endif /* CONFIG_SMP */
-#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */
-#if defined(CONFIG_SMP)
- rlwinm r8,r1,0,0,18
- lwz r8,TI_CPU(r8)
- oris r8,r8,10
- mfmsr r10
- SYNC
- rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
- rlwinm r0,r0,0,28,26 /* clear DR */
mtmsr r0
- SYNC_601
- isync
- lis r9,mmu_hash_lock@h
- ori r9,r9,mmu_hash_lock@l
- tophys(r9,r9)
-10: lwarx r7,0,r9
- cmpwi 0,r7,0
- bne- 10b
- stwcx. r8,0,r9
- bne- 10b
sync
- tlbia
- sync
- TLBSYNC
- li r0,0
- stw r0,0(r9) /* clear mmu_hash_lock */
- mtmsr r10
- SYNC_601
isync
-#else /* CONFIG_SMP */
+ lbz r3,0(r3)
sync
- tlbia
+ mtmsr r7
sync
-#endif /* CONFIG_SMP */
-#endif /* ! defined(CONFIG_40x) */
+ isync
blr
-/*
- * Flush MMU TLB for a particular address
+ /*
+ * Do an IO access in real mode
*/
-_GLOBAL(_tlbie)
-#if defined(CONFIG_40x)
- tlbsx. r3, 0, r3
- bne 10f
- sync
- /* There are only 64 TLB entries, so r3 < 64, which means bit 25 is clear.
- * Since 25 is the V bit in the TLB_TAG, loading this value will invalidate
- * the TLB entry. */
- tlbwe r3, r3, TLB_TAG
- isync
-10:
-#elif defined(CONFIG_44x)
- mfspr r4,SPRN_MMUCR
- mfspr r5,SPRN_PID /* Get PID */
- rlwimi r4,r5,0,24,31 /* Set TID */
- mtspr SPRN_MMUCR,r4
-
- tlbsx. r3, 0, r3
- bne 10f
+_GLOBAL(real_writeb)
+ mfmsr r7
+ ori r0,r7,MSR_DR
+ xori r0,r0,MSR_DR
sync
- /* There are only 64 TLB entries, so r3 < 64,
- * which means bit 22, is clear. Since 22 is
- * the V bit in the TLB_PAGEID, loading this
- * value will invalidate the TLB entry.
- */
- tlbwe r3, r3, PPC44x_TLB_PAGEID
- isync
-10:
-#elif defined(CONFIG_FSL_BOOKE)
- rlwinm r4, r3, 0, 0, 19
- ori r5, r4, 0x08 /* TLBSEL = 1 */
- ori r6, r4, 0x10 /* TLBSEL = 2 */
- ori r7, r4, 0x18 /* TLBSEL = 3 */
- tlbivax 0, r4
- tlbivax 0, r5
- tlbivax 0, r6
- tlbivax 0, r7
- msync
-#if defined(CONFIG_SMP)
- tlbsync
-#endif /* CONFIG_SMP */
-#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */
-#if defined(CONFIG_SMP)
- rlwinm r8,r1,0,0,18
- lwz r8,TI_CPU(r8)
- oris r8,r8,11
- mfmsr r10
- SYNC
- rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
- rlwinm r0,r0,0,28,26 /* clear DR */
mtmsr r0
- SYNC_601
- isync
- lis r9,mmu_hash_lock@h
- ori r9,r9,mmu_hash_lock@l
- tophys(r9,r9)
-10: lwarx r7,0,r9
- cmpwi 0,r7,0
- bne- 10b
- stwcx. r8,0,r9
- bne- 10b
- eieio
- tlbie r3
sync
- TLBSYNC
- li r0,0
- stw r0,0(r9) /* clear mmu_hash_lock */
- mtmsr r10
- SYNC_601
isync
-#else /* CONFIG_SMP */
- tlbie r3
+ stb r3,0(r4)
sync
-#endif /* CONFIG_SMP */
-#endif /* ! CONFIG_40x */
+ mtmsr r7
+ sync
+ isync
blr
+#endif /* CONFIG_40x */
+
+
/*
* Flush instruction cache.
* This is a no-op on the 601.
@@ -466,7 +321,7 @@ BEGIN_FTR_SECTION
mtspr SPRN_L1CSR0,r3
isync
blr
-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
+END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
mfspr r3,SPRN_L1CSR1
ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
mtspr SPRN_L1CSR1,r3
@@ -490,10 +345,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
*
* flush_icache_range(unsigned long start, unsigned long stop)
*/
-_GLOBAL(__flush_icache_range)
+_KPROBE(flush_icache_range)
BEGIN_FTR_SECTION
+ PURGE_PREFETCHED_INS
blr /* for 601, do nothing */
-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
li r5,L1_CACHE_BYTES-1
andc r3,r3,r5
subf r4,r3,r4
@@ -506,10 +362,17 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
addi r3,r3,L1_CACHE_BYTES
bdnz 1b
sync /* wait for dcbst's to get to ram */
+#ifndef CONFIG_44x
mtctr r4
2: icbi 0,r6
addi r6,r6,L1_CACHE_BYTES
bdnz 2b
+#else
+ /* Flash invalidate on 44x because we are passed kmapped addresses and
+ this doesn't work for userspace pages due to the virtually tagged
+ icache. Sigh. */
+ iccci 0, r0
+#endif
sync /* additional sync needed on g4 */
isync
blr
@@ -588,16 +451,29 @@ _GLOBAL(invalidate_dcache_range)
*/
_GLOBAL(__flush_dcache_icache)
BEGIN_FTR_SECTION
- blr /* for 601, do nothing */
-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
- rlwinm r3,r3,0,0,19 /* Get page base address */
- li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */
+ PURGE_PREFETCHED_INS
+ blr
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
+ li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
mtctr r4
mr r6,r3
0: dcbst 0,r3 /* Write line to ram */
addi r3,r3,L1_CACHE_BYTES
bdnz 0b
sync
+#ifdef CONFIG_44x
+ /* We don't flush the icache on 44x. Those have a virtual icache
+ * and we don't have access to the virtual address here (it's
+ * not the page vaddr but where it's mapped in user space). The
+ * flushing of the icache on these is handled elsewhere, when
+ * a change in the address space occurs, before returning to
+ * user space
+ */
+BEGIN_MMU_FTR_SECTION
+ blr
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
+#endif /* CONFIG_44x */
mtctr r4
1: icbi 0,r6
addi r6,r6,L1_CACHE_BYTES
@@ -606,6 +482,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
isync
blr
+#ifndef CONFIG_BOOKE
/*
* Flush a particular page from the data cache to RAM, identified
* by its physical address. We turn off the MMU so we can just use
@@ -616,14 +493,15 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
*/
_GLOBAL(__flush_dcache_icache_phys)
BEGIN_FTR_SECTION
+ PURGE_PREFETCHED_INS
blr /* for 601, do nothing */
-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
mfmsr r10
rlwinm r0,r10,0,28,26 /* clear DR */
mtmsr r0
isync
- rlwinm r3,r3,0,0,19 /* Get page base address */
- li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */
+ rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
+ li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
mtctr r4
mr r6,r3
0: dcbst 0,r3 /* Write line to ram */
@@ -638,6 +516,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
mtmsr r10 /* restore DR */
isync
blr
+#endif /* CONFIG_BOOKE */
/*
* Clear pages using the dcbz instruction, which doesn't cause any
@@ -647,18 +526,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
* void clear_pages(void *page, int order) ;
*/
_GLOBAL(clear_pages)
- li r0,4096/L1_CACHE_BYTES
+ li r0,PAGE_SIZE/L1_CACHE_BYTES
slw r0,r0,r4
mtctr r0
-#ifdef CONFIG_8xx
- li r4, 0
-1: stw r4, 0(r3)
- stw r4, 4(r3)
- stw r4, 8(r3)
- stw r4, 12(r3)
-#else
1: dcbz 0,r3
-#endif
addi r3,r3,L1_CACHE_BYTES
bdnz 1b
blr
@@ -683,15 +554,6 @@ _GLOBAL(copy_page)
addi r3,r3,-4
addi r4,r4,-4
-#ifdef CONFIG_8xx
- /* don't use prefetch on 8xx */
- li r0,4096/L1_CACHE_BYTES
- mtctr r0
-1: COPY_16_BYTES
- bdnz 1b
- blr
-
-#else /* not 8xx, we can prefetch */
li r5,4
#if MAX_COPY_PREFETCH > 1
@@ -705,7 +567,7 @@ _GLOBAL(copy_page)
dcbt r5,r4
li r11,L1_CACHE_BYTES+4
#endif /* MAX_COPY_PREFETCH */
- li r0,4096/L1_CACHE_BYTES - MAX_COPY_PREFETCH
+ li r0,PAGE_SIZE/L1_CACHE_BYTES - MAX_COPY_PREFETCH
crclr 4*cr0+eq
2:
mtctr r0
@@ -732,7 +594,6 @@ _GLOBAL(copy_page)
li r0,MAX_COPY_PREFETCH
li r11,4
b 2b
-#endif /* CONFIG_8xx */
/*
* void atomic_clear_mask(atomic_t mask, atomic_t *addr)
@@ -802,54 +663,63 @@ _GLOBAL(__lshrdi3)
or r4,r4,r7 # LSW |= t2
blr
-_GLOBAL(abs)
- srawi r4,r3,31
- xor r3,r3,r4
- sub r3,r3,r4
- blr
-
-_GLOBAL(_get_SP)
- mr r3,r1 /* Close enough */
+/*
+ * 64-bit comparison: __cmpdi2(s64 a, s64 b)
+ * Returns 0 if a < b, 1 if a == b, 2 if a > b.
+ */
+_GLOBAL(__cmpdi2)
+ cmpw r3,r5
+ li r3,1
+ bne 1f
+ cmplw r4,r6
+ beqlr
+1: li r3,0
+ bltlr
+ li r3,2
blr
-
/*
- * Create a kernel thread
- * kernel_thread(fn, arg, flags)
+ * 64-bit comparison: __ucmpdi2(u64 a, u64 b)
+ * Returns 0 if a < b, 1 if a == b, 2 if a > b.
*/
-_GLOBAL(kernel_thread)
- stwu r1,-16(r1)
- stw r30,8(r1)
- stw r31,12(r1)
- mr r30,r3 /* function */
- mr r31,r4 /* argument */
- ori r3,r5,CLONE_VM /* flags */
- oris r3,r3,CLONE_UNTRACED>>16
- li r4,0 /* new sp (unused) */
- li r0,__NR_clone
- sc
- cmpwi 0,r3,0 /* parent or child? */
- bne 1f /* return if parent */
- li r0,0 /* make top-level stack frame */
- stwu r0,-16(r1)
- mtlr r30 /* fn addr in lr */
- mr r3,r31 /* load arg and call fn */
- PPC440EP_ERR42
- blrl
- li r0,__NR_exit /* exit if function returns */
- li r3,0
- sc
-1: lwz r30,8(r1)
- lwz r31,12(r1)
- addi r1,r1,16
+_GLOBAL(__ucmpdi2)
+ cmplw r3,r5
+ li r3,1
+ bne 1f
+ cmplw r4,r6
+ beqlr
+1: li r3,0
+ bltlr
+ li r3,2
blr
-_GLOBAL(execve)
- li r0,__NR_execve
- sc
- bnslr
- neg r3,r3
+_GLOBAL(__bswapdi2)
+ rotlwi r9,r4,8
+ rotlwi r10,r3,8
+ rlwimi r9,r4,24,0,7
+ rlwimi r10,r3,24,0,7
+ rlwimi r9,r4,24,16,23
+ rlwimi r10,r3,24,16,23
+ mr r3,r9
+ mr r4,r10
blr
+_GLOBAL(abs)
+ srawi r4,r3,31
+ xor r3,r3,r4
+ sub r3,r3,r4
+ blr
+
+#ifdef CONFIG_SMP
+_GLOBAL(start_secondary_resume)
+ /* Reset stack */
+ CURRENT_THREAD_INFO(r1, r1)
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ li r3,0
+ stw r3,0(r1) /* Zero the stack frame pointer */
+ bl start_secondary
+ b .
+#endif /* CONFIG_SMP */
+
/*
* This routine is just here to keep GCC happy - sigh...
*/
@@ -866,6 +736,378 @@ relocate_new_kernel:
/* r4 = reboot_code_buffer */
/* r5 = start_address */
+#ifdef CONFIG_FSL_BOOKE
+
+ mr r29, r3
+ mr r30, r4
+ mr r31, r5
+
+#define ENTRY_MAPPING_KEXEC_SETUP
+#include "fsl_booke_entry_mapping.S"
+#undef ENTRY_MAPPING_KEXEC_SETUP
+
+ mr r3, r29
+ mr r4, r30
+ mr r5, r31
+
+ li r0, 0
+#elif defined(CONFIG_44x)
+
+ /* Save our parameters */
+ mr r29, r3
+ mr r30, r4
+ mr r31, r5
+
+#ifdef CONFIG_PPC_47x
+ /* Check for 47x cores */
+ mfspr r3,SPRN_PVR
+ srwi r3,r3,16
+ cmplwi cr0,r3,PVR_476@h
+ beq setup_map_47x
+ cmplwi cr0,r3,PVR_476_ISS@h
+ beq setup_map_47x
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * Code for setting up 1:1 mapping for PPC440x for KEXEC
+ *
+ * We cannot switch off the MMU on PPC44x.
+ * So we:
+ * 1) Invalidate all the mappings except the one we are running from.
+ * 2) Create a tmp mapping for our code in the other address space(TS) and
+ * jump to it. Invalidate the entry we started in.
+ * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
+ * 4) Jump to the 1:1 mapping in original TS.
+ * 5) Invalidate the tmp mapping.
+ *
+ * - Based on the kexec support code for FSL BookE
+ *
+ */
+
+ /*
+ * Load the PID with kernel PID (0).
+ * Also load our MSR_IS and TID to MMUCR for TLB search.
+ */
+ li r3, 0
+ mtspr SPRN_PID, r3
+ mfmsr r4
+ andi. r4,r4,MSR_IS@l
+ beq wmmucr
+ oris r3,r3,PPC44x_MMUCR_STS@h
+wmmucr:
+ mtspr SPRN_MMUCR,r3
+ sync
+
+ /*
+ * Invalidate all the TLB entries except the current entry
+ * where we are running from
+ */
+ bl 0f /* Find our address */
+0: mflr r5 /* Make it accessible */
+ tlbsx r23,0,r5 /* Find entry we are in */
+ li r4,0 /* Start at TLB entry 0 */
+ li r3,0 /* Set PAGEID inval value */
+1: cmpw r23,r4 /* Is this our entry? */
+ beq skip /* If so, skip the inval */
+ tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */
+skip:
+ addi r4,r4,1 /* Increment */
+ cmpwi r4,64 /* Are we done? */
+ bne 1b /* If not, repeat */
+ isync
+
+ /* Create a temp mapping and jump to it */
+ andi. r6, r23, 1 /* Find the index to use */
+ addi r24, r6, 1 /* r24 will contain 1 or 2 */
+
+ mfmsr r9 /* get the MSR */
+ rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */
+ xori r7, r5, 1 /* Use the other address space */
+
+ /* Read the current mapping entries */
+ tlbre r3, r23, PPC44x_TLB_PAGEID
+ tlbre r4, r23, PPC44x_TLB_XLAT
+ tlbre r5, r23, PPC44x_TLB_ATTRIB
+
+ /* Save our current XLAT entry */
+ mr r25, r4
+
+ /* Extract the TLB PageSize */
+ li r10, 1 /* r10 will hold PageSize */
+ rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */
+
+ /* XXX: As of now we use 256M, 4K pages */
+ cmpwi r11, PPC44x_TLB_256M
+ bne tlb_4k
+ rotlwi r10, r10, 28 /* r10 = 256M */
+ b write_out
+tlb_4k:
+ cmpwi r11, PPC44x_TLB_4K
+ bne default
+ rotlwi r10, r10, 12 /* r10 = 4K */
+ b write_out
+default:
+ rotlwi r10, r10, 10 /* r10 = 1K */
+
+write_out:
+ /*
+ * Write out the tmp 1:1 mapping for this code in other address space
+ * Fixup EPN = RPN , TS=other address space
+ */
+ insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */
+
+ /* Write out the tmp mapping entries */
+ tlbwe r3, r24, PPC44x_TLB_PAGEID
+ tlbwe r4, r24, PPC44x_TLB_XLAT
+ tlbwe r5, r24, PPC44x_TLB_ATTRIB
+
+ subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */
+ not r10, r11 /* Mask for PageNum */
+
+ /* Switch to other address space in MSR */
+ insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
+
+ bl 1f
+1: mflr r8
+ addi r8, r8, (2f-1b) /* Find the target offset */
+
+ /* Jump to the tmp mapping */
+ mtspr SPRN_SRR0, r8
+ mtspr SPRN_SRR1, r9
+ rfi
+
+2:
+ /* Invalidate the entry we were executing from */
+ li r3, 0
+ tlbwe r3, r23, PPC44x_TLB_PAGEID
+
+ /* attribute fields. rwx for SUPERVISOR mode */
+ li r5, 0
+ ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+
+ /* Create 1:1 mapping in 256M pages */
+ xori r7, r7, 1 /* Revert back to Original TS */
+
+ li r8, 0 /* PageNumber */
+ li r6, 3 /* TLB Index, start at 3 */
+
+next_tlb:
+ rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */
+ mr r4, r3 /* RPN = EPN */
+ ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
+ insrwi r3, r7, 1, 23 /* Set TS from r7 */
+
+ tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */
+ tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */
+ tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */
+
+ addi r8, r8, 1 /* Increment PN */
+ addi r6, r6, 1 /* Increment TLB Index */
+ cmpwi r8, 8 /* Are we done ? */
+ bne next_tlb
+ isync
+
+ /* Jump to the new mapping 1:1 */
+ li r9,0
+ insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
+
+ bl 1f
+1: mflr r8
+ and r8, r8, r11 /* Get our offset within page */
+ addi r8, r8, (2f-1b)
+
+ and r5, r25, r10 /* Get our target PageNum */
+ or r8, r8, r5 /* Target jump address */
+
+ mtspr SPRN_SRR0, r8
+ mtspr SPRN_SRR1, r9
+ rfi
+2:
+ /* Invalidate the tmp entry we used */
+ li r3, 0
+ tlbwe r3, r24, PPC44x_TLB_PAGEID
+ sync
+ b ppc44x_map_done
+
+#ifdef CONFIG_PPC_47x
+
+ /* 1:1 mapping for 47x */
+
+setup_map_47x:
+
+ /*
+ * Load the kernel pid (0) to PID and also to MMUCR[TID].
+ * Also set the MSR IS->MMUCR STS
+ */
+ li r3, 0
+ mtspr SPRN_PID, r3 /* Set PID */
+ mfmsr r4 /* Get MSR */
+ andi. r4, r4, MSR_IS@l /* TS=1? */
+ beq 1f /* If not, leave STS=0 */
+ oris r3, r3, PPC47x_MMUCR_STS@h /* Set STS=1 */
+1: mtspr SPRN_MMUCR, r3 /* Put MMUCR */
+ sync
+
+ /* Find the entry we are running from */
+ bl 2f
+2: mflr r23
+ tlbsx r23, 0, r23
+ tlbre r24, r23, 0 /* TLB Word 0 */
+ tlbre r25, r23, 1 /* TLB Word 1 */
+ tlbre r26, r23, 2 /* TLB Word 2 */
+
+
+ /*
+ * Invalidates all the tlb entries by writing to 256 RPNs(r4)
+ * of 4k page size in all 4 ways (0-3 in r3).
+ * This would invalidate the entire UTLB including the one we are
+ * running from. However the shadow TLB entries would help us
+ * to continue the execution, until we flush them (rfi/isync).
+ */
+ addis r3, 0, 0x8000 /* specify the way */
+ addi r4, 0, 0 /* TLB Word0 = (EPN=0, VALID = 0) */
+ addi r5, 0, 0
+ b clear_utlb_entry
+
+ /* Align the loop to speed things up. from head_44x.S */
+ .align 6
+
+clear_utlb_entry:
+
+ tlbwe r4, r3, 0
+ tlbwe r5, r3, 1
+ tlbwe r5, r3, 2
+ addis r3, r3, 0x2000 /* Increment the way */
+ cmpwi r3, 0
+ bne clear_utlb_entry
+ addis r3, 0, 0x8000
+ addis r4, r4, 0x100 /* Increment the EPN */
+ cmpwi r4, 0
+ bne clear_utlb_entry
+
+ /* Create the entries in the other address space */
+ mfmsr r5
+ rlwinm r7, r5, 27, 31, 31 /* Get the TS (Bit 26) from MSR */
+ xori r7, r7, 1 /* r7 = !TS */
+
+ insrwi r24, r7, 1, 21 /* Change the TS in the saved TLB word 0 */
+
+ /*
+ * write out the TLB entries for the tmp mapping
+ * Use way '0' so that we could easily invalidate it later.
+ */
+ lis r3, 0x8000 /* Way '0' */
+
+ tlbwe r24, r3, 0
+ tlbwe r25, r3, 1
+ tlbwe r26, r3, 2
+
+ /* Update the msr to the new TS */
+ insrwi r5, r7, 1, 26
+
+ bl 1f
+1: mflr r6
+ addi r6, r6, (2f-1b)
+
+ mtspr SPRN_SRR0, r6
+ mtspr SPRN_SRR1, r5
+ rfi
+
+ /*
+ * Now we are in the tmp address space.
+ * Create a 1:1 mapping for 0-2GiB in the original TS.
+ */
+2:
+ li r3, 0
+ li r4, 0 /* TLB Word 0 */
+ li r5, 0 /* TLB Word 1 */
+ li r6, 0
+ ori r6, r6, PPC47x_TLB2_S_RWX /* TLB word 2 */
+
+ li r8, 0 /* PageIndex */
+
+ xori r7, r7, 1 /* revert back to original TS */
+
+write_utlb:
+ rotlwi r5, r8, 28 /* RPN = PageIndex * 256M */
+ /* ERPN = 0 as we don't use memory above 2G */
+
+ mr r4, r5 /* EPN = RPN */
+ ori r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
+ insrwi r4, r7, 1, 21 /* Insert the TS to Word 0 */
+
+ tlbwe r4, r3, 0 /* Write out the entries */
+ tlbwe r5, r3, 1
+ tlbwe r6, r3, 2
+ addi r8, r8, 1
+ cmpwi r8, 8 /* Have we completed ? */
+ bne write_utlb
+
+ /* make sure we complete the TLB write up */
+ isync
+
+ /*
+ * Prepare to jump to the 1:1 mapping.
+ * 1) Extract page size of the tmp mapping
+ * DSIZ = TLB_Word0[22:27]
+ * 2) Calculate the physical address of the address
+ * to jump to.
+ */
+ rlwinm r10, r24, 0, 22, 27
+
+ cmpwi r10, PPC47x_TLB0_4K
+ bne 0f
+ li r10, 0x1000 /* r10 = 4k */
+ bl 1f
+
+0:
+ /* Defaults to 256M */
+ lis r10, 0x1000
+
+ bl 1f
+1: mflr r4
+ addi r4, r4, (2f-1b) /* virtual address of 2f */
+
+ subi r11, r10, 1 /* offsetmask = Pagesize - 1 */
+ not r10, r11 /* Pagemask = ~(offsetmask) */
+
+ and r5, r25, r10 /* Physical page */
+ and r6, r4, r11 /* offset within the current page */
+
+ or r5, r5, r6 /* Physical address for 2f */
+
+ /* Switch the TS in MSR to the original one */
+ mfmsr r8
+ insrwi r8, r7, 1, 26
+
+ mtspr SPRN_SRR1, r8
+ mtspr SPRN_SRR0, r5
+ rfi
+
+2:
+ /* Invalidate the tmp mapping */
+ lis r3, 0x8000 /* Way '0' */
+
+ clrrwi r24, r24, 12 /* Clear the valid bit */
+ tlbwe r24, r3, 0
+ tlbwe r25, r3, 1
+ tlbwe r26, r3, 2
+
+ /* Make sure we complete the TLB write and flush the shadow TLB */
+ isync
+
+#endif
+
+ppc44x_map_done:
+
+
+ /* Restore the parameters */
+ mr r3, r29
+ mr r4, r30
+ mr r5, r31
+
+ li r0, 0
+#else
li r0, 0
/*
@@ -882,12 +1124,13 @@ relocate_new_kernel:
rfi
1:
+#endif
/* from this point address translation is turned off */
/* and interrupts are disabled */
/* set a new stack at the bottom of our page... */
/* (not really needed now) */
- addi r1, r4, KEXEC_CONTROL_CODE_SIZE - 8 /* for LR Save+Back Chain */
+ addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
stw r0, 0(r1)
/* Do the copies */
@@ -950,6 +1193,9 @@ relocate_new_kernel:
isync
sync
+ mfspr r3, SPRN_PIR /* current core we are running on */
+ mr r4, r5 /* load physical address of chunk called */
+
/* jump to the entry point, usually the setup routine */
mtlr r5
blrl
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index e3ed21cd3d9..4e314b90c75 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -24,46 +24,32 @@
#include <asm/asm-offsets.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
+#include <asm/kexec.h>
+#include <asm/ptrace.h>
.text
-_GLOBAL(get_msr)
- mfmsr r3
- blr
-
-_GLOBAL(get_srr0)
- mfsrr0 r3
- blr
-
-_GLOBAL(get_srr1)
- mfsrr1 r3
- blr
-
-#ifdef CONFIG_IRQSTACKS
_GLOBAL(call_do_softirq)
mflr r0
std r0,16(r1)
- stdu r1,THREAD_SIZE-112(r3)
+ stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
mr r1,r3
- bl .__do_softirq
+ bl __do_softirq
ld r1,0(r1)
ld r0,16(r1)
mtlr r0
blr
-_GLOBAL(call_handle_irq)
- ld r8,0(r7)
+_GLOBAL(call_do_irq)
mflr r0
std r0,16(r1)
- mtctr r8
- stdu r1,THREAD_SIZE-112(r6)
- mr r1,r6
- bctrl
+ stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
+ mr r1,r4
+ bl __do_irq
ld r1,0(r1)
ld r0,16(r1)
mtlr r0
blr
-#endif /* CONFIG_IRQSTACKS */
.section ".toc","aw"
PPC64_CACHES:
@@ -79,8 +65,11 @@ PPC64_CACHES:
* flush all bytes from start through stop-1 inclusive
*/
-_KPROBE(__flush_icache_range)
-
+_KPROBE(flush_icache_range)
+BEGIN_FTR_SECTION
+ PURGE_PREFETCHED_INS
+ blr
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
/*
* Flush the data cache to memory
*
@@ -223,6 +212,11 @@ _GLOBAL(__flush_dcache_icache)
* Different systems have different cache line sizes
*/
+BEGIN_FTR_SECTION
+ PURGE_PREFETCHED_INS
+ blr
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+
/* Flush the dcache */
ld r7,PPC64_CACHES@toc(r2)
clrrdi r3,r3,PAGE_SHIFT /* Page align */
@@ -246,86 +240,53 @@ _GLOBAL(__flush_dcache_icache)
isync
blr
-/*
- * identify_cpu and calls setup_cpu
- * In: r3 = base of the cpu_specs array
- * r4 = address of cur_cpu_spec
- * r5 = relocation offset
- */
-_GLOBAL(identify_cpu)
- mfpvr r7
-1:
- lwz r8,CPU_SPEC_PVR_MASK(r3)
- and r8,r8,r7
- lwz r9,CPU_SPEC_PVR_VALUE(r3)
- cmplw 0,r9,r8
- beq 1f
- addi r3,r3,CPU_SPEC_ENTRY_SIZE
- b 1b
-1:
- sub r0,r3,r5
- std r0,0(r4)
- ld r4,CPU_SPEC_SETUP(r3)
- cmpdi 0,r4,0
- add r4,r4,r5
- beqlr
- ld r4,0(r4)
- add r4,r4,r5
- mtctr r4
- /* Calling convention for cpu setup is r3=offset, r4=cur_cpu_spec */
- mr r4,r3
- mr r3,r5
- bctr
+_GLOBAL(__bswapdi2)
+ srdi r8,r3,32
+ rlwinm r7,r3,8,0xffffffff
+ rlwimi r7,r3,24,0,7
+ rlwinm r9,r8,8,0xffffffff
+ rlwimi r7,r3,24,16,23
+ rlwimi r9,r8,24,0,7
+ rlwimi r9,r8,24,16,23
+ sldi r7,r7,32
+ or r3,r7,r9
+ blr
-/*
- * do_cpu_ftr_fixups - goes through the list of CPU feature fixups
- * and writes nop's over sections of code that don't apply for this cpu.
- * r3 = data offset (not changed)
- */
-_GLOBAL(do_cpu_ftr_fixups)
- /* Get CPU 0 features */
- LOAD_REG_IMMEDIATE(r6,cur_cpu_spec)
- sub r6,r6,r3
- ld r4,0(r6)
- sub r4,r4,r3
- ld r4,CPU_SPEC_FEATURES(r4)
- /* Get the fixup table */
- LOAD_REG_IMMEDIATE(r6,__start___ftr_fixup)
- sub r6,r6,r3
- LOAD_REG_IMMEDIATE(r7,__stop___ftr_fixup)
- sub r7,r7,r3
- /* Do the fixup */
-1: cmpld r6,r7
- bgelr
- addi r6,r6,32
- ld r8,-32(r6) /* mask */
- and r8,r8,r4
- ld r9,-24(r6) /* value */
- cmpld r8,r9
- beq 1b
- ld r8,-16(r6) /* section begin */
- ld r9,-8(r6) /* section end */
- subf. r9,r8,r9
- beq 1b
- /* write nops over the section of code */
- /* todo: if large section, add a branch at the start of it */
- srwi r9,r9,2
- mtctr r9
- sub r8,r8,r3
- lis r0,0x60000000@h /* nop */
-3: stw r0,0(r8)
- andi. r10,r4,CPU_FTR_SPLIT_ID_CACHE@l
- beq 2f
- dcbst 0,r8 /* suboptimal, but simpler */
- sync
- icbi 0,r8
-2: addi r8,r8,4
- bdnz 3b
- sync /* additional sync needed on g4 */
- isync
- b 1b
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
+_GLOBAL(rmci_on)
+ sync
+ isync
+ li r3,0x100
+ rldicl r3,r3,32,0
+ mfspr r5,SPRN_HID4
+ or r5,r5,r3
+ sync
+ mtspr SPRN_HID4,r5
+ isync
+ slbia
+ isync
+ sync
+ blr
+
+_GLOBAL(rmci_off)
+ sync
+ isync
+ li r3,0x100
+ rldicl r3,r3,32,0
+ mfspr r5,SPRN_HID4
+ andc r5,r5,r3
+ sync
+ mtspr SPRN_HID4,r5
+ isync
+ slbia
+ isync
+ sync
+ blr
+#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
+
/*
* Do an IO access in real mode
*/
@@ -389,7 +350,42 @@ _GLOBAL(real_writeb)
blr
#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */
-#ifdef CONFIG_CPU_FREQ_PMAC64
+#ifdef CONFIG_PPC_PASEMI
+
+_GLOBAL(real_205_readb)
+ mfmsr r7
+ ori r0,r7,MSR_DR
+ xori r0,r0,MSR_DR
+ sync
+ mtmsrd r0
+ sync
+ isync
+ LBZCIX(R3,R0,R3)
+ isync
+ mtmsrd r7
+ sync
+ isync
+ blr
+
+_GLOBAL(real_205_writeb)
+ mfmsr r7
+ ori r0,r7,MSR_DR
+ xori r0,r0,MSR_DR
+ sync
+ mtmsrd r0
+ sync
+ isync
+ STBCIX(R3,R0,R4)
+ isync
+ mtmsrd r7
+ sync
+ isync
+ blr
+
+#endif /* CONFIG_PPC_PASEMI */
+
+
+#if defined(CONFIG_CPU_FREQ_PMAC64) || defined(CONFIG_CPU_FREQ_MAPLE)
/*
* SCOM access functions for 970 (FX only for now)
*
@@ -458,117 +454,15 @@ _GLOBAL(scom970_write)
/* restore interrupts */
mtmsrd r5,1
blr
-#endif /* CONFIG_CPU_FREQ_PMAC64 */
-
-
-/*
- * Create a kernel thread
- * kernel_thread(fn, arg, flags)
- */
-_GLOBAL(kernel_thread)
- std r29,-24(r1)
- std r30,-16(r1)
- stdu r1,-STACK_FRAME_OVERHEAD(r1)
- mr r29,r3
- mr r30,r4
- ori r3,r5,CLONE_VM /* flags */
- oris r3,r3,(CLONE_UNTRACED>>16)
- li r4,0 /* new sp (unused) */
- li r0,__NR_clone
- sc
- cmpdi 0,r3,0 /* parent or child? */
- bne 1f /* return if parent */
- li r0,0
- stdu r0,-STACK_FRAME_OVERHEAD(r1)
- ld r2,8(r29)
- ld r29,0(r29)
- mtlr r29 /* fn addr in lr */
- mr r3,r30 /* load arg and call fn */
- blrl
- li r0,__NR_exit /* exit after child exits */
- li r3,0
- sc
-1: addi r1,r1,STACK_FRAME_OVERHEAD
- ld r29,-24(r1)
- ld r30,-16(r1)
- blr
-
-/*
- * disable_kernel_fp()
- * Disable the FPU.
- */
-_GLOBAL(disable_kernel_fp)
- mfmsr r3
- rldicl r0,r3,(63-MSR_FP_LG),1
- rldicl r3,r0,(MSR_FP_LG+1),0
- mtmsrd r3 /* disable use of fpu now */
- isync
- blr
-
-#ifdef CONFIG_ALTIVEC
-
-#if 0 /* this has no callers for now */
-/*
- * disable_kernel_altivec()
- * Disable the VMX.
- */
-_GLOBAL(disable_kernel_altivec)
- mfmsr r3
- rldicl r0,r3,(63-MSR_VEC_LG),1
- rldicl r3,r0,(MSR_VEC_LG+1),0
- mtmsrd r3 /* disable use of VMX now */
- isync
- blr
-#endif /* 0 */
-
-/*
- * giveup_altivec(tsk)
- * Disable VMX for the task given as the argument,
- * and save the vector registers in its thread_struct.
- * Enables the VMX for use in the kernel on return.
- */
-_GLOBAL(giveup_altivec)
- mfmsr r5
- oris r5,r5,MSR_VEC@h
- mtmsrd r5 /* enable use of VMX now */
- isync
- cmpdi 0,r3,0
- beqlr- /* if no previous owner, done */
- addi r3,r3,THREAD /* want THREAD of task */
- ld r5,PT_REGS(r3)
- cmpdi 0,r5,0
- SAVE_32VRS(0,r4,r3)
- mfvscr vr0
- li r4,THREAD_VSCR
- stvx vr0,r4,r3
- beq 1f
- ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- lis r3,MSR_VEC@h
- andc r4,r4,r3 /* disable FP for previous task */
- std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#ifndef CONFIG_SMP
- li r5,0
- ld r4,last_task_used_altivec@got(r2)
- std r5,0(r4)
-#endif /* CONFIG_SMP */
- blr
-
-#endif /* CONFIG_ALTIVEC */
-
-_GLOBAL(execve)
- li r0,__NR_execve
- sc
- bnslr
- neg r3,r3
- blr
+#endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */
/* kexec_wait(phys_cpu)
*
* wait for the flag to change, indicating this kernel is going away but
* the slave code for the next one is at addresses 0 to 100.
*
- * This is used by all slaves.
+ * This is used by all slaves, even those that did not find a matching
+ * paca in the secondary startup code.
*
* Physical (hardware) cpu id should be in r3.
*/
@@ -600,16 +494,19 @@ kexec_flag:
* note: this is a terminal routine, it does not save lr
*
* get phys id from paca
- * set paca id to -1 to say we got here
* switch to real mode
+ * mark the paca as no longer used
* join other cpus in kexec_wait(phys_id)
*/
_GLOBAL(kexec_smp_wait)
lhz r3,PACAHWCPUID(r13)
- li r4,-1
- sth r4,PACAHWCPUID(r13) /* let others know we left */
bl real_mode
- b .kexec_wait
+
+ li r4,KEXEC_STATE_REAL_MODE
+ stb r4,PACAKEXECSTATE(r13)
+ SYNC
+
+ b kexec_wait
/*
* switch to real mode (turn mmu off)
@@ -644,7 +541,7 @@ _GLOBAL(kexec_sequence)
std r0,16(r1)
/* switch stacks to newstack -- &kexec_stack.stack */
- stdu r1,THREAD_SIZE-112(r3)
+ stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
mr r1,r3
li r0,0
@@ -661,7 +558,7 @@ _GLOBAL(kexec_sequence)
std r26,-48(r1)
std r25,-56(r1)
- stdu r1,-112-64(r1)
+ stdu r1,-STACK_FRAME_OVERHEAD-64(r1)
/* save args into preserved regs */
mr r31,r3 /* newstack (both) */
@@ -679,14 +576,31 @@ _GLOBAL(kexec_sequence)
/* copy dest pages, flush whole dest image */
mr r3,r29
- bl .kexec_copy_flush /* (image) */
+ bl kexec_copy_flush /* (image) */
/* turn off mmu */
bl real_mode
+ /* copy 0x100 bytes starting at start to 0 */
+ li r3,0
+ mr r4,r30 /* start, aka phys mem offset */
+ li r5,0x100
+ li r6,0
+ bl copy_and_flush /* (dest, src, copy limit, start offset) */
+1: /* assume normal blr return */
+
+ /* release other cpus to the new kernel secondary start at 0x60 */
+ mflr r5
+ li r6,1
+ stw r6,kexec_flag-1b(5)
+
/* clear out hardware hash page table and tlb */
- ld r5,0(r27) /* deref function descriptor */
- mtctr r5
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+ ld r12,0(r27) /* deref function descriptor */
+#else
+ mr r12,r27
+#endif
+ mtctr r12
bctrl /* ppc_md.hpte_clear_all(void); */
/*
@@ -714,22 +628,37 @@ _GLOBAL(kexec_sequence)
* are the boot cpu ?????
* other device tree differences (prop sizes, va vs pa, etc)...
*/
-
- /* copy 0x100 bytes starting at start to 0 */
- li r3,0
- mr r4,r30
- li r5,0x100
- li r6,0
- bl .copy_and_flush /* (dest, src, copy limit, start offset) */
-1: /* assume normal blr return */
-
- /* release other cpus to the new kernel secondary start at 0x60 */
- mflr r5
- li r6,1
- stw r6,kexec_flag-1b(5)
mr r3,r25 # my phys cpu
mr r4,r30 # start, aka phys mem offset
mtlr 4
li r5,0
blr /* image->start(physid, image->start, 0); */
#endif /* CONFIG_KEXEC */
+
+#ifdef CONFIG_MODULES
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+
+#ifdef CONFIG_MODVERSIONS
+.weak __crc_TOC.
+.section "___kcrctab+TOC.","a"
+.globl __kcrctab_TOC.
+__kcrctab_TOC.:
+ .llong __crc_TOC.
+#endif
+
+/*
+ * Export a fake .TOC. since both modpost and depmod will complain otherwise.
+ * Both modpost and depmod strip the leading . so we do the same here.
+ */
+.section "__ksymtab_strings","a"
+__kstrtab_TOC.:
+ .asciz "TOC."
+
+.section "___ksymtab+TOC.","a"
+/* This symbol name is important: it's used by modpost to find exported syms */
+.globl __ksymtab_TOC.
+__ksymtab_TOC.:
+ .llong 0 /* .value */
+ .llong __kstrtab_TOC.
+#endif /* ELFv2 */
+#endif /* MODULES */
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
new file mode 100644
index 00000000000..9547381b631
--- /dev/null
+++ b/arch/powerpc/kernel/module.c
@@ -0,0 +1,79 @@
+/* Kernel module help for powerpc.
+ Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
+ Copyright (C) 2008 Freescale Semiconductor, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <linux/bug.h>
+#include <asm/module.h>
+#include <asm/uaccess.h>
+#include <asm/firmware.h>
+#include <linux/sort.h>
+#include <asm/setup.h>
+
+LIST_HEAD(module_bug_list);
+
+static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ const char *name)
+{
+ char *secstrings;
+ unsigned int i;
+
+ secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+ for (i = 1; i < hdr->e_shnum; i++)
+ if (strcmp(secstrings+sechdrs[i].sh_name, name) == 0)
+ return &sechdrs[i];
+ return NULL;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs, struct module *me)
+{
+ const Elf_Shdr *sect;
+
+ /* Apply feature fixups */
+ sect = find_section(hdr, sechdrs, "__ftr_fixup");
+ if (sect != NULL)
+ do_feature_fixups(cur_cpu_spec->cpu_features,
+ (void *)sect->sh_addr,
+ (void *)sect->sh_addr + sect->sh_size);
+
+ sect = find_section(hdr, sechdrs, "__mmu_ftr_fixup");
+ if (sect != NULL)
+ do_feature_fixups(cur_cpu_spec->mmu_features,
+ (void *)sect->sh_addr,
+ (void *)sect->sh_addr + sect->sh_size);
+
+#ifdef CONFIG_PPC64
+ sect = find_section(hdr, sechdrs, "__fw_ftr_fixup");
+ if (sect != NULL)
+ do_feature_fixups(powerpc_firmware_features,
+ (void *)sect->sh_addr,
+ (void *)sect->sh_addr + sect->sh_size);
+#endif
+
+ sect = find_section(hdr, sechdrs, "__lwsync_fixup");
+ if (sect != NULL)
+ do_lwsync_fixups(cur_cpu_spec->cpu_features,
+ (void *)sect->sh_addr,
+ (void *)sect->sh_addr + sect->sh_size);
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 92f4e5f64f0..6cff040bf45 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -22,7 +22,11 @@
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/kernel.h>
+#include <linux/ftrace.h>
#include <linux/cache.h>
+#include <linux/bug.h>
+#include <linux/sort.h>
+#include <asm/setup.h>
#if 0
#define DEBUGP printk
@@ -30,43 +34,67 @@
#define DEBUGP(fmt , ...)
#endif
-LIST_HEAD(module_bug_list);
-
-void *module_alloc(unsigned long size)
+/* Count how many different relocations (different symbol, different
+ addend) */
+static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
{
- if (size == 0)
- return NULL;
- return vmalloc(size);
+ unsigned int i, r_info, r_addend, _count_relocs;
+
+ _count_relocs = 0;
+ r_info = 0;
+ r_addend = 0;
+ for (i = 0; i < num; i++)
+ /* Only count 24-bit relocs, others don't need stubs */
+ if (ELF32_R_TYPE(rela[i].r_info) == R_PPC_REL24 &&
+ (r_info != ELF32_R_SYM(rela[i].r_info) ||
+ r_addend != rela[i].r_addend)) {
+ _count_relocs++;
+ r_info = ELF32_R_SYM(rela[i].r_info);
+ r_addend = rela[i].r_addend;
+ }
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ _count_relocs++; /* add one for ftrace_caller */
+#endif
+ return _count_relocs;
}
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
+static int relacmp(const void *_x, const void *_y)
{
- vfree(module_region);
- /* FIXME: If module_region == mod->init_region, trim exception
- table entries. */
+ const Elf32_Rela *x, *y;
+
+ y = (Elf32_Rela *)_x;
+ x = (Elf32_Rela *)_y;
+
+ /* Compare the entire r_info (as opposed to ELF32_R_SYM(r_info) only) to
+ * make the comparison cheaper/faster. It won't affect the sorting or
+ * the counting algorithms' performance
+ */
+ if (x->r_info < y->r_info)
+ return -1;
+ else if (x->r_info > y->r_info)
+ return 1;
+ else if (x->r_addend < y->r_addend)
+ return -1;
+ else if (x->r_addend > y->r_addend)
+ return 1;
+ else
+ return 0;
}
-/* Count how many different relocations (different symbol, different
- addend) */
-static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
+static void relaswap(void *_x, void *_y, int size)
{
- unsigned int i, j, ret = 0;
-
- /* Sure, this is order(n^2), but it's usually short, and not
- time critical */
- for (i = 0; i < num; i++) {
- for (j = 0; j < i; j++) {
- /* If this addend appeared before, it's
- already been counted */
- if (ELF32_R_SYM(rela[i].r_info)
- == ELF32_R_SYM(rela[j].r_info)
- && rela[i].r_addend == rela[j].r_addend)
- break;
- }
- if (j == i) ret++;
+ uint32_t *x, *y, tmp;
+ int i;
+
+ y = (uint32_t *)_x;
+ x = (uint32_t *)_y;
+
+ for (i = 0; i < sizeof(Elf32_Rela) / sizeof(uint32_t); i++) {
+ tmp = x[i];
+ x[i] = y[i];
+ y[i] = tmp;
}
- return ret;
}
/* Get the potential trampolines size required of the init and
@@ -97,6 +125,16 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
DEBUGP("Ptr: %p. Number: %u\n",
(void *)hdr + sechdrs[i].sh_offset,
sechdrs[i].sh_size / sizeof(Elf32_Rela));
+
+ /* Sort the relocation information based on a symbol and
+ * addend key. This is a stable O(n*log n) complexity
+ * alogrithm but it will reduce the complexity of
+ * count_relocs() to linear complexity O(n)
+ */
+ sort((void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size / sizeof(Elf32_Rela),
+ sizeof(Elf32_Rela), relacmp, relaswap);
+
ret += count_relocs((void *)hdr
+ sechdrs[i].sh_offset,
sechdrs[i].sh_size
@@ -135,21 +173,10 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
return 0;
}
-int apply_relocate(Elf32_Shdr *sechdrs,
- const char *strtab,
- unsigned int symindex,
- unsigned int relsec,
- struct module *module)
-{
- printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n",
- module->name);
- return -ENOEXEC;
-}
-
static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
{
- if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16)
- && entry->jump[1] == 0x396b0000 + (val & 0xffff))
+ if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16)
+ && entry->jump[1] == 0x398c0000 + (val & 0xffff))
return 1;
return 0;
}
@@ -176,10 +203,9 @@ static uint32_t do_plt_call(void *location,
entry++;
}
- /* Stolen from Paul Mackerras as well... */
- entry->jump[0] = 0x3d600000+((val+0x8000)>>16); /* lis r11,sym@ha */
- entry->jump[1] = 0x396b0000 + (val&0xffff); /* addi r11,r11,sym@l*/
- entry->jump[2] = 0x7d6903a6; /* mtctr r11 */
+ entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */
+ entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/
+ entry->jump[2] = 0x7d8903a6; /* mtctr r12 */
entry->jump[3] = 0x4e800420; /* bctr */
DEBUGP("Initialized plt for 0x%x at %p\n", val, entry);
@@ -221,7 +247,12 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
/* Low half of the symbol */
*(uint16_t *)location = value;
break;
-
+
+ case R_PPC_ADDR16_HI:
+ /* Higher half of the symbol */
+ *(uint16_t *)location = (value >> 16);
+ break;
+
case R_PPC_ADDR16_HA:
/* Sign-adjusted lower 16 bits: PPC ELF ABI says:
(((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF.
@@ -266,55 +297,11 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
return -ENOEXEC;
}
}
+#ifdef CONFIG_DYNAMIC_FTRACE
+ module->arch.tramp =
+ do_plt_call(module->module_core,
+ (unsigned long)ftrace_caller,
+ sechdrs, module);
+#endif
return 0;
}
-
-int module_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *me)
-{
- char *secstrings;
- unsigned int i;
-
- me->arch.bug_table = NULL;
- me->arch.num_bugs = 0;
-
- /* Find the __bug_table section, if present */
- secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
- for (i = 1; i < hdr->e_shnum; i++) {
- if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table"))
- continue;
- me->arch.bug_table = (void *) sechdrs[i].sh_addr;
- me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry);
- break;
- }
-
- /*
- * Strictly speaking this should have a spinlock to protect against
- * traversals, but since we only traverse on BUG()s, a spinlock
- * could potentially lead to deadlock and thus be counter-productive.
- */
- list_add(&me->arch.bug_list, &module_bug_list);
-
- return 0;
-}
-
-void module_arch_cleanup(struct module *mod)
-{
- list_del(&mod->arch.bug_list);
-}
-
-struct bug_entry *module_find_bug(unsigned long bugaddr)
-{
- struct mod_arch_specific *mod;
- unsigned int i;
- struct bug_entry *bug;
-
- list_for_each_entry(mod, &module_bug_list, bug_list) {
- bug = mod->bug_table;
- for (i = 0; i < mod->num_bugs; ++i, ++bug)
- if (bugaddr == bug->bug_addr)
- return bug;
- }
- return NULL;
-}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index ba34001fca8..d807ee626af 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -20,8 +20,14 @@
#include <linux/moduleloader.h>
#include <linux/err.h>
#include <linux/vmalloc.h>
+#include <linux/ftrace.h>
+#include <linux/bug.h>
+#include <linux/uaccess.h>
#include <asm/module.h>
-#include <asm/uaccess.h>
+#include <asm/firmware.h>
+#include <asm/code-patching.h>
+#include <linux/sort.h>
+#include <asm/setup.h>
/* FIXME: We don't do .init separately. To do this, we'd need to have
a separate r2 value in the init and core section, and stub between
@@ -36,82 +42,230 @@
#define DEBUGP(fmt , ...)
#endif
-/* There's actually a third entry here, but it's unused */
-struct ppc64_opd_entry
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define R2_STACK_OFFSET 24
+
+/* An address is simply the address of the function. */
+typedef unsigned long func_desc_t;
+
+static func_desc_t func_desc(unsigned long addr)
{
- unsigned long funcaddr;
- unsigned long r2;
-};
+ return addr;
+}
+static unsigned long func_addr(unsigned long addr)
+{
+ return addr;
+}
+static unsigned long stub_func_addr(func_desc_t func)
+{
+ return func;
+}
+
+/* PowerPC64 specific values for the Elf64_Sym st_other field. */
+#define STO_PPC64_LOCAL_BIT 5
+#define STO_PPC64_LOCAL_MASK (7 << STO_PPC64_LOCAL_BIT)
+#define PPC64_LOCAL_ENTRY_OFFSET(other) \
+ (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2)
+
+static unsigned int local_entry_offset(const Elf64_Sym *sym)
+{
+ /* sym->st_other indicates offset to local entry point
+ * (otherwise it will assume r12 is the address of the start
+ * of function and try to derive r2 from it). */
+ return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
+}
+#else
+#define R2_STACK_OFFSET 40
+
+/* An address is address of the OPD entry, which contains address of fn. */
+typedef struct ppc64_opd_entry func_desc_t;
+
+static func_desc_t func_desc(unsigned long addr)
+{
+ return *(struct ppc64_opd_entry *)addr;
+}
+static unsigned long func_addr(unsigned long addr)
+{
+ return func_desc(addr).funcaddr;
+}
+static unsigned long stub_func_addr(func_desc_t func)
+{
+ return func.funcaddr;
+}
+static unsigned int local_entry_offset(const Elf64_Sym *sym)
+{
+ return 0;
+}
+#endif
/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
the kernel itself). But on PPC64, these need to be used for every
jump, actually, to reset r2 (TOC+0x8000). */
struct ppc64_stub_entry
{
- /* 28 byte jump instruction sequence (7 instructions) */
- unsigned char jump[28];
- unsigned char unused[4];
+ /* 28 byte jump instruction sequence (7 instructions). We only
+ * need 6 instructions on ABIv2 but we always allocate 7 so
+ * so we don't have to modify the trampoline load instruction. */
+ u32 jump[7];
+ u32 unused;
/* Data for the above code */
- struct ppc64_opd_entry opd;
+ func_desc_t funcdata;
};
-/* We use a stub to fix up r2 (TOC ptr) and to jump to the (external)
- function which may be more than 24-bits away. We could simply
- patch the new r2 value and function pointer into the stub, but it's
- significantly shorter to put these values at the end of the stub
- code, and patch the stub address (32-bits relative to the TOC ptr,
- r2) into the stub. */
-static struct ppc64_stub_entry ppc64_stub =
-{ .jump = {
- 0x3d, 0x82, 0x00, 0x00, /* addis r12,r2, <high> */
- 0x39, 0x8c, 0x00, 0x00, /* addi r12,r12, <low> */
+/*
+ * PPC64 uses 24 bit jumps, but we need to jump into other modules or
+ * the kernel which may be further. So we jump to a stub.
+ *
+ * For ELFv1 we need to use this to set up the new r2 value (aka TOC
+ * pointer). For ELFv2 it's the callee's responsibility to set up the
+ * new r2, but for both we need to save the old r2.
+ *
+ * We could simply patch the new r2 value and function pointer into
+ * the stub, but it's significantly shorter to put these values at the
+ * end of the stub code, and patch the stub address (32-bits relative
+ * to the TOC ptr, r2) into the stub.
+ */
+
+static u32 ppc64_stub_insns[] = {
+ 0x3d620000, /* addis r11,r2, <high> */
+ 0x396b0000, /* addi r11,r11, <low> */
/* Save current r2 value in magic place on the stack. */
- 0xf8, 0x41, 0x00, 0x28, /* std r2,40(r1) */
- 0xe9, 0x6c, 0x00, 0x20, /* ld r11,32(r12) */
- 0xe8, 0x4c, 0x00, 0x28, /* ld r2,40(r12) */
- 0x7d, 0x69, 0x03, 0xa6, /* mtctr r11 */
- 0x4e, 0x80, 0x04, 0x20 /* bctr */
-} };
+ 0xf8410000|R2_STACK_OFFSET, /* std r2,R2_STACK_OFFSET(r1) */
+ 0xe98b0020, /* ld r12,32(r11) */
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+ /* Set up new r2 from function descriptor */
+ 0xe84b0028, /* ld r2,40(r11) */
+#endif
+ 0x7d8903a6, /* mtctr r12 */
+ 0x4e800420 /* bctr */
+};
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+static u32 ppc64_stub_mask[] = {
+ 0xffff0000,
+ 0xffff0000,
+ 0xffffffff,
+ 0xffffffff,
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+ 0xffffffff,
+#endif
+ 0xffffffff,
+ 0xffffffff
+};
+
+bool is_module_trampoline(u32 *p)
+{
+ unsigned int i;
+ u32 insns[ARRAY_SIZE(ppc64_stub_insns)];
+
+ BUILD_BUG_ON(sizeof(ppc64_stub_insns) != sizeof(ppc64_stub_mask));
+
+ if (probe_kernel_read(insns, p, sizeof(insns)))
+ return -EFAULT;
+
+ for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) {
+ u32 insna = insns[i];
+ u32 insnb = ppc64_stub_insns[i];
+ u32 mask = ppc64_stub_mask[i];
+
+ if ((insna & mask) != (insnb & mask))
+ return false;
+ }
+
+ return true;
+}
+
+int module_trampoline_target(struct module *mod, u32 *trampoline,
+ unsigned long *target)
+{
+ u32 buf[2];
+ u16 upper, lower;
+ long offset;
+ void *toc_entry;
+
+ if (probe_kernel_read(buf, trampoline, sizeof(buf)))
+ return -EFAULT;
+
+ upper = buf[0] & 0xffff;
+ lower = buf[1] & 0xffff;
+
+ /* perform the addis/addi, both signed */
+ offset = ((short)upper << 16) + (short)lower;
+
+ /*
+ * Now get the address this trampoline jumps to. This
+ * is always 32 bytes into our trampoline stub.
+ */
+ toc_entry = (void *)mod->arch.toc + offset + 32;
+
+ if (probe_kernel_read(target, toc_entry, sizeof(*target)))
+ return -EFAULT;
+
+ return 0;
+}
+
+#endif
/* Count how many different 24-bit relocations (different symbol,
different addend) */
static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
{
- unsigned int i, j, ret = 0;
+ unsigned int i, r_info, r_addend, _count_relocs;
/* FIXME: Only count external ones --RR */
- /* Sure, this is order(n^2), but it's usually short, and not
- time critical */
- for (i = 0; i < num; i++) {
+ _count_relocs = 0;
+ r_info = 0;
+ r_addend = 0;
+ for (i = 0; i < num; i++)
/* Only count 24-bit relocs, others don't need stubs */
- if (ELF64_R_TYPE(rela[i].r_info) != R_PPC_REL24)
- continue;
- for (j = 0; j < i; j++) {
- /* If this addend appeared before, it's
- already been counted */
- if (rela[i].r_info == rela[j].r_info
- && rela[i].r_addend == rela[j].r_addend)
- break;
+ if (ELF64_R_TYPE(rela[i].r_info) == R_PPC_REL24 &&
+ (r_info != ELF64_R_SYM(rela[i].r_info) ||
+ r_addend != rela[i].r_addend)) {
+ _count_relocs++;
+ r_info = ELF64_R_SYM(rela[i].r_info);
+ r_addend = rela[i].r_addend;
}
- if (j == i) ret++;
- }
- return ret;
+
+ return _count_relocs;
}
-void *module_alloc(unsigned long size)
+static int relacmp(const void *_x, const void *_y)
{
- if (size == 0)
- return NULL;
+ const Elf64_Rela *x, *y;
- return vmalloc_exec(size);
+ y = (Elf64_Rela *)_x;
+ x = (Elf64_Rela *)_y;
+
+ /* Compare the entire r_info (as opposed to ELF64_R_SYM(r_info) only) to
+ * make the comparison cheaper/faster. It won't affect the sorting or
+ * the counting algorithms' performance
+ */
+ if (x->r_info < y->r_info)
+ return -1;
+ else if (x->r_info > y->r_info)
+ return 1;
+ else if (x->r_addend < y->r_addend)
+ return -1;
+ else if (x->r_addend > y->r_addend)
+ return 1;
+ else
+ return 0;
}
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
+static void relaswap(void *_x, void *_y, int size)
{
- vfree(module_region);
- /* FIXME: If module_region == mod->init_region, trim exception
- table entries. */
+ uint64_t *x, *y, tmp;
+ int i;
+
+ y = (uint64_t *)_x;
+ x = (uint64_t *)_y;
+
+ for (i = 0; i < sizeof(Elf64_Rela) / sizeof(uint64_t); i++) {
+ tmp = x[i];
+ x[i] = y[i];
+ y[i] = tmp;
+ }
}
/* Get size of potential trampolines required. */
@@ -129,27 +283,52 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
DEBUGP("Ptr: %p. Number: %lu\n",
(void *)sechdrs[i].sh_addr,
sechdrs[i].sh_size / sizeof(Elf64_Rela));
+
+ /* Sort the relocation information based on a symbol and
+ * addend key. This is a stable O(n*log n) complexity
+ * alogrithm but it will reduce the complexity of
+ * count_relocs() to linear complexity O(n)
+ */
+ sort((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela),
+ sizeof(Elf64_Rela), relacmp, relaswap);
+
relocs += count_relocs((void *)sechdrs[i].sh_addr,
sechdrs[i].sh_size
/ sizeof(Elf64_Rela));
}
}
+#ifdef CONFIG_DYNAMIC_FTRACE
+ /* make the trampoline to the ftrace_caller */
+ relocs++;
+#endif
+
DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
return relocs * sizeof(struct ppc64_stub_entry);
}
+/* Still needed for ELFv2, for .TOC. */
static void dedotify_versions(struct modversion_info *vers,
unsigned long size)
{
struct modversion_info *end;
for (end = (void *)vers + size; vers < end; vers++)
- if (vers->name[0] == '.')
+ if (vers->name[0] == '.') {
memmove(vers->name, vers->name+1, strlen(vers->name));
+#ifdef ARCH_RELOCATES_KCRCTAB
+ /* The TOC symbol has no CRC computed. To avoid CRC
+ * check failing, we must force it to the expected
+ * value (see CRC check in module.c).
+ */
+ if (!strcmp(vers->name, "TOC."))
+ vers->crc = -(unsigned long)reloc_start;
+#endif
+ }
}
-/* Undefined symbols which refer to .funcname, hack to funcname */
+/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
{
unsigned int i;
@@ -163,6 +342,24 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
}
}
+static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
+ const char *strtab,
+ unsigned int symindex)
+{
+ unsigned int i, numsyms;
+ Elf64_Sym *syms;
+
+ syms = (Elf64_Sym *)sechdrs[symindex].sh_addr;
+ numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
+
+ for (i = 1; i < numsyms; i++) {
+ if (syms[i].st_shndx == SHN_UNDEF
+ && strcmp(strtab + syms[i].st_name, "TOC.") == 0)
+ return &syms[i];
+ }
+ return NULL;
+}
+
int module_frob_arch_sections(Elf64_Ehdr *hdr,
Elf64_Shdr *sechdrs,
char *secstrings,
@@ -209,16 +406,6 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
return 0;
}
-int apply_relocate(Elf64_Shdr *sechdrs,
- const char *strtab,
- unsigned int symindex,
- unsigned int relsec,
- struct module *me)
-{
- printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", me->name);
- return -ENOEXEC;
-}
-
/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
gives the value maximum span in an instruction which uses a signed
offset) */
@@ -237,16 +424,12 @@ static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
/* Patch stub to reference function and correct r2 value. */
static inline int create_stub(Elf64_Shdr *sechdrs,
struct ppc64_stub_entry *entry,
- struct ppc64_opd_entry *opd,
+ unsigned long addr,
struct module *me)
{
- Elf64_Half *loc1, *loc2;
long reladdr;
- *entry = ppc64_stub;
-
- loc1 = (Elf64_Half *)&entry->jump[2];
- loc2 = (Elf64_Half *)&entry->jump[6];
+ memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns));
/* Stub uses address relative to r2. */
reladdr = (unsigned long)entry - my_r2(sechdrs, me);
@@ -257,35 +440,33 @@ static inline int create_stub(Elf64_Shdr *sechdrs,
}
DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
- *loc1 = PPC_HA(reladdr);
- *loc2 = PPC_LO(reladdr);
- entry->opd.funcaddr = opd->funcaddr;
- entry->opd.r2 = opd->r2;
+ entry->jump[0] |= PPC_HA(reladdr);
+ entry->jump[1] |= PPC_LO(reladdr);
+ entry->funcdata = func_desc(addr);
return 1;
}
-/* Create stub to jump to function described in this OPD: we need the
+/* Create stub to jump to function described in this OPD/ptr: we need the
stub to set up the TOC ptr (r2) for the function. */
static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
- unsigned long opdaddr,
+ unsigned long addr,
struct module *me)
{
struct ppc64_stub_entry *stubs;
- struct ppc64_opd_entry *opd = (void *)opdaddr;
unsigned int i, num_stubs;
num_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stubs);
/* Find this stub, or if that fails, the next avail. entry */
stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
- for (i = 0; stubs[i].opd.funcaddr; i++) {
+ for (i = 0; stub_func_addr(stubs[i].funcdata); i++) {
BUG_ON(i >= num_stubs);
- if (stubs[i].opd.funcaddr == opd->funcaddr)
+ if (stub_func_addr(stubs[i].funcdata) == func_addr(addr))
return (unsigned long)&stubs[i];
}
- if (!create_stub(sechdrs, &stubs[i], opd, me))
+ if (!create_stub(sechdrs, &stubs[i], addr, me))
return 0;
return (unsigned long)&stubs[i];
@@ -295,12 +476,13 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
restore r2. */
static int restore_r2(u32 *instruction, struct module *me)
{
- if (*instruction != 0x60000000) {
+ if (*instruction != PPC_INST_NOP) {
printk("%s: Expect noop after relocate, got %08x\n",
me->name, *instruction);
return 0;
}
- *instruction = 0xe8410028; /* ld r2,40(r1) */
+ /* ld r2,R2_STACK_OFFSET(r1) */
+ *instruction = 0xe8410000 | R2_STACK_OFFSET;
return 1;
}
@@ -318,6 +500,17 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
sechdrs[relsec].sh_info);
+
+ /* First time we're called, we can fix up .TOC. */
+ if (!me->arch.toc_fixed) {
+ sym = find_dot_toc(sechdrs, strtab, symindex);
+ /* It's theoretically possible that a module doesn't want a
+ * .TOC. so don't fail it just for that. */
+ if (sym)
+ sym->st_value = my_r2(sechdrs, me);
+ me->arch.toc_fixed = true;
+ }
+
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
/* This is where to make the change */
location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -339,7 +532,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
/* Simply set it */
*(u32 *)location = value;
break;
-
+
case R_PPC64_ADDR64:
/* Simply set it */
*(unsigned long *)location = value;
@@ -362,6 +555,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
| (value & 0xffff);
break;
+ case R_PPC64_TOC16_LO:
+ /* Subtract TOC pointer */
+ value -= my_r2(sechdrs, me);
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
case R_PPC64_TOC16_DS:
/* Subtract TOC pointer */
value -= my_r2(sechdrs, me);
@@ -375,6 +576,28 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
| (value & 0xfffc);
break;
+ case R_PPC64_TOC16_LO_DS:
+ /* Subtract TOC pointer */
+ value -= my_r2(sechdrs, me);
+ if ((value & 3) != 0) {
+ printk("%s: bad TOC16_LO_DS relocation (%lu)\n",
+ me->name, value);
+ return -ENOEXEC;
+ }
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xfffc)
+ | (value & 0xfffc);
+ break;
+
+ case R_PPC64_TOC16_HA:
+ /* Subtract TOC pointer */
+ value -= my_r2(sechdrs, me);
+ value = ((value + 0x8000) >> 16);
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
case R_PPC_REL24:
/* FIXME: Handle weak symbols here --RR */
if (sym->st_shndx == SHN_UNDEF) {
@@ -384,7 +607,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
return -ENOENT;
if (!restore_r2((u32 *)location + 1, me))
return -ENOEXEC;
- }
+ } else
+ value += local_entry_offset(sym);
/* Convert value to relative */
value -= (unsigned long)location;
@@ -395,11 +619,41 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
}
/* Only replace bits 2 through 26 */
- *(uint32_t *)location
+ *(uint32_t *)location
= (*(uint32_t *)location & ~0x03fffffc)
| (value & 0x03fffffc);
break;
+ case R_PPC64_REL64:
+ /* 64 bits relative (used by features fixups) */
+ *location = value - (unsigned long)location;
+ break;
+
+ case R_PPC64_TOCSAVE:
+ /*
+ * Marker reloc indicates we don't have to save r2.
+ * That would only save us one instruction, so ignore
+ * it.
+ */
+ break;
+
+ case R_PPC64_REL16_HA:
+ /* Subtract location pointer */
+ value -= (unsigned long)location;
+ value = ((value + 0x8000) >> 16);
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
+ case R_PPC64_REL16_LO:
+ /* Subtract location pointer */
+ value -= (unsigned long)location;
+ *((uint16_t *) location)
+ = (*((uint16_t *) location) & ~0xffff)
+ | (value & 0xffff);
+ break;
+
default:
printk("%s: Unknown ADD relocation: %lu\n",
me->name,
@@ -408,56 +662,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
}
}
- return 0;
-}
-
-LIST_HEAD(module_bug_list);
-
-int module_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs, struct module *me)
-{
- char *secstrings;
- unsigned int i;
-
- me->arch.bug_table = NULL;
- me->arch.num_bugs = 0;
-
- /* Find the __bug_table section, if present */
- secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
- for (i = 1; i < hdr->e_shnum; i++) {
- if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table"))
- continue;
- me->arch.bug_table = (void *) sechdrs[i].sh_addr;
- me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry);
- break;
- }
-
- /*
- * Strictly speaking this should have a spinlock to protect against
- * traversals, but since we only traverse on BUG()s, a spinlock
- * could potentially lead to deadlock and thus be counter-productive.
- */
- list_add(&me->arch.bug_list, &module_bug_list);
+#ifdef CONFIG_DYNAMIC_FTRACE
+ me->arch.toc = my_r2(sechdrs, me);
+ me->arch.tramp = stub_for_addr(sechdrs,
+ (unsigned long)ftrace_caller,
+ me);
+#endif
return 0;
}
-
-void module_arch_cleanup(struct module *mod)
-{
- list_del(&mod->arch.bug_list);
-}
-
-struct bug_entry *module_find_bug(unsigned long bugaddr)
-{
- struct mod_arch_specific *mod;
- unsigned int i;
- struct bug_entry *bug;
-
- list_for_each_entry(mod, &module_bug_list, bug_list) {
- bug = mod->bug_table;
- for (i = 0; i < mod->num_bugs; ++i, ++bug)
- if (bugaddr == bug->bug_addr)
- return bug;
- }
- return NULL;
-}
diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c
new file mode 100644
index 00000000000..8bbc12d20f5
--- /dev/null
+++ b/arch/powerpc/kernel/msi.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+
+#include <asm/machdep.h>
+
+int arch_msi_check_device(struct pci_dev* dev, int nvec, int type)
+{
+ if (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs) {
+ pr_debug("msi: Platform doesn't provide MSI callbacks.\n");
+ return -ENOSYS;
+ }
+
+ /* PowerPC doesn't support multiple MSI yet */
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
+
+ if (ppc_md.msi_check_device) {
+ pr_debug("msi: Using platform check routine.\n");
+ return ppc_md.msi_check_device(dev, nvec, type);
+ }
+
+ return 0;
+}
+
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ return ppc_md.setup_msi_irqs(dev, nvec, type);
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *dev)
+{
+ ppc_md.teardown_msi_irqs(dev);
+}
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 6960f090991..28b898e6818 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -34,26 +34,26 @@
#undef DEBUG_NVRAM
-static int nvram_scan_partitions(void);
-static int nvram_setup_partition(void);
-static int nvram_create_os_partition(void);
-static int nvram_remove_os_partition(void);
-
-static struct nvram_partition * nvram_part;
-static long nvram_error_log_index = -1;
-static long nvram_error_log_size = 0;
-
-int no_logging = 1; /* Until we initialize everything,
- * make sure we don't try logging
- * anything */
-
-extern volatile int error_log_cnt;
+#define NVRAM_HEADER_LEN sizeof(struct nvram_header)
+#define NVRAM_BLOCK_LEN NVRAM_HEADER_LEN
+
+/* If change this size, then change the size of NVNAME_LEN */
+struct nvram_header {
+ unsigned char signature;
+ unsigned char checksum;
+ unsigned short length;
+ /* Terminating null required only for names < 12 chars. */
+ char name[12];
+};
-struct err_log_info {
- int error_type;
- unsigned int seq_num;
+struct nvram_partition {
+ struct list_head partition;
+ struct nvram_header header;
+ unsigned int index;
};
+static LIST_HEAD(nvram_partitions);
+
static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
{
int size;
@@ -84,22 +84,30 @@ static ssize_t dev_nvram_read(struct file *file, char __user *buf,
char *tmp = NULL;
ssize_t size;
- ret = -ENODEV;
- if (!ppc_md.nvram_size)
+ if (!ppc_md.nvram_size) {
+ ret = -ENODEV;
goto out;
+ }
- ret = 0;
size = ppc_md.nvram_size();
- if (*ppos >= size || size < 0)
+ if (size < 0) {
+ ret = size;
goto out;
+ }
+
+ if (*ppos >= size) {
+ ret = 0;
+ goto out;
+ }
count = min_t(size_t, count, size - *ppos);
count = min(count, PAGE_SIZE);
- ret = -ENOMEM;
tmp = kmalloc(count, GFP_KERNEL);
- if (!tmp)
+ if (!tmp) {
+ ret = -ENOMEM;
goto out;
+ }
ret = ppc_md.nvram_read(tmp, count, ppos);
if (ret <= 0)
@@ -150,8 +158,8 @@ out:
}
-static int dev_nvram_ioctl(struct inode *inode, struct file *file,
- unsigned int cmd, unsigned long arg)
+static long dev_nvram_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
{
switch(cmd) {
#ifdef CONFIG_PPC_PMAC
@@ -179,12 +187,12 @@ static int dev_nvram_ioctl(struct inode *inode, struct file *file,
}
}
-struct file_operations nvram_fops = {
- .owner = THIS_MODULE,
- .llseek = dev_nvram_llseek,
- .read = dev_nvram_read,
- .write = dev_nvram_write,
- .ioctl = dev_nvram_ioctl,
+const struct file_operations nvram_fops = {
+ .owner = THIS_MODULE,
+ .llseek = dev_nvram_llseek,
+ .read = dev_nvram_read,
+ .write = dev_nvram_write,
+ .unlocked_ioctl = dev_nvram_ioctl,
};
static struct miscdevice nvram_dev = {
@@ -195,16 +203,14 @@ static struct miscdevice nvram_dev = {
#ifdef DEBUG_NVRAM
-static void nvram_print_partitions(char * label)
+static void __init nvram_print_partitions(char * label)
{
- struct list_head * p;
struct nvram_partition * tmp_part;
printk(KERN_WARNING "--------%s---------\n", label);
printk(KERN_WARNING "indx\t\tsig\tchks\tlen\tname\n");
- list_for_each(p, &nvram_part->partition) {
- tmp_part = list_entry(p, struct nvram_partition, partition);
- printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%s\n",
+ list_for_each_entry(tmp_part, &nvram_partitions, partition) {
+ printk(KERN_WARNING "%4d \t%02x\t%02x\t%d\t%12.12s\n",
tmp_part->index, tmp_part->header.signature,
tmp_part->header.checksum, tmp_part->header.length,
tmp_part->header.name);
@@ -213,19 +219,23 @@ static void nvram_print_partitions(char * label)
#endif
-static int nvram_write_header(struct nvram_partition * part)
+static int __init nvram_write_header(struct nvram_partition * part)
{
loff_t tmp_index;
int rc;
-
+ struct nvram_header phead;
+
+ memcpy(&phead, &part->header, NVRAM_HEADER_LEN);
+ phead.length = cpu_to_be16(phead.length);
+
tmp_index = part->index;
- rc = ppc_md.nvram_write((char *)&part->header, NVRAM_HEADER_LEN, &tmp_index);
+ rc = ppc_md.nvram_write((char *)&phead, NVRAM_HEADER_LEN, &tmp_index);
return rc;
}
-static unsigned char nvram_checksum(struct nvram_header *p)
+static unsigned char __init nvram_checksum(struct nvram_header *p)
{
unsigned int c_sum, c_sum2;
unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */
@@ -239,120 +249,136 @@ static unsigned char nvram_checksum(struct nvram_header *p)
return c_sum;
}
-
/*
- * Find an nvram partition, sig can be 0 for any
- * partition or name can be NULL for any name, else
- * tries to match both
+ * Per the criteria passed via nvram_remove_partition(), should this
+ * partition be removed? 1=remove, 0=keep
*/
-struct nvram_partition *nvram_find_partition(int sig, const char *name)
+static int nvram_can_remove_partition(struct nvram_partition *part,
+ const char *name, int sig, const char *exceptions[])
{
- struct nvram_partition * part;
- struct list_head * p;
-
- list_for_each(p, &nvram_part->partition) {
- part = list_entry(p, struct nvram_partition, partition);
-
- if (sig && part->header.signature != sig)
- continue;
- if (name && 0 != strncmp(name, part->header.name, 12))
- continue;
- return part;
+ if (part->header.signature != sig)
+ return 0;
+ if (name) {
+ if (strncmp(name, part->header.name, 12))
+ return 0;
+ } else if (exceptions) {
+ const char **except;
+ for (except = exceptions; *except; except++) {
+ if (!strncmp(*except, part->header.name, 12))
+ return 0;
+ }
}
- return NULL;
+ return 1;
}
-EXPORT_SYMBOL(nvram_find_partition);
+/**
+ * nvram_remove_partition - Remove one or more partitions in nvram
+ * @name: name of the partition to remove, or NULL for a
+ * signature only match
+ * @sig: signature of the partition(s) to remove
+ * @exceptions: When removing all partitions with a matching signature,
+ * leave these alone.
+ */
-static int nvram_remove_os_partition(void)
+int __init nvram_remove_partition(const char *name, int sig,
+ const char *exceptions[])
{
- struct list_head *i;
- struct list_head *j;
- struct nvram_partition * part;
- struct nvram_partition * cur_part;
+ struct nvram_partition *part, *prev, *tmp;
int rc;
- list_for_each(i, &nvram_part->partition) {
- part = list_entry(i, struct nvram_partition, partition);
- if (part->header.signature != NVRAM_SIG_OS)
+ list_for_each_entry(part, &nvram_partitions, partition) {
+ if (!nvram_can_remove_partition(part, name, sig, exceptions))
continue;
-
- /* Make os partition a free partition */
+
+ /* Make partition a free partition */
part->header.signature = NVRAM_SIG_FREE;
- sprintf(part->header.name, "wwwwwwwwwwww");
+ strncpy(part->header.name, "wwwwwwwwwwww", 12);
part->header.checksum = nvram_checksum(&part->header);
-
- /* Merge contiguous free partitions backwards */
- list_for_each_prev(j, &part->partition) {
- cur_part = list_entry(j, struct nvram_partition, partition);
- if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
- break;
- }
-
- part->header.length += cur_part->header.length;
- part->header.checksum = nvram_checksum(&part->header);
- part->index = cur_part->index;
-
- list_del(&cur_part->partition);
- kfree(cur_part);
- j = &part->partition; /* fixup our loop */
- }
-
- /* Merge contiguous free partitions forwards */
- list_for_each(j, &part->partition) {
- cur_part = list_entry(j, struct nvram_partition, partition);
- if (cur_part == nvram_part || cur_part->header.signature != NVRAM_SIG_FREE) {
- break;
- }
-
- part->header.length += cur_part->header.length;
- part->header.checksum = nvram_checksum(&part->header);
-
- list_del(&cur_part->partition);
- kfree(cur_part);
- j = &part->partition; /* fixup our loop */
- }
-
rc = nvram_write_header(part);
if (rc <= 0) {
- printk(KERN_ERR "nvram_remove_os_partition: nvram_write failed (%d)\n", rc);
+ printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc);
return rc;
}
+ }
+ /* Merge contiguous ones */
+ prev = NULL;
+ list_for_each_entry_safe(part, tmp, &nvram_partitions, partition) {
+ if (part->header.signature != NVRAM_SIG_FREE) {
+ prev = NULL;
+ continue;
+ }
+ if (prev) {
+ prev->header.length += part->header.length;
+ prev->header.checksum = nvram_checksum(&part->header);
+ rc = nvram_write_header(part);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc);
+ return rc;
+ }
+ list_del(&part->partition);
+ kfree(part);
+ } else
+ prev = part;
}
return 0;
}
-/* nvram_create_os_partition
+/**
+ * nvram_create_partition - Create a partition in nvram
+ * @name: name of the partition to create
+ * @sig: signature of the partition to create
+ * @req_size: size of data to allocate in bytes
+ * @min_size: minimum acceptable size (0 means req_size)
*
- * Create a OS linux partition to buffer error logs.
- * Will create a partition starting at the first free
- * space found if space has enough room.
+ * Returns a negative error code or a positive nvram index
+ * of the beginning of the data area of the newly created
+ * partition. If you provided a min_size smaller than req_size
+ * you need to query for the actual size yourself after the
+ * call using nvram_partition_get_size().
*/
-static int nvram_create_os_partition(void)
+loff_t __init nvram_create_partition(const char *name, int sig,
+ int req_size, int min_size)
{
struct nvram_partition *part;
struct nvram_partition *new_part;
struct nvram_partition *free_part = NULL;
- int seq_init[2] = { 0, 0 };
+ static char nv_init_vals[16];
loff_t tmp_index;
long size = 0;
int rc;
-
+
+ /* Convert sizes from bytes to blocks */
+ req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
+ min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
+
+ /* If no minimum size specified, make it the same as the
+ * requested size
+ */
+ if (min_size == 0)
+ min_size = req_size;
+ if (min_size > req_size)
+ return -EINVAL;
+
+ /* Now add one block to each for the header */
+ req_size += 1;
+ min_size += 1;
+
/* Find a free partition that will give us the maximum needed size
If can't find one that will give us the minimum size needed */
- list_for_each_entry(part, &nvram_part->partition, partition) {
+ list_for_each_entry(part, &nvram_partitions, partition) {
if (part->header.signature != NVRAM_SIG_FREE)
continue;
- if (part->header.length >= NVRAM_MAX_REQ) {
- size = NVRAM_MAX_REQ;
+ if (part->header.length >= req_size) {
+ size = req_size;
free_part = part;
break;
}
- if (!size && part->header.length >= NVRAM_MIN_REQ) {
- size = NVRAM_MIN_REQ;
+ if (part->header.length > size &&
+ part->header.length >= min_size) {
+ size = part->header.length;
free_part = part;
}
}
@@ -362,136 +388,95 @@ static int nvram_create_os_partition(void)
/* Create our OS partition */
new_part = kmalloc(sizeof(*new_part), GFP_KERNEL);
if (!new_part) {
- printk(KERN_ERR "nvram_create_os_partition: kmalloc failed\n");
+ pr_err("nvram_create_os_partition: kmalloc failed\n");
return -ENOMEM;
}
new_part->index = free_part->index;
- new_part->header.signature = NVRAM_SIG_OS;
+ new_part->header.signature = sig;
new_part->header.length = size;
- strcpy(new_part->header.name, "ppc64,linux");
+ strncpy(new_part->header.name, name, 12);
new_part->header.checksum = nvram_checksum(&new_part->header);
rc = nvram_write_header(new_part);
if (rc <= 0) {
- printk(KERN_ERR "nvram_create_os_partition: nvram_write_header \
- failed (%d)\n", rc);
- return rc;
- }
-
- /* make sure and initialize to zero the sequence number and the error
- type logged */
- tmp_index = new_part->index + NVRAM_HEADER_LEN;
- rc = ppc_md.nvram_write((char *)&seq_init, sizeof(seq_init), &tmp_index);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_create_os_partition: nvram_write "
- "failed (%d)\n", rc);
+ pr_err("nvram_create_os_partition: nvram_write_header "
+ "failed (%d)\n", rc);
return rc;
}
-
- nvram_error_log_index = new_part->index + NVRAM_HEADER_LEN;
- nvram_error_log_size = ((part->header.length - 1) *
- NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
-
list_add_tail(&new_part->partition, &free_part->partition);
- if (free_part->header.length <= size) {
+ /* Adjust or remove the partition we stole the space from */
+ if (free_part->header.length > size) {
+ free_part->index += size * NVRAM_BLOCK_LEN;
+ free_part->header.length -= size;
+ free_part->header.checksum = nvram_checksum(&free_part->header);
+ rc = nvram_write_header(free_part);
+ if (rc <= 0) {
+ pr_err("nvram_create_os_partition: nvram_write_header "
+ "failed (%d)\n", rc);
+ return rc;
+ }
+ } else {
list_del(&free_part->partition);
kfree(free_part);
- return 0;
}
- /* Adjust the partition we stole the space from */
- free_part->index += size * NVRAM_BLOCK_LEN;
- free_part->header.length -= size;
- free_part->header.checksum = nvram_checksum(&free_part->header);
-
- rc = nvram_write_header(free_part);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_create_os_partition: nvram_write_header "
- "failed (%d)\n", rc);
- return rc;
+ /* Clear the new partition */
+ for (tmp_index = new_part->index + NVRAM_HEADER_LEN;
+ tmp_index < ((size - 1) * NVRAM_BLOCK_LEN);
+ tmp_index += NVRAM_BLOCK_LEN) {
+ rc = ppc_md.nvram_write(nv_init_vals, NVRAM_BLOCK_LEN, &tmp_index);
+ if (rc <= 0) {
+ pr_err("nvram_create_partition: nvram_write failed (%d)\n", rc);
+ return rc;
+ }
}
-
- return 0;
+
+ return new_part->index + NVRAM_HEADER_LEN;
}
-
-/* nvram_setup_partition
- *
- * This will setup the partition we need for buffering the
- * error logs and cleanup partitions if needed.
- *
- * The general strategy is the following:
- * 1.) If there is ppc64,linux partition large enough then use it.
- * 2.) If there is not a ppc64,linux partition large enough, search
- * for a free partition that is large enough.
- * 3.) If there is not a free partition large enough remove
- * _all_ OS partitions and consolidate the space.
- * 4.) Will first try getting a chunk that will satisfy the maximum
- * error log size (NVRAM_MAX_REQ).
- * 5.) If the max chunk cannot be allocated then try finding a chunk
- * that will satisfy the minum needed (NVRAM_MIN_REQ).
+/**
+ * nvram_get_partition_size - Get the data size of an nvram partition
+ * @data_index: This is the offset of the start of the data of
+ * the partition. The same value that is returned by
+ * nvram_create_partition().
*/
-static int nvram_setup_partition(void)
+int nvram_get_partition_size(loff_t data_index)
{
- struct list_head * p;
- struct nvram_partition * part;
- int rc;
-
- /* For now, we don't do any of this on pmac, until I
- * have figured out if it's worth killing some unused stuffs
- * in our nvram, as Apple defined partitions use pretty much
- * all of the space
- */
- if (machine_is(powermac))
- return -ENOSPC;
-
- /* see if we have an OS partition that meets our needs.
- will try getting the max we need. If not we'll delete
- partitions and try again. */
- list_for_each(p, &nvram_part->partition) {
- part = list_entry(p, struct nvram_partition, partition);
- if (part->header.signature != NVRAM_SIG_OS)
- continue;
+ struct nvram_partition *part;
+
+ list_for_each_entry(part, &nvram_partitions, partition) {
+ if (part->index + NVRAM_HEADER_LEN == data_index)
+ return (part->header.length - 1) * NVRAM_BLOCK_LEN;
+ }
+ return -1;
+}
- if (strcmp(part->header.name, "ppc64,linux"))
- continue;
- if (part->header.length >= NVRAM_MIN_REQ) {
- /* found our partition */
- nvram_error_log_index = part->index + NVRAM_HEADER_LEN;
- nvram_error_log_size = ((part->header.length - 1) *
- NVRAM_BLOCK_LEN) - sizeof(struct err_log_info);
- return 0;
+/**
+ * nvram_find_partition - Find an nvram partition by signature and name
+ * @name: Name of the partition or NULL for any name
+ * @sig: Signature to test against
+ * @out_size: if non-NULL, returns the size of the data part of the partition
+ */
+loff_t nvram_find_partition(const char *name, int sig, int *out_size)
+{
+ struct nvram_partition *p;
+
+ list_for_each_entry(p, &nvram_partitions, partition) {
+ if (p->header.signature == sig &&
+ (!name || !strncmp(p->header.name, name, 12))) {
+ if (out_size)
+ *out_size = (p->header.length - 1) *
+ NVRAM_BLOCK_LEN;
+ return p->index + NVRAM_HEADER_LEN;
}
}
-
- /* try creating a partition with the free space we have */
- rc = nvram_create_os_partition();
- if (!rc) {
- return 0;
- }
-
- /* need to free up some space */
- rc = nvram_remove_os_partition();
- if (rc) {
- return rc;
- }
-
- /* create a partition in this new space */
- rc = nvram_create_os_partition();
- if (rc) {
- printk(KERN_ERR "nvram_create_os_partition: Could not find a "
- "NVRAM partition large enough\n");
- return rc;
- }
-
return 0;
}
-
-static int nvram_scan_partitions(void)
+int __init nvram_scan_partitions(void)
{
loff_t cur_index = 0;
struct nvram_header phead;
@@ -501,11 +486,11 @@ static int nvram_scan_partitions(void)
int total_size;
int err;
- if (ppc_md.nvram_size == NULL)
+ if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
return -ENODEV;
total_size = ppc_md.nvram_size();
- header = (char *) kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL);
+ header = kmalloc(NVRAM_HEADER_LEN, GFP_KERNEL);
if (!header) {
printk(KERN_ERR "nvram_scan_partitions: Failed kmalloc\n");
return -ENOMEM;
@@ -524,6 +509,8 @@ static int nvram_scan_partitions(void)
memcpy(&phead, header, NVRAM_HEADER_LEN);
+ phead.length = be16_to_cpu(phead.length);
+
err = 0;
c_sum = nvram_checksum(&phead);
if (c_sum != phead.checksum) {
@@ -538,8 +525,7 @@ static int nvram_scan_partitions(void)
"detected: 0-length partition\n");
goto out;
}
- tmp_part = (struct nvram_partition *)
- kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+ tmp_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
err = -ENOMEM;
if (!tmp_part) {
printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
@@ -548,12 +534,16 @@ static int nvram_scan_partitions(void)
memcpy(&tmp_part->header, &phead, NVRAM_HEADER_LEN);
tmp_part->index = cur_index;
- list_add_tail(&tmp_part->partition, &nvram_part->partition);
+ list_add_tail(&tmp_part->partition, &nvram_partitions);
cur_index += phead.length * NVRAM_BLOCK_LEN;
}
err = 0;
+#ifdef DEBUG_NVRAM
+ nvram_print_partitions("NVRAM Partitions");
+#endif
+
out:
kfree(header);
return err;
@@ -561,9 +551,10 @@ static int nvram_scan_partitions(void)
static int __init nvram_init(void)
{
- int error;
int rc;
+ BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16);
+
if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
return -ENODEV;
@@ -573,29 +564,6 @@ static int __init nvram_init(void)
return rc;
}
- /* initialize our anchor for the nvram partition list */
- nvram_part = (struct nvram_partition *) kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
- if (!nvram_part) {
- printk(KERN_ERR "nvram_init: Failed kmalloc\n");
- return -ENOMEM;
- }
- INIT_LIST_HEAD(&nvram_part->partition);
-
- /* Get all the NVRAM partitions */
- error = nvram_scan_partitions();
- if (error) {
- printk(KERN_ERR "nvram_init: Failed nvram_scan_partitions\n");
- return error;
- }
-
- if(nvram_setup_partition())
- printk(KERN_WARNING "nvram_init: Could not find nvram partition"
- " for nvram buffered error logging.\n");
-
-#ifdef DEBUG_NVRAM
- nvram_print_partitions("NVRAM Partitions");
-#endif
-
return rc;
}
@@ -604,134 +572,6 @@ void __exit nvram_cleanup(void)
misc_deregister( &nvram_dev );
}
-
-#ifdef CONFIG_PPC_PSERIES
-
-/* nvram_write_error_log
- *
- * We need to buffer the error logs into nvram to ensure that we have
- * the failure information to decode. If we have a severe error there
- * is no way to guarantee that the OS or the machine is in a state to
- * get back to user land and write the error to disk. For example if
- * the SCSI device driver causes a Machine Check by writing to a bad
- * IO address, there is no way of guaranteeing that the device driver
- * is in any state that is would also be able to write the error data
- * captured to disk, thus we buffer it in NVRAM for analysis on the
- * next boot.
- *
- * In NVRAM the partition containing the error log buffer will looks like:
- * Header (in bytes):
- * +-----------+----------+--------+------------+------------------+
- * | signature | checksum | length | name | data |
- * |0 |1 |2 3|4 15|16 length-1|
- * +-----------+----------+--------+------------+------------------+
- *
- * The 'data' section would look like (in bytes):
- * +--------------+------------+-----------------------------------+
- * | event_logged | sequence # | error log |
- * |0 3|4 7|8 nvram_error_log_size-1|
- * +--------------+------------+-----------------------------------+
- *
- * event_logged: 0 if event has not been logged to syslog, 1 if it has
- * sequence #: The unique sequence # for each event. (until it wraps)
- * error log: The error log from event_scan
- */
-int nvram_write_error_log(char * buff, int length, unsigned int err_type)
-{
- int rc;
- loff_t tmp_index;
- struct err_log_info info;
-
- if (no_logging) {
- return -EPERM;
- }
-
- if (nvram_error_log_index == -1) {
- return -ESPIPE;
- }
-
- if (length > nvram_error_log_size) {
- length = nvram_error_log_size;
- }
-
- info.error_type = err_type;
- info.seq_num = error_log_cnt;
-
- tmp_index = nvram_error_log_index;
-
- rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
- return rc;
- }
-
- rc = ppc_md.nvram_write(buff, length, &tmp_index);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_write_error_log: Failed nvram_write (%d)\n", rc);
- return rc;
- }
-
- return 0;
-}
-
-/* nvram_read_error_log
- *
- * Reads nvram for error log for at most 'length'
- */
-int nvram_read_error_log(char * buff, int length, unsigned int * err_type)
-{
- int rc;
- loff_t tmp_index;
- struct err_log_info info;
-
- if (nvram_error_log_index == -1)
- return -1;
-
- if (length > nvram_error_log_size)
- length = nvram_error_log_size;
-
- tmp_index = nvram_error_log_index;
-
- rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
- return rc;
- }
-
- rc = ppc_md.nvram_read(buff, length, &tmp_index);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
- return rc;
- }
-
- error_log_cnt = info.seq_num;
- *err_type = info.error_type;
-
- return 0;
-}
-
-/* This doesn't actually zero anything, but it sets the event_logged
- * word to tell that this event is safely in syslog.
- */
-int nvram_clear_error_log(void)
-{
- loff_t tmp_index;
- int clear_word = ERR_FLAG_ALREADY_LOGGED;
- int rc;
-
- tmp_index = nvram_error_log_index;
-
- rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
- return rc;
- }
-
- return 0;
-}
-
-#endif /* CONFIG_PPC_PSERIES */
-
module_init(nvram_init);
module_exit(nvram_cleanup);
MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c
deleted file mode 100644
index 397c83eda20..00000000000
--- a/arch/powerpc/kernel/of_device.c
+++ /dev/null
@@ -1,248 +0,0 @@
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mod_devicetable.h>
-#include <linux/slab.h>
-
-#include <asm/errno.h>
-#include <asm/of_device.h>
-
-/**
- * of_match_device - Tell if an of_device structure has a matching
- * of_match structure
- * @ids: array of of device match structures to search in
- * @dev: the of device structure to match against
- *
- * Used by a driver to check whether an of_device present in the
- * system is in its list of supported devices.
- */
-const struct of_device_id *of_match_device(const struct of_device_id *matches,
- const struct of_device *dev)
-{
- if (!dev->node)
- return NULL;
- while (matches->name[0] || matches->type[0] || matches->compatible[0]) {
- int match = 1;
- if (matches->name[0])
- match &= dev->node->name
- && !strcmp(matches->name, dev->node->name);
- if (matches->type[0])
- match &= dev->node->type
- && !strcmp(matches->type, dev->node->type);
- if (matches->compatible[0])
- match &= device_is_compatible(dev->node,
- matches->compatible);
- if (match)
- return matches;
- matches++;
- }
- return NULL;
-}
-
-static int of_platform_bus_match(struct device *dev, struct device_driver *drv)
-{
- struct of_device * of_dev = to_of_device(dev);
- struct of_platform_driver * of_drv = to_of_platform_driver(drv);
- const struct of_device_id * matches = of_drv->match_table;
-
- if (!matches)
- return 0;
-
- return of_match_device(matches, of_dev) != NULL;
-}
-
-struct of_device *of_dev_get(struct of_device *dev)
-{
- struct device *tmp;
-
- if (!dev)
- return NULL;
- tmp = get_device(&dev->dev);
- if (tmp)
- return to_of_device(tmp);
- else
- return NULL;
-}
-
-void of_dev_put(struct of_device *dev)
-{
- if (dev)
- put_device(&dev->dev);
-}
-
-
-static int of_device_probe(struct device *dev)
-{
- int error = -ENODEV;
- struct of_platform_driver *drv;
- struct of_device *of_dev;
- const struct of_device_id *match;
-
- drv = to_of_platform_driver(dev->driver);
- of_dev = to_of_device(dev);
-
- if (!drv->probe)
- return error;
-
- of_dev_get(of_dev);
-
- match = of_match_device(drv->match_table, of_dev);
- if (match)
- error = drv->probe(of_dev, match);
- if (error)
- of_dev_put(of_dev);
-
- return error;
-}
-
-static int of_device_remove(struct device *dev)
-{
- struct of_device * of_dev = to_of_device(dev);
- struct of_platform_driver * drv = to_of_platform_driver(dev->driver);
-
- if (dev->driver && drv->remove)
- drv->remove(of_dev);
- return 0;
-}
-
-static int of_device_suspend(struct device *dev, pm_message_t state)
-{
- struct of_device * of_dev = to_of_device(dev);
- struct of_platform_driver * drv = to_of_platform_driver(dev->driver);
- int error = 0;
-
- if (dev->driver && drv->suspend)
- error = drv->suspend(of_dev, state);
- return error;
-}
-
-static int of_device_resume(struct device * dev)
-{
- struct of_device * of_dev = to_of_device(dev);
- struct of_platform_driver * drv = to_of_platform_driver(dev->driver);
- int error = 0;
-
- if (dev->driver && drv->resume)
- error = drv->resume(of_dev);
- return error;
-}
-
-struct bus_type of_platform_bus_type = {
- .name = "of_platform",
- .match = of_platform_bus_match,
- .probe = of_device_probe,
- .remove = of_device_remove,
- .suspend = of_device_suspend,
- .resume = of_device_resume,
-};
-
-static int __init of_bus_driver_init(void)
-{
- return bus_register(&of_platform_bus_type);
-}
-
-postcore_initcall(of_bus_driver_init);
-
-int of_register_driver(struct of_platform_driver *drv)
-{
- /* initialize common driver fields */
- drv->driver.name = drv->name;
- drv->driver.bus = &of_platform_bus_type;
-
- /* register with core */
- return driver_register(&drv->driver);
-}
-
-void of_unregister_driver(struct of_platform_driver *drv)
-{
- driver_unregister(&drv->driver);
-}
-
-
-static ssize_t dev_show_devspec(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct of_device *ofdev;
-
- ofdev = to_of_device(dev);
- return sprintf(buf, "%s", ofdev->node->full_name);
-}
-
-static DEVICE_ATTR(devspec, S_IRUGO, dev_show_devspec, NULL);
-
-/**
- * of_release_dev - free an of device structure when all users of it are finished.
- * @dev: device that's been disconnected
- *
- * Will be called only by the device core when all users of this of device are
- * done.
- */
-void of_release_dev(struct device *dev)
-{
- struct of_device *ofdev;
-
- ofdev = to_of_device(dev);
- of_node_put(ofdev->node);
- kfree(ofdev);
-}
-
-int of_device_register(struct of_device *ofdev)
-{
- int rc;
-
- BUG_ON(ofdev->node == NULL);
-
- rc = device_register(&ofdev->dev);
- if (rc)
- return rc;
-
- device_create_file(&ofdev->dev, &dev_attr_devspec);
-
- return 0;
-}
-
-void of_device_unregister(struct of_device *ofdev)
-{
- device_remove_file(&ofdev->dev, &dev_attr_devspec);
-
- device_unregister(&ofdev->dev);
-}
-
-struct of_device* of_platform_device_create(struct device_node *np,
- const char *bus_id,
- struct device *parent)
-{
- struct of_device *dev;
-
- dev = kmalloc(sizeof(*dev), GFP_KERNEL);
- if (!dev)
- return NULL;
- memset(dev, 0, sizeof(*dev));
-
- dev->node = of_node_get(np);
- dev->dma_mask = 0xffffffffUL;
- dev->dev.dma_mask = &dev->dma_mask;
- dev->dev.parent = parent;
- dev->dev.bus = &of_platform_bus_type;
- dev->dev.release = of_release_dev;
-
- strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE);
-
- if (of_device_register(dev) != 0) {
- kfree(dev);
- return NULL;
- }
-
- return dev;
-}
-
-EXPORT_SYMBOL(of_match_device);
-EXPORT_SYMBOL(of_platform_bus_type);
-EXPORT_SYMBOL(of_register_driver);
-EXPORT_SYMBOL(of_unregister_driver);
-EXPORT_SYMBOL(of_device_register);
-EXPORT_SYMBOL(of_device_unregister);
-EXPORT_SYMBOL(of_dev_get);
-EXPORT_SYMBOL(of_dev_put);
-EXPORT_SYMBOL(of_platform_device_create);
-EXPORT_SYMBOL(of_release_dev);
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
new file mode 100644
index 00000000000..a7b74307672
--- /dev/null
+++ b/arch/powerpc/kernel/of_platform.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp.
+ * <benh@kernel.crashing.org>
+ * and Arnd Bergmann, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#undef DEBUG
+
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/mod_devicetable.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/atomic.h>
+
+#include <asm/errno.h>
+#include <asm/topology.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/eeh.h>
+
+#ifdef CONFIG_PPC_OF_PLATFORM_PCI
+
+/* The probing of PCI controllers from of_platform is currently
+ * 64 bits only, mostly due to gratuitous differences between
+ * the 32 and 64 bits PCI code on PowerPC and the 32 bits one
+ * lacking some bits needed here.
+ */
+
+static int of_pci_phb_probe(struct platform_device *dev)
+{
+ struct pci_controller *phb;
+
+ /* Check if we can do that ... */
+ if (ppc_md.pci_setup_phb == NULL)
+ return -ENODEV;
+
+ pr_info("Setting up PCI bus %s\n", dev->dev.of_node->full_name);
+
+ /* Alloc and setup PHB data structure */
+ phb = pcibios_alloc_controller(dev->dev.of_node);
+ if (!phb)
+ return -ENODEV;
+
+ /* Setup parent in sysfs */
+ phb->parent = &dev->dev;
+
+ /* Setup the PHB using arch provided callback */
+ if (ppc_md.pci_setup_phb(phb)) {
+ pcibios_free_controller(phb);
+ return -ENODEV;
+ }
+
+ /* Process "ranges" property */
+ pci_process_bridge_OF_ranges(phb, dev->dev.of_node, 0);
+
+ /* Init pci_dn data structures */
+ pci_devs_phb_init_dynamic(phb);
+
+ /* Create EEH devices for the PHB */
+ eeh_dev_phb_init_dynamic(phb);
+
+ /* Register devices with EEH */
+ if (dev->dev.of_node->child)
+ eeh_add_device_tree_early(dev->dev.of_node);
+
+ /* Scan the bus */
+ pcibios_scan_phb(phb);
+ if (phb->bus == NULL)
+ return -ENXIO;
+
+ /* Claim resources. This might need some rework as well depending
+ * whether we are doing probe-only or not, like assigning unassigned
+ * resources etc...
+ */
+ pcibios_claim_one_bus(phb->bus);
+
+ /* Finish EEH setup */
+ eeh_add_device_tree_late(phb->bus);
+
+ /* Add probed PCI devices to the device model */
+ pci_bus_add_devices(phb->bus);
+
+ /* sysfs files should only be added after devices are added */
+ eeh_add_sysfs_files(phb->bus);
+
+ return 0;
+}
+
+static struct of_device_id of_pci_phb_ids[] = {
+ { .type = "pci", },
+ { .type = "pcix", },
+ { .type = "pcie", },
+ { .type = "pciex", },
+ { .type = "ht", },
+ {}
+};
+
+static struct platform_driver of_pci_phb_driver = {
+ .probe = of_pci_phb_probe,
+ .driver = {
+ .name = "of-pci",
+ .owner = THIS_MODULE,
+ .of_match_table = of_pci_phb_ids,
+ },
+};
+
+static __init int of_pci_phb_init(void)
+{
+ return platform_driver_register(&of_pci_phb_driver);
+}
+
+device_initcall(of_pci_phb_init);
+
+#endif /* CONFIG_PPC_OF_PLATFORM_PCI */
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 55f1a25085c..d6e195e8cd4 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -7,55 +7,125 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/types.h>
-#include <linux/threads.h>
-#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/page.h>
#include <asm/lppaca.h>
-#include <asm/iseries/it_lp_reg_save.h>
#include <asm/paca.h>
-#include <asm/mmu.h>
-
+#include <asm/sections.h>
+#include <asm/pgtable.h>
+#include <asm/kexec.h>
/* This symbol is provided by the linker - let it fill in the paca
* field correctly */
extern unsigned long __toc_start;
+#ifdef CONFIG_PPC_BOOK3S
+
/*
- * iSeries structure which the hypervisor knows about - this structure
+ * The structure which the hypervisor knows about - this structure
* should not cross a page boundary. The vpa_init/register_vpa call
* is now known to fail if the lppaca structure crosses a page
- * boundary. The lppaca is also used on POWER5 pSeries boxes. The
- * lppaca is 640 bytes long, and cannot readily change since the
- * hypervisor knows its layout, so a 1kB alignment will suffice to
- * ensure that it doesn't cross a page boundary.
+ * boundary. The lppaca is also used on POWER5 pSeries boxes.
+ * The lppaca is 640 bytes long, and cannot readily
+ * change since the hypervisor knows its layout, so a 1kB alignment
+ * will suffice to ensure that it doesn't cross a page boundary.
*/
struct lppaca lppaca[] = {
- [0 ... (NR_CPUS-1)] = {
- .desc = 0xd397d781, /* "LpPa" */
- .size = sizeof(struct lppaca),
- .dyn_proc_status = 2,
- .decr_val = 0x00ff0000,
+ [0 ... (NR_LPPACAS-1)] = {
+ .desc = cpu_to_be32(0xd397d781), /* "LpPa" */
+ .size = cpu_to_be16(sizeof(struct lppaca)),
.fpregs_in_use = 1,
- .end_of_quantum = 0xfffffffffffffffful,
- .slb_count = 64,
+ .slb_count = cpu_to_be16(64),
.vmxregs_in_use = 0,
+ .page_ins = 0,
},
};
+static struct lppaca *extra_lppacas;
+static long __initdata lppaca_size;
+
+static void __init allocate_lppacas(int nr_cpus, unsigned long limit)
+{
+ if (nr_cpus <= NR_LPPACAS)
+ return;
+
+ lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) *
+ (nr_cpus - NR_LPPACAS));
+ extra_lppacas = __va(memblock_alloc_base(lppaca_size,
+ PAGE_SIZE, limit));
+}
+
+static struct lppaca * __init new_lppaca(int cpu)
+{
+ struct lppaca *lp;
+
+ if (cpu < NR_LPPACAS)
+ return &lppaca[cpu];
+
+ lp = extra_lppacas + (cpu - NR_LPPACAS);
+ *lp = lppaca[0];
+
+ return lp;
+}
+
+static void __init free_lppacas(void)
+{
+ long new_size = 0, nr;
+
+ if (!lppaca_size)
+ return;
+ nr = num_possible_cpus() - NR_LPPACAS;
+ if (nr > 0)
+ new_size = PAGE_ALIGN(nr * sizeof(struct lppaca));
+ if (new_size >= lppaca_size)
+ return;
+
+ memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size);
+ lppaca_size = new_size;
+}
+
+#else
+
+static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { }
+static inline void free_lppacas(void) { }
+
+#endif /* CONFIG_PPC_BOOK3S */
+
+#ifdef CONFIG_PPC_STD_MMU_64
+
/*
* 3 persistent SLBs are registered here. The buffer will be zero
* initially, hence will all be invaild until we actually write them.
+ *
+ * If you make the number of persistent SLB entries dynamic, please also
+ * update PR KVM to flush and restore them accordingly.
*/
-struct slb_shadow slb_shadow[] __cacheline_aligned = {
- [0 ... (NR_CPUS-1)] = {
- .persistent = SLB_NUM_BOLTED,
- .buffer_length = sizeof(struct slb_shadow),
- },
-};
+static struct slb_shadow *slb_shadow;
+
+static void __init allocate_slb_shadows(int nr_cpus, int limit)
+{
+ int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
+ slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
+ memset(slb_shadow, 0, size);
+}
+
+static struct slb_shadow * __init init_slb_shadow(int cpu)
+{
+ struct slb_shadow *s = &slb_shadow[cpu];
+
+ s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
+ s->buffer_length = cpu_to_be32(sizeof(*s));
+
+ return s;
+}
+
+#else /* CONFIG_PPC_STD_MMU_64 */
+
+static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
+
+#endif /* CONFIG_PPC_STD_MMU_64 */
/* The Paca is an array with one entry per processor. Each contains an
* lppaca, which contains the information shared between the
@@ -66,75 +136,107 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = {
* processors. The processor VPD array needs one entry per physical
* processor (not thread).
*/
-#define PACA_INIT_COMMON(number) \
- .lppaca_ptr = &lppaca[number], \
- .lock_token = 0x8000, \
- .paca_index = (number), /* Paca Index */ \
- .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \
- .hw_cpu_id = 0xffff, \
- .slb_shadow_ptr = &slb_shadow[number],
-
-#ifdef CONFIG_PPC_ISERIES
-#define PACA_INIT_ISERIES(number) \
- .reg_save_ptr = &iseries_reg_save[number],
-
-#define PACA_INIT(number) \
-{ \
- PACA_INIT_COMMON(number) \
- PACA_INIT_ISERIES(number) \
-}
+struct paca_struct *paca;
+EXPORT_SYMBOL(paca);
-#else
-#define PACA_INIT(number) \
-{ \
- PACA_INIT_COMMON(number) \
-}
+void __init initialise_paca(struct paca_struct *new_paca, int cpu)
+{
+ /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB
+ * of the TOC can be addressed using a single machine instruction.
+ */
+ unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL;
+#ifdef CONFIG_PPC_BOOK3S
+ new_paca->lppaca_ptr = new_lppaca(cpu);
+#else
+ new_paca->kernel_pgd = swapper_pg_dir;
#endif
+ new_paca->lock_token = 0x8000;
+ new_paca->paca_index = cpu;
+ new_paca->kernel_toc = kernel_toc;
+ new_paca->kernelbase = (unsigned long) _stext;
+ /* Only set MSR:IR/DR when MMU is initialized */
+ new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR);
+ new_paca->hw_cpu_id = 0xffff;
+ new_paca->kexec_state = KEXEC_STATE_NONE;
+ new_paca->__current = &init_task;
+ new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
+#ifdef CONFIG_PPC_STD_MMU_64
+ new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
+#endif /* CONFIG_PPC_STD_MMU_64 */
-struct paca_struct paca[] = {
- PACA_INIT(0),
-#if NR_CPUS > 1
- PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3),
-#if NR_CPUS > 4
- PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7),
-#if NR_CPUS > 8
- PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11),
- PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15),
- PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19),
- PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23),
- PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27),
- PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31),
-#if NR_CPUS > 32
- PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35),
- PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39),
- PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43),
- PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47),
- PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51),
- PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55),
- PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59),
- PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63),
-#if NR_CPUS > 64
- PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67),
- PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71),
- PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75),
- PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79),
- PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83),
- PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87),
- PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91),
- PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95),
- PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99),
- PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103),
- PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107),
- PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111),
- PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115),
- PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119),
- PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123),
- PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127),
-#endif
-#endif
-#endif
+#ifdef CONFIG_PPC_BOOK3E
+ /* For now -- if we have threads this will be adjusted later */
+ new_paca->tcd_ptr = &new_paca->tcd;
#endif
+}
+
+/* Put the paca pointer into r13 and SPRG_PACA */
+void setup_paca(struct paca_struct *new_paca)
+{
+ /* Setup r13 */
+ local_paca = new_paca;
+
+#ifdef CONFIG_PPC_BOOK3E
+ /* On Book3E, initialize the TLB miss exception frames */
+ mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
+#else
+ /* In HV mode, we setup both HPACA and PACA to avoid problems
+ * if we do a GET_PACA() before the feature fixups have been
+ * applied
+ */
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ mtspr(SPRN_SPRG_HPACA, local_paca);
#endif
-};
-EXPORT_SYMBOL(paca);
+ mtspr(SPRN_SPRG_PACA, local_paca);
+
+}
+
+static int __initdata paca_size;
+
+void __init allocate_pacas(void)
+{
+ int cpu, limit;
+
+ /*
+ * We can't take SLB misses on the paca, and we want to access them
+ * in real mode, so allocate them within the RMA and also within
+ * the first segment.
+ */
+ limit = min(0x10000000ULL, ppc64_rma_size);
+
+ paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
+
+ paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
+ memset(paca, 0, paca_size);
+
+ printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n",
+ paca_size, nr_cpu_ids, paca);
+
+ allocate_lppacas(nr_cpu_ids, limit);
+
+ allocate_slb_shadows(nr_cpu_ids, limit);
+
+ /* Can't use for_each_*_cpu, as they aren't functional yet */
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+ initialise_paca(&paca[cpu], cpu);
+}
+
+void __init free_unused_pacas(void)
+{
+ int new_size;
+
+ new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
+
+ if (new_size >= paca_size)
+ return;
+
+ memblock_free(__pa(paca) + new_size, paca_size - new_size);
+
+ printk(KERN_DEBUG "Freed %u bytes for unused pacas\n",
+ paca_size - new_size);
+
+ paca_size = new_size;
+
+ free_lppacas();
+}
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
new file mode 100644
index 00000000000..b49c72fd7f1
--- /dev/null
+++ b/arch/powerpc/kernel/pci-common.c
@@ -0,0 +1,1681 @@
+/*
+ * Contains common pci routines for ALL ppc platform
+ * (based on pci_32.c and pci_64.c)
+ *
+ * Port for PPC64 David Engebretsen, IBM Corp.
+ * Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
+ *
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Rework, based on alpha PCI code.
+ *
+ * Common pmac/prep/chrp pci routines. -- Cort
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+#include <linux/irq.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/vgaarb.h>
+
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/byteorder.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/eeh.h>
+
+static DEFINE_SPINLOCK(hose_spinlock);
+LIST_HEAD(hose_list);
+
+/* XXX kill that some day ... */
+static int global_phb_number; /* Global phb counter */
+
+/* ISA Memory physical address */
+resource_size_t isa_mem_base;
+
+
+static struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
+
+void set_pci_dma_ops(struct dma_map_ops *dma_ops)
+{
+ pci_dma_ops = dma_ops;
+}
+
+struct dma_map_ops *get_pci_dma_ops(void)
+{
+ return pci_dma_ops;
+}
+EXPORT_SYMBOL(get_pci_dma_ops);
+
+struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
+{
+ struct pci_controller *phb;
+
+ phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL);
+ if (phb == NULL)
+ return NULL;
+ spin_lock(&hose_spinlock);
+ phb->global_number = global_phb_number++;
+ list_add_tail(&phb->list_node, &hose_list);
+ spin_unlock(&hose_spinlock);
+ phb->dn = dev;
+ phb->is_dynamic = mem_init_done;
+#ifdef CONFIG_PPC64
+ if (dev) {
+ int nid = of_node_to_nid(dev);
+
+ if (nid < 0 || !node_online(nid))
+ nid = -1;
+
+ PHB_SET_NODE(phb, nid);
+ }
+#endif
+ return phb;
+}
+
+void pcibios_free_controller(struct pci_controller *phb)
+{
+ spin_lock(&hose_spinlock);
+ list_del(&phb->list_node);
+ spin_unlock(&hose_spinlock);
+
+ if (phb->is_dynamic)
+ kfree(phb);
+}
+
+/*
+ * The function is used to return the minimal alignment
+ * for memory or I/O windows of the associated P2P bridge.
+ * By default, 4KiB alignment for I/O windows and 1MiB for
+ * memory windows.
+ */
+resource_size_t pcibios_window_alignment(struct pci_bus *bus,
+ unsigned long type)
+{
+ if (ppc_md.pcibios_window_alignment)
+ return ppc_md.pcibios_window_alignment(bus, type);
+
+ /*
+ * PCI core will figure out the default
+ * alignment: 4KiB for I/O and 1MiB for
+ * memory window.
+ */
+ return 1;
+}
+
+void pcibios_reset_secondary_bus(struct pci_dev *dev)
+{
+ u16 ctrl;
+
+ if (ppc_md.pcibios_reset_secondary_bus) {
+ ppc_md.pcibios_reset_secondary_bus(dev);
+ return;
+ }
+
+ pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+ ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+ pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+ msleep(2);
+
+ ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+ pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+ ssleep(1);
+}
+
+static resource_size_t pcibios_io_size(const struct pci_controller *hose)
+{
+#ifdef CONFIG_PPC64
+ return hose->pci_io_size;
+#else
+ return resource_size(&hose->io_resource);
+#endif
+}
+
+int pcibios_vaddr_is_ioport(void __iomem *address)
+{
+ int ret = 0;
+ struct pci_controller *hose;
+ resource_size_t size;
+
+ spin_lock(&hose_spinlock);
+ list_for_each_entry(hose, &hose_list, list_node) {
+ size = pcibios_io_size(hose);
+ if (address >= hose->io_base_virt &&
+ address < (hose->io_base_virt + size)) {
+ ret = 1;
+ break;
+ }
+ }
+ spin_unlock(&hose_spinlock);
+ return ret;
+}
+
+unsigned long pci_address_to_pio(phys_addr_t address)
+{
+ struct pci_controller *hose;
+ resource_size_t size;
+ unsigned long ret = ~0;
+
+ spin_lock(&hose_spinlock);
+ list_for_each_entry(hose, &hose_list, list_node) {
+ size = pcibios_io_size(hose);
+ if (address >= hose->io_base_phys &&
+ address < (hose->io_base_phys + size)) {
+ unsigned long base =
+ (unsigned long)hose->io_base_virt - _IO_BASE;
+ ret = base + (address - hose->io_base_phys);
+ break;
+ }
+ }
+ spin_unlock(&hose_spinlock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(pci_address_to_pio);
+
+/*
+ * Return the domain number for this bus.
+ */
+int pci_domain_nr(struct pci_bus *bus)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+
+ return hose->global_number;
+}
+EXPORT_SYMBOL(pci_domain_nr);
+
+/* This routine is meant to be used early during boot, when the
+ * PCI bus numbers have not yet been assigned, and you need to
+ * issue PCI config cycles to an OF device.
+ * It could also be used to "fix" RTAS config cycles if you want
+ * to set pci_assign_all_buses to 1 and still use RTAS for PCI
+ * config cycles.
+ */
+struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
+{
+ while(node) {
+ struct pci_controller *hose, *tmp;
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ if (hose->dn == node)
+ return hose;
+ node = node->parent;
+ }
+ return NULL;
+}
+
+/*
+ * Reads the interrupt pin to determine if interrupt is use by card.
+ * If the interrupt is used, then gets the interrupt line from the
+ * openfirmware and sets it in the pci_dev and pci_config line.
+ */
+static int pci_read_irq_line(struct pci_dev *pci_dev)
+{
+ struct of_phandle_args oirq;
+ unsigned int virq;
+
+ pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
+
+#ifdef DEBUG
+ memset(&oirq, 0xff, sizeof(oirq));
+#endif
+ /* Try to get a mapping from the device-tree */
+ if (of_irq_parse_pci(pci_dev, &oirq)) {
+ u8 line, pin;
+
+ /* If that fails, lets fallback to what is in the config
+ * space and map that through the default controller. We
+ * also set the type to level low since that's what PCI
+ * interrupts are. If your platform does differently, then
+ * either provide a proper interrupt tree or don't use this
+ * function.
+ */
+ if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin))
+ return -1;
+ if (pin == 0)
+ return -1;
+ if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) ||
+ line == 0xff || line == 0) {
+ return -1;
+ }
+ pr_debug(" No map ! Using line %d (pin %d) from PCI config\n",
+ line, pin);
+
+ virq = irq_create_mapping(NULL, line);
+ if (virq != NO_IRQ)
+ irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
+ } else {
+ pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n",
+ oirq.args_count, oirq.args[0], oirq.args[1],
+ of_node_full_name(oirq.np));
+
+ virq = irq_create_of_mapping(&oirq);
+ }
+ if(virq == NO_IRQ) {
+ pr_debug(" Failed to map !\n");
+ return -1;
+ }
+
+ pr_debug(" Mapped to linux irq %d\n", virq);
+
+ pci_dev->irq = virq;
+
+ return 0;
+}
+
+/*
+ * Platform support for /proc/bus/pci/X/Y mmap()s,
+ * modelled on the sparc64 implementation by Dave Miller.
+ * -- paulus.
+ */
+
+/*
+ * Adjust vm_pgoff of VMA such that it is the physical page offset
+ * corresponding to the 32-bit pci bus offset for DEV requested by the user.
+ *
+ * Basically, the user finds the base address for his device which he wishes
+ * to mmap. They read the 32-bit value from the config space base register,
+ * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
+ * offset parameter of mmap on /proc/bus/pci/XXX for that device.
+ *
+ * Returns negative error code on failure, zero on success.
+ */
+static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
+ resource_size_t *offset,
+ enum pci_mmap_state mmap_state)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ unsigned long io_offset = 0;
+ int i, res_bit;
+
+ if (hose == NULL)
+ return NULL; /* should never happen */
+
+ /* If memory, add on the PCI bridge address offset */
+ if (mmap_state == pci_mmap_mem) {
+#if 0 /* See comment in pci_resource_to_user() for why this is disabled */
+ *offset += hose->pci_mem_offset;
+#endif
+ res_bit = IORESOURCE_MEM;
+ } else {
+ io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ *offset += io_offset;
+ res_bit = IORESOURCE_IO;
+ }
+
+ /*
+ * Check that the offset requested corresponds to one of the
+ * resources of the device.
+ */
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ struct resource *rp = &dev->resource[i];
+ int flags = rp->flags;
+
+ /* treat ROM as memory (should be already) */
+ if (i == PCI_ROM_RESOURCE)
+ flags |= IORESOURCE_MEM;
+
+ /* Active and same type? */
+ if ((flags & res_bit) == 0)
+ continue;
+
+ /* In the range of this resource? */
+ if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
+ continue;
+
+ /* found it! construct the final physical address */
+ if (mmap_state == pci_mmap_io)
+ *offset += hose->io_base_phys - io_offset;
+ return rp;
+ }
+
+ return NULL;
+}
+
+/*
+ * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
+ * device mapping.
+ */
+static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
+ pgprot_t protection,
+ enum pci_mmap_state mmap_state,
+ int write_combine)
+{
+
+ /* Write combine is always 0 on non-memory space mappings. On
+ * memory space, if the user didn't pass 1, we check for a
+ * "prefetchable" resource. This is a bit hackish, but we use
+ * this to workaround the inability of /sysfs to provide a write
+ * combine bit
+ */
+ if (mmap_state != pci_mmap_mem)
+ write_combine = 0;
+ else if (write_combine == 0) {
+ if (rp->flags & IORESOURCE_PREFETCH)
+ write_combine = 1;
+ }
+
+ /* XXX would be nice to have a way to ask for write-through */
+ if (write_combine)
+ return pgprot_noncached_wc(protection);
+ else
+ return pgprot_noncached(protection);
+}
+
+/*
+ * This one is used by /dev/mem and fbdev who have no clue about the
+ * PCI device, it tries to find the PCI device first and calls the
+ * above routine
+ */
+pgprot_t pci_phys_mem_access_prot(struct file *file,
+ unsigned long pfn,
+ unsigned long size,
+ pgprot_t prot)
+{
+ struct pci_dev *pdev = NULL;
+ struct resource *found = NULL;
+ resource_size_t offset = ((resource_size_t)pfn) << PAGE_SHIFT;
+ int i;
+
+ if (page_is_ram(pfn))
+ return prot;
+
+ prot = pgprot_noncached(prot);
+ for_each_pci_dev(pdev) {
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ struct resource *rp = &pdev->resource[i];
+ int flags = rp->flags;
+
+ /* Active and same type? */
+ if ((flags & IORESOURCE_MEM) == 0)
+ continue;
+ /* In the range of this resource? */
+ if (offset < (rp->start & PAGE_MASK) ||
+ offset > rp->end)
+ continue;
+ found = rp;
+ break;
+ }
+ if (found)
+ break;
+ }
+ if (found) {
+ if (found->flags & IORESOURCE_PREFETCH)
+ prot = pgprot_noncached_wc(prot);
+ pci_dev_put(pdev);
+ }
+
+ pr_debug("PCI: Non-PCI map for %llx, prot: %lx\n",
+ (unsigned long long)offset, pgprot_val(prot));
+
+ return prot;
+}
+
+
+/*
+ * Perform the actual remap of the pages for a PCI device mapping, as
+ * appropriate for this architecture. The region in the process to map
+ * is described by vm_start and vm_end members of VMA, the base physical
+ * address is found in vm_pgoff.
+ * The pci device structure is provided so that architectures may make mapping
+ * decisions on a per-device or per-bus basis.
+ *
+ * Returns a negative error code on failure, zero on success.
+ */
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state, int write_combine)
+{
+ resource_size_t offset =
+ ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
+ struct resource *rp;
+ int ret;
+
+ rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
+ if (rp == NULL)
+ return -EINVAL;
+
+ vma->vm_pgoff = offset >> PAGE_SHIFT;
+ vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
+ vma->vm_page_prot,
+ mmap_state, write_combine);
+
+ ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot);
+
+ return ret;
+}
+
+/* This provides legacy IO read access on a bus */
+int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
+{
+ unsigned long offset;
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct resource *rp = &hose->io_resource;
+ void __iomem *addr;
+
+ /* Check if port can be supported by that bus. We only check
+ * the ranges of the PHB though, not the bus itself as the rules
+ * for forwarding legacy cycles down bridges are not our problem
+ * here. So if the host bridge supports it, we do it.
+ */
+ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ offset += port;
+
+ if (!(rp->flags & IORESOURCE_IO))
+ return -ENXIO;
+ if (offset < rp->start || (offset + size) > rp->end)
+ return -ENXIO;
+ addr = hose->io_base_virt + port;
+
+ switch(size) {
+ case 1:
+ *((u8 *)val) = in_8(addr);
+ return 1;
+ case 2:
+ if (port & 1)
+ return -EINVAL;
+ *((u16 *)val) = in_le16(addr);
+ return 2;
+ case 4:
+ if (port & 3)
+ return -EINVAL;
+ *((u32 *)val) = in_le32(addr);
+ return 4;
+ }
+ return -EINVAL;
+}
+
+/* This provides legacy IO write access on a bus */
+int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size)
+{
+ unsigned long offset;
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct resource *rp = &hose->io_resource;
+ void __iomem *addr;
+
+ /* Check if port can be supported by that bus. We only check
+ * the ranges of the PHB though, not the bus itself as the rules
+ * for forwarding legacy cycles down bridges are not our problem
+ * here. So if the host bridge supports it, we do it.
+ */
+ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ offset += port;
+
+ if (!(rp->flags & IORESOURCE_IO))
+ return -ENXIO;
+ if (offset < rp->start || (offset + size) > rp->end)
+ return -ENXIO;
+ addr = hose->io_base_virt + port;
+
+ /* WARNING: The generic code is idiotic. It gets passed a pointer
+ * to what can be a 1, 2 or 4 byte quantity and always reads that
+ * as a u32, which means that we have to correct the location of
+ * the data read within those 32 bits for size 1 and 2
+ */
+ switch(size) {
+ case 1:
+ out_8(addr, val >> 24);
+ return 1;
+ case 2:
+ if (port & 1)
+ return -EINVAL;
+ out_le16(addr, val >> 16);
+ return 2;
+ case 4:
+ if (port & 3)
+ return -EINVAL;
+ out_le32(addr, val);
+ return 4;
+ }
+ return -EINVAL;
+}
+
+/* This provides legacy IO or memory mmap access on a bus */
+int pci_mmap_legacy_page_range(struct pci_bus *bus,
+ struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ resource_size_t offset =
+ ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
+ resource_size_t size = vma->vm_end - vma->vm_start;
+ struct resource *rp;
+
+ pr_debug("pci_mmap_legacy_page_range(%04x:%02x, %s @%llx..%llx)\n",
+ pci_domain_nr(bus), bus->number,
+ mmap_state == pci_mmap_mem ? "MEM" : "IO",
+ (unsigned long long)offset,
+ (unsigned long long)(offset + size - 1));
+
+ if (mmap_state == pci_mmap_mem) {
+ /* Hack alert !
+ *
+ * Because X is lame and can fail starting if it gets an error trying
+ * to mmap legacy_mem (instead of just moving on without legacy memory
+ * access) we fake it here by giving it anonymous memory, effectively
+ * behaving just like /dev/zero
+ */
+ if ((offset + size) > hose->isa_mem_size) {
+ printk(KERN_DEBUG
+ "Process %s (pid:%d) mapped non-existing PCI legacy memory for 0%04x:%02x\n",
+ current->comm, current->pid, pci_domain_nr(bus), bus->number);
+ if (vma->vm_flags & VM_SHARED)
+ return shmem_zero_setup(vma);
+ return 0;
+ }
+ offset += hose->isa_mem_phys;
+ } else {
+ unsigned long io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ unsigned long roffset = offset + io_offset;
+ rp = &hose->io_resource;
+ if (!(rp->flags & IORESOURCE_IO))
+ return -ENXIO;
+ if (roffset < rp->start || (roffset + size) > rp->end)
+ return -ENXIO;
+ offset += hose->io_base_phys;
+ }
+ pr_debug(" -> mapping phys %llx\n", (unsigned long long)offset);
+
+ vma->vm_pgoff = offset >> PAGE_SHIFT;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+}
+
+void pci_resource_to_user(const struct pci_dev *dev, int bar,
+ const struct resource *rsrc,
+ resource_size_t *start, resource_size_t *end)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ resource_size_t offset = 0;
+
+ if (hose == NULL)
+ return;
+
+ if (rsrc->flags & IORESOURCE_IO)
+ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+
+ /* We pass a fully fixed up address to userland for MMIO instead of
+ * a BAR value because X is lame and expects to be able to use that
+ * to pass to /dev/mem !
+ *
+ * That means that we'll have potentially 64 bits values where some
+ * userland apps only expect 32 (like X itself since it thinks only
+ * Sparc has 64 bits MMIO) but if we don't do that, we break it on
+ * 32 bits CHRPs :-(
+ *
+ * Hopefully, the sysfs insterface is immune to that gunk. Once X
+ * has been fixed (and the fix spread enough), we can re-enable the
+ * 2 lines below and pass down a BAR value to userland. In that case
+ * we'll also have to re-enable the matching code in
+ * __pci_mmap_make_offset().
+ *
+ * BenH.
+ */
+#if 0
+ else if (rsrc->flags & IORESOURCE_MEM)
+ offset = hose->pci_mem_offset;
+#endif
+
+ *start = rsrc->start - offset;
+ *end = rsrc->end - offset;
+}
+
+/**
+ * pci_process_bridge_OF_ranges - Parse PCI bridge resources from device tree
+ * @hose: newly allocated pci_controller to be setup
+ * @dev: device node of the host bridge
+ * @primary: set if primary bus (32 bits only, soon to be deprecated)
+ *
+ * This function will parse the "ranges" property of a PCI host bridge device
+ * node and setup the resource mapping of a pci controller based on its
+ * content.
+ *
+ * Life would be boring if it wasn't for a few issues that we have to deal
+ * with here:
+ *
+ * - We can only cope with one IO space range and up to 3 Memory space
+ * ranges. However, some machines (thanks Apple !) tend to split their
+ * space into lots of small contiguous ranges. So we have to coalesce.
+ *
+ * - Some busses have IO space not starting at 0, which causes trouble with
+ * the way we do our IO resource renumbering. The code somewhat deals with
+ * it for 64 bits but I would expect problems on 32 bits.
+ *
+ * - Some 32 bits platforms such as 4xx can have physical space larger than
+ * 32 bits so we need to use 64 bits values for the parsing
+ */
+void pci_process_bridge_OF_ranges(struct pci_controller *hose,
+ struct device_node *dev, int primary)
+{
+ int memno = 0;
+ struct resource *res;
+ struct of_pci_range range;
+ struct of_pci_range_parser parser;
+
+ printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
+ dev->full_name, primary ? "(primary)" : "");
+
+ /* Check for ranges property */
+ if (of_pci_range_parser_init(&parser, dev))
+ return;
+
+ /* Parse it */
+ for_each_of_pci_range(&parser, &range) {
+ /* If we failed translation or got a zero-sized region
+ * (some FW try to feed us with non sensical zero sized regions
+ * such as power3 which look like some kind of attempt at exposing
+ * the VGA memory hole)
+ */
+ if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
+ continue;
+
+ /* Act based on address space type */
+ res = NULL;
+ switch (range.flags & IORESOURCE_TYPE_BITS) {
+ case IORESOURCE_IO:
+ printk(KERN_INFO
+ " IO 0x%016llx..0x%016llx -> 0x%016llx\n",
+ range.cpu_addr, range.cpu_addr + range.size - 1,
+ range.pci_addr);
+
+ /* We support only one IO range */
+ if (hose->pci_io_size) {
+ printk(KERN_INFO
+ " \\--> Skipped (too many) !\n");
+ continue;
+ }
+#ifdef CONFIG_PPC32
+ /* On 32 bits, limit I/O space to 16MB */
+ if (range.size > 0x01000000)
+ range.size = 0x01000000;
+
+ /* 32 bits needs to map IOs here */
+ hose->io_base_virt = ioremap(range.cpu_addr,
+ range.size);
+
+ /* Expect trouble if pci_addr is not 0 */
+ if (primary)
+ isa_io_base =
+ (unsigned long)hose->io_base_virt;
+#endif /* CONFIG_PPC32 */
+ /* pci_io_size and io_base_phys always represent IO
+ * space starting at 0 so we factor in pci_addr
+ */
+ hose->pci_io_size = range.pci_addr + range.size;
+ hose->io_base_phys = range.cpu_addr - range.pci_addr;
+
+ /* Build resource */
+ res = &hose->io_resource;
+ range.cpu_addr = range.pci_addr;
+ break;
+ case IORESOURCE_MEM:
+ printk(KERN_INFO
+ " MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
+ range.cpu_addr, range.cpu_addr + range.size - 1,
+ range.pci_addr,
+ (range.pci_space & 0x40000000) ?
+ "Prefetch" : "");
+
+ /* We support only 3 memory ranges */
+ if (memno >= 3) {
+ printk(KERN_INFO
+ " \\--> Skipped (too many) !\n");
+ continue;
+ }
+ /* Handles ISA memory hole space here */
+ if (range.pci_addr == 0) {
+ if (primary || isa_mem_base == 0)
+ isa_mem_base = range.cpu_addr;
+ hose->isa_mem_phys = range.cpu_addr;
+ hose->isa_mem_size = range.size;
+ }
+
+ /* Build resource */
+ hose->mem_offset[memno] = range.cpu_addr -
+ range.pci_addr;
+ res = &hose->mem_resources[memno++];
+ break;
+ }
+ if (res != NULL) {
+ of_pci_range_to_resource(&range, dev, res);
+ }
+ }
+}
+
+/* Decide whether to display the domain number in /proc */
+int pci_proc_domain(struct pci_bus *bus)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+
+ if (!pci_has_flag(PCI_ENABLE_PROC_DOMAINS))
+ return 0;
+ if (pci_has_flag(PCI_COMPAT_DOMAIN_0))
+ return hose->global_number != 0;
+ return 1;
+}
+
+int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+ if (ppc_md.pcibios_root_bridge_prepare)
+ return ppc_md.pcibios_root_bridge_prepare(bridge);
+
+ return 0;
+}
+
+/* This header fixup will do the resource fixup for all devices as they are
+ * probed, but not for bridge ranges
+ */
+static void pcibios_fixup_resources(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ int i;
+
+ if (!hose) {
+ printk(KERN_ERR "No host bridge for PCI dev %s !\n",
+ pci_name(dev));
+ return;
+ }
+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ struct resource *res = dev->resource + i;
+ struct pci_bus_region reg;
+ if (!res->flags)
+ continue;
+
+ /* If we're going to re-assign everything, we mark all resources
+ * as unset (and 0-base them). In addition, we mark BARs starting
+ * at 0 as unset as well, except if PCI_PROBE_ONLY is also set
+ * since in that case, we don't want to re-assign anything
+ */
+ pcibios_resource_to_bus(dev->bus, &reg, res);
+ if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) ||
+ (reg.start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) {
+ /* Only print message if not re-assigning */
+ if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC))
+ pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] "
+ "is unassigned\n",
+ pci_name(dev), i,
+ (unsigned long long)res->start,
+ (unsigned long long)res->end,
+ (unsigned int)res->flags);
+ res->end -= res->start;
+ res->start = 0;
+ res->flags |= IORESOURCE_UNSET;
+ continue;
+ }
+
+ pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]\n",
+ pci_name(dev), i,
+ (unsigned long long)res->start,\
+ (unsigned long long)res->end,
+ (unsigned int)res->flags);
+ }
+
+ /* Call machine specific resource fixup */
+ if (ppc_md.pcibios_fixup_resources)
+ ppc_md.pcibios_fixup_resources(dev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources);
+
+/* This function tries to figure out if a bridge resource has been initialized
+ * by the firmware or not. It doesn't have to be absolutely bullet proof, but
+ * things go more smoothly when it gets it right. It should covers cases such
+ * as Apple "closed" bridge resources and bare-metal pSeries unassigned bridges
+ */
+static int pcibios_uninitialized_bridge_resource(struct pci_bus *bus,
+ struct resource *res)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct pci_dev *dev = bus->self;
+ resource_size_t offset;
+ struct pci_bus_region region;
+ u16 command;
+ int i;
+
+ /* We don't do anything if PCI_PROBE_ONLY is set */
+ if (pci_has_flag(PCI_PROBE_ONLY))
+ return 0;
+
+ /* Job is a bit different between memory and IO */
+ if (res->flags & IORESOURCE_MEM) {
+ pcibios_resource_to_bus(dev->bus, &region, res);
+
+ /* If the BAR is non-0 then it's probably been initialized */
+ if (region.start != 0)
+ return 0;
+
+ /* The BAR is 0, let's check if memory decoding is enabled on
+ * the bridge. If not, we consider it unassigned
+ */
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if ((command & PCI_COMMAND_MEMORY) == 0)
+ return 1;
+
+ /* Memory decoding is enabled and the BAR is 0. If any of the bridge
+ * resources covers that starting address (0 then it's good enough for
+ * us for memory space)
+ */
+ for (i = 0; i < 3; i++) {
+ if ((hose->mem_resources[i].flags & IORESOURCE_MEM) &&
+ hose->mem_resources[i].start == hose->mem_offset[i])
+ return 0;
+ }
+
+ /* Well, it starts at 0 and we know it will collide so we may as
+ * well consider it as unassigned. That covers the Apple case.
+ */
+ return 1;
+ } else {
+ /* If the BAR is non-0, then we consider it assigned */
+ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ if (((res->start - offset) & 0xfffffffful) != 0)
+ return 0;
+
+ /* Here, we are a bit different than memory as typically IO space
+ * starting at low addresses -is- valid. What we do instead if that
+ * we consider as unassigned anything that doesn't have IO enabled
+ * in the PCI command register, and that's it.
+ */
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ if (command & PCI_COMMAND_IO)
+ return 0;
+
+ /* It's starting at 0 and IO is disabled in the bridge, consider
+ * it unassigned
+ */
+ return 1;
+ }
+}
+
+/* Fixup resources of a PCI<->PCI bridge */
+static void pcibios_fixup_bridge(struct pci_bus *bus)
+{
+ struct resource *res;
+ int i;
+
+ struct pci_dev *dev = bus->self;
+
+ pci_bus_for_each_resource(bus, res, i) {
+ if (!res || !res->flags)
+ continue;
+ if (i >= 3 && bus->self->transparent)
+ continue;
+
+ /* If we're going to reassign everything, we can
+ * shrink the P2P resource to have size as being
+ * of 0 in order to save space.
+ */
+ if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
+ res->flags |= IORESOURCE_UNSET;
+ res->start = 0;
+ res->end = -1;
+ continue;
+ }
+
+ pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x]\n",
+ pci_name(dev), i,
+ (unsigned long long)res->start,\
+ (unsigned long long)res->end,
+ (unsigned int)res->flags);
+
+ /* Try to detect uninitialized P2P bridge resources,
+ * and clear them out so they get re-assigned later
+ */
+ if (pcibios_uninitialized_bridge_resource(bus, res)) {
+ res->flags = 0;
+ pr_debug("PCI:%s (unassigned)\n", pci_name(dev));
+ }
+ }
+}
+
+void pcibios_setup_bus_self(struct pci_bus *bus)
+{
+ /* Fix up the bus resources for P2P bridges */
+ if (bus->self != NULL)
+ pcibios_fixup_bridge(bus);
+
+ /* Platform specific bus fixups. This is currently only used
+ * by fsl_pci and I'm hoping to get rid of it at some point
+ */
+ if (ppc_md.pcibios_fixup_bus)
+ ppc_md.pcibios_fixup_bus(bus);
+
+ /* Setup bus DMA mappings */
+ if (ppc_md.pci_dma_bus_setup)
+ ppc_md.pci_dma_bus_setup(bus);
+}
+
+static void pcibios_setup_device(struct pci_dev *dev)
+{
+ /* Fixup NUMA node as it may not be setup yet by the generic
+ * code and is needed by the DMA init
+ */
+ set_dev_node(&dev->dev, pcibus_to_node(dev->bus));
+
+ /* Hook up default DMA ops */
+ set_dma_ops(&dev->dev, pci_dma_ops);
+ set_dma_offset(&dev->dev, PCI_DRAM_OFFSET);
+
+ /* Additional platform DMA/iommu setup */
+ if (ppc_md.pci_dma_dev_setup)
+ ppc_md.pci_dma_dev_setup(dev);
+
+ /* Read default IRQs and fixup if necessary */
+ pci_read_irq_line(dev);
+ if (ppc_md.pci_irq_fixup)
+ ppc_md.pci_irq_fixup(dev);
+}
+
+int pcibios_add_device(struct pci_dev *dev)
+{
+ /*
+ * We can only call pcibios_setup_device() after bus setup is complete,
+ * since some of the platform specific DMA setup code depends on it.
+ */
+ if (dev->bus->is_added)
+ pcibios_setup_device(dev);
+ return 0;
+}
+
+void pcibios_setup_bus_devices(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ pr_debug("PCI: Fixup bus devices %d (%s)\n",
+ bus->number, bus->self ? pci_name(bus->self) : "PHB");
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ /* Cardbus can call us to add new devices to a bus, so ignore
+ * those who are already fully discovered
+ */
+ if (dev->is_added)
+ continue;
+
+ pcibios_setup_device(dev);
+ }
+}
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+ /* No special bus mastering setup handling */
+}
+
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+ /* When called from the generic PCI probe, read PCI<->PCI bridge
+ * bases. This is -not- called when generating the PCI tree from
+ * the OF device-tree.
+ */
+ pci_read_bridge_bases(bus);
+
+ /* Now fixup the bus bus */
+ pcibios_setup_bus_self(bus);
+
+ /* Now fixup devices on that bus */
+ pcibios_setup_bus_devices(bus);
+}
+EXPORT_SYMBOL(pcibios_fixup_bus);
+
+void pci_fixup_cardbus(struct pci_bus *bus)
+{
+ /* Now fixup devices on that bus */
+ pcibios_setup_bus_devices(bus);
+}
+
+
+static int skip_isa_ioresource_align(struct pci_dev *dev)
+{
+ if (pci_has_flag(PCI_CAN_SKIP_ISA_ALIGN) &&
+ !(dev->bus->bridge_ctl & PCI_BRIDGE_CTL_ISA))
+ return 1;
+ return 0;
+}
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+ resource_size_t size, resource_size_t align)
+{
+ struct pci_dev *dev = data;
+ resource_size_t start = res->start;
+
+ if (res->flags & IORESOURCE_IO) {
+ if (skip_isa_ioresource_align(dev))
+ return start;
+ if (start & 0x300)
+ start = (start + 0x3ff) & ~0x3ff;
+ }
+
+ return start;
+}
+EXPORT_SYMBOL(pcibios_align_resource);
+
+/*
+ * Reparent resource children of pr that conflict with res
+ * under res, and make res replace those children.
+ */
+static int reparent_resources(struct resource *parent,
+ struct resource *res)
+{
+ struct resource *p, **pp;
+ struct resource **firstpp = NULL;
+
+ for (pp = &parent->child; (p = *pp) != NULL; pp = &p->sibling) {
+ if (p->end < res->start)
+ continue;
+ if (res->end < p->start)
+ break;
+ if (p->start < res->start || p->end > res->end)
+ return -1; /* not completely contained */
+ if (firstpp == NULL)
+ firstpp = pp;
+ }
+ if (firstpp == NULL)
+ return -1; /* didn't find any conflicting entries? */
+ res->parent = parent;
+ res->child = *firstpp;
+ res->sibling = *pp;
+ *firstpp = res;
+ *pp = NULL;
+ for (p = res->child; p != NULL; p = p->sibling) {
+ p->parent = res;
+ pr_debug("PCI: Reparented %s [%llx..%llx] under %s\n",
+ p->name,
+ (unsigned long long)p->start,
+ (unsigned long long)p->end, res->name);
+ }
+ return 0;
+}
+
+/*
+ * Handle resources of PCI devices. If the world were perfect, we could
+ * just allocate all the resource regions and do nothing more. It isn't.
+ * On the other hand, we cannot just re-allocate all devices, as it would
+ * require us to know lots of host bridge internals. So we attempt to
+ * keep as much of the original configuration as possible, but tweak it
+ * when it's found to be wrong.
+ *
+ * Known BIOS problems we have to work around:
+ * - I/O or memory regions not configured
+ * - regions configured, but not enabled in the command register
+ * - bogus I/O addresses above 64K used
+ * - expansion ROMs left enabled (this may sound harmless, but given
+ * the fact the PCI specs explicitly allow address decoders to be
+ * shared between expansion ROMs and other resource regions, it's
+ * at least dangerous)
+ *
+ * Our solution:
+ * (1) Allocate resources for all buses behind PCI-to-PCI bridges.
+ * This gives us fixed barriers on where we can allocate.
+ * (2) Allocate resources for all enabled devices. If there is
+ * a collision, just mark the resource as unallocated. Also
+ * disable expansion ROMs during this step.
+ * (3) Try to allocate resources for disabled devices. If the
+ * resources were assigned correctly, everything goes well,
+ * if they weren't, they won't disturb allocation of other
+ * resources.
+ * (4) Assign new addresses to resources which were either
+ * not configured at all or misconfigured. If explicitly
+ * requested by the user, configure expansion ROM address
+ * as well.
+ */
+
+void pcibios_allocate_bus_resources(struct pci_bus *bus)
+{
+ struct pci_bus *b;
+ int i;
+ struct resource *res, *pr;
+
+ pr_debug("PCI: Allocating bus resources for %04x:%02x...\n",
+ pci_domain_nr(bus), bus->number);
+
+ pci_bus_for_each_resource(bus, res, i) {
+ if (!res || !res->flags || res->start > res->end || res->parent)
+ continue;
+
+ /* If the resource was left unset at this point, we clear it */
+ if (res->flags & IORESOURCE_UNSET)
+ goto clear_resource;
+
+ if (bus->parent == NULL)
+ pr = (res->flags & IORESOURCE_IO) ?
+ &ioport_resource : &iomem_resource;
+ else {
+ pr = pci_find_parent_resource(bus->self, res);
+ if (pr == res) {
+ /* this happens when the generic PCI
+ * code (wrongly) decides that this
+ * bridge is transparent -- paulus
+ */
+ continue;
+ }
+ }
+
+ pr_debug("PCI: %s (bus %d) bridge rsrc %d: %016llx-%016llx "
+ "[0x%x], parent %p (%s)\n",
+ bus->self ? pci_name(bus->self) : "PHB",
+ bus->number, i,
+ (unsigned long long)res->start,
+ (unsigned long long)res->end,
+ (unsigned int)res->flags,
+ pr, (pr && pr->name) ? pr->name : "nil");
+
+ if (pr && !(pr->flags & IORESOURCE_UNSET)) {
+ if (request_resource(pr, res) == 0)
+ continue;
+ /*
+ * Must be a conflict with an existing entry.
+ * Move that entry (or entries) under the
+ * bridge resource and try again.
+ */
+ if (reparent_resources(pr, res) == 0)
+ continue;
+ }
+ pr_warning("PCI: Cannot allocate resource region "
+ "%d of PCI bridge %d, will remap\n", i, bus->number);
+ clear_resource:
+ /* The resource might be figured out when doing
+ * reassignment based on the resources required
+ * by the downstream PCI devices. Here we set
+ * the size of the resource to be 0 in order to
+ * save more space.
+ */
+ res->start = 0;
+ res->end = -1;
+ res->flags = 0;
+ }
+
+ list_for_each_entry(b, &bus->children, node)
+ pcibios_allocate_bus_resources(b);
+}
+
+static inline void alloc_resource(struct pci_dev *dev, int idx)
+{
+ struct resource *pr, *r = &dev->resource[idx];
+
+ pr_debug("PCI: Allocating %s: Resource %d: %016llx..%016llx [%x]\n",
+ pci_name(dev), idx,
+ (unsigned long long)r->start,
+ (unsigned long long)r->end,
+ (unsigned int)r->flags);
+
+ pr = pci_find_parent_resource(dev, r);
+ if (!pr || (pr->flags & IORESOURCE_UNSET) ||
+ request_resource(pr, r) < 0) {
+ printk(KERN_WARNING "PCI: Cannot allocate resource region %d"
+ " of device %s, will remap\n", idx, pci_name(dev));
+ if (pr)
+ pr_debug("PCI: parent is %p: %016llx-%016llx [%x]\n",
+ pr,
+ (unsigned long long)pr->start,
+ (unsigned long long)pr->end,
+ (unsigned int)pr->flags);
+ /* We'll assign a new address later */
+ r->flags |= IORESOURCE_UNSET;
+ r->end -= r->start;
+ r->start = 0;
+ }
+}
+
+static void __init pcibios_allocate_resources(int pass)
+{
+ struct pci_dev *dev = NULL;
+ int idx, disabled;
+ u16 command;
+ struct resource *r;
+
+ for_each_pci_dev(dev) {
+ pci_read_config_word(dev, PCI_COMMAND, &command);
+ for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) {
+ r = &dev->resource[idx];
+ if (r->parent) /* Already allocated */
+ continue;
+ if (!r->flags || (r->flags & IORESOURCE_UNSET))
+ continue; /* Not assigned at all */
+ /* We only allocate ROMs on pass 1 just in case they
+ * have been screwed up by firmware
+ */
+ if (idx == PCI_ROM_RESOURCE )
+ disabled = 1;
+ if (r->flags & IORESOURCE_IO)
+ disabled = !(command & PCI_COMMAND_IO);
+ else
+ disabled = !(command & PCI_COMMAND_MEMORY);
+ if (pass == disabled)
+ alloc_resource(dev, idx);
+ }
+ if (pass)
+ continue;
+ r = &dev->resource[PCI_ROM_RESOURCE];
+ if (r->flags) {
+ /* Turn the ROM off, leave the resource region,
+ * but keep it unregistered.
+ */
+ u32 reg;
+ pci_read_config_dword(dev, dev->rom_base_reg, &reg);
+ if (reg & PCI_ROM_ADDRESS_ENABLE) {
+ pr_debug("PCI: Switching off ROM of %s\n",
+ pci_name(dev));
+ r->flags &= ~IORESOURCE_ROM_ENABLE;
+ pci_write_config_dword(dev, dev->rom_base_reg,
+ reg & ~PCI_ROM_ADDRESS_ENABLE);
+ }
+ }
+ }
+}
+
+static void __init pcibios_reserve_legacy_regions(struct pci_bus *bus)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ resource_size_t offset;
+ struct resource *res, *pres;
+ int i;
+
+ pr_debug("Reserving legacy ranges for domain %04x\n", pci_domain_nr(bus));
+
+ /* Check for IO */
+ if (!(hose->io_resource.flags & IORESOURCE_IO))
+ goto no_io;
+ offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+ BUG_ON(res == NULL);
+ res->name = "Legacy IO";
+ res->flags = IORESOURCE_IO;
+ res->start = offset;
+ res->end = (offset + 0xfff) & 0xfffffffful;
+ pr_debug("Candidate legacy IO: %pR\n", res);
+ if (request_resource(&hose->io_resource, res)) {
+ printk(KERN_DEBUG
+ "PCI %04x:%02x Cannot reserve Legacy IO %pR\n",
+ pci_domain_nr(bus), bus->number, res);
+ kfree(res);
+ }
+
+ no_io:
+ /* Check for memory */
+ for (i = 0; i < 3; i++) {
+ pres = &hose->mem_resources[i];
+ offset = hose->mem_offset[i];
+ if (!(pres->flags & IORESOURCE_MEM))
+ continue;
+ pr_debug("hose mem res: %pR\n", pres);
+ if ((pres->start - offset) <= 0xa0000 &&
+ (pres->end - offset) >= 0xbffff)
+ break;
+ }
+ if (i >= 3)
+ return;
+ res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+ BUG_ON(res == NULL);
+ res->name = "Legacy VGA memory";
+ res->flags = IORESOURCE_MEM;
+ res->start = 0xa0000 + offset;
+ res->end = 0xbffff + offset;
+ pr_debug("Candidate VGA memory: %pR\n", res);
+ if (request_resource(pres, res)) {
+ printk(KERN_DEBUG
+ "PCI %04x:%02x Cannot reserve VGA memory %pR\n",
+ pci_domain_nr(bus), bus->number, res);
+ kfree(res);
+ }
+}
+
+void __init pcibios_resource_survey(void)
+{
+ struct pci_bus *b;
+
+ /* Allocate and assign resources */
+ list_for_each_entry(b, &pci_root_buses, node)
+ pcibios_allocate_bus_resources(b);
+ pcibios_allocate_resources(0);
+ pcibios_allocate_resources(1);
+
+ /* Before we start assigning unassigned resource, we try to reserve
+ * the low IO area and the VGA memory area if they intersect the
+ * bus available resources to avoid allocating things on top of them
+ */
+ if (!pci_has_flag(PCI_PROBE_ONLY)) {
+ list_for_each_entry(b, &pci_root_buses, node)
+ pcibios_reserve_legacy_regions(b);
+ }
+
+ /* Now, if the platform didn't decide to blindly trust the firmware,
+ * we proceed to assigning things that were left unassigned
+ */
+ if (!pci_has_flag(PCI_PROBE_ONLY)) {
+ pr_debug("PCI: Assigning unassigned resources...\n");
+ pci_assign_unassigned_resources();
+ }
+
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+}
+
+/* This is used by the PCI hotplug driver to allocate resource
+ * of newly plugged busses. We can try to consolidate with the
+ * rest of the code later, for now, keep it as-is as our main
+ * resource allocation function doesn't deal with sub-trees yet.
+ */
+void pcibios_claim_one_bus(struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+ struct pci_bus *child_bus;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ int i;
+
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+ struct resource *r = &dev->resource[i];
+
+ if (r->parent || !r->start || !r->flags)
+ continue;
+
+ pr_debug("PCI: Claiming %s: "
+ "Resource %d: %016llx..%016llx [%x]\n",
+ pci_name(dev), i,
+ (unsigned long long)r->start,
+ (unsigned long long)r->end,
+ (unsigned int)r->flags);
+
+ pci_claim_resource(dev, i);
+ }
+ }
+
+ list_for_each_entry(child_bus, &bus->children, node)
+ pcibios_claim_one_bus(child_bus);
+}
+
+
+/* pcibios_finish_adding_to_bus
+ *
+ * This is to be called by the hotplug code after devices have been
+ * added to a bus, this include calling it for a PHB that is just
+ * being added
+ */
+void pcibios_finish_adding_to_bus(struct pci_bus *bus)
+{
+ pr_debug("PCI: Finishing adding to hotplug bus %04x:%02x\n",
+ pci_domain_nr(bus), bus->number);
+
+ /* Allocate bus and devices resources */
+ pcibios_allocate_bus_resources(bus);
+ pcibios_claim_one_bus(bus);
+ if (!pci_has_flag(PCI_PROBE_ONLY))
+ pci_assign_unassigned_bus_resources(bus);
+
+ /* Fixup EEH */
+ eeh_add_device_tree_late(bus);
+
+ /* Add new devices to global lists. Register in proc, sysfs. */
+ pci_bus_add_devices(bus);
+
+ /* sysfs files should only be added after devices are added */
+ eeh_add_sysfs_files(bus);
+}
+EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus);
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+ if (ppc_md.pcibios_enable_device_hook)
+ if (ppc_md.pcibios_enable_device_hook(dev))
+ return -EINVAL;
+
+ return pci_enable_resources(dev, mask);
+}
+
+resource_size_t pcibios_io_space_offset(struct pci_controller *hose)
+{
+ return (unsigned long) hose->io_base_virt - _IO_BASE;
+}
+
+static void pcibios_setup_phb_resources(struct pci_controller *hose,
+ struct list_head *resources)
+{
+ struct resource *res;
+ resource_size_t offset;
+ int i;
+
+ /* Hookup PHB IO resource */
+ res = &hose->io_resource;
+
+ if (!res->flags) {
+ printk(KERN_WARNING "PCI: I/O resource not set for host"
+ " bridge %s (domain %d)\n",
+ hose->dn->full_name, hose->global_number);
+ } else {
+ offset = pcibios_io_space_offset(hose);
+
+ pr_debug("PCI: PHB IO resource = %08llx-%08llx [%lx] off 0x%08llx\n",
+ (unsigned long long)res->start,
+ (unsigned long long)res->end,
+ (unsigned long)res->flags,
+ (unsigned long long)offset);
+ pci_add_resource_offset(resources, res, offset);
+ }
+
+ /* Hookup PHB Memory resources */
+ for (i = 0; i < 3; ++i) {
+ res = &hose->mem_resources[i];
+ if (!res->flags) {
+ if (i == 0)
+ printk(KERN_ERR "PCI: Memory resource 0 not set for "
+ "host bridge %s (domain %d)\n",
+ hose->dn->full_name, hose->global_number);
+ continue;
+ }
+ offset = hose->mem_offset[i];
+
+
+ pr_debug("PCI: PHB MEM resource %d = %08llx-%08llx [%lx] off 0x%08llx\n", i,
+ (unsigned long long)res->start,
+ (unsigned long long)res->end,
+ (unsigned long)res->flags,
+ (unsigned long long)offset);
+
+ pci_add_resource_offset(resources, res, offset);
+ }
+}
+
+/*
+ * Null PCI config access functions, for the case when we can't
+ * find a hose.
+ */
+#define NULL_PCI_OP(rw, size, type) \
+static int \
+null_##rw##_config_##size(struct pci_dev *dev, int offset, type val) \
+{ \
+ return PCIBIOS_DEVICE_NOT_FOUND; \
+}
+
+static int
+null_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
+ int len, u32 *val)
+{
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+static int
+null_write_config(struct pci_bus *bus, unsigned int devfn, int offset,
+ int len, u32 val)
+{
+ return PCIBIOS_DEVICE_NOT_FOUND;
+}
+
+static struct pci_ops null_pci_ops =
+{
+ .read = null_read_config,
+ .write = null_write_config,
+};
+
+/*
+ * These functions are used early on before PCI scanning is done
+ * and all of the pci_dev and pci_bus structures have been created.
+ */
+static struct pci_bus *
+fake_pci_bus(struct pci_controller *hose, int busnr)
+{
+ static struct pci_bus bus;
+
+ if (hose == NULL) {
+ printk(KERN_ERR "Can't find hose for PCI bus %d!\n", busnr);
+ }
+ bus.number = busnr;
+ bus.sysdata = hose;
+ bus.ops = hose? hose->ops: &null_pci_ops;
+ return &bus;
+}
+
+#define EARLY_PCI_OP(rw, size, type) \
+int early_##rw##_config_##size(struct pci_controller *hose, int bus, \
+ int devfn, int offset, type value) \
+{ \
+ return pci_bus_##rw##_config_##size(fake_pci_bus(hose, bus), \
+ devfn, offset, value); \
+}
+
+EARLY_PCI_OP(read, byte, u8 *)
+EARLY_PCI_OP(read, word, u16 *)
+EARLY_PCI_OP(read, dword, u32 *)
+EARLY_PCI_OP(write, byte, u8)
+EARLY_PCI_OP(write, word, u16)
+EARLY_PCI_OP(write, dword, u32)
+
+extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap);
+int early_find_capability(struct pci_controller *hose, int bus, int devfn,
+ int cap)
+{
+ return pci_bus_find_capability(fake_pci_bus(hose, bus), devfn, cap);
+}
+
+struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
+{
+ struct pci_controller *hose = bus->sysdata;
+
+ return of_node_get(hose->dn);
+}
+
+/**
+ * pci_scan_phb - Given a pci_controller, setup and scan the PCI bus
+ * @hose: Pointer to the PCI host controller instance structure
+ */
+void pcibios_scan_phb(struct pci_controller *hose)
+{
+ LIST_HEAD(resources);
+ struct pci_bus *bus;
+ struct device_node *node = hose->dn;
+ int mode;
+
+ pr_debug("PCI: Scanning PHB %s\n", of_node_full_name(node));
+
+ /* Get some IO space for the new PHB */
+ pcibios_setup_phb_io_space(hose);
+
+ /* Wire up PHB bus resources */
+ pcibios_setup_phb_resources(hose, &resources);
+
+ hose->busn.start = hose->first_busno;
+ hose->busn.end = hose->last_busno;
+ hose->busn.flags = IORESOURCE_BUS;
+ pci_add_resource(&resources, &hose->busn);
+
+ /* Create an empty bus for the toplevel */
+ bus = pci_create_root_bus(hose->parent, hose->first_busno,
+ hose->ops, hose, &resources);
+ if (bus == NULL) {
+ pr_err("Failed to create bus for PCI domain %04x\n",
+ hose->global_number);
+ pci_free_resource_list(&resources);
+ return;
+ }
+ hose->bus = bus;
+
+ /* Get probe mode and perform scan */
+ mode = PCI_PROBE_NORMAL;
+ if (node && ppc_md.pci_probe_mode)
+ mode = ppc_md.pci_probe_mode(bus);
+ pr_debug(" probe mode: %d\n", mode);
+ if (mode == PCI_PROBE_DEVTREE)
+ of_scan_bus(node, bus);
+
+ if (mode == PCI_PROBE_NORMAL) {
+ pci_bus_update_busn_res_end(bus, 255);
+ hose->last_busno = pci_scan_child_bus(bus);
+ pci_bus_update_busn_res_end(bus, hose->last_busno);
+ }
+
+ /* Platform gets a chance to do some global fixups before
+ * we proceed to resource allocation
+ */
+ if (ppc_md.pcibios_fixup_phb)
+ ppc_md.pcibios_fixup_phb(hose);
+
+ /* Configure PCI Express settings */
+ if (bus && !pci_has_flag(PCI_PROBE_ONLY)) {
+ struct pci_bus *child;
+ list_for_each_entry(child, &bus->children, node)
+ pcie_bus_configure_settings(child);
+ }
+}
+
+static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
+{
+ int i, class = dev->class >> 8;
+ /* When configured as agent, programing interface = 1 */
+ int prog_if = dev->class & 0xf;
+
+ if ((class == PCI_CLASS_PROCESSOR_POWERPC ||
+ class == PCI_CLASS_BRIDGE_OTHER) &&
+ (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) &&
+ (prog_if == 0) &&
+ (dev->bus->parent == NULL)) {
+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ dev->resource[i].start = 0;
+ dev->resource[i].end = 0;
+ dev->resource[i].flags = 0;
+ }
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl);
+
+static void fixup_vga(struct pci_dev *pdev)
+{
+ u16 cmd;
+
+ pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+ if ((cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) || !vga_default_device())
+ vga_set_default_device(pdev);
+
+}
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_DISPLAY_VGA, 8, fixup_vga);
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
new file mode 100644
index 00000000000..5b789177aa2
--- /dev/null
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -0,0 +1,109 @@
+/*
+ * Derived from "arch/powerpc/platforms/pseries/pci_dlpar.c"
+ *
+ * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com>
+ * Copyright (C) 2005 International Business Machines
+ *
+ * Updates, 2005, John Rose <johnrose@austin.ibm.com>
+ * Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
+ * Updates, 2013, Gavin Shan <shangw@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/pci.h>
+#include <linux/export.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/firmware.h>
+#include <asm/eeh.h>
+
+/**
+ * pcibios_release_device - release PCI device
+ * @dev: PCI device
+ *
+ * The function is called before releasing the indicated PCI device.
+ */
+void pcibios_release_device(struct pci_dev *dev)
+{
+ eeh_remove_device(dev);
+}
+
+/**
+ * pcibios_remove_pci_devices - remove all devices under this bus
+ * @bus: the indicated PCI bus
+ *
+ * Remove all of the PCI devices under this bus both from the
+ * linux pci device tree, and from the powerpc EEH address cache.
+ */
+void pcibios_remove_pci_devices(struct pci_bus *bus)
+{
+ struct pci_dev *dev, *tmp;
+ struct pci_bus *child_bus;
+
+ /* First go down child busses */
+ list_for_each_entry(child_bus, &bus->children, node)
+ pcibios_remove_pci_devices(child_bus);
+
+ pr_debug("PCI: Removing devices on bus %04x:%02x\n",
+ pci_domain_nr(bus), bus->number);
+ list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
+ pr_debug(" Removing %s...\n", pci_name(dev));
+ pci_stop_and_remove_bus_device(dev);
+ }
+}
+
+EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
+
+/**
+ * pcibios_add_pci_devices - adds new pci devices to bus
+ * @bus: the indicated PCI bus
+ *
+ * This routine will find and fixup new pci devices under
+ * the indicated bus. This routine presumes that there
+ * might already be some devices under this bridge, so
+ * it carefully tries to add only new devices. (And that
+ * is how this routine differs from other, similar pcibios
+ * routines.)
+ */
+void pcibios_add_pci_devices(struct pci_bus * bus)
+{
+ int slotno, mode, pass, max;
+ struct pci_dev *dev;
+ struct device_node *dn = pci_bus_to_OF_node(bus);
+
+ eeh_add_device_tree_early(dn);
+
+ mode = PCI_PROBE_NORMAL;
+ if (ppc_md.pci_probe_mode)
+ mode = ppc_md.pci_probe_mode(bus);
+
+ if (mode == PCI_PROBE_DEVTREE) {
+ /* use ofdt-based probe */
+ of_rescan_bus(dn, bus);
+ } else if (mode == PCI_PROBE_NORMAL) {
+ /*
+ * Use legacy probe. In the partial hotplug case, we
+ * probably have grandchildren devices unplugged. So
+ * we don't check the return value from pci_scan_slot() in
+ * order for fully rescan all the way down to pick them up.
+ * They can have been removed during partial hotplug.
+ */
+ slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
+ pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
+ pcibios_setup_bus_devices(bus);
+ max = bus->busn_res.start;
+ for (pass = 0; pass < 2; pass++) {
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ if (pci_is_bridge(dev))
+ max = pci_scan_bridge(bus, dev,
+ max, pass);
+ }
+ }
+ }
+ pcibios_finish_adding_to_bus(bus);
+}
+EXPORT_SYMBOL_GPL(pcibios_add_pci_devices);
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 9b49f8691d2..432459c817f 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -12,62 +12,44 @@
#include <linux/errno.h>
#include <linux/bootmem.h>
#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/sections.h>
#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
#include <asm/byteorder.h>
#include <asm/uaccess.h>
#include <asm/machdep.h>
#undef DEBUG
-#ifdef DEBUG
-#define DBG(x...) printk(x)
-#else
-#define DBG(x...)
-#endif
-
unsigned long isa_io_base = 0;
-unsigned long isa_mem_base = 0;
unsigned long pci_dram_offset = 0;
int pcibios_assign_bus_offset = 1;
void pcibios_make_OF_bus_map(void);
-static int pci_relocate_bridge_resource(struct pci_bus *bus, int i);
-static int probe_resource(struct pci_bus *parent, struct resource *pr,
- struct resource *res, struct resource **conflict);
-static void update_bridge_base(struct pci_bus *bus, int i);
-static void pcibios_fixup_resources(struct pci_dev* dev);
-static void fixup_broken_pcnet32(struct pci_dev* dev);
-static int reparent_resources(struct resource *parent, struct resource *res);
static void fixup_cpc710_pci64(struct pci_dev* dev);
-#ifdef CONFIG_PPC_OF
static u8* pci_to_OF_bus_map;
-#endif
/* By default, we don't re-assign bus numbers. We do this only on
* some pmacs
*/
-int pci_assign_all_buses;
-
-struct pci_controller* hose_head;
-struct pci_controller** hose_tail = &hose_head;
+static int pci_assign_all_buses;
static int pci_bus_count;
-static void
-fixup_broken_pcnet32(struct pci_dev* dev)
-{
- if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) {
- dev->vendor = PCI_VENDOR_ID_AMD;
- pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD);
- }
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32);
+/* This will remain NULL for now, until isa-bridge.c is made common
+ * to both 32-bit and 64-bit.
+ */
+struct pci_dev *isa_bridge_pcidev;
+EXPORT_SYMBOL_GPL(isa_bridge_pcidev);
static void
fixup_cpc710_pci64(struct pci_dev* dev)
@@ -82,551 +64,6 @@ fixup_cpc710_pci64(struct pci_dev* dev)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64);
-static void
-pcibios_fixup_resources(struct pci_dev *dev)
-{
- struct pci_controller* hose = (struct pci_controller *)dev->sysdata;
- int i;
- unsigned long offset;
-
- if (!hose) {
- printk(KERN_ERR "No hose for PCI dev %s!\n", pci_name(dev));
- return;
- }
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- struct resource *res = dev->resource + i;
- if (!res->flags)
- continue;
- if (res->end == 0xffffffff) {
- DBG("PCI:%s Resource %d [%016llx-%016llx] is unassigned\n",
- pci_name(dev), i, res->start, res->end);
- res->end -= res->start;
- res->start = 0;
- res->flags |= IORESOURCE_UNSET;
- continue;
- }
- offset = 0;
- if (res->flags & IORESOURCE_MEM) {
- offset = hose->pci_mem_offset;
- } else if (res->flags & IORESOURCE_IO) {
- offset = (unsigned long) hose->io_base_virt
- - isa_io_base;
- }
- if (offset != 0) {
- res->start += offset;
- res->end += offset;
-#ifdef DEBUG
- printk("Fixup res %d (%lx) of dev %s: %llx -> %llx\n",
- i, res->flags, pci_name(dev),
- res->start - offset, res->start);
-#endif
- }
- }
-
- /* Call machine specific resource fixup */
- if (ppc_md.pcibios_fixup_resources)
- ppc_md.pcibios_fixup_resources(dev);
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources);
-
-void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
- struct resource *res)
-{
- unsigned long offset = 0;
- struct pci_controller *hose = dev->sysdata;
-
- if (hose && res->flags & IORESOURCE_IO)
- offset = (unsigned long)hose->io_base_virt - isa_io_base;
- else if (hose && res->flags & IORESOURCE_MEM)
- offset = hose->pci_mem_offset;
- region->start = res->start - offset;
- region->end = res->end - offset;
-}
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-
-void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
- struct pci_bus_region *region)
-{
- unsigned long offset = 0;
- struct pci_controller *hose = dev->sysdata;
-
- if (hose && res->flags & IORESOURCE_IO)
- offset = (unsigned long)hose->io_base_virt - isa_io_base;
- else if (hose && res->flags & IORESOURCE_MEM)
- offset = hose->pci_mem_offset;
- res->start = region->start + offset;
- res->end = region->end + offset;
-}
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-
-/*
- * We need to avoid collisions with `mirrored' VGA ports
- * and other strange ISA hardware, so we always want the
- * addresses to be allocated in the 0x000-0x0ff region
- * modulo 0x400.
- *
- * Why? Because some silly external IO cards only decode
- * the low 10 bits of the IO address. The 0x00-0xff region
- * is reserved for motherboard devices that decode all 16
- * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
- * but we want to try to avoid allocating at 0x2900-0x2bff
- * which might have be mirrored at 0x0100-0x03ff..
- */
-void pcibios_align_resource(void *data, struct resource *res,
- resource_size_t size, resource_size_t align)
-{
- struct pci_dev *dev = data;
-
- if (res->flags & IORESOURCE_IO) {
- resource_size_t start = res->start;
-
- if (size > 0x100) {
- printk(KERN_ERR "PCI: I/O Region %s/%d too large"
- " (%lld bytes)\n", pci_name(dev),
- dev->resource - res, (unsigned long long)size);
- }
-
- if (start & 0x300) {
- start = (start + 0x3ff) & ~0x3ff;
- res->start = start;
- }
- }
-}
-EXPORT_SYMBOL(pcibios_align_resource);
-
-/*
- * Handle resources of PCI devices. If the world were perfect, we could
- * just allocate all the resource regions and do nothing more. It isn't.
- * On the other hand, we cannot just re-allocate all devices, as it would
- * require us to know lots of host bridge internals. So we attempt to
- * keep as much of the original configuration as possible, but tweak it
- * when it's found to be wrong.
- *
- * Known BIOS problems we have to work around:
- * - I/O or memory regions not configured
- * - regions configured, but not enabled in the command register
- * - bogus I/O addresses above 64K used
- * - expansion ROMs left enabled (this may sound harmless, but given
- * the fact the PCI specs explicitly allow address decoders to be
- * shared between expansion ROMs and other resource regions, it's
- * at least dangerous)
- *
- * Our solution:
- * (1) Allocate resources for all buses behind PCI-to-PCI bridges.
- * This gives us fixed barriers on where we can allocate.
- * (2) Allocate resources for all enabled devices. If there is
- * a collision, just mark the resource as unallocated. Also
- * disable expansion ROMs during this step.
- * (3) Try to allocate resources for disabled devices. If the
- * resources were assigned correctly, everything goes well,
- * if they weren't, they won't disturb allocation of other
- * resources.
- * (4) Assign new addresses to resources which were either
- * not configured at all or misconfigured. If explicitly
- * requested by the user, configure expansion ROM address
- * as well.
- */
-
-static void __init
-pcibios_allocate_bus_resources(struct list_head *bus_list)
-{
- struct pci_bus *bus;
- int i;
- struct resource *res, *pr;
-
- /* Depth-First Search on bus tree */
- list_for_each_entry(bus, bus_list, node) {
- for (i = 0; i < 4; ++i) {
- if ((res = bus->resource[i]) == NULL || !res->flags
- || res->start > res->end)
- continue;
- if (bus->parent == NULL)
- pr = (res->flags & IORESOURCE_IO)?
- &ioport_resource: &iomem_resource;
- else {
- pr = pci_find_parent_resource(bus->self, res);
- if (pr == res) {
- /* this happens when the generic PCI
- * code (wrongly) decides that this
- * bridge is transparent -- paulus
- */
- continue;
- }
- }
-
- DBG("PCI: bridge rsrc %llx..%llx (%lx), parent %p\n",
- res->start, res->end, res->flags, pr);
- if (pr) {
- if (request_resource(pr, res) == 0)
- continue;
- /*
- * Must be a conflict with an existing entry.
- * Move that entry (or entries) under the
- * bridge resource and try again.
- */
- if (reparent_resources(pr, res) == 0)
- continue;
- }
- printk(KERN_ERR "PCI: Cannot allocate resource region "
- "%d of PCI bridge %d\n", i, bus->number);
- if (pci_relocate_bridge_resource(bus, i))
- bus->resource[i] = NULL;
- }
- pcibios_allocate_bus_resources(&bus->children);
- }
-}
-
-/*
- * Reparent resource children of pr that conflict with res
- * under res, and make res replace those children.
- */
-static int __init
-reparent_resources(struct resource *parent, struct resource *res)
-{
- struct resource *p, **pp;
- struct resource **firstpp = NULL;
-
- for (pp = &parent->child; (p = *pp) != NULL; pp = &p->sibling) {
- if (p->end < res->start)
- continue;
- if (res->end < p->start)
- break;
- if (p->start < res->start || p->end > res->end)
- return -1; /* not completely contained */
- if (firstpp == NULL)
- firstpp = pp;
- }
- if (firstpp == NULL)
- return -1; /* didn't find any conflicting entries? */
- res->parent = parent;
- res->child = *firstpp;
- res->sibling = *pp;
- *firstpp = res;
- *pp = NULL;
- for (p = res->child; p != NULL; p = p->sibling) {
- p->parent = res;
- DBG(KERN_INFO "PCI: reparented %s [%llx..%llx] under %s\n",
- p->name, p->start, p->end, res->name);
- }
- return 0;
-}
-
-/*
- * A bridge has been allocated a range which is outside the range
- * of its parent bridge, so it needs to be moved.
- */
-static int __init
-pci_relocate_bridge_resource(struct pci_bus *bus, int i)
-{
- struct resource *res, *pr, *conflict;
- unsigned long try, size;
- int j;
- struct pci_bus *parent = bus->parent;
-
- if (parent == NULL) {
- /* shouldn't ever happen */
- printk(KERN_ERR "PCI: can't move host bridge resource\n");
- return -1;
- }
- res = bus->resource[i];
- if (res == NULL)
- return -1;
- pr = NULL;
- for (j = 0; j < 4; j++) {
- struct resource *r = parent->resource[j];
- if (!r)
- continue;
- if ((res->flags ^ r->flags) & (IORESOURCE_IO | IORESOURCE_MEM))
- continue;
- if (!((res->flags ^ r->flags) & IORESOURCE_PREFETCH)) {
- pr = r;
- break;
- }
- if (res->flags & IORESOURCE_PREFETCH)
- pr = r;
- }
- if (pr == NULL)
- return -1;
- size = res->end - res->start;
- if (pr->start > pr->end || size > pr->end - pr->start)
- return -1;
- try = pr->end;
- for (;;) {
- res->start = try - size;
- res->end = try;
- if (probe_resource(bus->parent, pr, res, &conflict) == 0)
- break;
- if (conflict->start <= pr->start + size)
- return -1;
- try = conflict->start - 1;
- }
- if (request_resource(pr, res)) {
- DBG(KERN_ERR "PCI: huh? couldn't move to %llx..%llx\n",
- res->start, res->end);
- return -1; /* "can't happen" */
- }
- update_bridge_base(bus, i);
- printk(KERN_INFO "PCI: bridge %d resource %d moved to %llx..%llx\n",
- bus->number, i, (unsigned long long)res->start,
- (unsigned long long)res->end);
- return 0;
-}
-
-static int __init
-probe_resource(struct pci_bus *parent, struct resource *pr,
- struct resource *res, struct resource **conflict)
-{
- struct pci_bus *bus;
- struct pci_dev *dev;
- struct resource *r;
- int i;
-
- for (r = pr->child; r != NULL; r = r->sibling) {
- if (r->end >= res->start && res->end >= r->start) {
- *conflict = r;
- return 1;
- }
- }
- list_for_each_entry(bus, &parent->children, node) {
- for (i = 0; i < 4; ++i) {
- if ((r = bus->resource[i]) == NULL)
- continue;
- if (!r->flags || r->start > r->end || r == res)
- continue;
- if (pci_find_parent_resource(bus->self, r) != pr)
- continue;
- if (r->end >= res->start && res->end >= r->start) {
- *conflict = r;
- return 1;
- }
- }
- }
- list_for_each_entry(dev, &parent->devices, bus_list) {
- for (i = 0; i < 6; ++i) {
- r = &dev->resource[i];
- if (!r->flags || (r->flags & IORESOURCE_UNSET))
- continue;
- if (pci_find_parent_resource(dev, r) != pr)
- continue;
- if (r->end >= res->start && res->end >= r->start) {
- *conflict = r;
- return 1;
- }
- }
- }
- return 0;
-}
-
-static void __init
-update_bridge_base(struct pci_bus *bus, int i)
-{
- struct resource *res = bus->resource[i];
- u8 io_base_lo, io_limit_lo;
- u16 mem_base, mem_limit;
- u16 cmd;
- unsigned long start, end, off;
- struct pci_dev *dev = bus->self;
- struct pci_controller *hose = dev->sysdata;
-
- if (!hose) {
- printk("update_bridge_base: no hose?\n");
- return;
- }
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- pci_write_config_word(dev, PCI_COMMAND,
- cmd & ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY));
- if (res->flags & IORESOURCE_IO) {
- off = (unsigned long) hose->io_base_virt - isa_io_base;
- start = res->start - off;
- end = res->end - off;
- io_base_lo = (start >> 8) & PCI_IO_RANGE_MASK;
- io_limit_lo = (end >> 8) & PCI_IO_RANGE_MASK;
- if (end > 0xffff) {
- pci_write_config_word(dev, PCI_IO_BASE_UPPER16,
- start >> 16);
- pci_write_config_word(dev, PCI_IO_LIMIT_UPPER16,
- end >> 16);
- io_base_lo |= PCI_IO_RANGE_TYPE_32;
- } else
- io_base_lo |= PCI_IO_RANGE_TYPE_16;
- pci_write_config_byte(dev, PCI_IO_BASE, io_base_lo);
- pci_write_config_byte(dev, PCI_IO_LIMIT, io_limit_lo);
-
- } else if ((res->flags & (IORESOURCE_MEM | IORESOURCE_PREFETCH))
- == IORESOURCE_MEM) {
- off = hose->pci_mem_offset;
- mem_base = ((res->start - off) >> 16) & PCI_MEMORY_RANGE_MASK;
- mem_limit = ((res->end - off) >> 16) & PCI_MEMORY_RANGE_MASK;
- pci_write_config_word(dev, PCI_MEMORY_BASE, mem_base);
- pci_write_config_word(dev, PCI_MEMORY_LIMIT, mem_limit);
-
- } else if ((res->flags & (IORESOURCE_MEM | IORESOURCE_PREFETCH))
- == (IORESOURCE_MEM | IORESOURCE_PREFETCH)) {
- off = hose->pci_mem_offset;
- mem_base = ((res->start - off) >> 16) & PCI_PREF_RANGE_MASK;
- mem_limit = ((res->end - off) >> 16) & PCI_PREF_RANGE_MASK;
- pci_write_config_word(dev, PCI_PREF_MEMORY_BASE, mem_base);
- pci_write_config_word(dev, PCI_PREF_MEMORY_LIMIT, mem_limit);
-
- } else {
- DBG(KERN_ERR "PCI: ugh, bridge %s res %d has flags=%lx\n",
- pci_name(dev), i, res->flags);
- }
- pci_write_config_word(dev, PCI_COMMAND, cmd);
-}
-
-static inline void alloc_resource(struct pci_dev *dev, int idx)
-{
- struct resource *pr, *r = &dev->resource[idx];
-
- DBG("PCI:%s: Resource %d: %016llx-%016llx (f=%lx)\n",
- pci_name(dev), idx, r->start, r->end, r->flags);
- pr = pci_find_parent_resource(dev, r);
- if (!pr || request_resource(pr, r) < 0) {
- printk(KERN_ERR "PCI: Cannot allocate resource region %d"
- " of device %s\n", idx, pci_name(dev));
- if (pr)
- DBG("PCI: parent is %p: %016llx-%016llx (f=%lx)\n",
- pr, pr->start, pr->end, pr->flags);
- /* We'll assign a new address later */
- r->flags |= IORESOURCE_UNSET;
- r->end -= r->start;
- r->start = 0;
- }
-}
-
-static void __init
-pcibios_allocate_resources(int pass)
-{
- struct pci_dev *dev = NULL;
- int idx, disabled;
- u16 command;
- struct resource *r;
-
- for_each_pci_dev(dev) {
- pci_read_config_word(dev, PCI_COMMAND, &command);
- for (idx = 0; idx < 6; idx++) {
- r = &dev->resource[idx];
- if (r->parent) /* Already allocated */
- continue;
- if (!r->flags || (r->flags & IORESOURCE_UNSET))
- continue; /* Not assigned at all */
- if (r->flags & IORESOURCE_IO)
- disabled = !(command & PCI_COMMAND_IO);
- else
- disabled = !(command & PCI_COMMAND_MEMORY);
- if (pass == disabled)
- alloc_resource(dev, idx);
- }
- if (pass)
- continue;
- r = &dev->resource[PCI_ROM_RESOURCE];
- if (r->flags & IORESOURCE_ROM_ENABLE) {
- /* Turn the ROM off, leave the resource region, but keep it unregistered. */
- u32 reg;
- DBG("PCI: Switching off ROM of %s\n", pci_name(dev));
- r->flags &= ~IORESOURCE_ROM_ENABLE;
- pci_read_config_dword(dev, dev->rom_base_reg, &reg);
- pci_write_config_dword(dev, dev->rom_base_reg,
- reg & ~PCI_ROM_ADDRESS_ENABLE);
- }
- }
-}
-
-static void __init
-pcibios_assign_resources(void)
-{
- struct pci_dev *dev = NULL;
- int idx;
- struct resource *r;
-
- for_each_pci_dev(dev) {
- int class = dev->class >> 8;
-
- /* Don't touch classless devices and host bridges */
- if (!class || class == PCI_CLASS_BRIDGE_HOST)
- continue;
-
- for (idx = 0; idx < 6; idx++) {
- r = &dev->resource[idx];
-
- /*
- * We shall assign a new address to this resource,
- * either because the BIOS (sic) forgot to do so
- * or because we have decided the old address was
- * unusable for some reason.
- */
- if ((r->flags & IORESOURCE_UNSET) && r->end &&
- (!ppc_md.pcibios_enable_device_hook ||
- !ppc_md.pcibios_enable_device_hook(dev, 1))) {
- r->flags &= ~IORESOURCE_UNSET;
- pci_assign_resource(dev, idx);
- }
- }
-
-#if 0 /* don't assign ROMs */
- r = &dev->resource[PCI_ROM_RESOURCE];
- r->end -= r->start;
- r->start = 0;
- if (r->end)
- pci_assign_resource(dev, PCI_ROM_RESOURCE);
-#endif
- }
-}
-
-
-int
-pcibios_enable_resources(struct pci_dev *dev, int mask)
-{
- u16 cmd, old_cmd;
- int idx;
- struct resource *r;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- old_cmd = cmd;
- for (idx=0; idx<6; idx++) {
- /* Only set up the requested stuff */
- if (!(mask & (1<<idx)))
- continue;
-
- r = &dev->resource[idx];
- if (r->flags & IORESOURCE_UNSET) {
- printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", pci_name(dev));
- return -EINVAL;
- }
- if (r->flags & IORESOURCE_IO)
- cmd |= PCI_COMMAND_IO;
- if (r->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
- if (dev->resource[PCI_ROM_RESOURCE].start)
- cmd |= PCI_COMMAND_MEMORY;
- if (cmd != old_cmd) {
- printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd);
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
- return 0;
-}
-
-static int next_controller_index;
-
-struct pci_controller * __init
-pcibios_alloc_controller(void)
-{
- struct pci_controller *hose;
-
- hose = (struct pci_controller *)alloc_bootmem(sizeof(*hose));
- memset(hose, 0, sizeof(struct pci_controller));
-
- *hose_tail = hose;
- hose_tail = &hose->next;
-
- hose->index = next_controller_index++;
-
- return hose;
-}
-
-#ifdef CONFIG_PPC_OF
/*
* Functions below are used on OpenFirmware machines.
*/
@@ -638,7 +75,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
if (pci_bus >= pci_bus_count)
return;
- bus_range = get_property(node, "bus-range", &len);
+ bus_range = of_get_property(node, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
printk(KERN_WARNING "Can't get bus-range for %s, "
"assuming it starts at 0\n", node->full_name);
@@ -646,21 +83,24 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
} else
pci_to_OF_bus_map[pci_bus] = bus_range[0];
- for (node=node->child; node != 0;node = node->sibling) {
+ for_each_child_of_node(node, node) {
struct pci_dev* dev;
const unsigned int *class_code, *reg;
- class_code = get_property(node, "class-code", NULL);
+ class_code = of_get_property(node, "class-code", NULL);
if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
continue;
- reg = get_property(node, "reg", NULL);
+ reg = of_get_property(node, "reg", NULL);
if (!reg)
continue;
- dev = pci_find_slot(pci_bus, ((reg[0] >> 8) & 0xff));
- if (!dev || !dev->subordinate)
+ dev = pci_get_bus_and_slot(pci_bus, ((reg[0] >> 8) & 0xff));
+ if (!dev || !dev->subordinate) {
+ pci_dev_put(dev);
continue;
+ }
make_one_node_map(node, dev->subordinate->number);
+ pci_dev_put(dev);
}
}
@@ -668,10 +108,11 @@ void
pcibios_make_OF_bus_map(void)
{
int i;
- struct pci_controller* hose;
+ struct pci_controller *hose, *tmp;
struct property *map_prop;
+ struct device_node *dn;
- pci_to_OF_bus_map = (u8*)kmalloc(pci_bus_count, GFP_KERNEL);
+ pci_to_OF_bus_map = kmalloc(pci_bus_count, GFP_KERNEL);
if (!pci_to_OF_bus_map) {
printk(KERN_ERR "Can't allocate OF bus map !\n");
return;
@@ -684,19 +125,20 @@ pcibios_make_OF_bus_map(void)
pci_to_OF_bus_map[i] = 0xff;
/* For each hose, we begin searching bridges */
- for(hose=hose_head; hose; hose=hose->next) {
- struct device_node* node;
- node = (struct device_node *)hose->arch_data;
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ struct device_node* node = hose->dn;
+
if (!node)
continue;
make_one_node_map(node, hose->first_busno);
}
- map_prop = of_find_property(find_path_device("/"),
- "pci-OF-bus-map", NULL);
+ dn = of_find_node_by_path("/");
+ map_prop = of_find_property(dn, "pci-OF-bus-map", NULL);
if (map_prop) {
BUG_ON(pci_bus_count > map_prop->length);
memcpy(map_prop->value, pci_to_OF_bus_map, pci_bus_count);
}
+ of_node_put(dn);
#ifdef DEBUG
printk("PCI->OF bus map:\n");
for (i=0; i<pci_bus_count; i++) {
@@ -707,161 +149,26 @@ pcibios_make_OF_bus_map(void)
#endif
}
-typedef int (*pci_OF_scan_iterator)(struct device_node* node, void* data);
-
-static struct device_node*
-scan_OF_pci_childs(struct device_node* node, pci_OF_scan_iterator filter, void* data)
-{
- struct device_node* sub_node;
-
- for (; node != 0;node = node->sibling) {
- const unsigned int *class_code;
-
- if (filter(node, data))
- return node;
-
- /* For PCI<->PCI bridges or CardBus bridges, we go down
- * Note: some OFs create a parent node "multifunc-device" as
- * a fake root for all functions of a multi-function device,
- * we go down them as well.
- */
- class_code = get_property(node, "class-code", NULL);
- if ((!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
- (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) &&
- strcmp(node->name, "multifunc-device"))
- continue;
- sub_node = scan_OF_pci_childs(node->child, filter, data);
- if (sub_node)
- return sub_node;
- }
- return NULL;
-}
-
-static int
-scan_OF_pci_childs_iterator(struct device_node* node, void* data)
-{
- const unsigned int *reg;
- u8* fdata = (u8*)data;
-
- reg = get_property(node, "reg", NULL);
- if (reg && ((reg[0] >> 8) & 0xff) == fdata[1]
- && ((reg[0] >> 16) & 0xff) == fdata[0])
- return 1;
- return 0;
-}
-
-static struct device_node*
-scan_OF_childs_for_device(struct device_node* node, u8 bus, u8 dev_fn)
-{
- u8 filter_data[2] = {bus, dev_fn};
-
- return scan_OF_pci_childs(node, scan_OF_pci_childs_iterator, filter_data);
-}
-
-/*
- * Scans the OF tree for a device node matching a PCI device
- */
-struct device_node *
-pci_busdev_to_OF_node(struct pci_bus *bus, int devfn)
-{
- struct pci_controller *hose;
- struct device_node *node;
- int busnr;
-
- if (!have_of)
- return NULL;
-
- /* Lookup the hose */
- busnr = bus->number;
- hose = pci_bus_to_hose(busnr);
- if (!hose)
- return NULL;
-
- /* Check it has an OF node associated */
- node = (struct device_node *) hose->arch_data;
- if (!node)
- return NULL;
-
- /* Fixup bus number according to what OF think it is. */
-#ifdef CONFIG_PPC_PMAC
- /* The G5 need a special case here. Basically, we don't remap all
- * busses on it so we don't create the pci-OF-map. However, we do
- * remap the AGP bus and so have to deal with it. A future better
- * fix has to be done by making the remapping per-host and always
- * filling the pci_to_OF map. --BenH
- */
- if (machine_is(powermac) && busnr >= 0xf0)
- busnr -= 0xf0;
- else
-#endif
- if (pci_to_OF_bus_map)
- busnr = pci_to_OF_bus_map[busnr];
- if (busnr == 0xff)
- return NULL;
-
- /* Now, lookup childs of the hose */
- return scan_OF_childs_for_device(node->child, busnr, devfn);
-}
-EXPORT_SYMBOL(pci_busdev_to_OF_node);
-
-struct device_node*
-pci_device_to_OF_node(struct pci_dev *dev)
-{
- return pci_busdev_to_OF_node(dev->bus, dev->devfn);
-}
-EXPORT_SYMBOL(pci_device_to_OF_node);
-
-/* This routine is meant to be used early during boot, when the
- * PCI bus numbers have not yet been assigned, and you need to
- * issue PCI config cycles to an OF device.
- * It could also be used to "fix" RTAS config cycles if you want
- * to set pci_assign_all_buses to 1 and still use RTAS for PCI
- * config cycles.
- */
-struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
-{
- if (!have_of)
- return NULL;
- while(node) {
- struct pci_controller* hose;
- for (hose=hose_head;hose;hose=hose->next)
- if (hose->arch_data == node)
- return hose;
- node=node->parent;
- }
- return NULL;
-}
-
-static int
-find_OF_pci_device_filter(struct device_node* node, void* data)
-{
- return ((void *)node == data);
-}
/*
* Returns the PCI device matching a given OF node
*/
-int
-pci_device_from_OF_node(struct device_node* node, u8* bus, u8* devfn)
+int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn)
{
- const unsigned int *reg;
- struct pci_controller* hose;
- struct pci_dev* dev = NULL;
-
- if (!have_of)
- return -ENODEV;
- /* Make sure it's really a PCI device */
- hose = pci_find_hose_for_OF_device(node);
- if (!hose || !hose->arch_data)
- return -ENODEV;
- if (!scan_OF_pci_childs(((struct device_node*)hose->arch_data)->child,
- find_OF_pci_device_filter, (void *)node))
+ struct pci_dev *dev = NULL;
+ const __be32 *reg;
+ int size;
+
+ /* Check if it might have a chance to be a PCI device */
+ if (!pci_find_hose_for_OF_device(node))
return -ENODEV;
- reg = get_property(node, "reg", NULL);
- if (!reg)
+
+ reg = of_get_property(node, "reg", &size);
+ if (!reg || size < 5 * sizeof(u32))
return -ENODEV;
- *bus = (reg[0] >> 16) & 0xff;
- *devfn = ((reg[0] >> 8) & 0xff);
+
+ *bus = (be32_to_cpup(&reg[0]) >> 16) & 0xff;
+ *devfn = (be32_to_cpup(&reg[0]) >> 8) & 0xff;
/* Ok, here we need some tweak. If we have already renumbered
* all busses, we can't rely on the OF bus number any more.
@@ -883,120 +190,6 @@ pci_device_from_OF_node(struct device_node* node, u8* bus, u8* devfn)
}
EXPORT_SYMBOL(pci_device_from_OF_node);
-void __init
-pci_process_bridge_OF_ranges(struct pci_controller *hose,
- struct device_node *dev, int primary)
-{
- static unsigned int static_lc_ranges[256] __initdata;
- const unsigned int *dt_ranges;
- unsigned int *lc_ranges, *ranges, *prev, size;
- int rlen = 0, orig_rlen;
- int memno = 0;
- struct resource *res;
- int np, na = prom_n_addr_cells(dev);
- np = na + 5;
-
- /* First we try to merge ranges to fix a problem with some pmacs
- * that can have more than 3 ranges, fortunately using contiguous
- * addresses -- BenH
- */
- dt_ranges = get_property(dev, "ranges", &rlen);
- if (!dt_ranges)
- return;
- /* Sanity check, though hopefully that never happens */
- if (rlen > sizeof(static_lc_ranges)) {
- printk(KERN_WARNING "OF ranges property too large !\n");
- rlen = sizeof(static_lc_ranges);
- }
- lc_ranges = static_lc_ranges;
- memcpy(lc_ranges, dt_ranges, rlen);
- orig_rlen = rlen;
-
- /* Let's work on a copy of the "ranges" property instead of damaging
- * the device-tree image in memory
- */
- ranges = lc_ranges;
- prev = NULL;
- while ((rlen -= np * sizeof(unsigned int)) >= 0) {
- if (prev) {
- if (prev[0] == ranges[0] && prev[1] == ranges[1] &&
- (prev[2] + prev[na+4]) == ranges[2] &&
- (prev[na+2] + prev[na+4]) == ranges[na+2]) {
- prev[na+4] += ranges[na+4];
- ranges[0] = 0;
- ranges += np;
- continue;
- }
- }
- prev = ranges;
- ranges += np;
- }
-
- /*
- * The ranges property is laid out as an array of elements,
- * each of which comprises:
- * cells 0 - 2: a PCI address
- * cells 3 or 3+4: a CPU physical address
- * (size depending on dev->n_addr_cells)
- * cells 4+5 or 5+6: the size of the range
- */
- ranges = lc_ranges;
- rlen = orig_rlen;
- while (ranges && (rlen -= np * sizeof(unsigned int)) >= 0) {
- res = NULL;
- size = ranges[na+4];
- switch ((ranges[0] >> 24) & 0x3) {
- case 1: /* I/O space */
- if (ranges[2] != 0)
- break;
- hose->io_base_phys = ranges[na+2];
- /* limit I/O space to 16MB */
- if (size > 0x01000000)
- size = 0x01000000;
- hose->io_base_virt = ioremap(ranges[na+2], size);
- if (primary)
- isa_io_base = (unsigned long) hose->io_base_virt;
- res = &hose->io_resource;
- res->flags = IORESOURCE_IO;
- res->start = ranges[2];
- DBG("PCI: IO 0x%llx -> 0x%llx\n",
- res->start, res->start + size - 1);
- break;
- case 2: /* memory space */
- memno = 0;
- if (ranges[1] == 0 && ranges[2] == 0
- && ranges[na+4] <= (16 << 20)) {
- /* 1st 16MB, i.e. ISA memory area */
- if (primary)
- isa_mem_base = ranges[na+2];
- memno = 1;
- }
- while (memno < 3 && hose->mem_resources[memno].flags)
- ++memno;
- if (memno == 0)
- hose->pci_mem_offset = ranges[na+2] - ranges[2];
- if (memno < 3) {
- res = &hose->mem_resources[memno];
- res->flags = IORESOURCE_MEM;
- if(ranges[0] & 0x40000000)
- res->flags |= IORESOURCE_PREFETCH;
- res->start = ranges[na+2];
- DBG("PCI: MEM[%d] 0x%llx -> 0x%llx\n", memno,
- res->start, res->start + size - 1);
- }
- break;
- }
- if (res != NULL) {
- res->name = dev->full_name;
- res->end = res->start + size - 1;
- res->parent = NULL;
- res->sibling = NULL;
- res->child = NULL;
- }
- ranges += np;
- }
-}
-
/* We create the "pci-OF-bus-map" property now so it appears in the
* /proc device tree
*/
@@ -1004,272 +197,50 @@ void __init
pci_create_OF_bus_map(void)
{
struct property* of_prop;
-
+ struct device_node *dn;
+
of_prop = (struct property*) alloc_bootmem(sizeof(struct property) + 256);
- if (of_prop && find_path_device("/")) {
+ if (!of_prop)
+ return;
+ dn = of_find_node_by_path("/");
+ if (dn) {
memset(of_prop, -1, sizeof(struct property) + 256);
of_prop->name = "pci-OF-bus-map";
of_prop->length = 256;
- of_prop->value = (unsigned char *)&of_prop[1];
- prom_add_property(find_path_device("/"), of_prop);
- }
-}
-
-static ssize_t pci_show_devspec(struct device *dev, struct device_attribute *attr, char *buf)
-{
- struct pci_dev *pdev;
- struct device_node *np;
-
- pdev = to_pci_dev (dev);
- np = pci_device_to_OF_node(pdev);
- if (np == NULL || np->full_name == NULL)
- return 0;
- return sprintf(buf, "%s", np->full_name);
-}
-static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL);
-
-#else /* CONFIG_PPC_OF */
-void pcibios_make_OF_bus_map(void)
-{
-}
-#endif /* CONFIG_PPC_OF */
-
-/* Add sysfs properties */
-void pcibios_add_platform_entries(struct pci_dev *pdev)
-{
-#ifdef CONFIG_PPC_OF
- device_create_file(&pdev->dev, &dev_attr_devspec);
-#endif /* CONFIG_PPC_OF */
-}
-
-
-#ifdef CONFIG_PPC_PMAC
-/*
- * This set of routines checks for PCI<->PCI bridges that have closed
- * IO resources and have child devices. It tries to re-open an IO
- * window on them.
- *
- * This is a _temporary_ fix to workaround a problem with Apple's OF
- * closing IO windows on P2P bridges when the OF drivers of cards
- * below this bridge don't claim any IO range (typically ATI or
- * Adaptec).
- *
- * A more complete fix would be to use drivers/pci/setup-bus.c, which
- * involves a working pcibios_fixup_pbus_ranges(), some more care about
- * ordering when creating the host bus resources, and maybe a few more
- * minor tweaks
- */
-
-/* Initialize bridges with base/limit values we have collected */
-static void __init
-do_update_p2p_io_resource(struct pci_bus *bus, int enable_vga)
-{
- struct pci_dev *bridge = bus->self;
- struct pci_controller* hose = (struct pci_controller *)bridge->sysdata;
- u32 l;
- u16 w;
- struct resource res;
-
- if (bus->resource[0] == NULL)
- return;
- res = *(bus->resource[0]);
-
- DBG("Remapping Bus %d, bridge: %s\n", bus->number, pci_name(bridge));
- res.start -= ((unsigned long) hose->io_base_virt - isa_io_base);
- res.end -= ((unsigned long) hose->io_base_virt - isa_io_base);
- DBG(" IO window: %016llx-%016llx\n", res.start, res.end);
-
- /* Set up the top and bottom of the PCI I/O segment for this bus. */
- pci_read_config_dword(bridge, PCI_IO_BASE, &l);
- l &= 0xffff000f;
- l |= (res.start >> 8) & 0x00f0;
- l |= res.end & 0xf000;
- pci_write_config_dword(bridge, PCI_IO_BASE, l);
-
- if ((l & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
- l = (res.start >> 16) | (res.end & 0xffff0000);
- pci_write_config_dword(bridge, PCI_IO_BASE_UPPER16, l);
- }
-
- pci_read_config_word(bridge, PCI_COMMAND, &w);
- w |= PCI_COMMAND_IO;
- pci_write_config_word(bridge, PCI_COMMAND, w);
-
-#if 0 /* Enabling this causes XFree 4.2.0 to hang during PCI probe */
- if (enable_vga) {
- pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, &w);
- w |= PCI_BRIDGE_CTL_VGA;
- pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, w);
+ of_prop->value = &of_prop[1];
+ of_add_property(dn, of_prop);
+ of_node_put(dn);
}
-#endif
}
-/* This function is pretty basic and actually quite broken for the
- * general case, it's enough for us right now though. It's supposed
- * to tell us if we need to open an IO range at all or not and what
- * size.
- */
-static int __init
-check_for_io_childs(struct pci_bus *bus, struct resource* res, int *found_vga)
+void pcibios_setup_phb_io_space(struct pci_controller *hose)
{
- struct pci_dev *dev;
- int i;
- int rc = 0;
-
-#define push_end(res, mask) do { \
- BUG_ON((mask+1) & mask); \
- res->end = (res->end + mask) | mask; \
-} while (0)
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- u16 class = dev->class >> 8;
-
- if (class == PCI_CLASS_DISPLAY_VGA ||
- class == PCI_CLASS_NOT_DEFINED_VGA)
- *found_vga = 1;
- if (class >> 8 == PCI_BASE_CLASS_BRIDGE && dev->subordinate)
- rc |= check_for_io_childs(dev->subordinate, res, found_vga);
- if (class == PCI_CLASS_BRIDGE_CARDBUS)
- push_end(res, 0xfff);
-
- for (i=0; i<PCI_NUM_RESOURCES; i++) {
- struct resource *r;
- unsigned long r_size;
-
- if (dev->class >> 8 == PCI_CLASS_BRIDGE_PCI
- && i >= PCI_BRIDGE_RESOURCES)
- continue;
- r = &dev->resource[i];
- r_size = r->end - r->start;
- if (r_size < 0xfff)
- r_size = 0xfff;
- if (r->flags & IORESOURCE_IO && (r_size) != 0) {
- rc = 1;
- push_end(res, r_size);
- }
- }
- }
-
- return rc;
-}
-
-/* Here we scan all P2P bridges of a given level that have a closed
- * IO window. Note that the test for the presence of a VGA card should
- * be improved to take into account already configured P2P bridges,
- * currently, we don't see them and might end up configuring 2 bridges
- * with VGA pass through enabled
- */
-static void __init
-do_fixup_p2p_level(struct pci_bus *bus)
-{
- struct pci_bus *b;
- int i, parent_io;
- int has_vga = 0;
-
- for (parent_io=0; parent_io<4; parent_io++)
- if (bus->resource[parent_io]
- && bus->resource[parent_io]->flags & IORESOURCE_IO)
- break;
- if (parent_io >= 4)
- return;
-
- list_for_each_entry(b, &bus->children, node) {
- struct pci_dev *d = b->self;
- struct pci_controller* hose = (struct pci_controller *)d->sysdata;
- struct resource *res = b->resource[0];
- struct resource tmp_res;
- unsigned long max;
- int found_vga = 0;
-
- memset(&tmp_res, 0, sizeof(tmp_res));
- tmp_res.start = bus->resource[parent_io]->start;
-
- /* We don't let low addresses go through that closed P2P bridge, well,
- * that may not be necessary but I feel safer that way
- */
- if (tmp_res.start == 0)
- tmp_res.start = 0x1000;
-
- if (!list_empty(&b->devices) && res && res->flags == 0 &&
- res != bus->resource[parent_io] &&
- (d->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
- check_for_io_childs(b, &tmp_res, &found_vga)) {
- u8 io_base_lo;
-
- printk(KERN_INFO "Fixing up IO bus %s\n", b->name);
-
- if (found_vga) {
- if (has_vga) {
- printk(KERN_WARNING "Skipping VGA, already active"
- " on bus segment\n");
- found_vga = 0;
- } else
- has_vga = 1;
- }
- pci_read_config_byte(d, PCI_IO_BASE, &io_base_lo);
-
- if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32)
- max = ((unsigned long) hose->io_base_virt
- - isa_io_base) + 0xffffffff;
- else
- max = ((unsigned long) hose->io_base_virt
- - isa_io_base) + 0xffff;
-
- *res = tmp_res;
- res->flags = IORESOURCE_IO;
- res->name = b->name;
-
- /* Find a resource in the parent where we can allocate */
- for (i = 0 ; i < 4; i++) {
- struct resource *r = bus->resource[i];
- if (!r)
- continue;
- if ((r->flags & IORESOURCE_IO) == 0)
- continue;
- DBG("Trying to allocate from %016llx, size %016llx from parent"
- " res %d: %016llx -> %016llx\n",
- res->start, res->end, i, r->start, r->end);
-
- if (allocate_resource(r, res, res->end + 1, res->start, max,
- res->end + 1, NULL, NULL) < 0) {
- DBG("Failed !\n");
- continue;
- }
- do_update_p2p_io_resource(b, found_vga);
- break;
- }
- }
- do_fixup_p2p_level(b);
- }
-}
-
-static void
-pcibios_fixup_p2p_bridges(void)
-{
- struct pci_bus *b;
+ unsigned long io_offset;
+ struct resource *res = &hose->io_resource;
- list_for_each_entry(b, &pci_root_buses, node)
- do_fixup_p2p_level(b);
+ /* Fixup IO space offset */
+ io_offset = pcibios_io_space_offset(hose);
+ res->start += io_offset;
+ res->end += io_offset;
}
-#endif /* CONFIG_PPC_PMAC */
-
-static int __init
-pcibios_init(void)
+static int __init pcibios_init(void)
{
- struct pci_controller *hose;
- struct pci_bus *bus;
- int next_busno;
+ struct pci_controller *hose, *tmp;
+ int next_busno = 0;
printk(KERN_INFO "PCI: Probing PCI hardware\n");
+ if (pci_has_flag(PCI_REASSIGN_ALL_BUS))
+ pci_assign_all_buses = 1;
+
/* Scan all of the recorded PCI controllers. */
- for (next_busno = 0, hose = hose_head; hose; hose = hose->next) {
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
if (pci_assign_all_buses)
hose->first_busno = next_busno;
hose->last_busno = 0xff;
- bus = pci_scan_bus(hose->first_busno, hose->ops, hose);
- hose->last_busno = bus->subordinate;
+ pcibios_scan_phb(hose);
+ pci_bus_add_devices(hose->bus);
if (pci_assign_all_buses || next_busno <= hose->last_busno)
next_busno = hose->last_busno + pcibios_assign_bus_offset;
}
@@ -1279,25 +250,11 @@ pcibios_init(void)
* numbers vs. kernel bus numbers since we may have to
* remap them.
*/
- if (pci_assign_all_buses && have_of)
+ if (pci_assign_all_buses)
pcibios_make_OF_bus_map();
- /* Do machine dependent PCI interrupt routing */
- if (ppc_md.pci_swizzle && ppc_md.pci_map_irq)
- pci_fixup_irqs(ppc_md.pci_swizzle, ppc_md.pci_map_irq);
-
- /* Call machine dependent fixup */
- if (ppc_md.pcibios_fixup)
- ppc_md.pcibios_fixup();
-
- /* Allocate and assign resources */
- pcibios_allocate_bus_resources(&pci_root_buses);
- pcibios_allocate_resources(0);
- pcibios_allocate_resources(1);
-#ifdef CONFIG_PPC_PMAC
- pcibios_fixup_p2p_bridges();
-#endif /* CONFIG_PPC_PMAC */
- pcibios_assign_resources();
+ /* Call common code to handle resource allocation */
+ pcibios_resource_survey();
/* Call machine dependent post-init code */
if (ppc_md.pcibios_after_init)
@@ -1308,471 +265,17 @@ pcibios_init(void)
subsys_initcall(pcibios_init);
-unsigned char __init
-common_swizzle(struct pci_dev *dev, unsigned char *pinp)
-{
- struct pci_controller *hose = dev->sysdata;
-
- if (dev->bus->number != hose->first_busno) {
- u8 pin = *pinp;
- do {
- pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn));
- /* Move up the chain of bridges. */
- dev = dev->bus->self;
- } while (dev->bus->self);
- *pinp = pin;
-
- /* The slot is the idsel of the last bridge. */
- }
- return PCI_SLOT(dev->devfn);
-}
-
-unsigned long resource_fixup(struct pci_dev * dev, struct resource * res,
- unsigned long start, unsigned long size)
-{
- return start;
-}
-
-void __init pcibios_fixup_bus(struct pci_bus *bus)
-{
- struct pci_controller *hose = (struct pci_controller *) bus->sysdata;
- unsigned long io_offset;
- struct resource *res;
- int i;
-
- io_offset = (unsigned long)hose->io_base_virt - isa_io_base;
- if (bus->parent == NULL) {
- /* This is a host bridge - fill in its resources */
- hose->bus = bus;
-
- bus->resource[0] = res = &hose->io_resource;
- if (!res->flags) {
- if (io_offset)
- printk(KERN_ERR "I/O resource not set for host"
- " bridge %d\n", hose->index);
- res->start = 0;
- res->end = IO_SPACE_LIMIT;
- res->flags = IORESOURCE_IO;
- }
- res->start += io_offset;
- res->end += io_offset;
-
- for (i = 0; i < 3; ++i) {
- res = &hose->mem_resources[i];
- if (!res->flags) {
- if (i > 0)
- continue;
- printk(KERN_ERR "Memory resource not set for "
- "host bridge %d\n", hose->index);
- res->start = hose->pci_mem_offset;
- res->end = ~0U;
- res->flags = IORESOURCE_MEM;
- }
- bus->resource[i+1] = res;
- }
- } else {
- /* This is a subordinate bridge */
- pci_read_bridge_bases(bus);
-
- for (i = 0; i < 4; ++i) {
- if ((res = bus->resource[i]) == NULL)
- continue;
- if (!res->flags)
- continue;
- if (io_offset && (res->flags & IORESOURCE_IO)) {
- res->start += io_offset;
- res->end += io_offset;
- } else if (hose->pci_mem_offset
- && (res->flags & IORESOURCE_MEM)) {
- res->start += hose->pci_mem_offset;
- res->end += hose->pci_mem_offset;
- }
- }
- }
-
- if (ppc_md.pcibios_fixup_bus)
- ppc_md.pcibios_fixup_bus(bus);
-}
-
-char __init *pcibios_setup(char *str)
-{
- return str;
-}
-
-/* the next one is stolen from the alpha port... */
-void __init
-pcibios_update_irq(struct pci_dev *dev, int irq)
-{
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
- /* XXX FIXME - update OF device tree node interrupt property */
-}
-
-#ifdef CONFIG_PPC_MERGE
-/* XXX This is a copy of the ppc64 version. This is temporary until we start
- * merging the 2 PCI layers
- */
-/*
- * Reads the interrupt pin to determine if interrupt is use by card.
- * If the interrupt is used, then gets the interrupt line from the
- * openfirmware and sets it in the pci_dev and pci_config line.
- */
-int pci_read_irq_line(struct pci_dev *pci_dev)
-{
- struct of_irq oirq;
- unsigned int virq;
-
- DBG("Try to map irq for %s...\n", pci_name(pci_dev));
-
- /* Try to get a mapping from the device-tree */
- if (of_irq_map_pci(pci_dev, &oirq)) {
- u8 line, pin;
-
- /* If that fails, lets fallback to what is in the config
- * space and map that through the default controller. We
- * also set the type to level low since that's what PCI
- * interrupts are. If your platform does differently, then
- * either provide a proper interrupt tree or don't use this
- * function.
- */
- if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin))
- return -1;
- if (pin == 0)
- return -1;
- if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) ||
- line == 0xff) {
- return -1;
- }
- DBG(" -> no map ! Using irq line %d from PCI config\n", line);
-
- virq = irq_create_mapping(NULL, line);
- if (virq != NO_IRQ)
- set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
- } else {
- DBG(" -> got one, spec %d cells (0x%08x...) on %s\n",
- oirq.size, oirq.specifier[0], oirq.controller->full_name);
-
- virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
- oirq.size);
- }
- if(virq == NO_IRQ) {
- DBG(" -> failed to map !\n");
- return -1;
- }
- pci_dev->irq = virq;
- pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, virq);
-
- return 0;
-}
-EXPORT_SYMBOL(pci_read_irq_line);
-#endif /* CONFIG_PPC_MERGE */
-
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
- u16 cmd, old_cmd;
- int idx;
- struct resource *r;
-
- if (ppc_md.pcibios_enable_device_hook)
- if (ppc_md.pcibios_enable_device_hook(dev, 0))
- return -EINVAL;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- old_cmd = cmd;
- for (idx=0; idx<6; idx++) {
- r = &dev->resource[idx];
- if (r->flags & IORESOURCE_UNSET) {
- printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", pci_name(dev));
- return -EINVAL;
- }
- if (r->flags & IORESOURCE_IO)
- cmd |= PCI_COMMAND_IO;
- if (r->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
- if (cmd != old_cmd) {
- printk("PCI: Enabling device %s (%04x -> %04x)\n",
- pci_name(dev), old_cmd, cmd);
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
- return 0;
-}
-
-struct pci_controller*
+static struct pci_controller*
pci_bus_to_hose(int bus)
{
- struct pci_controller* hose = hose_head;
+ struct pci_controller *hose, *tmp;
- for (; hose; hose = hose->next)
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
if (bus >= hose->first_busno && bus <= hose->last_busno)
return hose;
return NULL;
}
-void __iomem *
-pci_bus_io_base(unsigned int bus)
-{
- struct pci_controller *hose;
-
- hose = pci_bus_to_hose(bus);
- if (!hose)
- return NULL;
- return hose->io_base_virt;
-}
-
-unsigned long
-pci_bus_io_base_phys(unsigned int bus)
-{
- struct pci_controller *hose;
-
- hose = pci_bus_to_hose(bus);
- if (!hose)
- return 0;
- return hose->io_base_phys;
-}
-
-unsigned long
-pci_bus_mem_base_phys(unsigned int bus)
-{
- struct pci_controller *hose;
-
- hose = pci_bus_to_hose(bus);
- if (!hose)
- return 0;
- return hose->pci_mem_offset;
-}
-
-unsigned long
-pci_resource_to_bus(struct pci_dev *pdev, struct resource *res)
-{
- /* Hack alert again ! See comments in chrp_pci.c
- */
- struct pci_controller* hose =
- (struct pci_controller *)pdev->sysdata;
- if (hose && res->flags & IORESOURCE_MEM)
- return res->start - hose->pci_mem_offset;
- /* We may want to do something with IOs here... */
- return res->start;
-}
-
-
-static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
- unsigned long *offset,
- enum pci_mmap_state mmap_state)
-{
- struct pci_controller *hose = pci_bus_to_hose(dev->bus->number);
- unsigned long io_offset = 0;
- int i, res_bit;
-
- if (hose == 0)
- return NULL; /* should never happen */
-
- /* If memory, add on the PCI bridge address offset */
- if (mmap_state == pci_mmap_mem) {
- *offset += hose->pci_mem_offset;
- res_bit = IORESOURCE_MEM;
- } else {
- io_offset = hose->io_base_virt - ___IO_BASE;
- *offset += io_offset;
- res_bit = IORESOURCE_IO;
- }
-
- /*
- * Check that the offset requested corresponds to one of the
- * resources of the device.
- */
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- struct resource *rp = &dev->resource[i];
- int flags = rp->flags;
-
- /* treat ROM as memory (should be already) */
- if (i == PCI_ROM_RESOURCE)
- flags |= IORESOURCE_MEM;
-
- /* Active and same type? */
- if ((flags & res_bit) == 0)
- continue;
-
- /* In the range of this resource? */
- if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
- continue;
-
- /* found it! construct the final physical address */
- if (mmap_state == pci_mmap_io)
- *offset += hose->io_base_phys - io_offset;
- return rp;
- }
-
- return NULL;
-}
-
-/*
- * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
- * device mapping.
- */
-static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
- pgprot_t protection,
- enum pci_mmap_state mmap_state,
- int write_combine)
-{
- unsigned long prot = pgprot_val(protection);
-
- /* Write combine is always 0 on non-memory space mappings. On
- * memory space, if the user didn't pass 1, we check for a
- * "prefetchable" resource. This is a bit hackish, but we use
- * this to workaround the inability of /sysfs to provide a write
- * combine bit
- */
- if (mmap_state != pci_mmap_mem)
- write_combine = 0;
- else if (write_combine == 0) {
- if (rp->flags & IORESOURCE_PREFETCH)
- write_combine = 1;
- }
-
- /* XXX would be nice to have a way to ask for write-through */
- prot |= _PAGE_NO_CACHE;
- if (write_combine)
- prot &= ~_PAGE_GUARDED;
- else
- prot |= _PAGE_GUARDED;
-
- printk("PCI map for %s:%llx, prot: %lx\n", pci_name(dev),
- (unsigned long long)rp->start, prot);
-
- return __pgprot(prot);
-}
-
-/*
- * This one is used by /dev/mem and fbdev who have no clue about the
- * PCI device, it tries to find the PCI device first and calls the
- * above routine
- */
-pgprot_t pci_phys_mem_access_prot(struct file *file,
- unsigned long pfn,
- unsigned long size,
- pgprot_t protection)
-{
- struct pci_dev *pdev = NULL;
- struct resource *found = NULL;
- unsigned long prot = pgprot_val(protection);
- unsigned long offset = pfn << PAGE_SHIFT;
- int i;
-
- if (page_is_ram(pfn))
- return prot;
-
- prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
-
- for_each_pci_dev(pdev) {
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- struct resource *rp = &pdev->resource[i];
- int flags = rp->flags;
-
- /* Active and same type? */
- if ((flags & IORESOURCE_MEM) == 0)
- continue;
- /* In the range of this resource? */
- if (offset < (rp->start & PAGE_MASK) ||
- offset > rp->end)
- continue;
- found = rp;
- break;
- }
- if (found)
- break;
- }
- if (found) {
- if (found->flags & IORESOURCE_PREFETCH)
- prot &= ~_PAGE_GUARDED;
- pci_dev_put(pdev);
- }
-
- DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
-
- return __pgprot(prot);
-}
-
-
-/*
- * Perform the actual remap of the pages for a PCI device mapping, as
- * appropriate for this architecture. The region in the process to map
- * is described by vm_start and vm_end members of VMA, the base physical
- * address is found in vm_pgoff.
- * The pci device structure is provided so that architectures may make mapping
- * decisions on a per-device or per-bus basis.
- *
- * Returns a negative error code on failure, zero on success.
- */
-int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state,
- int write_combine)
-{
- unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
- struct resource *rp;
- int ret;
-
- rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
- if (rp == NULL)
- return -EINVAL;
-
- vma->vm_pgoff = offset >> PAGE_SHIFT;
- vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
- vma->vm_page_prot,
- mmap_state, write_combine);
-
- ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- vma->vm_end - vma->vm_start, vma->vm_page_prot);
-
- return ret;
-}
-
-/* Obsolete functions. Should be removed once the symbios driver
- * is fixed
- */
-unsigned long
-phys_to_bus(unsigned long pa)
-{
- struct pci_controller *hose;
- int i;
-
- for (hose = hose_head; hose; hose = hose->next) {
- for (i = 0; i < 3; ++i) {
- if (pa >= hose->mem_resources[i].start
- && pa <= hose->mem_resources[i].end) {
- /*
- * XXX the hose->pci_mem_offset really
- * only applies to mem_resources[0].
- * We need a way to store an offset for
- * the others. -- paulus
- */
- if (i == 0)
- pa -= hose->pci_mem_offset;
- return pa;
- }
- }
- }
- /* hmmm, didn't find it */
- return 0;
-}
-
-unsigned long
-pci_phys_to_bus(unsigned long pa, int busnr)
-{
- struct pci_controller* hose = pci_bus_to_hose(busnr);
- if (!hose)
- return pa;
- return pa - hose->pci_mem_offset;
-}
-
-unsigned long
-pci_bus_to_phys(unsigned int ba, int busnr)
-{
- struct pci_controller* hose = pci_bus_to_hose(busnr);
- if (!hose)
- return ba;
- return ba + hose->pci_mem_offset;
-}
-
/* Provide information on locations of various I/O regions in physical
* memory. Do this on a per-card basis so that we choose the right
* root bridge.
@@ -1784,17 +287,6 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn)
struct pci_controller* hose;
long result = -EOPNOTSUPP;
- /* Argh ! Please forgive me for that hack, but that's the
- * simplest way to get existing XFree to not lockup on some
- * G5 machines... So when something asks for bus 0 io base
- * (bus 0 is HT root), we return the AGP one instead.
- */
-#ifdef CONFIG_PPC_PMAC
- if (machine_is(powermac) && machine_is_compatible("MacRISC4"))
- if (bus == 0)
- bus = 0xf0;
-#endif /* CONFIG_PPC_PMAC */
-
hose = pci_bus_to_hose(bus);
if (!hose)
return -ENODEV;
@@ -1803,7 +295,7 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn)
case IOBASE_BRIDGE_NUMBER:
return (long)hose->first_busno;
case IOBASE_MEMORY:
- return (long)hose->pci_mem_offset;
+ return (long)hose->mem_offset[0];
case IOBASE_IO:
return (long)hose->io_base_phys;
case IOBASE_ISA_IO:
@@ -1815,145 +307,4 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn)
return result;
}
-void pci_resource_to_user(const struct pci_dev *dev, int bar,
- const struct resource *rsrc,
- resource_size_t *start, resource_size_t *end)
-{
- struct pci_controller *hose = pci_bus_to_hose(dev->bus->number);
- unsigned long offset = 0;
-
- if (hose == NULL)
- return;
-
- if (rsrc->flags & IORESOURCE_IO)
- offset = ___IO_BASE - hose->io_base_virt + hose->io_base_phys;
-
- *start = rsrc->start + offset;
- *end = rsrc->end + offset;
-}
-
-void __init
-pci_init_resource(struct resource *res, unsigned long start, unsigned long end,
- int flags, char *name)
-{
- res->start = start;
- res->end = end;
- res->flags = flags;
- res->name = name;
- res->parent = NULL;
- res->sibling = NULL;
- res->child = NULL;
-}
-
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
-{
- unsigned long start = pci_resource_start(dev, bar);
- unsigned long len = pci_resource_len(dev, bar);
- unsigned long flags = pci_resource_flags(dev, bar);
-
- if (!len)
- return NULL;
- if (max && len > max)
- len = max;
- if (flags & IORESOURCE_IO)
- return ioport_map(start, len);
- if (flags & IORESOURCE_MEM)
- /* Not checking IORESOURCE_CACHEABLE because PPC does
- * not currently distinguish between ioremap and
- * ioremap_nocache.
- */
- return ioremap(start, len);
- /* What? */
- return NULL;
-}
-
-void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
-{
- /* Nothing to do */
-}
-EXPORT_SYMBOL(pci_iomap);
-EXPORT_SYMBOL(pci_iounmap);
-
-unsigned long pci_address_to_pio(phys_addr_t address)
-{
- struct pci_controller* hose = hose_head;
-
- for (; hose; hose = hose->next) {
- unsigned int size = hose->io_resource.end -
- hose->io_resource.start + 1;
- if (address >= hose->io_base_phys &&
- address < (hose->io_base_phys + size)) {
- unsigned long base =
- (unsigned long)hose->io_base_virt - _IO_BASE;
- return base + (address - hose->io_base_phys);
- }
- }
- return (unsigned int)-1;
-}
-EXPORT_SYMBOL(pci_address_to_pio);
-
-/*
- * Null PCI config access functions, for the case when we can't
- * find a hose.
- */
-#define NULL_PCI_OP(rw, size, type) \
-static int \
-null_##rw##_config_##size(struct pci_dev *dev, int offset, type val) \
-{ \
- return PCIBIOS_DEVICE_NOT_FOUND; \
-}
-
-static int
-null_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
- int len, u32 *val)
-{
- return PCIBIOS_DEVICE_NOT_FOUND;
-}
-
-static int
-null_write_config(struct pci_bus *bus, unsigned int devfn, int offset,
- int len, u32 val)
-{
- return PCIBIOS_DEVICE_NOT_FOUND;
-}
-
-static struct pci_ops null_pci_ops =
-{
- null_read_config,
- null_write_config
-};
-
-/*
- * These functions are used early on before PCI scanning is done
- * and all of the pci_dev and pci_bus structures have been created.
- */
-static struct pci_bus *
-fake_pci_bus(struct pci_controller *hose, int busnr)
-{
- static struct pci_bus bus;
-
- if (hose == 0) {
- hose = pci_bus_to_hose(busnr);
- if (hose == 0)
- printk(KERN_ERR "Can't find hose for PCI bus %d!\n", busnr);
- }
- bus.number = busnr;
- bus.sysdata = hose;
- bus.ops = hose? hose->ops: &null_pci_ops;
- return &bus;
-}
-
-#define EARLY_PCI_OP(rw, size, type) \
-int early_##rw##_config_##size(struct pci_controller *hose, int bus, \
- int devfn, int offset, type value) \
-{ \
- return pci_bus_##rw##_config_##size(fake_pci_bus(hose, bus), \
- devfn, offset, value); \
-}
-EARLY_PCI_OP(read, byte, u8 *)
-EARLY_PCI_OP(read, word, u16 *)
-EARLY_PCI_OP(read, dword, u32 *)
-EARLY_PCI_OP(write, byte, u8)
-EARLY_PCI_OP(write, word, u16)
-EARLY_PCI_OP(write, dword, u32)
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index c1b1e14775e..155013da27e 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -18,10 +18,12 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/bootmem.h>
+#include <linux/export.h>
#include <linux/mm.h>
#include <linux/list.h>
#include <linux/syscalls.h>
#include <linux/irq.h>
+#include <linux/vmalloc.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -31,564 +33,39 @@
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
-#ifdef DEBUG
-#include <asm/udbg.h>
-#define DBG(fmt...) printk(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-unsigned long pci_probe_only = 1;
-int pci_assign_all_buses = 0;
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-static void fixup_resource(struct resource *res, struct pci_dev *dev);
-static void do_bus_setup(struct pci_bus *bus);
-static void phbs_remap_io(void);
-#endif
-
/* pci_io_base -- the base address from which io bars are offsets.
* This is the lowest I/O base address (so bar values are always positive),
* and it *must* be the start of ISA space if an ISA bus exists because
- * ISA drivers use hard coded offsets. If no ISA bus exists a dummy
- * page is mapped and isa_io_limit prevents access to it.
+ * ISA drivers use hard coded offsets. If no ISA bus exists nothing
+ * is mapped on the first 64K of IO space
*/
-unsigned long isa_io_base; /* NULL if no ISA bus */
-EXPORT_SYMBOL(isa_io_base);
-unsigned long pci_io_base;
+unsigned long pci_io_base = ISA_IO_BASE;
EXPORT_SYMBOL(pci_io_base);
-void iSeries_pcibios_init(void);
-
-LIST_HEAD(hose_list);
-
-struct dma_mapping_ops pci_dma_ops;
-EXPORT_SYMBOL(pci_dma_ops);
-
-int global_phb_number; /* Global phb counter */
-
-/* Cached ISA bridge dev. */
-struct pci_dev *ppc64_isabridge_dev = NULL;
-EXPORT_SYMBOL_GPL(ppc64_isabridge_dev);
-
-static void fixup_broken_pcnet32(struct pci_dev* dev)
-{
- if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) {
- dev->vendor = PCI_VENDOR_ID_AMD;
- pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD);
- }
-}
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32);
-
-void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
- struct resource *res)
-{
- unsigned long offset = 0;
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
- if (!hose)
- return;
-
- if (res->flags & IORESOURCE_IO)
- offset = (unsigned long)hose->io_base_virt - pci_io_base;
-
- if (res->flags & IORESOURCE_MEM)
- offset = hose->pci_mem_offset;
-
- region->start = res->start - offset;
- region->end = res->end - offset;
-}
-
-void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
- struct pci_bus_region *region)
-{
- unsigned long offset = 0;
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
- if (!hose)
- return;
-
- if (res->flags & IORESOURCE_IO)
- offset = (unsigned long)hose->io_base_virt - pci_io_base;
-
- if (res->flags & IORESOURCE_MEM)
- offset = hose->pci_mem_offset;
-
- res->start = region->start + offset;
- res->end = region->end + offset;
-}
-
-#ifdef CONFIG_HOTPLUG
-EXPORT_SYMBOL(pcibios_resource_to_bus);
-EXPORT_SYMBOL(pcibios_bus_to_resource);
-#endif
-
-/*
- * We need to avoid collisions with `mirrored' VGA ports
- * and other strange ISA hardware, so we always want the
- * addresses to be allocated in the 0x000-0x0ff region
- * modulo 0x400.
- *
- * Why? Because some silly external IO cards only decode
- * the low 10 bits of the IO address. The 0x00-0xff region
- * is reserved for motherboard devices that decode all 16
- * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
- * but we want to try to avoid allocating at 0x2900-0x2bff
- * which might have be mirrored at 0x0100-0x03ff..
- */
-void pcibios_align_resource(void *data, struct resource *res,
- resource_size_t size, resource_size_t align)
-{
- struct pci_dev *dev = data;
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- resource_size_t start = res->start;
- unsigned long alignto;
-
- if (res->flags & IORESOURCE_IO) {
- unsigned long offset = (unsigned long)hose->io_base_virt -
- pci_io_base;
- /* Make sure we start at our min on all hoses */
- if (start - offset < PCIBIOS_MIN_IO)
- start = PCIBIOS_MIN_IO + offset;
-
- /*
- * Put everything into 0x00-0xff region modulo 0x400
- */
- if (start & 0x300)
- start = (start + 0x3ff) & ~0x3ff;
-
- } else if (res->flags & IORESOURCE_MEM) {
- /* Make sure we start at our min on all hoses */
- if (start - hose->pci_mem_offset < PCIBIOS_MIN_MEM)
- start = PCIBIOS_MIN_MEM + hose->pci_mem_offset;
-
- /* Align to multiple of size of minimum base. */
- alignto = max(0x1000UL, align);
- start = ALIGN(start, alignto);
- }
-
- res->start = start;
-}
-
-static DEFINE_SPINLOCK(hose_spinlock);
-
-/*
- * pci_controller(phb) initialized common variables.
- */
-static void __devinit pci_setup_pci_controller(struct pci_controller *hose)
-{
- memset(hose, 0, sizeof(struct pci_controller));
-
- spin_lock(&hose_spinlock);
- hose->global_number = global_phb_number++;
- list_add_tail(&hose->list_node, &hose_list);
- spin_unlock(&hose_spinlock);
-}
-
-struct pci_controller * pcibios_alloc_controller(struct device_node *dev)
-{
- struct pci_controller *phb;
-
- if (mem_init_done)
- phb = kmalloc(sizeof(struct pci_controller), GFP_KERNEL);
- else
- phb = alloc_bootmem(sizeof (struct pci_controller));
- if (phb == NULL)
- return NULL;
- pci_setup_pci_controller(phb);
- phb->arch_data = dev;
- phb->is_dynamic = mem_init_done;
- if (dev)
- PHB_SET_NODE(phb, of_node_to_nid(dev));
- return phb;
-}
-
-void pcibios_free_controller(struct pci_controller *phb)
-{
- if (phb->is_dynamic)
- kfree(phb);
-}
-
-#ifndef CONFIG_PPC_ISERIES
-void __devinit pcibios_claim_one_bus(struct pci_bus *b)
-{
- struct pci_dev *dev;
- struct pci_bus *child_bus;
-
- list_for_each_entry(dev, &b->devices, bus_list) {
- int i;
-
- for (i = 0; i < PCI_NUM_RESOURCES; i++) {
- struct resource *r = &dev->resource[i];
-
- if (r->parent || !r->start || !r->flags)
- continue;
- pci_claim_resource(dev, i);
- }
- }
-
- list_for_each_entry(child_bus, &b->children, node)
- pcibios_claim_one_bus(child_bus);
-}
-#ifdef CONFIG_HOTPLUG
-EXPORT_SYMBOL_GPL(pcibios_claim_one_bus);
-#endif
-
-static void __init pcibios_claim_of_setup(void)
-{
- struct pci_bus *b;
-
- list_for_each_entry(b, &pci_root_buses, node)
- pcibios_claim_one_bus(b);
-}
-#endif
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
-{
- const u32 *prop;
- int len;
-
- prop = get_property(np, name, &len);
- if (prop && len >= 4)
- return *prop;
- return def;
-}
-
-static unsigned int pci_parse_of_flags(u32 addr0)
-{
- unsigned int flags = 0;
-
- if (addr0 & 0x02000000) {
- flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
- flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
- flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
- if (addr0 & 0x40000000)
- flags |= IORESOURCE_PREFETCH
- | PCI_BASE_ADDRESS_MEM_PREFETCH;
- } else if (addr0 & 0x01000000)
- flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
- return flags;
-}
-
-#define GET_64BIT(prop, i) ((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1])
-
-static void pci_parse_of_addrs(struct device_node *node, struct pci_dev *dev)
-{
- u64 base, size;
- unsigned int flags;
- struct resource *res;
- const u32 *addrs;
- u32 i;
- int proplen;
-
- addrs = get_property(node, "assigned-addresses", &proplen);
- if (!addrs)
- return;
- DBG(" parse addresses (%d bytes) @ %p\n", proplen, addrs);
- for (; proplen >= 20; proplen -= 20, addrs += 5) {
- flags = pci_parse_of_flags(addrs[0]);
- if (!flags)
- continue;
- base = GET_64BIT(addrs, 1);
- size = GET_64BIT(addrs, 3);
- if (!size)
- continue;
- i = addrs[0] & 0xff;
- DBG(" base: %llx, size: %llx, i: %x\n",
- (unsigned long long)base, (unsigned long long)size, i);
-
- if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
- res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
- } else if (i == dev->rom_base_reg) {
- res = &dev->resource[PCI_ROM_RESOURCE];
- flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
- } else {
- printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
- continue;
- }
- res->start = base;
- res->end = base + size - 1;
- res->flags = flags;
- res->name = pci_name(dev);
- fixup_resource(res, dev);
- }
-}
-
-struct pci_dev *of_create_pci_dev(struct device_node *node,
- struct pci_bus *bus, int devfn)
-{
- struct pci_dev *dev;
- const char *type;
-
- dev = kmalloc(sizeof(struct pci_dev), GFP_KERNEL);
- if (!dev)
- return NULL;
- type = get_property(node, "device_type", NULL);
- if (type == NULL)
- type = "";
-
- DBG(" create device, devfn: %x, type: %s\n", devfn, type);
-
- memset(dev, 0, sizeof(struct pci_dev));
- dev->bus = bus;
- dev->sysdata = node;
- dev->dev.parent = bus->bridge;
- dev->dev.bus = &pci_bus_type;
- dev->devfn = devfn;
- dev->multifunction = 0; /* maybe a lie? */
-
- dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
- dev->device = get_int_prop(node, "device-id", 0xffff);
- dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
- dev->subsystem_device = get_int_prop(node, "subsystem-id", 0);
-
- dev->cfg_size = pci_cfg_space_size(dev);
-
- sprintf(pci_name(dev), "%04x:%02x:%02x.%d", pci_domain_nr(bus),
- dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
- dev->class = get_int_prop(node, "class-code", 0);
-
- DBG(" class: 0x%x\n", dev->class);
-
- dev->current_state = 4; /* unknown power state */
-
- if (!strcmp(type, "pci") || !strcmp(type, "pciex")) {
- /* a PCI-PCI bridge */
- dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
- dev->rom_base_reg = PCI_ROM_ADDRESS1;
- } else if (!strcmp(type, "cardbus")) {
- dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
- } else {
- dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
- dev->rom_base_reg = PCI_ROM_ADDRESS;
- /* Maybe do a default OF mapping here */
- dev->irq = NO_IRQ;
- }
-
- pci_parse_of_addrs(node, dev);
-
- DBG(" adding to system ...\n");
-
- pci_device_add(dev, bus);
-
- /* XXX pci_scan_msi_device(dev); */
-
- return dev;
-}
-EXPORT_SYMBOL(of_create_pci_dev);
-
-void __devinit of_scan_bus(struct device_node *node,
- struct pci_bus *bus)
-{
- struct device_node *child = NULL;
- const u32 *reg;
- int reglen, devfn;
- struct pci_dev *dev;
-
- DBG("of_scan_bus(%s) bus no %d... \n", node->full_name, bus->number);
-
- while ((child = of_get_next_child(node, child)) != NULL) {
- DBG(" * %s\n", child->full_name);
- reg = get_property(child, "reg", &reglen);
- if (reg == NULL || reglen < 20)
- continue;
- devfn = (reg[0] >> 8) & 0xff;
-
- /* create a new pci_dev for this device */
- dev = of_create_pci_dev(child, bus, devfn);
- if (!dev)
- continue;
- DBG("dev header type: %x\n", dev->hdr_type);
-
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
- dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
- of_scan_pci_bridge(child, dev);
- }
-
- do_bus_setup(bus);
-}
-EXPORT_SYMBOL(of_scan_bus);
-
-void __devinit of_scan_pci_bridge(struct device_node *node,
- struct pci_dev *dev)
-{
- struct pci_bus *bus;
- const u32 *busrange, *ranges;
- int len, i, mode;
- struct resource *res;
- unsigned int flags;
- u64 size;
-
- DBG("of_scan_pci_bridge(%s)\n", node->full_name);
-
- /* parse bus-range property */
- busrange = get_property(node, "bus-range", &len);
- if (busrange == NULL || len != 8) {
- printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n",
- node->full_name);
- return;
- }
- ranges = get_property(node, "ranges", &len);
- if (ranges == NULL) {
- printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n",
- node->full_name);
- return;
- }
-
- bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
- if (!bus) {
- printk(KERN_ERR "Failed to create pci bus for %s\n",
- node->full_name);
- return;
- }
-
- bus->primary = dev->bus->number;
- bus->subordinate = busrange[1];
- bus->bridge_ctl = 0;
- bus->sysdata = node;
-
- /* parse ranges property */
- /* PCI #address-cells == 3 and #size-cells == 2 always */
- res = &dev->resource[PCI_BRIDGE_RESOURCES];
- for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
- res->flags = 0;
- bus->resource[i] = res;
- ++res;
- }
- i = 1;
- for (; len >= 32; len -= 32, ranges += 8) {
- flags = pci_parse_of_flags(ranges[0]);
- size = GET_64BIT(ranges, 6);
- if (flags == 0 || size == 0)
- continue;
- if (flags & IORESOURCE_IO) {
- res = bus->resource[0];
- if (res->flags) {
- printk(KERN_ERR "PCI: ignoring extra I/O range"
- " for bridge %s\n", node->full_name);
- continue;
- }
- } else {
- if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
- printk(KERN_ERR "PCI: too many memory ranges"
- " for bridge %s\n", node->full_name);
- continue;
- }
- res = bus->resource[i];
- ++i;
- }
- res->start = GET_64BIT(ranges, 1);
- res->end = res->start + size - 1;
- res->flags = flags;
- fixup_resource(res, dev);
- }
- sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
- bus->number);
- DBG(" bus name: %s\n", bus->name);
-
- mode = PCI_PROBE_NORMAL;
- if (ppc_md.pci_probe_mode)
- mode = ppc_md.pci_probe_mode(bus);
- DBG(" probe mode: %d\n", mode);
-
- if (mode == PCI_PROBE_DEVTREE)
- of_scan_bus(node, bus);
- else if (mode == PCI_PROBE_NORMAL)
- pci_scan_child_bus(bus);
-}
-EXPORT_SYMBOL(of_scan_pci_bridge);
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-
-void __devinit scan_phb(struct pci_controller *hose)
-{
- struct pci_bus *bus;
- struct device_node *node = hose->arch_data;
- int i, mode;
- struct resource *res;
-
- DBG("Scanning PHB %s\n", node ? node->full_name : "<NO NAME>");
-
- bus = pci_create_bus(NULL, hose->first_busno, hose->ops, node);
- if (bus == NULL) {
- printk(KERN_ERR "Failed to create bus for PCI domain %04x\n",
- hose->global_number);
- return;
- }
- bus->secondary = hose->first_busno;
- hose->bus = bus;
-
- bus->resource[0] = res = &hose->io_resource;
- if (res->flags && request_resource(&ioport_resource, res))
- printk(KERN_ERR "Failed to request PCI IO region "
- "on PCI domain %04x\n", hose->global_number);
-
- for (i = 0; i < 3; ++i) {
- res = &hose->mem_resources[i];
- bus->resource[i+1] = res;
- if (res->flags && request_resource(&iomem_resource, res))
- printk(KERN_ERR "Failed to request PCI memory region "
- "on PCI domain %04x\n", hose->global_number);
- }
-
- mode = PCI_PROBE_NORMAL;
-#ifdef CONFIG_PPC_MULTIPLATFORM
- if (node && ppc_md.pci_probe_mode)
- mode = ppc_md.pci_probe_mode(bus);
- DBG(" probe mode: %d\n", mode);
- if (mode == PCI_PROBE_DEVTREE) {
- bus->subordinate = hose->last_busno;
- of_scan_bus(node, bus);
- }
-#endif /* CONFIG_PPC_MULTIPLATFORM */
- if (mode == PCI_PROBE_NORMAL)
- hose->last_busno = bus->subordinate = pci_scan_child_bus(bus);
-}
-
static int __init pcibios_init(void)
{
struct pci_controller *hose, *tmp;
- /* For now, override phys_mem_access_prot. If we need it,
+ printk(KERN_INFO "PCI: Probing PCI hardware\n");
+
+ /* For now, override phys_mem_access_prot. If we need it,g
* later, we may move that initialization to each ppc_md
*/
ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
-#ifdef CONFIG_PPC_ISERIES
- iSeries_pcibios_init();
-#endif
-
- printk(KERN_DEBUG "PCI: Probing PCI hardware\n");
+ /* On ppc64, we always enable PCI domains and we keep domain 0
+ * backward compatible in /proc for video cards
+ */
+ pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
/* Scan all of the recorded PCI controllers. */
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- scan_phb(hose);
+ pcibios_scan_phb(hose);
pci_bus_add_devices(hose->bus);
}
-#ifndef CONFIG_PPC_ISERIES
- if (pci_probe_only)
- pcibios_claim_of_setup();
- else
- /* FIXME: `else' will be removed when
- pci_assign_unassigned_resources() is able to work
- correctly with [partially] allocated PCI tree. */
- pci_assign_unassigned_resources();
-#endif /* !CONFIG_PPC_ISERIES */
-
- /* Call machine dependent final fixup */
- if (ppc_md.pcibios_fixup)
- ppc_md.pcibios_fixup();
-
- /* Cache the location of the ISA bridge (if we have one) */
- ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
- if (ppc64_isabridge_dev != NULL)
- printk(KERN_DEBUG "ISA bridge at %s\n", pci_name(ppc64_isabridge_dev));
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
- /* map in PCI I/O space */
- phbs_remap_io();
-#endif
+ /* Call common code to handle resource allocation */
+ pcibios_resource_survey();
printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
@@ -597,763 +74,130 @@ static int __init pcibios_init(void)
subsys_initcall(pcibios_init);
-char __init *pcibios_setup(char *str)
+int pcibios_unmap_io_space(struct pci_bus *bus)
{
- return str;
-}
+ struct pci_controller *hose;
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
- u16 cmd, oldcmd;
- int i;
-
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
- oldcmd = cmd;
-
- for (i = 0; i < PCI_NUM_RESOURCES; i++) {
- struct resource *res = &dev->resource[i];
-
- /* Only set up the requested stuff */
- if (!(mask & (1<<i)))
- continue;
-
- if (res->flags & IORESOURCE_IO)
- cmd |= PCI_COMMAND_IO;
- if (res->flags & IORESOURCE_MEM)
- cmd |= PCI_COMMAND_MEMORY;
- }
-
- if (cmd != oldcmd) {
- printk(KERN_DEBUG "PCI: Enabling device: (%s), cmd %x\n",
- pci_name(dev), cmd);
- /* Enable the appropriate bits in the PCI command register. */
- pci_write_config_word(dev, PCI_COMMAND, cmd);
- }
- return 0;
-}
-
-/*
- * Return the domain number for this bus.
- */
-int pci_domain_nr(struct pci_bus *bus)
-{
-#ifdef CONFIG_PPC_ISERIES
- return 0;
-#else
- struct pci_controller *hose = pci_bus_to_host(bus);
+ WARN_ON(bus == NULL);
- return hose->global_number;
+ /* If this is not a PHB, we only flush the hash table over
+ * the area mapped by this bridge. We don't play with the PTE
+ * mappings since we might have to deal with sub-page alignemnts
+ * so flushing the hash table is the only sane way to make sure
+ * that no hash entries are covering that removed bridge area
+ * while still allowing other busses overlapping those pages
+ *
+ * Note: If we ever support P2P hotplug on Book3E, we'll have
+ * to do an appropriate TLB flush here too
+ */
+ if (bus->self) {
+#ifdef CONFIG_PPC_STD_MMU_64
+ struct resource *res = bus->resource[0];
#endif
-}
-EXPORT_SYMBOL(pci_domain_nr);
+ pr_debug("IO unmapping for PCI-PCI bridge %s\n",
+ pci_name(bus->self));
-/* Decide whether to display the domain number in /proc */
-int pci_proc_domain(struct pci_bus *bus)
-{
-#ifdef CONFIG_PPC_ISERIES
- return 0;
-#else
- struct pci_controller *hose = pci_bus_to_host(bus);
- return hose->buid;
+#ifdef CONFIG_PPC_STD_MMU_64
+ __flush_hash_table_range(&init_mm, res->start + _IO_BASE,
+ res->end + _IO_BASE + 1);
#endif
-}
-
-/*
- * Platform support for /proc/bus/pci/X/Y mmap()s,
- * modelled on the sparc64 implementation by Dave Miller.
- * -- paulus.
- */
-
-/*
- * Adjust vm_pgoff of VMA such that it is the physical page offset
- * corresponding to the 32-bit pci bus offset for DEV requested by the user.
- *
- * Basically, the user finds the base address for his device which he wishes
- * to mmap. They read the 32-bit value from the config space base register,
- * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
- * offset parameter of mmap on /proc/bus/pci/XXX for that device.
- *
- * Returns negative error code on failure, zero on success.
- */
-static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
- unsigned long *offset,
- enum pci_mmap_state mmap_state)
-{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- unsigned long io_offset = 0;
- int i, res_bit;
-
- if (hose == 0)
- return NULL; /* should never happen */
-
- /* If memory, add on the PCI bridge address offset */
- if (mmap_state == pci_mmap_mem) {
- *offset += hose->pci_mem_offset;
- res_bit = IORESOURCE_MEM;
- } else {
- io_offset = (unsigned long)hose->io_base_virt - pci_io_base;
- *offset += io_offset;
- res_bit = IORESOURCE_IO;
- }
-
- /*
- * Check that the offset requested corresponds to one of the
- * resources of the device.
- */
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- struct resource *rp = &dev->resource[i];
- int flags = rp->flags;
-
- /* treat ROM as memory (should be already) */
- if (i == PCI_ROM_RESOURCE)
- flags |= IORESOURCE_MEM;
-
- /* Active and same type? */
- if ((flags & res_bit) == 0)
- continue;
-
- /* In the range of this resource? */
- if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
- continue;
-
- /* found it! construct the final physical address */
- if (mmap_state == pci_mmap_io)
- *offset += hose->io_base_phys - io_offset;
- return rp;
- }
-
- return NULL;
-}
-
-/*
- * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
- * device mapping.
- */
-static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
- pgprot_t protection,
- enum pci_mmap_state mmap_state,
- int write_combine)
-{
- unsigned long prot = pgprot_val(protection);
-
- /* Write combine is always 0 on non-memory space mappings. On
- * memory space, if the user didn't pass 1, we check for a
- * "prefetchable" resource. This is a bit hackish, but we use
- * this to workaround the inability of /sysfs to provide a write
- * combine bit
- */
- if (mmap_state != pci_mmap_mem)
- write_combine = 0;
- else if (write_combine == 0) {
- if (rp->flags & IORESOURCE_PREFETCH)
- write_combine = 1;
+ return 0;
}
- /* XXX would be nice to have a way to ask for write-through */
- prot |= _PAGE_NO_CACHE;
- if (write_combine)
- prot &= ~_PAGE_GUARDED;
- else
- prot |= _PAGE_GUARDED;
-
- printk(KERN_DEBUG "PCI map for %s:%lx, prot: %lx\n", pci_name(dev), rp->start,
- prot);
-
- return __pgprot(prot);
-}
-
-/*
- * This one is used by /dev/mem and fbdev who have no clue about the
- * PCI device, it tries to find the PCI device first and calls the
- * above routine
- */
-pgprot_t pci_phys_mem_access_prot(struct file *file,
- unsigned long pfn,
- unsigned long size,
- pgprot_t protection)
-{
- struct pci_dev *pdev = NULL;
- struct resource *found = NULL;
- unsigned long prot = pgprot_val(protection);
- unsigned long offset = pfn << PAGE_SHIFT;
- int i;
+ /* Get the host bridge */
+ hose = pci_bus_to_host(bus);
- if (page_is_ram(pfn))
- return __pgprot(prot);
+ /* Check if we have IOs allocated */
+ if (hose->io_base_alloc == NULL)
+ return 0;
- prot |= _PAGE_NO_CACHE | _PAGE_GUARDED;
+ pr_debug("IO unmapping for PHB %s\n", hose->dn->full_name);
+ pr_debug(" alloc=0x%p\n", hose->io_base_alloc);
- for_each_pci_dev(pdev) {
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- struct resource *rp = &pdev->resource[i];
- int flags = rp->flags;
+ /* This is a PHB, we fully unmap the IO area */
+ vunmap(hose->io_base_alloc);
- /* Active and same type? */
- if ((flags & IORESOURCE_MEM) == 0)
- continue;
- /* In the range of this resource? */
- if (offset < (rp->start & PAGE_MASK) ||
- offset > rp->end)
- continue;
- found = rp;
- break;
- }
- if (found)
- break;
- }
- if (found) {
- if (found->flags & IORESOURCE_PREFETCH)
- prot &= ~_PAGE_GUARDED;
- pci_dev_put(pdev);
- }
-
- DBG("non-PCI map for %lx, prot: %lx\n", offset, prot);
-
- return __pgprot(prot);
+ return 0;
}
+EXPORT_SYMBOL_GPL(pcibios_unmap_io_space);
-
-/*
- * Perform the actual remap of the pages for a PCI device mapping, as
- * appropriate for this architecture. The region in the process to map
- * is described by vm_start and vm_end members of VMA, the base physical
- * address is found in vm_pgoff.
- * The pci device structure is provided so that architectures may make mapping
- * decisions on a per-device or per-bus basis.
- *
- * Returns a negative error code on failure, zero on success.
- */
-int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state, int write_combine)
+static int pcibios_map_phb_io_space(struct pci_controller *hose)
{
- unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
- struct resource *rp;
- int ret;
-
- rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
- if (rp == NULL)
- return -EINVAL;
-
- vma->vm_pgoff = offset >> PAGE_SHIFT;
- vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
- vma->vm_page_prot,
- mmap_state, write_combine);
-
- ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- vma->vm_end - vma->vm_start, vma->vm_page_prot);
+ struct vm_struct *area;
+ unsigned long phys_page;
+ unsigned long size_page;
+ unsigned long io_virt_offset;
- return ret;
-}
+ phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE);
+ size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE);
-static ssize_t pci_show_devspec(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct pci_dev *pdev;
- struct device_node *np;
+ /* Make sure IO area address is clear */
+ hose->io_base_alloc = NULL;
- pdev = to_pci_dev (dev);
- np = pci_device_to_OF_node(pdev);
- if (np == NULL || np->full_name == NULL)
+ /* If there's no IO to map on that bus, get away too */
+ if (hose->pci_io_size == 0 || hose->io_base_phys == 0)
return 0;
- return sprintf(buf, "%s", np->full_name);
-}
-static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL);
-
-void pcibios_add_platform_entries(struct pci_dev *pdev)
-{
- device_create_file(&pdev->dev, &dev_attr_devspec);
-}
-
-#ifdef CONFIG_PPC_MULTIPLATFORM
-
-#define ISA_SPACE_MASK 0x1
-#define ISA_SPACE_IO 0x1
-static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
- unsigned long phb_io_base_phys,
- void __iomem * phb_io_base_virt)
-{
- /* Remove these asap */
-
- struct pci_address {
- u32 a_hi;
- u32 a_mid;
- u32 a_lo;
- };
-
- struct isa_address {
- u32 a_hi;
- u32 a_lo;
- };
-
- struct isa_range {
- struct isa_address isa_addr;
- struct pci_address pci_addr;
- unsigned int size;
- };
-
- const struct isa_range *range;
- unsigned long pci_addr;
- unsigned int isa_addr;
- unsigned int size;
- int rlen = 0;
-
- range = get_property(isa_node, "ranges", &rlen);
- if (range == NULL || (rlen < sizeof(struct isa_range))) {
- printk(KERN_ERR "no ISA ranges or unexpected isa range size,"
- "mapping 64k\n");
- __ioremap_explicit(phb_io_base_phys,
- (unsigned long)phb_io_base_virt,
- 0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED);
- return;
- }
-
- /* From "ISA Binding to 1275"
- * The ranges property is laid out as an array of elements,
- * each of which comprises:
- * cells 0 - 1: an ISA address
- * cells 2 - 4: a PCI address
- * (size depending on dev->n_addr_cells)
- * cell 5: the size of the range
+ /* Let's allocate some IO space for that guy. We don't pass
+ * VM_IOREMAP because we don't care about alignment tricks that
+ * the core does in that case. Maybe we should due to stupid card
+ * with incomplete address decoding but I'd rather not deal with
+ * those outside of the reserved 64K legacy region.
*/
- if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) {
- isa_addr = range->isa_addr.a_lo;
- pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
- range->pci_addr.a_lo;
-
- /* Assume these are both zero */
- if ((pci_addr != 0) || (isa_addr != 0)) {
- printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
- __FUNCTION__);
- return;
- }
-
- size = PAGE_ALIGN(range->size);
+ area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END);
+ if (area == NULL)
+ return -ENOMEM;
+ hose->io_base_alloc = area->addr;
+ hose->io_base_virt = (void __iomem *)(area->addr +
+ hose->io_base_phys - phys_page);
+
+ pr_debug("IO mapping for PHB %s\n", hose->dn->full_name);
+ pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n",
+ hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc);
+ pr_debug(" size=0x%016llx (alloc=0x%016lx)\n",
+ hose->pci_io_size, size_page);
+
+ /* Establish the mapping */
+ if (__ioremap_at(phys_page, area->addr, size_page,
+ _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL)
+ return -ENOMEM;
+
+ /* Fixup hose IO resource */
+ io_virt_offset = pcibios_io_space_offset(hose);
+ hose->io_resource.start += io_virt_offset;
+ hose->io_resource.end += io_virt_offset;
+
+ pr_debug(" hose->io_resource=%pR\n", &hose->io_resource);
- __ioremap_explicit(phb_io_base_phys,
- (unsigned long) phb_io_base_virt,
- size, _PAGE_NO_CACHE | _PAGE_GUARDED);
- }
+ return 0;
}
-void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
- struct device_node *dev, int prim)
+int pcibios_map_io_space(struct pci_bus *bus)
{
- const unsigned int *ranges;
- unsigned int pci_space;
- unsigned long size;
- int rlen = 0;
- int memno = 0;
- struct resource *res;
- int np, na = prom_n_addr_cells(dev);
- unsigned long pci_addr, cpu_phys_addr;
-
- np = na + 5;
+ WARN_ON(bus == NULL);
- /* From "PCI Binding to 1275"
- * The ranges property is laid out as an array of elements,
- * each of which comprises:
- * cells 0 - 2: a PCI address
- * cells 3 or 3+4: a CPU physical address
- * (size depending on dev->n_addr_cells)
- * cells 4+5 or 5+6: the size of the range
+ /* If this not a PHB, nothing to do, page tables still exist and
+ * thus HPTEs will be faulted in when needed
*/
- ranges = get_property(dev, "ranges", &rlen);
- if (ranges == NULL)
- return;
- hose->io_base_phys = 0;
- while ((rlen -= np * sizeof(unsigned int)) >= 0) {
- res = NULL;
- pci_space = ranges[0];
- pci_addr = ((unsigned long)ranges[1] << 32) | ranges[2];
-
- cpu_phys_addr = ranges[3];
- if (na >= 2)
- cpu_phys_addr = (cpu_phys_addr << 32) | ranges[4];
-
- size = ((unsigned long)ranges[na+3] << 32) | ranges[na+4];
- ranges += np;
- if (size == 0)
- continue;
-
- /* Now consume following elements while they are contiguous */
- while (rlen >= np * sizeof(unsigned int)) {
- unsigned long addr, phys;
-
- if (ranges[0] != pci_space)
- break;
- addr = ((unsigned long)ranges[1] << 32) | ranges[2];
- phys = ranges[3];
- if (na >= 2)
- phys = (phys << 32) | ranges[4];
- if (addr != pci_addr + size ||
- phys != cpu_phys_addr + size)
- break;
-
- size += ((unsigned long)ranges[na+3] << 32)
- | ranges[na+4];
- ranges += np;
- rlen -= np * sizeof(unsigned int);
- }
-
- switch ((pci_space >> 24) & 0x3) {
- case 1: /* I/O space */
- hose->io_base_phys = cpu_phys_addr;
- hose->pci_io_size = size;
-
- res = &hose->io_resource;
- res->flags = IORESOURCE_IO;
- res->start = pci_addr;
- DBG("phb%d: IO 0x%lx -> 0x%lx\n", hose->global_number,
- res->start, res->start + size - 1);
- break;
- case 2: /* memory space */
- memno = 0;
- while (memno < 3 && hose->mem_resources[memno].flags)
- ++memno;
-
- if (memno == 0)
- hose->pci_mem_offset = cpu_phys_addr - pci_addr;
- if (memno < 3) {
- res = &hose->mem_resources[memno];
- res->flags = IORESOURCE_MEM;
- res->start = cpu_phys_addr;
- DBG("phb%d: MEM 0x%lx -> 0x%lx\n", hose->global_number,
- res->start, res->start + size - 1);
- }
- break;
- }
- if (res != NULL) {
- res->name = dev->full_name;
- res->end = res->start + size - 1;
- res->parent = NULL;
- res->sibling = NULL;
- res->child = NULL;
- }
- }
-}
-
-void __init pci_setup_phb_io(struct pci_controller *hose, int primary)
-{
- unsigned long size = hose->pci_io_size;
- unsigned long io_virt_offset;
- struct resource *res;
- struct device_node *isa_dn;
-
- hose->io_base_virt = reserve_phb_iospace(size);
- DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
- hose->global_number, hose->io_base_phys,
- (unsigned long) hose->io_base_virt);
-
- if (primary) {
- pci_io_base = (unsigned long)hose->io_base_virt;
- isa_dn = of_find_node_by_type(NULL, "isa");
- if (isa_dn) {
- isa_io_base = pci_io_base;
- pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys,
- hose->io_base_virt);
- of_node_put(isa_dn);
- }
- }
-
- io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
- res = &hose->io_resource;
- res->start += io_virt_offset;
- res->end += io_virt_offset;
-}
-
-void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose,
- int primary)
-{
- unsigned long size = hose->pci_io_size;
- unsigned long io_virt_offset;
- struct resource *res;
-
- hose->io_base_virt = __ioremap(hose->io_base_phys, size,
- _PAGE_NO_CACHE | _PAGE_GUARDED);
- DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
- hose->global_number, hose->io_base_phys,
- (unsigned long) hose->io_base_virt);
-
- if (primary)
- pci_io_base = (unsigned long)hose->io_base_virt;
-
- io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
- res = &hose->io_resource;
- res->start += io_virt_offset;
- res->end += io_virt_offset;
-}
-
-
-static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys,
- unsigned long *start_virt, unsigned long *size)
-{
- struct pci_controller *hose = pci_bus_to_host(bus);
- struct pci_bus_region region;
- struct resource *res;
-
if (bus->self) {
- res = bus->resource[0];
- pcibios_resource_to_bus(bus->self, &region, res);
- *start_phys = hose->io_base_phys + region.start;
- *start_virt = (unsigned long) hose->io_base_virt +
- region.start;
- if (region.end > region.start)
- *size = region.end - region.start + 1;
- else {
- printk("%s(): unexpected region 0x%lx->0x%lx\n",
- __FUNCTION__, region.start, region.end);
- return 1;
- }
-
- } else {
- /* Root Bus */
- res = &hose->io_resource;
- *start_phys = hose->io_base_phys;
- *start_virt = (unsigned long) hose->io_base_virt;
- if (res->end > res->start)
- *size = res->end - res->start + 1;
- else {
- printk("%s(): unexpected region 0x%lx->0x%lx\n",
- __FUNCTION__, res->start, res->end);
- return 1;
- }
- }
-
- return 0;
-}
-
-int unmap_bus_range(struct pci_bus *bus)
-{
- unsigned long start_phys;
- unsigned long start_virt;
- unsigned long size;
-
- if (!bus) {
- printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
- return 1;
- }
-
- if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
- return 1;
- if (iounmap_explicit((void __iomem *) start_virt, size))
- return 1;
-
- return 0;
-}
-EXPORT_SYMBOL(unmap_bus_range);
-
-int remap_bus_range(struct pci_bus *bus)
-{
- unsigned long start_phys;
- unsigned long start_virt;
- unsigned long size;
-
- if (!bus) {
- printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
- return 1;
- }
-
-
- if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
- return 1;
- if (start_phys == 0)
- return 1;
- printk(KERN_DEBUG "mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size);
- if (__ioremap_explicit(start_phys, start_virt, size,
- _PAGE_NO_CACHE | _PAGE_GUARDED))
- return 1;
-
- return 0;
-}
-EXPORT_SYMBOL(remap_bus_range);
-
-static void phbs_remap_io(void)
-{
- struct pci_controller *hose, *tmp;
-
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
- remap_bus_range(hose->bus);
-}
-
-static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
-{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- unsigned long offset;
-
- if (res->flags & IORESOURCE_IO) {
- offset = (unsigned long)hose->io_base_virt - pci_io_base;
-
- res->start += offset;
- res->end += offset;
- } else if (res->flags & IORESOURCE_MEM) {
- res->start += hose->pci_mem_offset;
- res->end += hose->pci_mem_offset;
- }
-}
-
-void __devinit pcibios_fixup_device_resources(struct pci_dev *dev,
- struct pci_bus *bus)
-{
- /* Update device resources. */
- int i;
-
- for (i = 0; i < PCI_NUM_RESOURCES; i++)
- if (dev->resource[i].flags)
- fixup_resource(&dev->resource[i], dev);
-}
-EXPORT_SYMBOL(pcibios_fixup_device_resources);
-
-
-static void __devinit do_bus_setup(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- ppc_md.iommu_bus_setup(bus);
-
- list_for_each_entry(dev, &bus->devices, bus_list)
- ppc_md.iommu_dev_setup(dev);
-
- if (ppc_md.irq_bus_setup)
- ppc_md.irq_bus_setup(bus);
-}
-
-void __devinit pcibios_fixup_bus(struct pci_bus *bus)
-{
- struct pci_dev *dev = bus->self;
-
- if (dev && pci_probe_only &&
- (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
- /* This is a subordinate bridge */
-
- pci_read_bridge_bases(bus);
- pcibios_fixup_device_resources(dev, bus);
- }
-
- do_bus_setup(bus);
-
- if (!pci_probe_only)
- return;
-
- list_for_each_entry(dev, &bus->devices, bus_list)
- if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
- pcibios_fixup_device_resources(dev, bus);
-}
-EXPORT_SYMBOL(pcibios_fixup_bus);
-
-/*
- * Reads the interrupt pin to determine if interrupt is use by card.
- * If the interrupt is used, then gets the interrupt line from the
- * openfirmware and sets it in the pci_dev and pci_config line.
- */
-int pci_read_irq_line(struct pci_dev *pci_dev)
-{
- struct of_irq oirq;
- unsigned int virq;
-
- DBG("Try to map irq for %s...\n", pci_name(pci_dev));
-
-#ifdef DEBUG
- memset(&oirq, 0xff, sizeof(oirq));
-#endif
- /* Try to get a mapping from the device-tree */
- if (of_irq_map_pci(pci_dev, &oirq)) {
- u8 line, pin;
-
- /* If that fails, lets fallback to what is in the config
- * space and map that through the default controller. We
- * also set the type to level low since that's what PCI
- * interrupts are. If your platform does differently, then
- * either provide a proper interrupt tree or don't use this
- * function.
- */
- if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin))
- return -1;
- if (pin == 0)
- return -1;
- if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) ||
- line == 0xff) {
- return -1;
- }
- DBG(" -> no map ! Using irq line %d from PCI config\n", line);
-
- virq = irq_create_mapping(NULL, line);
- if (virq != NO_IRQ)
- set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
- } else {
- DBG(" -> got one, spec %d cells (0x%08x 0x%08x...) on %s\n",
- oirq.size, oirq.specifier[0], oirq.specifier[1],
- oirq.controller->full_name);
-
- virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
- oirq.size);
- }
- if(virq == NO_IRQ) {
- DBG(" -> failed to map !\n");
- return -1;
+ pr_debug("IO mapping for PCI-PCI bridge %s\n",
+ pci_name(bus->self));
+ pr_debug(" virt=0x%016llx...0x%016llx\n",
+ bus->resource[0]->start + _IO_BASE,
+ bus->resource[0]->end + _IO_BASE);
+ return 0;
}
- DBG(" -> mapped to linux irq %d\n", virq);
-
- pci_dev->irq = virq;
- pci_write_config_byte(pci_dev, PCI_INTERRUPT_LINE, virq);
-
- return 0;
-}
-EXPORT_SYMBOL(pci_read_irq_line);
-
-void pci_resource_to_user(const struct pci_dev *dev, int bar,
- const struct resource *rsrc,
- u64 *start, u64 *end)
-{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- unsigned long offset = 0;
-
- if (hose == NULL)
- return;
-
- if (rsrc->flags & IORESOURCE_IO)
- offset = pci_io_base - (unsigned long)hose->io_base_virt +
- hose->io_base_phys;
-
- *start = rsrc->start + offset;
- *end = rsrc->end + offset;
+ return pcibios_map_phb_io_space(pci_bus_to_host(bus));
}
+EXPORT_SYMBOL_GPL(pcibios_map_io_space);
-struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
+void pcibios_setup_phb_io_space(struct pci_controller *hose)
{
- if (!have_of)
- return NULL;
- while(node) {
- struct pci_controller *hose, *tmp;
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
- if (hose->arch_data == node)
- return hose;
- node = node->parent;
- }
- return NULL;
+ pcibios_map_phb_io_space(hose);
}
-#endif /* CONFIG_PPC_MULTIPLATFORM */
-
-unsigned long pci_address_to_pio(phys_addr_t address)
-{
- struct pci_controller *hose, *tmp;
-
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- if (address >= hose->io_base_phys &&
- address < (hose->io_base_phys + hose->pci_io_size)) {
- unsigned long base =
- (unsigned long)hose->io_base_virt - pci_io_base;
- return base + (address - hose->io_base_phys);
- }
- }
- return (unsigned int)-1;
-}
-EXPORT_SYMBOL_GPL(pci_address_to_pio);
-
-
#define IOBASE_BRIDGE_NUMBER 0
#define IOBASE_MEMORY 1
#define IOBASE_IO 2
@@ -1364,8 +208,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
unsigned long in_devfn)
{
struct pci_controller* hose;
- struct list_head *ln;
- struct pci_bus *bus = NULL;
+ struct pci_bus *tmp_bus, *bus = NULL;
struct device_node *hose_node;
/* Argh ! Please forgive me for that hack, but that's the
@@ -1373,31 +216,37 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
* G5 machines... So when something asks for bus 0 io base
* (bus 0 is HT root), we return the AGP one instead.
*/
- if (machine_is_compatible("MacRISC4"))
- if (in_bus == 0)
+ if (in_bus == 0 && of_machine_is_compatible("MacRISC4")) {
+ struct device_node *agp;
+
+ agp = of_find_compatible_node(NULL, NULL, "u3-agp");
+ if (agp)
in_bus = 0xf0;
+ of_node_put(agp);
+ }
/* That syscall isn't quite compatible with PCI domains, but it's
* used on pre-domains setup. We return the first match
*/
- for (ln = pci_root_buses.next; ln != &pci_root_buses; ln = ln->next) {
- bus = pci_bus_b(ln);
- if (in_bus >= bus->number && in_bus < (bus->number + bus->subordinate))
+ list_for_each_entry(tmp_bus, &pci_root_buses, node) {
+ if (in_bus >= tmp_bus->number &&
+ in_bus <= tmp_bus->busn_res.end) {
+ bus = tmp_bus;
break;
- bus = NULL;
+ }
}
- if (bus == NULL || bus->sysdata == NULL)
+ if (bus == NULL || bus->dev.of_node == NULL)
return -ENODEV;
- hose_node = (struct device_node *)bus->sysdata;
+ hose_node = bus->dev.of_node;
hose = PCI_DN(hose_node)->phb;
switch (which) {
case IOBASE_BRIDGE_NUMBER:
return (long)hose->first_busno;
case IOBASE_MEMORY:
- return (long)hose->pci_mem_offset;
+ return (long)hose->mem_offset[0];
case IOBASE_IO:
return (long)hose->io_base_phys;
case IOBASE_ISA_IO:
@@ -1417,3 +266,13 @@ int pcibus_to_node(struct pci_bus *bus)
}
EXPORT_SYMBOL(pcibus_to_node);
#endif
+
+static void quirk_radeon_32bit_msi(struct pci_dev *dev)
+{
+ struct pci_dn *pdn = pci_get_pdn(dev);
+
+ if (pdn)
+ pdn->force_32bit_msi = true;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x68f2, quirk_radeon_32bit_msi);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0xaa68, quirk_radeon_32bit_msi);
diff --git a/arch/powerpc/kernel/pci_direct_iommu.c b/arch/powerpc/kernel/pci_direct_iommu.c
deleted file mode 100644
index 72ce082ce73..00000000000
--- a/arch/powerpc/kernel/pci_direct_iommu.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Support for DMA from PCI devices to main memory on
- * machines without an iommu or with directly addressable
- * RAM (typically a pmac with 2Gb of RAM or less)
- *
- * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-
-#include <asm/sections.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/machdep.h>
-#include <asm/pmac_feature.h>
-#include <asm/abs_addr.h>
-#include <asm/ppc-pci.h>
-
-static void *pci_direct_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag)
-{
- void *ret;
-
- ret = (void *)__get_free_pages(flag, get_order(size));
- if (ret != NULL) {
- memset(ret, 0, size);
- *dma_handle = virt_to_abs(ret);
- }
- return ret;
-}
-
-static void pci_direct_free_coherent(struct device *hwdev, size_t size,
- void *vaddr, dma_addr_t dma_handle)
-{
- free_pages((unsigned long)vaddr, get_order(size));
-}
-
-static dma_addr_t pci_direct_map_single(struct device *hwdev, void *ptr,
- size_t size, enum dma_data_direction direction)
-{
- return virt_to_abs(ptr);
-}
-
-static void pci_direct_unmap_single(struct device *hwdev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction direction)
-{
-}
-
-static int pci_direct_map_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, enum dma_data_direction direction)
-{
- int i;
-
- for (i = 0; i < nents; i++, sg++) {
- sg->dma_address = page_to_phys(sg->page) + sg->offset;
- sg->dma_length = sg->length;
- }
-
- return nents;
-}
-
-static void pci_direct_unmap_sg(struct device *hwdev, struct scatterlist *sg,
- int nents, enum dma_data_direction direction)
-{
-}
-
-static int pci_direct_dma_supported(struct device *dev, u64 mask)
-{
- return mask < 0x100000000ull;
-}
-
-static struct dma_mapping_ops pci_direct_ops = {
- .alloc_coherent = pci_direct_alloc_coherent,
- .free_coherent = pci_direct_free_coherent,
- .map_single = pci_direct_map_single,
- .unmap_single = pci_direct_unmap_single,
- .map_sg = pci_direct_map_sg,
- .unmap_sg = pci_direct_unmap_sg,
- .dma_supported = pci_direct_dma_supported,
-};
-
-void __init pci_direct_iommu_init(void)
-{
- pci_dma_ops = pci_direct_ops;
-}
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 68df018dae0..1f61fab59d9 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -22,51 +22,54 @@
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/string.h>
+#include <linux/export.h>
#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/bootmem.h>
+#include <linux/gfp.h>
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/pci-bridge.h>
-#include <asm/pSeries_reconfig.h>
#include <asm/ppc-pci.h>
#include <asm/firmware.h>
+struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
+{
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+ if (!dn)
+ return NULL;
+ return PCI_DN(dn);
+}
+
/*
* Traverse_func that inits the PCI fields of the device node.
* NOTE: this *must* be done before read/write config to the device.
*/
-static void * __devinit update_dn_pci_info(struct device_node *dn, void *data)
+void *update_dn_pci_info(struct device_node *dn, void *data)
{
struct pci_controller *phb = data;
- const int *type = get_property(dn, "ibm,pci-config-space-type", NULL);
- const u32 *regs;
+ const __be32 *type = of_get_property(dn, "ibm,pci-config-space-type", NULL);
+ const __be32 *regs;
struct pci_dn *pdn;
- if (mem_init_done)
- pdn = kmalloc(sizeof(*pdn), GFP_KERNEL);
- else
- pdn = alloc_bootmem(sizeof(*pdn));
+ pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
if (pdn == NULL)
return NULL;
- memset(pdn, 0, sizeof(*pdn));
dn->data = pdn;
pdn->node = dn;
pdn->phb = phb;
- regs = get_property(dn, "reg", NULL);
+#ifdef CONFIG_PPC_POWERNV
+ pdn->pe_number = IODA_INVALID_PE;
+#endif
+ regs = of_get_property(dn, "reg", NULL);
if (regs) {
+ u32 addr = of_read_number(regs, 1);
+
/* First register entry is addr (00BBSS00) */
- pdn->busno = (regs[0] >> 16) & 0xff;
- pdn->devfn = (regs[0] >> 8) & 0xff;
- }
- if (firmware_has_feature(FW_FEATURE_ISERIES)) {
- const u32 *busp = get_property(dn, "linux,subbus", NULL);
- if (busp)
- pdn->bussubno = *busp;
+ pdn->busno = (addr >> 16) & 0xff;
+ pdn->devfn = (addr >> 8) & 0xff;
}
- pdn->pci_ext_config_space = (type && *type == 1);
+ pdn->pci_ext_config_space = (type && of_read_number(type, 1) == 1);
return NULL;
}
@@ -96,12 +99,13 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre,
/* We started with a phb, iterate all childs */
for (dn = start->child; dn; dn = nextdn) {
- const u32 *classp;
- u32 class;
+ const __be32 *classp;
+ u32 class = 0;
nextdn = NULL;
- classp = get_property(dn, "class-code", NULL);
- class = classp ? *classp : 0;
+ classp = of_get_property(dn, "class-code", NULL);
+ if (classp)
+ class = of_read_number(classp, 1);
if (pre && ((ret = pre(dn, data)) != NULL))
return ret;
@@ -135,9 +139,9 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre,
* subsystem is set up, before kmalloc is valid) and during the
* dynamic lpar operation of adding a PHB to a running system.
*/
-void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb)
+void pci_devs_phb_init_dynamic(struct pci_controller *phb)
{
- struct device_node * dn = (struct device_node *) phb->arch_data;
+ struct device_node *dn = phb->dn;
struct pci_dn *pdn;
/* PHB nodes themselves must not match */
@@ -152,70 +156,6 @@ void __devinit pci_devs_phb_init_dynamic(struct pci_controller *phb)
traverse_pci_devices(dn, update_dn_pci_info, phb);
}
-/*
- * Traversal func that looks for a <busno,devfcn> value.
- * If found, the pci_dn is returned (thus terminating the traversal).
- */
-static void *is_devfn_node(struct device_node *dn, void *data)
-{
- int busno = ((unsigned long)data >> 8) & 0xff;
- int devfn = ((unsigned long)data) & 0xff;
- struct pci_dn *pci = dn->data;
-
- if (pci && (devfn == pci->devfn) && (busno == pci->busno))
- return dn;
- return NULL;
-}
-
-/*
- * This is the "slow" path for looking up a device_node from a
- * pci_dev. It will hunt for the device under its parent's
- * phb and then update sysdata for a future fastpath.
- *
- * It may also do fixups on the actual device since this happens
- * on the first read/write.
- *
- * Note that it also must deal with devices that don't exist.
- * In this case it may probe for real hardware ("just in case")
- * and add a device_node to the device tree if necessary.
- *
- */
-struct device_node *fetch_dev_dn(struct pci_dev *dev)
-{
- struct device_node *orig_dn = dev->sysdata;
- struct device_node *dn;
- unsigned long searchval = (dev->bus->number << 8) | dev->devfn;
-
- dn = traverse_pci_devices(orig_dn, is_devfn_node, (void *)searchval);
- if (dn)
- dev->sysdata = dn;
- return dn;
-}
-EXPORT_SYMBOL(fetch_dev_dn);
-
-static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
-{
- struct device_node *np = node;
- struct pci_dn *pci = NULL;
- int err = NOTIFY_OK;
-
- switch (action) {
- case PSERIES_RECONFIG_ADD:
- pci = np->parent->data;
- if (pci)
- update_dn_pci_info(np, pci->phb);
- break;
- default:
- err = NOTIFY_DONE;
- break;
- }
- return err;
-}
-
-static struct notifier_block pci_dn_reconfig_nb = {
- .notifier_call = pci_dn_reconfig_notifier,
-};
-
/**
* pci_devs_phb_init - Initialize phbs and pci devs under them.
*
@@ -232,6 +172,4 @@ void __init pci_devs_phb_init(void)
/* This must be done first so the device nodes have valid pci info! */
list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
pci_devs_phb_init_dynamic(phb);
-
- pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
}
diff --git a/arch/powerpc/kernel/pci_iommu.c b/arch/powerpc/kernel/pci_iommu.c
deleted file mode 100644
index 0688b2534ac..00000000000
--- a/arch/powerpc/kernel/pci_iommu.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
- *
- * Rewrite, cleanup, new allocation schemes:
- * Copyright (C) 2004 Olof Johansson, IBM Corporation
- *
- * Dynamic DMA mapping support, platform-independent parts.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/iommu.h>
-#include <asm/pci-bridge.h>
-#include <asm/machdep.h>
-#include <asm/ppc-pci.h>
-
-/*
- * We can use ->sysdata directly and avoid the extra work in
- * pci_device_to_OF_node since ->sysdata will have been initialised
- * in the iommu init code for all devices.
- */
-#define PCI_GET_DN(dev) ((struct device_node *)((dev)->sysdata))
-
-static inline struct iommu_table *device_to_table(struct device *hwdev)
-{
- struct pci_dev *pdev;
-
- if (!hwdev) {
- pdev = ppc64_isabridge_dev;
- if (!pdev)
- return NULL;
- } else
- pdev = to_pci_dev(hwdev);
-
- return PCI_DN(PCI_GET_DN(pdev))->iommu_table;
-}
-
-
-static inline unsigned long device_to_mask(struct device *hwdev)
-{
- struct pci_dev *pdev;
-
- if (!hwdev) {
- pdev = ppc64_isabridge_dev;
- if (!pdev) /* This is the best guess we can do */
- return 0xfffffffful;
- } else
- pdev = to_pci_dev(hwdev);
-
- if (pdev->dma_mask)
- return pdev->dma_mask;
-
- /* Assume devices without mask can take 32 bit addresses */
- return 0xfffffffful;
-}
-
-
-/* Allocates a contiguous real buffer and creates mappings over it.
- * Returns the virtual address of the buffer and sets dma_handle
- * to the dma address (mapping) of the first page.
- */
-static void *pci_iommu_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag)
-{
- return iommu_alloc_coherent(device_to_table(hwdev), size, dma_handle,
- device_to_mask(hwdev), flag,
- pcibus_to_node(to_pci_dev(hwdev)->bus));
-}
-
-static void pci_iommu_free_coherent(struct device *hwdev, size_t size,
- void *vaddr, dma_addr_t dma_handle)
-{
- iommu_free_coherent(device_to_table(hwdev), size, vaddr, dma_handle);
-}
-
-/* Creates TCEs for a user provided buffer. The user buffer must be
- * contiguous real kernel storage (not vmalloc). The address of the buffer
- * passed here is the kernel (virtual) address of the buffer. The buffer
- * need not be page aligned, the dma_addr_t returned will point to the same
- * byte within the page as vaddr.
- */
-static dma_addr_t pci_iommu_map_single(struct device *hwdev, void *vaddr,
- size_t size, enum dma_data_direction direction)
-{
- return iommu_map_single(device_to_table(hwdev), vaddr, size,
- device_to_mask(hwdev), direction);
-}
-
-
-static void pci_iommu_unmap_single(struct device *hwdev, dma_addr_t dma_handle,
- size_t size, enum dma_data_direction direction)
-{
- iommu_unmap_single(device_to_table(hwdev), dma_handle, size, direction);
-}
-
-
-static int pci_iommu_map_sg(struct device *pdev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction)
-{
- return iommu_map_sg(pdev, device_to_table(pdev), sglist,
- nelems, device_to_mask(pdev), direction);
-}
-
-static void pci_iommu_unmap_sg(struct device *pdev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction)
-{
- iommu_unmap_sg(device_to_table(pdev), sglist, nelems, direction);
-}
-
-/* We support DMA to/from any memory page via the iommu */
-static int pci_iommu_dma_supported(struct device *dev, u64 mask)
-{
- struct iommu_table *tbl = device_to_table(dev);
-
- if (!tbl || tbl->it_offset > mask) {
- printk(KERN_INFO "Warning: IOMMU table offset too big for device mask\n");
- if (tbl)
- printk(KERN_INFO "mask: 0x%08lx, table offset: 0x%08lx\n",
- mask, tbl->it_offset);
- else
- printk(KERN_INFO "mask: 0x%08lx, table unavailable\n",
- mask);
- return 0;
- } else
- return 1;
-}
-
-struct dma_mapping_ops pci_iommu_ops = {
- .alloc_coherent = pci_iommu_alloc_coherent,
- .free_coherent = pci_iommu_free_coherent,
- .map_single = pci_iommu_map_single,
- .unmap_single = pci_iommu_unmap_single,
- .map_sg = pci_iommu_map_sg,
- .unmap_sg = pci_iommu_unmap_sg,
- .dma_supported = pci_iommu_dma_supported,
-};
-
-void pci_iommu_init(void)
-{
- pci_dma_ops = pci_iommu_ops;
-}
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
new file mode 100644
index 00000000000..44562aa97f1
--- /dev/null
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -0,0 +1,404 @@
+/*
+ * Helper routines to scan the device tree for PCI devices and busses
+ *
+ * Migrated out of PowerPC architecture pci_64.c file by Grant Likely
+ * <grant.likely@secretlab.ca> so that these routines are available for
+ * 32 bit also.
+ *
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Rework, based on alpha PCI code.
+ * Copyright (c) 2009 Secret Lab Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/pci.h>
+#include <linux/export.h>
+#include <asm/pci-bridge.h>
+#include <asm/prom.h>
+
+/**
+ * get_int_prop - Decode a u32 from a device tree property
+ */
+static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
+{
+ const __be32 *prop;
+ int len;
+
+ prop = of_get_property(np, name, &len);
+ if (prop && len >= 4)
+ return of_read_number(prop, 1);
+ return def;
+}
+
+/**
+ * pci_parse_of_flags - Parse the flags cell of a device tree PCI address
+ * @addr0: value of 1st cell of a device tree PCI address.
+ * @bridge: Set this flag if the address is from a bridge 'ranges' property
+ */
+unsigned int pci_parse_of_flags(u32 addr0, int bridge)
+{
+ unsigned int flags = 0;
+
+ if (addr0 & 0x02000000) {
+ flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
+ flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
+ flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
+ if (addr0 & 0x40000000)
+ flags |= IORESOURCE_PREFETCH
+ | PCI_BASE_ADDRESS_MEM_PREFETCH;
+ /* Note: We don't know whether the ROM has been left enabled
+ * by the firmware or not. We mark it as disabled (ie, we do
+ * not set the IORESOURCE_ROM_ENABLE flag) for now rather than
+ * do a config space read, it will be force-enabled if needed
+ */
+ if (!bridge && (addr0 & 0xff) == 0x30)
+ flags |= IORESOURCE_READONLY;
+ } else if (addr0 & 0x01000000)
+ flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+ if (flags)
+ flags |= IORESOURCE_SIZEALIGN;
+ return flags;
+}
+
+/**
+ * of_pci_parse_addrs - Parse PCI addresses assigned in the device tree node
+ * @node: device tree node for the PCI device
+ * @dev: pci_dev structure for the device
+ *
+ * This function parses the 'assigned-addresses' property of a PCI devices'
+ * device tree node and writes them into the associated pci_dev structure.
+ */
+static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
+{
+ u64 base, size;
+ unsigned int flags;
+ struct pci_bus_region region;
+ struct resource *res;
+ const __be32 *addrs;
+ u32 i;
+ int proplen;
+
+ addrs = of_get_property(node, "assigned-addresses", &proplen);
+ if (!addrs)
+ return;
+ pr_debug(" parse addresses (%d bytes) @ %p\n", proplen, addrs);
+ for (; proplen >= 20; proplen -= 20, addrs += 5) {
+ flags = pci_parse_of_flags(of_read_number(addrs, 1), 0);
+ if (!flags)
+ continue;
+ base = of_read_number(&addrs[1], 2);
+ size = of_read_number(&addrs[3], 2);
+ if (!size)
+ continue;
+ i = of_read_number(addrs, 1) & 0xff;
+ pr_debug(" base: %llx, size: %llx, i: %x\n",
+ (unsigned long long)base,
+ (unsigned long long)size, i);
+
+ if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
+ res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
+ } else if (i == dev->rom_base_reg) {
+ res = &dev->resource[PCI_ROM_RESOURCE];
+ flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+ } else {
+ printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
+ continue;
+ }
+ res->flags = flags;
+ res->name = pci_name(dev);
+ region.start = base;
+ region.end = base + size - 1;
+ pcibios_bus_to_resource(dev->bus, res, &region);
+ }
+}
+
+/**
+ * of_create_pci_dev - Given a device tree node on a pci bus, create a pci_dev
+ * @node: device tree node pointer
+ * @bus: bus the device is sitting on
+ * @devfn: PCI function number, extracted from device tree by caller.
+ */
+struct pci_dev *of_create_pci_dev(struct device_node *node,
+ struct pci_bus *bus, int devfn)
+{
+ struct pci_dev *dev;
+ const char *type;
+ struct pci_slot *slot;
+
+ dev = pci_alloc_dev(bus);
+ if (!dev)
+ return NULL;
+ type = of_get_property(node, "device_type", NULL);
+ if (type == NULL)
+ type = "";
+
+ pr_debug(" create device, devfn: %x, type: %s\n", devfn, type);
+
+ dev->dev.of_node = of_node_get(node);
+ dev->dev.parent = bus->bridge;
+ dev->dev.bus = &pci_bus_type;
+ dev->devfn = devfn;
+ dev->multifunction = 0; /* maybe a lie? */
+ dev->needs_freset = 0; /* pcie fundamental reset required */
+ set_pcie_port_type(dev);
+
+ list_for_each_entry(slot, &dev->bus->slots, list)
+ if (PCI_SLOT(dev->devfn) == slot->number)
+ dev->slot = slot;
+
+ dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
+ dev->device = get_int_prop(node, "device-id", 0xffff);
+ dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
+ dev->subsystem_device = get_int_prop(node, "subsystem-id", 0);
+
+ dev->cfg_size = pci_cfg_space_size(dev);
+
+ dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(bus),
+ dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ dev->class = get_int_prop(node, "class-code", 0);
+ dev->revision = get_int_prop(node, "revision-id", 0);
+
+ pr_debug(" class: 0x%x\n", dev->class);
+ pr_debug(" revision: 0x%x\n", dev->revision);
+
+ dev->current_state = PCI_UNKNOWN; /* unknown power state */
+ dev->error_state = pci_channel_io_normal;
+ dev->dma_mask = 0xffffffff;
+
+ /* Early fixups, before probing the BARs */
+ pci_fixup_device(pci_fixup_early, dev);
+
+ if (!strcmp(type, "pci") || !strcmp(type, "pciex")) {
+ /* a PCI-PCI bridge */
+ dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
+ dev->rom_base_reg = PCI_ROM_ADDRESS1;
+ set_pcie_hotplug_bridge(dev);
+ } else if (!strcmp(type, "cardbus")) {
+ dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
+ } else {
+ dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
+ dev->rom_base_reg = PCI_ROM_ADDRESS;
+ /* Maybe do a default OF mapping here */
+ dev->irq = NO_IRQ;
+ }
+
+ of_pci_parse_addrs(node, dev);
+
+ pr_debug(" adding to system ...\n");
+
+ pci_device_add(dev, bus);
+
+ return dev;
+}
+EXPORT_SYMBOL(of_create_pci_dev);
+
+/**
+ * of_scan_pci_bridge - Set up a PCI bridge and scan for child nodes
+ * @dev: pci_dev structure for the bridge
+ *
+ * of_scan_bus() calls this routine for each PCI bridge that it finds, and
+ * this routine in turn call of_scan_bus() recusively to scan for more child
+ * devices.
+ */
+void of_scan_pci_bridge(struct pci_dev *dev)
+{
+ struct device_node *node = dev->dev.of_node;
+ struct pci_bus *bus;
+ const __be32 *busrange, *ranges;
+ int len, i, mode;
+ struct pci_bus_region region;
+ struct resource *res;
+ unsigned int flags;
+ u64 size;
+
+ pr_debug("of_scan_pci_bridge(%s)\n", node->full_name);
+
+ /* parse bus-range property */
+ busrange = of_get_property(node, "bus-range", &len);
+ if (busrange == NULL || len != 8) {
+ printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n",
+ node->full_name);
+ return;
+ }
+ ranges = of_get_property(node, "ranges", &len);
+ if (ranges == NULL) {
+ printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n",
+ node->full_name);
+ return;
+ }
+
+ bus = pci_find_bus(pci_domain_nr(dev->bus),
+ of_read_number(busrange, 1));
+ if (!bus) {
+ bus = pci_add_new_bus(dev->bus, dev,
+ of_read_number(busrange, 1));
+ if (!bus) {
+ printk(KERN_ERR "Failed to create pci bus for %s\n",
+ node->full_name);
+ return;
+ }
+ }
+
+ bus->primary = dev->bus->number;
+ pci_bus_insert_busn_res(bus, of_read_number(busrange, 1),
+ of_read_number(busrange+1, 1));
+ bus->bridge_ctl = 0;
+
+ /* parse ranges property */
+ /* PCI #address-cells == 3 and #size-cells == 2 always */
+ res = &dev->resource[PCI_BRIDGE_RESOURCES];
+ for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
+ res->flags = 0;
+ bus->resource[i] = res;
+ ++res;
+ }
+ i = 1;
+ for (; len >= 32; len -= 32, ranges += 8) {
+ flags = pci_parse_of_flags(of_read_number(ranges, 1), 1);
+ size = of_read_number(&ranges[6], 2);
+ if (flags == 0 || size == 0)
+ continue;
+ if (flags & IORESOURCE_IO) {
+ res = bus->resource[0];
+ if (res->flags) {
+ printk(KERN_ERR "PCI: ignoring extra I/O range"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ } else {
+ if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
+ printk(KERN_ERR "PCI: too many memory ranges"
+ " for bridge %s\n", node->full_name);
+ continue;
+ }
+ res = bus->resource[i];
+ ++i;
+ }
+ res->flags = flags;
+ region.start = of_read_number(&ranges[1], 2);
+ region.end = region.start + size - 1;
+ pcibios_bus_to_resource(dev->bus, res, &region);
+ }
+ sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
+ bus->number);
+ pr_debug(" bus name: %s\n", bus->name);
+
+ mode = PCI_PROBE_NORMAL;
+ if (ppc_md.pci_probe_mode)
+ mode = ppc_md.pci_probe_mode(bus);
+ pr_debug(" probe mode: %d\n", mode);
+
+ if (mode == PCI_PROBE_DEVTREE)
+ of_scan_bus(node, bus);
+ else if (mode == PCI_PROBE_NORMAL)
+ pci_scan_child_bus(bus);
+}
+EXPORT_SYMBOL(of_scan_pci_bridge);
+
+static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
+ struct device_node *dn)
+{
+ struct pci_dev *dev = NULL;
+ const __be32 *reg;
+ int reglen, devfn;
+#ifdef CONFIG_EEH
+ struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+#endif
+
+ pr_debug(" * %s\n", dn->full_name);
+ if (!of_device_is_available(dn))
+ return NULL;
+
+ reg = of_get_property(dn, "reg", &reglen);
+ if (reg == NULL || reglen < 20)
+ return NULL;
+ devfn = (of_read_number(reg, 1) >> 8) & 0xff;
+
+ /* Check if the PCI device is already there */
+ dev = pci_get_slot(bus, devfn);
+ if (dev) {
+ pci_dev_put(dev);
+ return dev;
+ }
+
+ /* Device removed permanently ? */
+#ifdef CONFIG_EEH
+ if (edev && (edev->mode & EEH_DEV_REMOVED))
+ return NULL;
+#endif
+
+ /* create a new pci_dev for this device */
+ dev = of_create_pci_dev(dn, bus, devfn);
+ if (!dev)
+ return NULL;
+
+ pr_debug(" dev header type: %x\n", dev->hdr_type);
+ return dev;
+}
+
+/**
+ * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices
+ * @node: device tree node for the PCI bus
+ * @bus: pci_bus structure for the PCI bus
+ * @rescan_existing: Flag indicating bus has already been set up
+ */
+static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
+ int rescan_existing)
+{
+ struct device_node *child;
+ struct pci_dev *dev;
+
+ pr_debug("of_scan_bus(%s) bus no %d...\n",
+ node->full_name, bus->number);
+
+ /* Scan direct children */
+ for_each_child_of_node(node, child) {
+ dev = of_scan_pci_dev(bus, child);
+ if (!dev)
+ continue;
+ pr_debug(" dev header type: %x\n", dev->hdr_type);
+ }
+
+ /* Apply all fixups necessary. We don't fixup the bus "self"
+ * for an existing bridge that is being rescanned
+ */
+ if (!rescan_existing)
+ pcibios_setup_bus_self(bus);
+ pcibios_setup_bus_devices(bus);
+
+ /* Now scan child busses */
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ if (pci_is_bridge(dev)) {
+ of_scan_pci_bridge(dev);
+ }
+ }
+}
+
+/**
+ * of_scan_bus - given a PCI bus node, setup bus and scan for child devices
+ * @node: device tree node for the PCI bus
+ * @bus: pci_bus structure for the PCI bus
+ */
+void of_scan_bus(struct device_node *node, struct pci_bus *bus)
+{
+ __of_scan_bus(node, bus, 0);
+}
+EXPORT_SYMBOL_GPL(of_scan_bus);
+
+/**
+ * of_rescan_bus - given a PCI bus node, scan for child devices
+ * @node: device tree node for the PCI bus
+ * @bus: pci_bus structure for the PCI bus
+ *
+ * Same as of_scan_bus, but for a pci_bus structure that has already been
+ * setup.
+ */
+void of_rescan_bus(struct device_node *node, struct pci_bus *bus)
+{
+ __of_scan_bus(node, bus, 1);
+}
+EXPORT_SYMBOL_GPL(of_rescan_bus);
+
diff --git a/arch/powerpc/kernel/perfmon_fsl_booke.c b/arch/powerpc/kernel/perfmon_fsl_booke.c
deleted file mode 100644
index bdc3977a7b0..00000000000
--- a/arch/powerpc/kernel/perfmon_fsl_booke.c
+++ /dev/null
@@ -1,221 +0,0 @@
-/* kernel/perfmon_fsl_booke.c
- * Freescale Book-E Performance Monitor code
- *
- * Author: Andy Fleming
- * Copyright (c) 2004 Freescale Semiconductor, Inc
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/user.h>
-#include <linux/a.out.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/prctl.h>
-
-#include <asm/pgtable.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/reg.h>
-#include <asm/xmon.h>
-#include <asm/pmc.h>
-
-static inline u32 get_pmlca(int ctr);
-static inline void set_pmlca(int ctr, u32 pmlca);
-
-static inline u32 get_pmlca(int ctr)
-{
- u32 pmlca;
-
- switch (ctr) {
- case 0:
- pmlca = mfpmr(PMRN_PMLCA0);
- break;
- case 1:
- pmlca = mfpmr(PMRN_PMLCA1);
- break;
- case 2:
- pmlca = mfpmr(PMRN_PMLCA2);
- break;
- case 3:
- pmlca = mfpmr(PMRN_PMLCA3);
- break;
- default:
- panic("Bad ctr number\n");
- }
-
- return pmlca;
-}
-
-static inline void set_pmlca(int ctr, u32 pmlca)
-{
- switch (ctr) {
- case 0:
- mtpmr(PMRN_PMLCA0, pmlca);
- break;
- case 1:
- mtpmr(PMRN_PMLCA1, pmlca);
- break;
- case 2:
- mtpmr(PMRN_PMLCA2, pmlca);
- break;
- case 3:
- mtpmr(PMRN_PMLCA3, pmlca);
- break;
- default:
- panic("Bad ctr number\n");
- }
-}
-
-void init_pmc_stop(int ctr)
-{
- u32 pmlca = (PMLCA_FC | PMLCA_FCS | PMLCA_FCU |
- PMLCA_FCM1 | PMLCA_FCM0);
- u32 pmlcb = 0;
-
- switch (ctr) {
- case 0:
- mtpmr(PMRN_PMLCA0, pmlca);
- mtpmr(PMRN_PMLCB0, pmlcb);
- break;
- case 1:
- mtpmr(PMRN_PMLCA1, pmlca);
- mtpmr(PMRN_PMLCB1, pmlcb);
- break;
- case 2:
- mtpmr(PMRN_PMLCA2, pmlca);
- mtpmr(PMRN_PMLCB2, pmlcb);
- break;
- case 3:
- mtpmr(PMRN_PMLCA3, pmlca);
- mtpmr(PMRN_PMLCB3, pmlcb);
- break;
- default:
- panic("Bad ctr number!\n");
- }
-}
-
-void set_pmc_event(int ctr, int event)
-{
- u32 pmlca;
-
- pmlca = get_pmlca(ctr);
-
- pmlca = (pmlca & ~PMLCA_EVENT_MASK) |
- ((event << PMLCA_EVENT_SHIFT) &
- PMLCA_EVENT_MASK);
-
- set_pmlca(ctr, pmlca);
-}
-
-void set_pmc_user_kernel(int ctr, int user, int kernel)
-{
- u32 pmlca;
-
- pmlca = get_pmlca(ctr);
-
- if(user)
- pmlca &= ~PMLCA_FCU;
- else
- pmlca |= PMLCA_FCU;
-
- if(kernel)
- pmlca &= ~PMLCA_FCS;
- else
- pmlca |= PMLCA_FCS;
-
- set_pmlca(ctr, pmlca);
-}
-
-void set_pmc_marked(int ctr, int mark0, int mark1)
-{
- u32 pmlca = get_pmlca(ctr);
-
- if(mark0)
- pmlca &= ~PMLCA_FCM0;
- else
- pmlca |= PMLCA_FCM0;
-
- if(mark1)
- pmlca &= ~PMLCA_FCM1;
- else
- pmlca |= PMLCA_FCM1;
-
- set_pmlca(ctr, pmlca);
-}
-
-void pmc_start_ctr(int ctr, int enable)
-{
- u32 pmlca = get_pmlca(ctr);
-
- pmlca &= ~PMLCA_FC;
-
- if (enable)
- pmlca |= PMLCA_CE;
- else
- pmlca &= ~PMLCA_CE;
-
- set_pmlca(ctr, pmlca);
-}
-
-void pmc_start_ctrs(int enable)
-{
- u32 pmgc0 = mfpmr(PMRN_PMGC0);
-
- pmgc0 &= ~PMGC0_FAC;
- pmgc0 |= PMGC0_FCECE;
-
- if (enable)
- pmgc0 |= PMGC0_PMIE;
- else
- pmgc0 &= ~PMGC0_PMIE;
-
- mtpmr(PMRN_PMGC0, pmgc0);
-}
-
-void pmc_stop_ctrs(void)
-{
- u32 pmgc0 = mfpmr(PMRN_PMGC0);
-
- pmgc0 |= PMGC0_FAC;
-
- pmgc0 &= ~(PMGC0_PMIE | PMGC0_FCECE);
-
- mtpmr(PMRN_PMGC0, pmgc0);
-}
-
-void dump_pmcs(void)
-{
- printk("pmgc0: %x\n", mfpmr(PMRN_PMGC0));
- printk("pmc\t\tpmlca\t\tpmlcb\n");
- printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC0),
- mfpmr(PMRN_PMLCA0), mfpmr(PMRN_PMLCB0));
- printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC1),
- mfpmr(PMRN_PMLCA1), mfpmr(PMRN_PMLCB1));
- printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC2),
- mfpmr(PMRN_PMLCA2), mfpmr(PMRN_PMLCB2));
- printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC3),
- mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3));
-}
-
-EXPORT_SYMBOL(init_pmc_stop);
-EXPORT_SYMBOL(set_pmc_event);
-EXPORT_SYMBOL(set_pmc_user_kernel);
-EXPORT_SYMBOL(set_pmc_marked);
-EXPORT_SYMBOL(pmc_start_ctr);
-EXPORT_SYMBOL(pmc_start_ctrs);
-EXPORT_SYMBOL(pmc_stop_ctrs);
-EXPORT_SYMBOL(dump_pmcs);
diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c
index a0a2efadeab..58eaa3ddf7b 100644
--- a/arch/powerpc/kernel/pmc.c
+++ b/arch/powerpc/kernel/pmc.c
@@ -13,46 +13,32 @@
*/
#include <linux/errno.h>
+#include <linux/bug.h>
#include <linux/spinlock.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <asm/processor.h>
+#include <asm/cputable.h>
#include <asm/pmc.h>
-#if defined(CONFIG_FSL_BOOKE) && !defined(CONFIG_E200)
-static void dummy_perf(struct pt_regs *regs)
-{
- unsigned int pmgc0 = mfpmr(PMRN_PMGC0);
-
- pmgc0 &= ~PMGC0_PMIE;
- mtpmr(PMRN_PMGC0, pmgc0);
-}
-#elif defined(CONFIG_PPC64) || defined(CONFIG_6xx)
-
#ifndef MMCR0_PMAO
#define MMCR0_PMAO 0
#endif
-/* Ensure exceptions are disabled */
static void dummy_perf(struct pt_regs *regs)
{
- unsigned int mmcr0 = mfspr(SPRN_MMCR0);
-
- mmcr0 &= ~(MMCR0_PMXE|MMCR0_PMAO);
- mtspr(SPRN_MMCR0, mmcr0);
-}
+#if defined(CONFIG_FSL_EMB_PERFMON)
+ mtpmr(PMRN_PMGC0, mfpmr(PMRN_PMGC0) & ~PMGC0_PMIE);
+#elif defined(CONFIG_PPC64) || defined(CONFIG_6xx)
+ if (cur_cpu_spec->pmc_type == PPC_PMC_IBM)
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~(MMCR0_PMXE|MMCR0_PMAO));
#else
-/* Ensure exceptions are disabled */
-static void dummy_perf(struct pt_regs *regs)
-{
- unsigned int mmcr0 = mfspr(SPRN_MMCR0);
-
- mmcr0 &= ~(MMCR0_PMXE);
- mtspr(SPRN_MMCR0, mmcr0);
-}
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMXE);
#endif
+}
+
-static DEFINE_SPINLOCK(pmc_owner_lock);
+static DEFINE_RAW_SPINLOCK(pmc_owner_lock);
static void *pmc_owner_caller; /* mostly for debugging */
perf_irq_t perf_irq = dummy_perf;
@@ -60,7 +46,7 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq)
{
int err = 0;
- spin_lock(&pmc_owner_lock);
+ raw_spin_lock(&pmc_owner_lock);
if (pmc_owner_caller) {
printk(KERN_WARNING "reserve_pmc_hardware: "
@@ -71,24 +57,24 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq)
}
pmc_owner_caller = __builtin_return_address(0);
- perf_irq = new_perf_irq ? : dummy_perf;
+ perf_irq = new_perf_irq ? new_perf_irq : dummy_perf;
out:
- spin_unlock(&pmc_owner_lock);
+ raw_spin_unlock(&pmc_owner_lock);
return err;
}
EXPORT_SYMBOL_GPL(reserve_pmc_hardware);
void release_pmc_hardware(void)
{
- spin_lock(&pmc_owner_lock);
+ raw_spin_lock(&pmc_owner_lock);
WARN_ON(! pmc_owner_caller);
pmc_owner_caller = NULL;
perf_irq = dummy_perf;
- spin_unlock(&pmc_owner_lock);
+ raw_spin_unlock(&pmc_owner_lock);
}
EXPORT_SYMBOL_GPL(release_pmc_hardware);
diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h
index 90e56277179..a27c914d580 100644
--- a/arch/powerpc/kernel/ppc32.h
+++ b/arch/powerpc/kernel/ppc32.h
@@ -16,81 +16,6 @@
/* These are here to support 32-bit syscalls on a 64-bit kernel. */
-typedef struct compat_siginfo {
- int si_signo;
- int si_errno;
- int si_code;
-
- union {
- int _pad[SI_PAD_SIZE32];
-
- /* kill() */
- struct {
- compat_pid_t _pid; /* sender's pid */
- compat_uid_t _uid; /* sender's uid */
- } _kill;
-
- /* POSIX.1b timers */
- struct {
- compat_timer_t _tid; /* timer id */
- int _overrun; /* overrun count */
- compat_sigval_t _sigval; /* same as below */
- int _sys_private; /* not to be passed to user */
- } _timer;
-
- /* POSIX.1b signals */
- struct {
- compat_pid_t _pid; /* sender's pid */
- compat_uid_t _uid; /* sender's uid */
- compat_sigval_t _sigval;
- } _rt;
-
- /* SIGCHLD */
- struct {
- compat_pid_t _pid; /* which child */
- compat_uid_t _uid; /* sender's uid */
- int _status; /* exit code */
- compat_clock_t _utime;
- compat_clock_t _stime;
- } _sigchld;
-
- /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
- struct {
- unsigned int _addr; /* faulting insn/memory ref. */
- } _sigfault;
-
- /* SIGPOLL */
- struct {
- int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
- int _fd;
- } _sigpoll;
- } _sifields;
-} compat_siginfo_t;
-
-#define __old_sigaction32 old_sigaction32
-
-struct __old_sigaction32 {
- compat_uptr_t sa_handler;
- compat_old_sigset_t sa_mask;
- unsigned int sa_flags;
- compat_uptr_t sa_restorer; /* not used by Linux/SPARC yet */
-};
-
-
-
-struct sigaction32 {
- compat_uptr_t sa_handler; /* Really a pointer, but need to deal with 32 bits */
- unsigned int sa_flags;
- compat_uptr_t sa_restorer; /* Another 32 bit pointer */
- compat_sigset_t sa_mask; /* A 32 bit mask */
-};
-
-typedef struct sigaltstack_32 {
- unsigned int ss_sp;
- int ss_flags;
- compat_size_t ss_size;
-} stack_32_t;
-
struct pt_regs32 {
unsigned int gpr[32];
unsigned int nip;
@@ -120,12 +45,13 @@ struct mcontext32 {
elf_fpregset_t mc_fregs;
unsigned int mc_pad[2];
elf_vrregset_t32 mc_vregs __attribute__((__aligned__(16)));
+ elf_vsrreghalf_t32 mc_vsregs __attribute__((__aligned__(16)));
};
struct ucontext32 {
unsigned int uc_flags;
unsigned int uc_link;
- stack_32_t uc_stack;
+ compat_stack_t uc_stack;
int uc_pad[7];
compat_uptr_t uc_regs; /* points to uc_mcontext field */
compat_sigset_t uc_sigmask; /* mask last for extensibility */
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 807193a3c78..48d17d6fca5 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/threads.h>
#include <linux/smp.h>
#include <linux/sched.h>
@@ -8,20 +8,17 @@
#include <linux/screen_info.h>
#include <linux/vt_kern.h>
#include <linux/nvram.h>
-#include <linux/console.h>
#include <linux/irq.h>
#include <linux/pci.h>
#include <linux/delay.h>
-#include <linux/ide.h>
#include <linux/bitops.h>
#include <asm/page.h>
-#include <asm/semaphore.h>
#include <asm/processor.h>
+#include <asm/cacheflush.h>
#include <asm/uaccess.h>
#include <asm/io.h>
-#include <asm/ide.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/checksum.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -29,7 +26,6 @@
#include <linux/cuda.h>
#include <linux/pmu.h>
#include <asm/prom.h>
-#include <asm/system.h>
#include <asm/pci-bridge.h>
#include <asm/irq.h>
#include <asm/pmac_feature.h>
@@ -44,10 +40,10 @@
#include <asm/btext.h>
#include <asm/div64.h>
#include <asm/signal.h>
-
-#ifdef CONFIG_8xx
-#include <asm/commproc.h>
-#endif
+#include <asm/dcr.h>
+#include <asm/ftrace.h>
+#include <asm/switch_to.h>
+#include <asm/epapr_hcalls.h>
#ifdef CONFIG_PPC32
extern void transfer_to_handler(void);
@@ -62,9 +58,7 @@ EXPORT_SYMBOL(clear_pages);
EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
EXPORT_SYMBOL(DMA_MODE_READ);
EXPORT_SYMBOL(DMA_MODE_WRITE);
-EXPORT_SYMBOL(__div64_32);
-EXPORT_SYMBOL(do_signal);
EXPORT_SYMBOL(transfer_to_handler);
EXPORT_SYMBOL(do_IRQ);
EXPORT_SYMBOL(machine_check_exception);
@@ -74,62 +68,59 @@ EXPORT_SYMBOL(single_step_exception);
EXPORT_SYMBOL(sys_sigreturn);
#endif
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(_mcount);
+#endif
+
EXPORT_SYMBOL(strcpy);
EXPORT_SYMBOL(strncpy);
EXPORT_SYMBOL(strcat);
EXPORT_SYMBOL(strlen);
EXPORT_SYMBOL(strcmp);
-EXPORT_SYMBOL(strcasecmp);
-EXPORT_SYMBOL(strncasecmp);
+EXPORT_SYMBOL(strncmp);
+#ifndef CONFIG_GENERIC_CSUM
EXPORT_SYMBOL(csum_partial);
EXPORT_SYMBOL(csum_partial_copy_generic);
EXPORT_SYMBOL(ip_fast_csum);
EXPORT_SYMBOL(csum_tcpudp_magic);
+#endif
EXPORT_SYMBOL(__copy_tofrom_user);
EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(__strncpy_from_user);
-EXPORT_SYMBOL(__strnlen_user);
-#ifdef CONFIG_PPC64
-EXPORT_SYMBOL(copy_4K_page);
-#endif
-
-#if defined(CONFIG_PPC32) && (defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE))
-EXPORT_SYMBOL(ppc_ide_md);
-#endif
+EXPORT_SYMBOL(copy_page);
#if defined(CONFIG_PCI) && defined(CONFIG_PPC32)
EXPORT_SYMBOL(isa_io_base);
EXPORT_SYMBOL(isa_mem_base);
EXPORT_SYMBOL(pci_dram_offset);
-EXPORT_SYMBOL(pci_alloc_consistent);
-EXPORT_SYMBOL(pci_free_consistent);
-EXPORT_SYMBOL(pci_bus_io_base);
-EXPORT_SYMBOL(pci_bus_io_base_phys);
-EXPORT_SYMBOL(pci_bus_mem_base_phys);
-EXPORT_SYMBOL(pci_bus_to_hose);
#endif /* CONFIG_PCI */
EXPORT_SYMBOL(start_thread);
-EXPORT_SYMBOL(kernel_thread);
+#ifdef CONFIG_PPC_FPU
EXPORT_SYMBOL(giveup_fpu);
+EXPORT_SYMBOL(load_fp_state);
+EXPORT_SYMBOL(store_fp_state);
+#endif
#ifdef CONFIG_ALTIVEC
EXPORT_SYMBOL(giveup_altivec);
+EXPORT_SYMBOL(load_vr_state);
+EXPORT_SYMBOL(store_vr_state);
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+EXPORT_SYMBOL(giveup_vsx);
+EXPORT_SYMBOL_GPL(__giveup_vsx);
+#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
EXPORT_SYMBOL(giveup_spe);
#endif /* CONFIG_SPE */
#ifndef CONFIG_PPC64
EXPORT_SYMBOL(flush_instruction_cache);
-EXPORT_SYMBOL(flush_tlb_kernel_range);
-EXPORT_SYMBOL(flush_tlb_page);
-EXPORT_SYMBOL(_tlbie);
#endif
-EXPORT_SYMBOL(__flush_icache_range);
EXPORT_SYMBOL(flush_dcache_range);
+EXPORT_SYMBOL(flush_icache_range);
#ifdef CONFIG_SMP
#ifdef CONFIG_PPC32
@@ -148,9 +139,6 @@ EXPORT_SYMBOL(adb_try_handler_change);
EXPORT_SYMBOL(cuda_request);
EXPORT_SYMBOL(cuda_poll);
#endif /* CONFIG_ADB_CUDA */
-#ifdef CONFIG_VT
-EXPORT_SYMBOL(kd_mksound);
-#endif
EXPORT_SYMBOL(to_tm);
#ifdef CONFIG_PPC32
@@ -160,8 +148,13 @@ long long __lshrdi3(long long, int);
EXPORT_SYMBOL(__ashrdi3);
EXPORT_SYMBOL(__ashldi3);
EXPORT_SYMBOL(__lshrdi3);
+int __ucmpdi2(unsigned long long, unsigned long long);
+EXPORT_SYMBOL(__ucmpdi2);
+int __cmpdi2(long long, long long);
+EXPORT_SYMBOL(__cmpdi2);
#endif
-
+long long __bswapdi2(long long);
+EXPORT_SYMBOL(__bswapdi2);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memmove);
@@ -174,23 +167,13 @@ EXPORT_SYMBOL(screen_info);
#ifdef CONFIG_PPC32
EXPORT_SYMBOL(timer_interrupt);
-EXPORT_SYMBOL(irq_desc);
EXPORT_SYMBOL(tb_ticks_per_jiffy);
-EXPORT_SYMBOL(console_drivers);
EXPORT_SYMBOL(cacheable_memcpy);
-#endif
-
-#ifdef CONFIG_8xx
-EXPORT_SYMBOL(cpm_install_handler);
-EXPORT_SYMBOL(cpm_free_handler);
-#endif /* CONFIG_8xx */
-#if defined(CONFIG_8xx) || defined(CONFIG_40x)
-EXPORT_SYMBOL(__res);
+EXPORT_SYMBOL(cacheable_memzero);
#endif
#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(next_mmu_context);
-EXPORT_SYMBOL(set_context);
+EXPORT_SYMBOL(switch_mmu_context);
#endif
#ifdef CONFIG_PPC_STD_MMU_32
@@ -204,7 +187,23 @@ EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */
extern long *intercept_table;
EXPORT_SYMBOL(intercept_table);
#endif /* CONFIG_PPC_STD_MMU_32 */
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+#ifdef CONFIG_PPC_DCR_NATIVE
EXPORT_SYMBOL(__mtdcr);
EXPORT_SYMBOL(__mfdcr);
#endif
+EXPORT_SYMBOL(empty_zero_page);
+
+#ifdef CONFIG_PPC64
+EXPORT_SYMBOL(__arch_hweight8);
+EXPORT_SYMBOL(__arch_hweight16);
+EXPORT_SYMBOL(__arch_hweight32);
+EXPORT_SYMBOL(__arch_hweight64);
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+EXPORT_SYMBOL_GPL(mmu_psize_defs);
+#endif
+
+#ifdef CONFIG_EPAPR_PARAVIRT
+EXPORT_SYMBOL(epapr_hypercall_start);
+#endif
diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
new file mode 100644
index 00000000000..1b1787d5289
--- /dev/null
+++ b/arch/powerpc/kernel/ppc_save_regs.S
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * NOTE: assert(sizeof(buf) > 23 * sizeof(long))
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+
+/*
+ * Grab the register values as they are now.
+ * This won't do a particularly good job because we really
+ * want our caller's caller's registers, and our caller has
+ * already executed its prologue.
+ * ToDo: We could reach back into the caller's save area to do
+ * a better job of representing the caller's state (note that
+ * that will be different for 32-bit and 64-bit, because of the
+ * different ABIs, though).
+ */
+_GLOBAL(ppc_save_regs)
+ PPC_STL r0,0*SZL(r3)
+ PPC_STL r2,2*SZL(r3)
+ PPC_STL r3,3*SZL(r3)
+ PPC_STL r4,4*SZL(r3)
+ PPC_STL r5,5*SZL(r3)
+ PPC_STL r6,6*SZL(r3)
+ PPC_STL r7,7*SZL(r3)
+ PPC_STL r8,8*SZL(r3)
+ PPC_STL r9,9*SZL(r3)
+ PPC_STL r10,10*SZL(r3)
+ PPC_STL r11,11*SZL(r3)
+ PPC_STL r12,12*SZL(r3)
+ PPC_STL r13,13*SZL(r3)
+ PPC_STL r14,14*SZL(r3)
+ PPC_STL r15,15*SZL(r3)
+ PPC_STL r16,16*SZL(r3)
+ PPC_STL r17,17*SZL(r3)
+ PPC_STL r18,18*SZL(r3)
+ PPC_STL r19,19*SZL(r3)
+ PPC_STL r20,20*SZL(r3)
+ PPC_STL r21,21*SZL(r3)
+ PPC_STL r22,22*SZL(r3)
+ PPC_STL r23,23*SZL(r3)
+ PPC_STL r24,24*SZL(r3)
+ PPC_STL r25,25*SZL(r3)
+ PPC_STL r26,26*SZL(r3)
+ PPC_STL r27,27*SZL(r3)
+ PPC_STL r28,28*SZL(r3)
+ PPC_STL r29,29*SZL(r3)
+ PPC_STL r30,30*SZL(r3)
+ PPC_STL r31,31*SZL(r3)
+ /* go up one stack frame for SP */
+ PPC_LL r4,0(r1)
+ PPC_STL r4,1*SZL(r3)
+ /* get caller's LR */
+ PPC_LL r0,LRSAVE(r4)
+ PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3)
+ mfmsr r0
+ PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3)
+ mfctr r0
+ PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3)
+ mfxer r0
+ PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3)
+ mfcr r0
+ PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3)
+ li r0,0
+ PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
+ blr
diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_powerpc.c
index f598cb51953..c30612aad68 100644
--- a/arch/powerpc/kernel/proc_ppc64.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -19,7 +19,6 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
-#include <linux/slab.h>
#include <linux/kernel.h>
#include <asm/machdep.h>
@@ -28,17 +27,54 @@
#include <asm/uaccess.h>
#include <asm/prom.h>
-static loff_t page_map_seek( struct file *file, loff_t off, int whence);
+#ifdef CONFIG_PPC64
+
+static loff_t page_map_seek(struct file *file, loff_t off, int whence)
+{
+ return fixed_size_llseek(file, off, whence, PAGE_SIZE);
+}
+
static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
- loff_t *ppos);
-static int page_map_mmap( struct file *file, struct vm_area_struct *vma );
+ loff_t *ppos)
+{
+ return simple_read_from_buffer(buf, nbytes, ppos,
+ PDE_DATA(file_inode(file)), PAGE_SIZE);
+}
-static struct file_operations page_map_fops = {
+static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
+{
+ if ((vma->vm_end - vma->vm_start) > PAGE_SIZE)
+ return -EINVAL;
+
+ remap_pfn_range(vma, vma->vm_start,
+ __pa(PDE_DATA(file_inode(file))) >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot);
+ return 0;
+}
+
+static const struct file_operations page_map_fops = {
.llseek = page_map_seek,
.read = page_map_read,
.mmap = page_map_mmap
};
+
+static int __init proc_ppc64_init(void)
+{
+ struct proc_dir_entry *pde;
+
+ pde = proc_create_data("powerpc/systemcfg", S_IFREG|S_IRUGO, NULL,
+ &page_map_fops, vdso_data);
+ if (!pde)
+ return 1;
+ proc_set_size(pde, PAGE_SIZE);
+
+ return 0;
+}
+__initcall(proc_ppc64_init);
+
+#endif /* CONFIG_PPC64 */
+
/*
* Create the ppc64 and ppc64/rtas directories early. This allows us to
* assume that they have been previously created in drivers.
@@ -47,78 +83,24 @@ static int __init proc_ppc64_create(void)
{
struct proc_dir_entry *root;
- root = proc_mkdir("ppc64", NULL);
+ root = proc_mkdir("powerpc", NULL);
if (!root)
return 1;
+#ifdef CONFIG_PPC64
+ if (!proc_symlink("ppc64", NULL, "powerpc"))
+ pr_err("Failed to create link /proc/ppc64 -> /proc/powerpc\n");
+#endif
+
if (!of_find_node_by_path("/rtas"))
return 0;
if (!proc_mkdir("rtas", root))
return 1;
- if (!proc_symlink("rtas", NULL, "ppc64/rtas"))
+ if (!proc_symlink("rtas", NULL, "powerpc/rtas"))
return 1;
return 0;
}
core_initcall(proc_ppc64_create);
-
-static int __init proc_ppc64_init(void)
-{
- struct proc_dir_entry *pde;
-
- pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL);
- if (!pde)
- return 1;
- pde->nlink = 1;
- pde->data = vdso_data;
- pde->size = PAGE_SIZE;
- pde->proc_fops = &page_map_fops;
-
- return 0;
-}
-__initcall(proc_ppc64_init);
-
-static loff_t page_map_seek( struct file *file, loff_t off, int whence)
-{
- loff_t new;
- struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
-
- switch(whence) {
- case 0:
- new = off;
- break;
- case 1:
- new = file->f_pos + off;
- break;
- case 2:
- new = dp->size + off;
- break;
- default:
- return -EINVAL;
- }
- if ( new < 0 || new > dp->size )
- return -EINVAL;
- return (file->f_pos = new);
-}
-
-static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes,
- loff_t *ppos)
-{
- struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
- return simple_read_from_buffer(buf, nbytes, ppos, dp->data, dp->size);
-}
-
-static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
-{
- struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
-
- if ((vma->vm_end - vma->vm_start) > dp->size)
- return -EINVAL;
-
- remap_pfn_range(vma, vma->vm_start, __pa(dp->data) >> PAGE_SHIFT,
- dp->size, vma->vm_page_prot);
- return 0;
-}
-
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a127a1e3c09..be99774d3f4 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -19,44 +19,104 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/smp.h>
-#include <linux/smp_lock.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/user.h>
#include <linux/elf.h>
-#include <linux/init.h>
#include <linux/prctl.h>
#include <linux/init_task.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/kallsyms.h>
#include <linux/mqueue.h>
#include <linux/hardirq.h>
#include <linux/utsname.h>
+#include <linux/ftrace.h>
+#include <linux/kernel_stat.h>
+#include <linux/personality.h>
+#include <linux/random.h>
+#include <linux/hw_breakpoint.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
-#include <asm/system.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/time.h>
+#include <asm/runlatch.h>
#include <asm/syscalls.h>
+#include <asm/switch_to.h>
+#include <asm/tm.h>
+#include <asm/debug.h>
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#endif
+#include <asm/code-patching.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+
+/* Transactional Memory debug */
+#ifdef TM_DEBUG_SW
+#define TM_DEBUG(x...) printk(KERN_INFO x)
+#else
+#define TM_DEBUG(x...) do { } while(0)
+#endif
extern unsigned long _get_SP(void);
#ifndef CONFIG_SMP
struct task_struct *last_task_used_math = NULL;
struct task_struct *last_task_used_altivec = NULL;
+struct task_struct *last_task_used_vsx = NULL;
struct task_struct *last_task_used_spe = NULL;
#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void giveup_fpu_maybe_transactional(struct task_struct *tsk)
+{
+ /*
+ * If we are saving the current thread's registers, and the
+ * thread is in a transactional state, set the TIF_RESTORE_TM
+ * bit so that we know to restore the registers before
+ * returning to userspace.
+ */
+ if (tsk == current && tsk->thread.regs &&
+ MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
+ !test_thread_flag(TIF_RESTORE_TM)) {
+ tsk->thread.tm_orig_msr = tsk->thread.regs->msr;
+ set_thread_flag(TIF_RESTORE_TM);
+ }
+
+ giveup_fpu(tsk);
+}
+
+void giveup_altivec_maybe_transactional(struct task_struct *tsk)
+{
+ /*
+ * If we are saving the current thread's registers, and the
+ * thread is in a transactional state, set the TIF_RESTORE_TM
+ * bit so that we know to restore the registers before
+ * returning to userspace.
+ */
+ if (tsk == current && tsk->thread.regs &&
+ MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
+ !test_thread_flag(TIF_RESTORE_TM)) {
+ tsk->thread.tm_orig_msr = tsk->thread.regs->msr;
+ set_thread_flag(TIF_RESTORE_TM);
+ }
+
+ giveup_altivec(tsk);
+}
+
+#else
+#define giveup_fpu_maybe_transactional(tsk) giveup_fpu(tsk)
+#define giveup_altivec_maybe_transactional(tsk) giveup_altivec(tsk)
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+#ifdef CONFIG_PPC_FPU
/*
* Make sure the floating-point register state in the
* the thread_struct is up to date for task tsk.
@@ -84,11 +144,13 @@ void flush_fp_to_thread(struct task_struct *tsk)
*/
BUG_ON(tsk != current);
#endif
- giveup_fpu(current);
+ giveup_fpu_maybe_transactional(tsk);
}
preempt_enable();
}
}
+EXPORT_SYMBOL_GPL(flush_fp_to_thread);
+#endif /* CONFIG_PPC_FPU */
void enable_kernel_fp(void)
{
@@ -96,26 +158,15 @@ void enable_kernel_fp(void)
#ifdef CONFIG_SMP
if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
- giveup_fpu(current);
+ giveup_fpu_maybe_transactional(current);
else
giveup_fpu(NULL); /* just enables FP for kernel */
#else
- giveup_fpu(last_task_used_math);
+ giveup_fpu_maybe_transactional(last_task_used_math);
#endif /* CONFIG_SMP */
}
EXPORT_SYMBOL(enable_kernel_fp);
-int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
-{
- if (!tsk->thread.regs)
- return 0;
- flush_fp_to_thread(current);
-
- memcpy(fpregs, &tsk->thread.fpr[0], sizeof(*fpregs));
-
- return 1;
-}
-
#ifdef CONFIG_ALTIVEC
void enable_kernel_altivec(void)
{
@@ -123,11 +174,11 @@ void enable_kernel_altivec(void)
#ifdef CONFIG_SMP
if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
- giveup_altivec(current);
+ giveup_altivec_maybe_transactional(current);
else
- giveup_altivec(NULL); /* just enable AltiVec for kernel - force */
+ giveup_altivec_notask();
#else
- giveup_altivec(last_task_used_altivec);
+ giveup_altivec_maybe_transactional(last_task_used_altivec);
#endif /* CONFIG_SMP */
}
EXPORT_SYMBOL(enable_kernel_altivec);
@@ -144,19 +195,55 @@ void flush_altivec_to_thread(struct task_struct *tsk)
#ifdef CONFIG_SMP
BUG_ON(tsk != current);
#endif
- giveup_altivec(current);
+ giveup_altivec_maybe_transactional(tsk);
}
preempt_enable();
}
}
+EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
+#endif /* CONFIG_ALTIVEC */
-int dump_task_altivec(struct pt_regs *regs, elf_vrregset_t *vrregs)
+#ifdef CONFIG_VSX
+#if 0
+/* not currently used, but some crazy RAID module might want to later */
+void enable_kernel_vsx(void)
{
- flush_altivec_to_thread(current);
- memcpy(vrregs, &current->thread.vr[0], sizeof(*vrregs));
- return 1;
+ WARN_ON(preemptible());
+
+#ifdef CONFIG_SMP
+ if (current->thread.regs && (current->thread.regs->msr & MSR_VSX))
+ giveup_vsx(current);
+ else
+ giveup_vsx(NULL); /* just enable vsx for kernel - force */
+#else
+ giveup_vsx(last_task_used_vsx);
+#endif /* CONFIG_SMP */
}
-#endif /* CONFIG_ALTIVEC */
+EXPORT_SYMBOL(enable_kernel_vsx);
+#endif
+
+void giveup_vsx(struct task_struct *tsk)
+{
+ giveup_fpu_maybe_transactional(tsk);
+ giveup_altivec_maybe_transactional(tsk);
+ __giveup_vsx(tsk);
+}
+
+void flush_vsx_to_thread(struct task_struct *tsk)
+{
+ if (tsk->thread.regs) {
+ preempt_disable();
+ if (tsk->thread.regs->msr & MSR_VSX) {
+#ifdef CONFIG_SMP
+ BUG_ON(tsk != current);
+#endif
+ giveup_vsx(tsk);
+ }
+ preempt_enable();
+ }
+}
+EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
+#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
@@ -183,19 +270,12 @@ void flush_spe_to_thread(struct task_struct *tsk)
#ifdef CONFIG_SMP
BUG_ON(tsk != current);
#endif
- giveup_spe(current);
+ tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
+ giveup_spe(tsk);
}
preempt_enable();
}
}
-
-int dump_spe(struct pt_regs *regs, elf_vrregset_t *evrregs)
-{
- flush_spe_to_thread(current);
- /* We copy u32 evr[32] + u64 acc + u32 spefscr -> 35 */
- memcpy(evrregs, &current->thread.evr[0], sizeof(u32) * 35);
- return 1;
-}
#endif /* CONFIG_SPE */
#ifndef CONFIG_SMP
@@ -212,6 +292,10 @@ void discard_lazy_cpu_state(void)
if (last_task_used_altivec == current)
last_task_used_altivec = NULL;
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ if (last_task_used_vsx == current)
+ last_task_used_vsx = NULL;
+#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
if (last_task_used_spe == current)
last_task_used_spe = NULL;
@@ -220,28 +304,468 @@ void discard_lazy_cpu_state(void)
}
#endif /* CONFIG_SMP */
-#ifdef CONFIG_PPC_MERGE /* XXX for now */
-int set_dabr(unsigned long dabr)
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+void do_send_trap(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code, int signal_code, int breakpt)
{
- if (ppc_md.set_dabr)
- return ppc_md.set_dabr(dabr);
+ siginfo_t info;
+
+ current->thread.trap_nr = signal_code;
+ if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+ 11, SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ /* Deliver the signal to userspace */
+ info.si_signo = SIGTRAP;
+ info.si_errno = breakpt; /* breakpoint or watchpoint id */
+ info.si_code = signal_code;
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGTRAP, &info, current);
+}
+#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
+void do_break (struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
+{
+ siginfo_t info;
+ current->thread.trap_nr = TRAP_HWBKPT;
+ if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+ 11, SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ if (debugger_break_match(regs))
+ return;
+
+ /* Clear the breakpoint */
+ hw_breakpoint_disable();
+
+ /* Deliver the signal to userspace */
+ info.si_signo = SIGTRAP;
+ info.si_errno = 0;
+ info.si_code = TRAP_HWBKPT;
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGTRAP, &info, current);
+}
+#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
+
+static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk);
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+/*
+ * Set the debug registers back to their default "safe" values.
+ */
+static void set_debug_reg_defaults(struct thread_struct *thread)
+{
+ thread->debug.iac1 = thread->debug.iac2 = 0;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ thread->debug.iac3 = thread->debug.iac4 = 0;
+#endif
+ thread->debug.dac1 = thread->debug.dac2 = 0;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ thread->debug.dvc1 = thread->debug.dvc2 = 0;
+#endif
+ thread->debug.dbcr0 = 0;
+#ifdef CONFIG_BOOKE
+ /*
+ * Force User/Supervisor bits to b11 (user-only MSR[PR]=1)
+ */
+ thread->debug.dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
+ DBCR1_IAC3US | DBCR1_IAC4US;
+ /*
+ * Force Data Address Compare User/Supervisor bits to be User-only
+ * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0.
+ */
+ thread->debug.dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+#else
+ thread->debug.dbcr1 = 0;
+#endif
+}
+
+static void prime_debug_regs(struct debug_reg *debug)
+{
+ /*
+ * We could have inherited MSR_DE from userspace, since
+ * it doesn't get cleared on exception entry. Make sure
+ * MSR_DE is clear before we enable any debug events.
+ */
+ mtmsr(mfmsr() & ~MSR_DE);
+
+ mtspr(SPRN_IAC1, debug->iac1);
+ mtspr(SPRN_IAC2, debug->iac2);
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ mtspr(SPRN_IAC3, debug->iac3);
+ mtspr(SPRN_IAC4, debug->iac4);
+#endif
+ mtspr(SPRN_DAC1, debug->dac1);
+ mtspr(SPRN_DAC2, debug->dac2);
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ mtspr(SPRN_DVC1, debug->dvc1);
+ mtspr(SPRN_DVC2, debug->dvc2);
+#endif
+ mtspr(SPRN_DBCR0, debug->dbcr0);
+ mtspr(SPRN_DBCR1, debug->dbcr1);
+#ifdef CONFIG_BOOKE
+ mtspr(SPRN_DBCR2, debug->dbcr2);
+#endif
+}
+/*
+ * Unless neither the old or new thread are making use of the
+ * debug registers, set the debug registers from the values
+ * stored in the new thread.
+ */
+void switch_booke_debug_regs(struct debug_reg *new_debug)
+{
+ if ((current->thread.debug.dbcr0 & DBCR0_IDM)
+ || (new_debug->dbcr0 & DBCR0_IDM))
+ prime_debug_regs(new_debug);
+}
+EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
+#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
+#ifndef CONFIG_HAVE_HW_BREAKPOINT
+static void set_debug_reg_defaults(struct thread_struct *thread)
+{
+ thread->hw_brk.address = 0;
+ thread->hw_brk.type = 0;
+ set_breakpoint(&thread->hw_brk);
+}
+#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
+#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
+
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
+{
+ mtspr(SPRN_DAC1, dabr);
+#ifdef CONFIG_PPC_47x
+ isync();
+#endif
+ return 0;
+}
+#elif defined(CONFIG_PPC_BOOK3S)
+static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
+{
mtspr(SPRN_DABR, dabr);
+ if (cpu_has_feature(CPU_FTR_DABRX))
+ mtspr(SPRN_DABRX, dabrx);
return 0;
}
+#else
+static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
+{
+ return -EINVAL;
+}
#endif
+static inline int set_dabr(struct arch_hw_breakpoint *brk)
+{
+ unsigned long dabr, dabrx;
+
+ dabr = brk->address | (brk->type & HW_BRK_TYPE_DABR);
+ dabrx = ((brk->type >> 3) & 0x7);
+
+ if (ppc_md.set_dabr)
+ return ppc_md.set_dabr(dabr, dabrx);
+
+ return __set_dabr(dabr, dabrx);
+}
+
+static inline int set_dawr(struct arch_hw_breakpoint *brk)
+{
+ unsigned long dawr, dawrx, mrd;
+
+ dawr = brk->address;
+
+ dawrx = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE)) \
+ << (63 - 58); //* read/write bits */
+ dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) \
+ << (63 - 59); //* translate */
+ dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) \
+ >> 3; //* PRIM bits */
+ /* dawr length is stored in field MDR bits 48:53. Matches range in
+ doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
+ 0b111111=64DW.
+ brk->len is in bytes.
+ This aligns up to double word size, shifts and does the bias.
+ */
+ mrd = ((brk->len + 7) >> 3) - 1;
+ dawrx |= (mrd & 0x3f) << (63 - 53);
+
+ if (ppc_md.set_dawr)
+ return ppc_md.set_dawr(dawr, dawrx);
+ mtspr(SPRN_DAWR, dawr);
+ mtspr(SPRN_DAWRX, dawrx);
+ return 0;
+}
+
+void __set_breakpoint(struct arch_hw_breakpoint *brk)
+{
+ __get_cpu_var(current_brk) = *brk;
+
+ if (cpu_has_feature(CPU_FTR_DAWR))
+ set_dawr(brk);
+ else
+ set_dabr(brk);
+}
+
+void set_breakpoint(struct arch_hw_breakpoint *brk)
+{
+ preempt_disable();
+ __set_breakpoint(brk);
+ preempt_enable();
+}
+
#ifdef CONFIG_PPC64
DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
-static DEFINE_PER_CPU(unsigned long, current_dabr);
#endif
+static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
+ struct arch_hw_breakpoint *b)
+{
+ if (a->address != b->address)
+ return false;
+ if (a->type != b->type)
+ return false;
+ if (a->len != b->len)
+ return false;
+ return true;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static void tm_reclaim_thread(struct thread_struct *thr,
+ struct thread_info *ti, uint8_t cause)
+{
+ unsigned long msr_diff = 0;
+
+ /*
+ * If FP/VSX registers have been already saved to the
+ * thread_struct, move them to the transact_fp array.
+ * We clear the TIF_RESTORE_TM bit since after the reclaim
+ * the thread will no longer be transactional.
+ */
+ if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) {
+ msr_diff = thr->tm_orig_msr & ~thr->regs->msr;
+ if (msr_diff & MSR_FP)
+ memcpy(&thr->transact_fp, &thr->fp_state,
+ sizeof(struct thread_fp_state));
+ if (msr_diff & MSR_VEC)
+ memcpy(&thr->transact_vr, &thr->vr_state,
+ sizeof(struct thread_vr_state));
+ clear_ti_thread_flag(ti, TIF_RESTORE_TM);
+ msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1;
+ }
+
+ tm_reclaim(thr, thr->regs->msr, cause);
+
+ /* Having done the reclaim, we now have the checkpointed
+ * FP/VSX values in the registers. These might be valid
+ * even if we have previously called enable_kernel_fp() or
+ * flush_fp_to_thread(), so update thr->regs->msr to
+ * indicate their current validity.
+ */
+ thr->regs->msr |= msr_diff;
+}
+
+void tm_reclaim_current(uint8_t cause)
+{
+ tm_enable();
+ tm_reclaim_thread(&current->thread, current_thread_info(), cause);
+}
+
+static inline void tm_reclaim_task(struct task_struct *tsk)
+{
+ /* We have to work out if we're switching from/to a task that's in the
+ * middle of a transaction.
+ *
+ * In switching we need to maintain a 2nd register state as
+ * oldtask->thread.ckpt_regs. We tm_reclaim(oldproc); this saves the
+ * checkpointed (tbegin) state in ckpt_regs and saves the transactional
+ * (current) FPRs into oldtask->thread.transact_fpr[].
+ *
+ * We also context switch (save) TFHAR/TEXASR/TFIAR in here.
+ */
+ struct thread_struct *thr = &tsk->thread;
+
+ if (!thr->regs)
+ return;
+
+ if (!MSR_TM_ACTIVE(thr->regs->msr))
+ goto out_and_saveregs;
+
+ /* Stash the original thread MSR, as giveup_fpu et al will
+ * modify it. We hold onto it to see whether the task used
+ * FP & vector regs. If the TIF_RESTORE_TM flag is set,
+ * tm_orig_msr is already set.
+ */
+ if (!test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_TM))
+ thr->tm_orig_msr = thr->regs->msr;
+
+ TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
+ "ccr=%lx, msr=%lx, trap=%lx)\n",
+ tsk->pid, thr->regs->nip,
+ thr->regs->ccr, thr->regs->msr,
+ thr->regs->trap);
+
+ tm_reclaim_thread(thr, task_thread_info(tsk), TM_CAUSE_RESCHED);
+
+ TM_DEBUG("--- tm_reclaim on pid %d complete\n",
+ tsk->pid);
+
+out_and_saveregs:
+ /* Always save the regs here, even if a transaction's not active.
+ * This context-switches a thread's TM info SPRs. We do it here to
+ * be consistent with the restore path (in recheckpoint) which
+ * cannot happen later in _switch().
+ */
+ tm_save_sprs(thr);
+}
+
+extern void __tm_recheckpoint(struct thread_struct *thread,
+ unsigned long orig_msr);
+
+void tm_recheckpoint(struct thread_struct *thread,
+ unsigned long orig_msr)
+{
+ unsigned long flags;
+
+ /* We really can't be interrupted here as the TEXASR registers can't
+ * change and later in the trecheckpoint code, we have a userspace R1.
+ * So let's hard disable over this region.
+ */
+ local_irq_save(flags);
+ hard_irq_disable();
+
+ /* The TM SPRs are restored here, so that TEXASR.FS can be set
+ * before the trecheckpoint and no explosion occurs.
+ */
+ tm_restore_sprs(thread);
+
+ __tm_recheckpoint(thread, orig_msr);
+
+ local_irq_restore(flags);
+}
+
+static inline void tm_recheckpoint_new_task(struct task_struct *new)
+{
+ unsigned long msr;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return;
+
+ /* Recheckpoint the registers of the thread we're about to switch to.
+ *
+ * If the task was using FP, we non-lazily reload both the original and
+ * the speculative FP register states. This is because the kernel
+ * doesn't see if/when a TM rollback occurs, so if we take an FP
+ * unavoidable later, we are unable to determine which set of FP regs
+ * need to be restored.
+ */
+ if (!new->thread.regs)
+ return;
+
+ if (!MSR_TM_ACTIVE(new->thread.regs->msr)){
+ tm_restore_sprs(&new->thread);
+ return;
+ }
+ msr = new->thread.tm_orig_msr;
+ /* Recheckpoint to restore original checkpointed register state. */
+ TM_DEBUG("*** tm_recheckpoint of pid %d "
+ "(new->msr 0x%lx, new->origmsr 0x%lx)\n",
+ new->pid, new->thread.regs->msr, msr);
+
+ /* This loads the checkpointed FP/VEC state, if used */
+ tm_recheckpoint(&new->thread, msr);
+
+ /* This loads the speculative FP/VEC state, if used */
+ if (msr & MSR_FP) {
+ do_load_up_transact_fpu(&new->thread);
+ new->thread.regs->msr |=
+ (MSR_FP | new->thread.fpexc_mode);
+ }
+#ifdef CONFIG_ALTIVEC
+ if (msr & MSR_VEC) {
+ do_load_up_transact_altivec(&new->thread);
+ new->thread.regs->msr |= MSR_VEC;
+ }
+#endif
+ /* We may as well turn on VSX too since all the state is restored now */
+ if (msr & MSR_VSX)
+ new->thread.regs->msr |= MSR_VSX;
+
+ TM_DEBUG("*** tm_recheckpoint of pid %d complete "
+ "(kernel msr 0x%lx)\n",
+ new->pid, mfmsr());
+}
+
+static inline void __switch_to_tm(struct task_struct *prev)
+{
+ if (cpu_has_feature(CPU_FTR_TM)) {
+ tm_enable();
+ tm_reclaim_task(prev);
+ }
+}
+
+/*
+ * This is called if we are on the way out to userspace and the
+ * TIF_RESTORE_TM flag is set. It checks if we need to reload
+ * FP and/or vector state and does so if necessary.
+ * If userspace is inside a transaction (whether active or
+ * suspended) and FP/VMX/VSX instructions have ever been enabled
+ * inside that transaction, then we have to keep them enabled
+ * and keep the FP/VMX/VSX state loaded while ever the transaction
+ * continues. The reason is that if we didn't, and subsequently
+ * got a FP/VMX/VSX unavailable interrupt inside a transaction,
+ * we don't know whether it's the same transaction, and thus we
+ * don't know which of the checkpointed state and the transactional
+ * state to use.
+ */
+void restore_tm_state(struct pt_regs *regs)
+{
+ unsigned long msr_diff;
+
+ clear_thread_flag(TIF_RESTORE_TM);
+ if (!MSR_TM_ACTIVE(regs->msr))
+ return;
+
+ msr_diff = current->thread.tm_orig_msr & ~regs->msr;
+ msr_diff &= MSR_FP | MSR_VEC | MSR_VSX;
+ if (msr_diff & MSR_FP) {
+ fp_enable();
+ load_fp_state(&current->thread.fp_state);
+ regs->msr |= current->thread.fpexc_mode;
+ }
+ if (msr_diff & MSR_VEC) {
+ vec_enable();
+ load_vr_state(&current->thread.vr_state);
+ }
+ regs->msr |= msr_diff;
+}
+
+#else
+#define tm_recheckpoint_new_task(new)
+#define __switch_to_tm(prev)
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
{
struct thread_struct *new_thread, *old_thread;
- unsigned long flags;
struct task_struct *last;
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct ppc64_tlb_batch *batch;
+#endif
+
+ WARN_ON(!irqs_disabled());
+
+ /* Back up the TAR and DSCR across context switches.
+ * Note that the TAR is not available for use in the kernel. (To
+ * provide this, the TAR should be backed up/restored on exception
+ * entry/exit instead, and be in pt_regs. FIXME, this should be in
+ * pt_regs anyway (for debug).)
+ * Save the TAR and DSCR here before we do treclaim/trecheckpoint as
+ * these will change them.
+ */
+ save_early_sprs(&prev->thread);
+
+ __switch_to_tm(prev);
#ifdef CONFIG_SMP
/* avoid complexity of lazy save/restore of fpu
@@ -270,6 +794,11 @@ struct task_struct *__switch_to(struct task_struct *prev,
if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC))
giveup_altivec(prev);
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ if (prev->thread.regs && (prev->thread.regs->msr & MSR_VSX))
+ /* VMX and FPU registers are already save here */
+ __giveup_vsx(prev);
+#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
/*
* If the previous thread used spe in the last quantum
@@ -290,6 +819,10 @@ struct task_struct *__switch_to(struct task_struct *prev,
if (new->thread.regs && last_task_used_altivec == new)
new->thread.regs->msr |= MSR_VEC;
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ if (new->thread.regs && last_task_used_vsx == new)
+ new->thread.regs->msr |= MSR_VSX;
+#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
/* Avoid the trap. On smp this this never happens since
* we don't set last_task_used_spe
@@ -300,15 +833,20 @@ struct task_struct *__switch_to(struct task_struct *prev,
#endif /* CONFIG_SMP */
-#ifdef CONFIG_PPC64 /* for now */
- if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) {
- set_dabr(new->thread.dabr);
- __get_cpu_var(current_dabr) = new->thread.dabr;
- }
-
- flush_tlb_pending();
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ switch_booke_debug_regs(&new->thread.debug);
+#else
+/*
+ * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would
+ * schedule DABR
+ */
+#ifndef CONFIG_HAVE_HW_BREAKPOINT
+ if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk)))
+ __set_breakpoint(&new->thread.hw_brk);
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif
+
new_thread = &new->thread;
old_thread = &current->thread;
@@ -324,30 +862,42 @@ struct task_struct *__switch_to(struct task_struct *prev,
old_thread->accum_tb += (current_tb - start_tb);
new_thread->start_tb = current_tb;
}
-#endif
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ batch = &__get_cpu_var(ppc64_tlb_batch);
+ if (batch->active) {
+ current_thread_info()->local_flags |= _TLF_LAZY_MMU;
+ if (batch->index)
+ __flush_tlb_pending(batch);
+ batch->active = 0;
+ }
+#endif /* CONFIG_PPC_BOOK3S_64 */
- local_irq_save(flags);
+ /*
+ * We can't take a PMU exception inside _switch() since there is a
+ * window where the kernel stack SLB and the kernel stack are out
+ * of sync. Hard disable here.
+ */
+ hard_irq_disable();
- account_system_vtime(current);
- account_process_vtime(current);
- calculate_steal_time();
+ tm_recheckpoint_new_task(new);
last = _switch(old_thread, new_thread);
- local_irq_restore(flags);
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
+ current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
+ batch = &__get_cpu_var(ppc64_tlb_batch);
+ batch->active = 1;
+ }
+#endif /* CONFIG_PPC_BOOK3S_64 */
return last;
}
static int instructions_to_print = 16;
-#ifdef CONFIG_PPC64
-#define BAD_PC(pc) ((REGION_ID(pc) != KERNEL_REGION_ID) && \
- (REGION_ID(pc) != VMALLOC_REGION_ID))
-#else
-#define BAD_PC(pc) ((pc) < KERNELBASE)
-#endif
-
static void show_instructions(struct pt_regs *regs)
{
int i;
@@ -362,17 +912,26 @@ static void show_instructions(struct pt_regs *regs)
if (!(i % 8))
printk("\n");
+#if !defined(CONFIG_BOOKE)
+ /* If executing with the IMMU off, adjust pc rather
+ * than print XXXXXXXX.
+ */
+ if (!(regs->msr & MSR_IR))
+ pc = (unsigned long)phys_to_virt(pc);
+#endif
+
/* We use __get_user here *only* to avoid an OOPS on a
* bad address because the pc *should* only be a
* kernel address.
*/
- if (BAD_PC(pc) || __get_user(instr, (unsigned int __user *)pc)) {
- printk("XXXXXXXX ");
+ if (!__kernel_text_address(pc) ||
+ __get_user(instr, (unsigned int __user *)pc)) {
+ printk(KERN_CONT "XXXXXXXX ");
} else {
if (regs->nip == pc)
- printk("<%08x> ", instr);
+ printk(KERN_CONT "<%08x> ", instr);
else
- printk("%08x ", instr);
+ printk(KERN_CONT "%08x ", instr);
}
pc += sizeof(int);
@@ -385,12 +944,32 @@ static struct regbit {
unsigned long bit;
const char *name;
} msr_bits[] = {
+#if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE)
+ {MSR_SF, "SF"},
+ {MSR_HV, "HV"},
+#endif
+ {MSR_VEC, "VEC"},
+ {MSR_VSX, "VSX"},
+#ifdef CONFIG_BOOKE
+ {MSR_CE, "CE"},
+#endif
{MSR_EE, "EE"},
{MSR_PR, "PR"},
{MSR_FP, "FP"},
{MSR_ME, "ME"},
+#ifdef CONFIG_BOOKE
+ {MSR_DE, "DE"},
+#else
+ {MSR_SE, "SE"},
+ {MSR_BE, "BE"},
+#endif
{MSR_IR, "IR"},
{MSR_DR, "DR"},
+ {MSR_PMM, "PMM"},
+#ifndef CONFIG_BOOKE
+ {MSR_RI, "RI"},
+ {MSR_LE, "LE"},
+#endif
{0, NULL}
};
@@ -408,11 +987,11 @@ static void printbits(unsigned long val, struct regbit *bits)
}
#ifdef CONFIG_PPC64
-#define REG "%016lX"
+#define REG "%016lx"
#define REGS_PER_LINE 4
#define LAST_VOLATILE 13
#else
-#define REG "%08lX"
+#define REG "%08lx"
#define REGS_PER_LINE 8
#define LAST_VOLATILE 12
#endif
@@ -421,26 +1000,35 @@ void show_regs(struct pt_regs * regs)
{
int i, trap;
+ show_regs_print_info(KERN_DEFAULT);
+
printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
regs->nip, regs->link, regs->ctr);
printk("REGS: %p TRAP: %04lx %s (%s)\n",
- regs, regs->trap, print_tainted(), system_utsname.release);
+ regs, regs->trap, print_tainted(), init_utsname()->release);
printk("MSR: "REG" ", regs->msr);
printbits(regs->msr, msr_bits);
- printk(" CR: %08lX XER: %08lX\n", regs->ccr, regs->xer);
+ printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
trap = TRAP(regs);
- if (trap == 0x300 || trap == 0x600)
- printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr);
- printk("TASK = %p[%d] '%s' THREAD: %p",
- current, current->pid, current->comm, task_thread_info(current));
-
-#ifdef CONFIG_SMP
- printk(" CPU: %d", smp_processor_id());
-#endif /* CONFIG_SMP */
+ if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
+ printk("CFAR: "REG" ", regs->orig_gpr3);
+ if (trap == 0x200 || trap == 0x300 || trap == 0x600)
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+ printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
+#else
+ printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
+#endif
+#ifdef CONFIG_PPC64
+ printk("SOFTE: %ld ", regs->softe);
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (MSR_TM_ACTIVE(regs->msr))
+ printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
+#endif
for (i = 0; i < 32; i++) {
if ((i % REGS_PER_LINE) == 0)
- printk("\n" KERN_INFO "GPR%02d: ", i);
+ printk("\nGPR%02d: ", i);
printk(REG " ", regs->gpr[i]);
if (i == LAST_VOLATILE && !FULL_REGS(regs))
break;
@@ -451,10 +1039,8 @@ void show_regs(struct pt_regs * regs)
* Lookup NIP late so we have the best change of getting the
* above info out without failing
*/
- printk("NIP ["REG"] ", regs->nip);
- print_symbol("%s\n", regs->nip);
- printk("LR ["REG"] ", regs->link);
- print_symbol("%s\n", regs->link);
+ printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
+ printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
#endif
show_stack(current, (unsigned long *) regs->gpr[1]);
if (!user_mode(regs))
@@ -468,21 +1054,13 @@ void exit_thread(void)
void flush_thread(void)
{
-#ifdef CONFIG_PPC64
- struct thread_info *t = current_thread_info();
-
- if (t->flags & _TIF_ABI_PENDING)
- t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
-#endif
-
discard_lazy_cpu_state();
-#ifdef CONFIG_PPC64 /* for now */
- if (current->thread.dabr) {
- current->thread.dabr = 0;
- set_dabr(0);
- }
-#endif
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ flush_ptrace_hw_breakpoint(current);
+#else /* CONFIG_HAVE_HW_BREAKPOINT */
+ set_debug_reg_defaults(&current->thread);
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
}
void
@@ -491,54 +1069,83 @@ release_thread(struct task_struct *t)
}
/*
- * This gets called before we allocate a new thread and copy
- * the current task into it.
+ * this gets called so that we can store coprocessor state into memory and
+ * copy the current task into the new thread.
*/
-void prepare_to_copy(struct task_struct *tsk)
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- flush_fp_to_thread(current);
- flush_altivec_to_thread(current);
- flush_spe_to_thread(current);
+ flush_fp_to_thread(src);
+ flush_altivec_to_thread(src);
+ flush_vsx_to_thread(src);
+ flush_spe_to_thread(src);
+ /*
+ * Flush TM state out so we can copy it. __switch_to_tm() does this
+ * flush but it removes the checkpointed state from the current CPU and
+ * transitions the CPU out of TM mode. Hence we need to call
+ * tm_recheckpoint_new_task() (on the same task) to restore the
+ * checkpointed state back and the TM mode.
+ */
+ __switch_to_tm(src);
+ tm_recheckpoint_new_task(src);
+
+ *dst = *src;
+
+ clear_task_ebb(dst);
+
+ return 0;
}
/*
* Copy a thread..
*/
-int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
- unsigned long unused, struct task_struct *p,
- struct pt_regs *regs)
+extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
+
+int copy_thread(unsigned long clone_flags, unsigned long usp,
+ unsigned long arg, struct task_struct *p)
{
struct pt_regs *childregs, *kregs;
extern void ret_from_fork(void);
+ extern void ret_from_kernel_thread(void);
+ void (*f)(void);
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
- CHECK_FULL_REGS(regs);
/* Copy registers */
sp -= sizeof(struct pt_regs);
childregs = (struct pt_regs *) sp;
- *childregs = *regs;
- if ((childregs->msr & MSR_PR) == 0) {
- /* for kernel thread, set `current' and stackptr in new task */
+ if (unlikely(p->flags & PF_KTHREAD)) {
+ struct thread_info *ti = (void *)task_stack_page(p);
+ memset(childregs, 0, sizeof(struct pt_regs));
childregs->gpr[1] = sp + sizeof(struct pt_regs);
-#ifdef CONFIG_PPC32
- childregs->gpr[2] = (unsigned long) p;
-#else
+ /* function */
+ if (usp)
+ childregs->gpr[14] = ppc_function_entry((void *)usp);
+#ifdef CONFIG_PPC64
clear_tsk_thread_flag(p, TIF_32BIT);
+ childregs->softe = 1;
#endif
+ childregs->gpr[15] = arg;
p->thread.regs = NULL; /* no user register state */
+ ti->flags |= _TIF_RESTOREALL;
+ f = ret_from_kernel_thread;
} else {
- childregs->gpr[1] = usp;
+ struct pt_regs *regs = current_pt_regs();
+ CHECK_FULL_REGS(regs);
+ *childregs = *regs;
+ if (usp)
+ childregs->gpr[1] = usp;
p->thread.regs = childregs;
+ childregs->gpr[3] = 0; /* Result from fork() */
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_PPC64
- if (!test_thread_flag(TIF_32BIT))
+ if (!is_32bit_task())
childregs->gpr[13] = childregs->gpr[6];
else
#endif
childregs->gpr[2] = childregs->gpr[6];
}
+
+ f = ret_from_fork;
}
- childregs->gpr[3] = 0; /* Result from fork() */
sp -= STACK_FRAME_OVERHEAD;
/*
@@ -549,33 +1156,48 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
* do some house keeping and then return from the fork or clone
* system call, using the stack frame created above.
*/
+ ((unsigned long *)sp)[0] = 0;
sp -= sizeof(struct pt_regs);
kregs = (struct pt_regs *) sp;
sp -= STACK_FRAME_OVERHEAD;
p->thread.ksp = sp;
+#ifdef CONFIG_PPC32
+ p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
+ _ALIGN_UP(sizeof(struct thread_info), 16);
+#endif
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ p->thread.ptrace_bps[0] = NULL;
+#endif
-#ifdef CONFIG_PPC64
- if (cpu_has_feature(CPU_FTR_SLB)) {
- unsigned long sp_vsid = get_kernel_vsid(sp);
+ p->thread.fp_save_area = NULL;
+#ifdef CONFIG_ALTIVEC
+ p->thread.vr_save_area = NULL;
+#endif
+
+#ifdef CONFIG_PPC_STD_MMU_64
+ if (mmu_has_feature(MMU_FTR_SLB)) {
+ unsigned long sp_vsid;
unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
- sp_vsid <<= SLB_VSID_SHIFT;
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
+ << SLB_VSID_SHIFT_1T;
+ else
+ sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M)
+ << SLB_VSID_SHIFT;
sp_vsid |= SLB_VSID_KERNEL | llp;
p->thread.ksp_vsid = sp_vsid;
}
-
- /*
- * The PPC64 ABI makes use of a TOC to contain function
- * pointers. The function (ret_from_except) is actually a pointer
- * to the TOC entry. The first entry is a pointer to the actual
- * function.
- */
- kregs->nip = *((unsigned long *)ret_from_fork);
-#else
- kregs->nip = (unsigned long)ret_from_fork;
- p->thread.last_syscall = -1;
+#endif /* CONFIG_PPC_STD_MMU_64 */
+#ifdef CONFIG_PPC64
+ if (cpu_has_feature(CPU_FTR_DSCR)) {
+ p->thread.dscr_inherit = current->thread.dscr_inherit;
+ p->thread.dscr = current->thread.dscr;
+ }
+ if (cpu_has_feature(CPU_FTR_HAS_PPR))
+ p->thread.ppr = INIT_PPR;
#endif
-
+ kregs->nip = ppc_function_entry(f);
return 0;
}
@@ -588,8 +1210,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
#endif
- set_fs(USER_DS);
-
/*
* If we exec out of a kernel thread then thread.regs will not be
* set. Do it now.
@@ -606,31 +1226,58 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
regs->ccr = 0;
regs->gpr[1] = sp;
+ /*
+ * We have just cleared all the nonvolatile GPRs, so make
+ * FULL_REGS(regs) return true. This is necessary to allow
+ * ptrace to examine the thread immediately after exec.
+ */
+ regs->trap &= ~1UL;
+
#ifdef CONFIG_PPC32
regs->mq = 0;
regs->nip = start;
regs->msr = MSR_USER;
#else
- if (!test_thread_flag(TIF_32BIT)) {
- unsigned long entry, toc;
+ if (!is_32bit_task()) {
+ unsigned long entry;
- /* start is a relocated pointer to the function descriptor for
- * the elf _start routine. The first entry in the function
- * descriptor is the entry address of _start and the second
- * entry is the TOC value we need to use.
- */
- __get_user(entry, (unsigned long __user *)start);
- __get_user(toc, (unsigned long __user *)start+1);
+ if (is_elf2_task()) {
+ /* Look ma, no function descriptors! */
+ entry = start;
- /* Check whether the e_entry function descriptor entries
- * need to be relocated before we can use them.
- */
- if (load_addr != 0) {
- entry += load_addr;
- toc += load_addr;
+ /*
+ * Ulrich says:
+ * The latest iteration of the ABI requires that when
+ * calling a function (at its global entry point),
+ * the caller must ensure r12 holds the entry point
+ * address (so that the function can quickly
+ * establish addressability).
+ */
+ regs->gpr[12] = start;
+ /* Make sure that's restored on entry to userspace. */
+ set_thread_flag(TIF_RESTOREALL);
+ } else {
+ unsigned long toc;
+
+ /* start is a relocated pointer to the function
+ * descriptor for the elf _start routine. The first
+ * entry in the function descriptor is the entry
+ * address of _start and the second entry is the TOC
+ * value we need to use.
+ */
+ __get_user(entry, (unsigned long __user *)start);
+ __get_user(toc, (unsigned long __user *)start+1);
+
+ /* Check whether the e_entry function descriptor entries
+ * need to be relocated before we can use them.
+ */
+ if (load_addr != 0) {
+ entry += load_addr;
+ toc += load_addr;
+ }
+ regs->gpr[2] = toc;
}
regs->nip = entry;
- regs->gpr[2] = toc;
regs->msr = MSR_USER64;
} else {
regs->nip = start;
@@ -638,14 +1285,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
regs->msr = MSR_USER32;
}
#endif
-
discard_lazy_cpu_state();
- memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
- current->thread.fpscr.val = 0;
+#ifdef CONFIG_VSX
+ current->thread.used_vsr = 0;
+#endif
+ memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
+ current->thread.fp_save_area = NULL;
#ifdef CONFIG_ALTIVEC
- memset(current->thread.vr, 0, sizeof(current->thread.vr));
- memset(&current->thread.vscr, 0, sizeof(current->thread.vscr));
- current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */
+ memset(&current->thread.vr_state, 0, sizeof(current->thread.vr_state));
+ current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */
+ current->thread.vr_save_area = NULL;
current->thread.vrsave = 0;
current->thread.used_vr = 0;
#endif /* CONFIG_ALTIVEC */
@@ -655,6 +1304,13 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.spefscr = 0;
current->thread.used_spe = 0;
#endif /* CONFIG_SPE */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (cpu_has_feature(CPU_FTR_TM))
+ regs->msr |= MSR_TM;
+ current->thread.tm_tfhar = 0;
+ current->thread.tm_texasr = 0;
+ current->thread.tm_tfiar = 0;
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
}
#define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \
@@ -670,9 +1326,26 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
* mode (asyn, precise, disabled) for 'Classic' FP. */
if (val & PR_FP_EXC_SW_ENABLE) {
#ifdef CONFIG_SPE
- tsk->thread.fpexc_mode = val &
- (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
- return 0;
+ if (cpu_has_feature(CPU_FTR_SPE)) {
+ /*
+ * When the sticky exception bits are set
+ * directly by userspace, it must call prctl
+ * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
+ * in the existing prctl settings) or
+ * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
+ * the bits being set). <fenv.h> functions
+ * saving and restoring the whole
+ * floating-point environment need to do so
+ * anyway to restore the prctl settings from
+ * the saved environment.
+ */
+ tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
+ tsk->thread.fpexc_mode = val &
+ (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
+ return 0;
+ } else {
+ return -EINVAL;
+ }
#else
return -EINVAL;
#endif
@@ -698,7 +1371,23 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
#ifdef CONFIG_SPE
- val = tsk->thread.fpexc_mode;
+ if (cpu_has_feature(CPU_FTR_SPE)) {
+ /*
+ * When the sticky exception bits are set
+ * directly by userspace, it must call prctl
+ * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
+ * in the existing prctl settings) or
+ * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
+ * the bits being set). <fenv.h> functions
+ * saving and restoring the whole
+ * floating-point environment need to do so
+ * anyway to restore the prctl settings from
+ * the saved environment.
+ */
+ tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
+ val = tsk->thread.fpexc_mode;
+ } else
+ return -EINVAL;
#else
return -EINVAL;
#endif
@@ -762,66 +1451,28 @@ int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr);
}
-#define TRUNC_PTR(x) ((typeof(x))(((unsigned long)(x)) & 0xffffffff))
-
-int sys_clone(unsigned long clone_flags, unsigned long usp,
- int __user *parent_tidp, void __user *child_threadptr,
- int __user *child_tidp, int p6,
- struct pt_regs *regs)
+static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
+ unsigned long nbytes)
{
- CHECK_FULL_REGS(regs);
- if (usp == 0)
- usp = regs->gpr[1]; /* stack pointer for child */
-#ifdef CONFIG_PPC64
- if (test_thread_flag(TIF_32BIT)) {
- parent_tidp = TRUNC_PTR(parent_tidp);
- child_tidp = TRUNC_PTR(child_tidp);
- }
-#endif
- return do_fork(clone_flags, usp, regs, 0, parent_tidp, child_tidp);
-}
-
-int sys_fork(unsigned long p1, unsigned long p2, unsigned long p3,
- unsigned long p4, unsigned long p5, unsigned long p6,
- struct pt_regs *regs)
-{
- CHECK_FULL_REGS(regs);
- return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL);
-}
+ unsigned long stack_page;
+ unsigned long cpu = task_cpu(p);
-int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3,
- unsigned long p4, unsigned long p5, unsigned long p6,
- struct pt_regs *regs)
-{
- CHECK_FULL_REGS(regs);
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1],
- regs, 0, NULL, NULL);
-}
-
-int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
- unsigned long a3, unsigned long a4, unsigned long a5,
- struct pt_regs *regs)
-{
- int error;
- char *filename;
-
- filename = getname((char __user *) a0);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- goto out;
- flush_fp_to_thread(current);
- flush_altivec_to_thread(current);
- flush_spe_to_thread(current);
- error = do_execve(filename, (char __user * __user *) a1,
- (char __user * __user *) a2, regs);
- if (error == 0) {
- task_lock(current);
- current->ptrace &= ~PT_DTRACE;
- task_unlock(current);
+ /*
+ * Avoid crashing if the stack has overflowed and corrupted
+ * task_cpu(p), which is in the thread_info struct.
+ */
+ if (cpu < NR_CPUS && cpu_possible(cpu)) {
+ stack_page = (unsigned long) hardirq_ctx[cpu];
+ if (sp >= stack_page + sizeof(struct thread_struct)
+ && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+ stack_page = (unsigned long) softirq_ctx[cpu];
+ if (sp >= stack_page + sizeof(struct thread_struct)
+ && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
}
- putname(filename);
-out:
- return error;
+ return 0;
}
int validate_sp(unsigned long sp, struct task_struct *p,
@@ -833,35 +1484,9 @@ int validate_sp(unsigned long sp, struct task_struct *p,
&& sp <= stack_page + THREAD_SIZE - nbytes)
return 1;
-#ifdef CONFIG_IRQSTACKS
- stack_page = (unsigned long) hardirq_ctx[task_cpu(p)];
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
- return 1;
-
- stack_page = (unsigned long) softirq_ctx[task_cpu(p)];
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
- return 1;
-#endif
-
- return 0;
+ return valid_irq_stack(sp, p, nbytes);
}
-#ifdef CONFIG_PPC64
-#define MIN_STACK_FRAME 112 /* same as STACK_FRAME_OVERHEAD, in fact */
-#define FRAME_LR_SAVE 2
-#define INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD + 288)
-#define REGS_MARKER 0x7265677368657265ul
-#define FRAME_MARKER 12
-#else
-#define MIN_STACK_FRAME 16
-#define FRAME_LR_SAVE 1
-#define INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
-#define REGS_MARKER 0x72656773ul
-#define FRAME_MARKER 2
-#endif
-
EXPORT_SYMBOL(validate_sp);
unsigned long get_wchan(struct task_struct *p)
@@ -873,15 +1498,15 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
sp = p->thread.ksp;
- if (!validate_sp(sp, p, MIN_STACK_FRAME))
+ if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
return 0;
do {
sp = *(unsigned long *)sp;
- if (!validate_sp(sp, p, MIN_STACK_FRAME))
+ if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
return 0;
if (count > 0) {
- ip = ((unsigned long *)sp)[FRAME_LR_SAVE];
+ ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
if (!in_sched_functions(ip))
return ip;
}
@@ -889,13 +1514,25 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
}
-static int kstack_depth_to_print = 64;
+static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
void show_stack(struct task_struct *tsk, unsigned long *stack)
{
unsigned long sp, ip, lr, newsp;
int count = 0;
int firstframe = 1;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ int curr_frame = current->curr_ret_stack;
+ extern void return_to_handler(void);
+ unsigned long rth = (unsigned long)return_to_handler;
+ unsigned long mrth = -1;
+#ifdef CONFIG_PPC64
+ extern void mod_return_to_handler(void);
+ rth = *(unsigned long *)rth;
+ mrth = (unsigned long)mod_return_to_handler;
+ mrth = *(unsigned long *)mrth;
+#endif
+#endif
sp = (unsigned long) stack;
if (tsk == NULL)
@@ -910,15 +1547,21 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
lr = 0;
printk("Call Trace:\n");
do {
- if (!validate_sp(sp, tsk, MIN_STACK_FRAME))
+ if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
return;
stack = (unsigned long *) sp;
newsp = stack[0];
- ip = stack[FRAME_LR_SAVE];
+ ip = stack[STACK_FRAME_LR_SAVE];
if (!firstframe || ip != lr) {
- printk("["REG"] ["REG"] ", sp, ip);
- print_symbol("%s", ip);
+ printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ if ((ip == rth || ip == mrth) && curr_frame >= 0) {
+ printk(" (%pS)",
+ (void *)current->ret_stack[curr_frame].ret);
+ curr_frame--;
+ }
+#endif
if (firstframe)
printk(" (unreliable)");
printk("\n");
@@ -929,14 +1572,13 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
* See if this is an exception frame.
* We look for the "regshere" marker in the current frame.
*/
- if (validate_sp(sp, tsk, INT_FRAME_SIZE)
- && stack[FRAME_MARKER] == REGS_MARKER) {
+ if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE)
+ && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
struct pt_regs *regs = (struct pt_regs *)
(sp + STACK_FRAME_OVERHEAD);
- printk("--- Exception: %lx", regs->trap);
- print_symbol(" at %s\n", regs->nip);
lr = regs->link;
- print_symbol(" LR = %s\n", lr);
+ printk("--- Exception: %lx at %pS\n LR = %pS\n",
+ regs->trap, (void *)regs->nip, (void *)lr);
firstframe = 1;
}
@@ -944,40 +1586,85 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
} while (count++ < kstack_depth_to_print);
}
-void dump_stack(void)
+#ifdef CONFIG_PPC64
+/* Called with hard IRQs off */
+void notrace __ppc64_runlatch_on(void)
{
- show_stack(current, NULL);
+ struct thread_info *ti = current_thread_info();
+ unsigned long ctrl;
+
+ ctrl = mfspr(SPRN_CTRLF);
+ ctrl |= CTRL_RUNLATCH;
+ mtspr(SPRN_CTRLT, ctrl);
+
+ ti->local_flags |= _TLF_RUNLATCH;
}
-EXPORT_SYMBOL(dump_stack);
-#ifdef CONFIG_PPC64
-void ppc64_runlatch_on(void)
+/* Called with hard IRQs off */
+void notrace __ppc64_runlatch_off(void)
{
+ struct thread_info *ti = current_thread_info();
unsigned long ctrl;
- if (cpu_has_feature(CPU_FTR_CTRL) && !test_thread_flag(TIF_RUNLATCH)) {
- HMT_medium();
+ ti->local_flags &= ~_TLF_RUNLATCH;
- ctrl = mfspr(SPRN_CTRLF);
- ctrl |= CTRL_RUNLATCH;
- mtspr(SPRN_CTRLT, ctrl);
-
- set_thread_flag(TIF_RUNLATCH);
- }
+ ctrl = mfspr(SPRN_CTRLF);
+ ctrl &= ~CTRL_RUNLATCH;
+ mtspr(SPRN_CTRLT, ctrl);
}
+#endif /* CONFIG_PPC64 */
-void ppc64_runlatch_off(void)
+unsigned long arch_align_stack(unsigned long sp)
{
- unsigned long ctrl;
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ sp -= get_random_int() & ~PAGE_MASK;
+ return sp & ~0xf;
+}
- if (cpu_has_feature(CPU_FTR_CTRL) && test_thread_flag(TIF_RUNLATCH)) {
- HMT_medium();
+static inline unsigned long brk_rnd(void)
+{
+ unsigned long rnd = 0;
- clear_thread_flag(TIF_RUNLATCH);
+ /* 8MB for 32bit, 1GB for 64bit */
+ if (is_32bit_task())
+ rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
+ else
+ rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
- ctrl = mfspr(SPRN_CTRLF);
- ctrl &= ~CTRL_RUNLATCH;
- mtspr(SPRN_CTRLT, ctrl);
- }
+ return rnd << PAGE_SHIFT;
}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ unsigned long base = mm->brk;
+ unsigned long ret;
+
+#ifdef CONFIG_PPC_STD_MMU_64
+ /*
+ * If we are using 1TB segments and we are allowed to randomise
+ * the heap, we can put it above 1TB so it is backed by a 1TB
+ * segment. Otherwise the heap will be in the bottom 1TB
+ * which always uses 256MB segments and this may result in a
+ * performance penalty.
+ */
+ if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
+ base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
#endif
+
+ ret = PAGE_ALIGN(base + brk_rnd());
+
+ if (ret < mm->brk)
+ return mm->brk;
+
+ return ret;
+}
+
+unsigned long randomize_et_dyn(unsigned long base)
+{
+ unsigned long ret = PAGE_ALIGN(base + brk_rnd());
+
+ if (ret < base)
+ return base;
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c