aboutsummaryrefslogtreecommitdiff
path: root/arch/ia64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r--arch/ia64/kernel/.gitignore2
-rw-r--r--arch/ia64/kernel/Makefile105
-rw-r--r--arch/ia64/kernel/Makefile.gate27
-rw-r--r--arch/ia64/kernel/acpi-ext.c1
-rw-r--r--arch/ia64/kernel/acpi-processor.c67
-rw-r--r--arch/ia64/kernel/acpi.c322
-rw-r--r--arch/ia64/kernel/asm-offsets.c35
-rw-r--r--arch/ia64/kernel/audit.c21
-rw-r--r--arch/ia64/kernel/cpufreq/Kconfig29
-rw-r--r--arch/ia64/kernel/cpufreq/Makefile2
-rw-r--r--arch/ia64/kernel/cpufreq/acpi-cpufreq.c440
-rw-r--r--arch/ia64/kernel/crash.c133
-rw-r--r--arch/ia64/kernel/crash_dump.c1
-rw-r--r--arch/ia64/kernel/cyclone.c12
-rw-r--r--arch/ia64/kernel/dma-mapping.c24
-rw-r--r--arch/ia64/kernel/efi.c668
-rw-r--r--arch/ia64/kernel/elfcore.c76
-rw-r--r--arch/ia64/kernel/entry.S423
-rw-r--r--arch/ia64/kernel/err_inject.c75
-rw-r--r--arch/ia64/kernel/esi.c4
-rw-r--r--arch/ia64/kernel/fsys.S338
-rw-r--r--arch/ia64/kernel/fsyscall_gtod_data.h6
-rw-r--r--arch/ia64/kernel/ftrace.c204
-rw-r--r--arch/ia64/kernel/gate-data.S2
-rw-r--r--arch/ia64/kernel/gate.S182
-rw-r--r--arch/ia64/kernel/gate.lds.S142
-rw-r--r--arch/ia64/kernel/head.S207
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c40
-rw-r--r--arch/ia64/kernel/init_task.c11
-rw-r--r--arch/ia64/kernel/iosapic.c379
-rw-r--r--arch/ia64/kernel/irq.c101
-rw-r--r--arch/ia64/kernel/irq_ia64.c241
-rw-r--r--arch/ia64/kernel/irq_lsapic.c25
-rw-r--r--arch/ia64/kernel/ivt.S656
-rw-r--r--arch/ia64/kernel/jprobes.S2
-rw-r--r--arch/ia64/kernel/kprobes.c200
-rw-r--r--arch/ia64/kernel/machine_kexec.c59
-rw-r--r--arch/ia64/kernel/machvec.c9
-rw-r--r--arch/ia64/kernel/mca.c644
-rw-r--r--arch/ia64/kernel/mca_asm.S100
-rw-r--r--arch/ia64/kernel/mca_drv.c9
-rw-r--r--arch/ia64/kernel/mca_drv.h4
-rw-r--r--arch/ia64/kernel/mca_drv_asm.S2
-rw-r--r--arch/ia64/kernel/minstate.h67
-rw-r--r--arch/ia64/kernel/module.c108
-rw-r--r--arch/ia64/kernel/msi_ia64.c175
-rw-r--r--arch/ia64/kernel/nr-irqs.c21
-rw-r--r--arch/ia64/kernel/numa.c6
-rw-r--r--arch/ia64/kernel/palinfo.c714
-rw-r--r--arch/ia64/kernel/paravirt.c902
-rw-r--r--arch/ia64/kernel/paravirt_inst.h28
-rw-r--r--arch/ia64/kernel/paravirt_patch.c514
-rw-r--r--arch/ia64/kernel/paravirt_patchlist.c81
-rw-r--r--arch/ia64/kernel/paravirt_patchlist.h24
-rw-r--r--arch/ia64/kernel/paravirtentry.S121
-rw-r--r--arch/ia64/kernel/patch.c75
-rw-r--r--arch/ia64/kernel/pci-dma.c110
-rw-r--r--arch/ia64/kernel/pci-swiotlb.c67
-rw-r--r--arch/ia64/kernel/perfmon.c683
-rw-r--r--arch/ia64/kernel/perfmon_default_smpl.c14
-rw-r--r--arch/ia64/kernel/process.c413
-rw-r--r--arch/ia64/kernel/ptrace.c1764
-rw-r--r--arch/ia64/kernel/relocate_kernel.S4
-rw-r--r--arch/ia64/kernel/sal.c39
-rw-r--r--arch/ia64/kernel/salinfo.c94
-rw-r--r--arch/ia64/kernel/semaphore.c165
-rw-r--r--arch/ia64/kernel/setup.c449
-rw-r--r--arch/ia64/kernel/signal.c189
-rw-r--r--arch/ia64/kernel/smp.c245
-rw-r--r--arch/ia64/kernel/smpboot.c243
-rw-r--r--arch/ia64/kernel/stacktrace.c39
-rw-r--r--arch/ia64/kernel/sys_ia64.c143
-rw-r--r--arch/ia64/kernel/time.c228
-rw-r--r--arch/ia64/kernel/topology.c114
-rw-r--r--arch/ia64/kernel/traps.c55
-rw-r--r--arch/ia64/kernel/unaligned.c70
-rw-r--r--arch/ia64/kernel/uncached.c45
-rw-r--r--arch/ia64/kernel/unwind.c128
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S439
79 files changed, 8402 insertions, 6204 deletions
diff --git a/arch/ia64/kernel/.gitignore b/arch/ia64/kernel/.gitignore
new file mode 100644
index 00000000000..21cb0da5ded
--- /dev/null
+++ b/arch/ia64/kernel/.gitignore
@@ -0,0 +1,2 @@
+gate.lds
+vmlinux.lds
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 33e5a598672..20678a9ed11 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -2,21 +2,19 @@
# Makefile for the linux kernel.
#
+ifdef CONFIG_DYNAMIC_FTRACE
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
extra-y := head.o init_task.o vmlinux.lds
-obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
- irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
- salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
- unwind.o mca.o mca_asm.o topology.o
+obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
+ irq_lsapic.o ivt.o machvec.o pal.o paravirt_patchlist.o patch.o process.o perfmon.o ptrace.o sal.o \
+ salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+ unwind.o mca.o mca_asm.o topology.o dma-mapping.o
+obj-$(CONFIG_ACPI) += acpi.o acpi-ext.o
obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
-obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o
-obj-$(CONFIG_IA64_HP_ZX1) += acpi-ext.o
-obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o
-
-ifneq ($(CONFIG_ACPI_PROCESSOR),)
-obj-y += acpi-processor.o
-endif
obj-$(CONFIG_IA64_PALINFO) += palinfo.o
obj-$(CONFIG_IOSAPIC) += iosapic.o
@@ -25,9 +23,9 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o
obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
-obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
@@ -35,38 +33,81 @@ obj-$(CONFIG_AUDIT) += audit.o
obj-$(CONFIG_PCI_MSI) += msi_ia64.o
mca_recovery-y += mca_drv.o mca_drv_asm.o
obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
+
+obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o \
+ paravirt_patch.o
obj-$(CONFIG_IA64_ESI) += esi.o
ifneq ($(CONFIG_IA64_ESI),)
obj-y += esi_stub.o # must be in kernel proper
endif
+obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o
+obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
-# The gate DSO image is built using a special linker script.
-targets += gate.so gate-syms.o
-
-extra-y += gate.so gate-syms.o gate.lds gate.o
+obj-$(CONFIG_BINFMT_ELF) += elfcore.o
# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31
-CPPFLAGS_gate.lds := -P -C -U$(ARCH)
+# The gate DSO image is built using a special linker script.
+include $(srctree)/arch/ia64/kernel/Makefile.gate
+# tell compiled for native
+CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_NATIVE
+
+# Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config
+define sed-y
+ "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
+endef
+quiet_cmd_nr_irqs = GEN $@
+define cmd_nr_irqs
+ (set -e; \
+ echo "#ifndef __ASM_NR_IRQS_H__"; \
+ echo "#define __ASM_NR_IRQS_H__"; \
+ echo "/*"; \
+ echo " * DO NOT MODIFY."; \
+ echo " *"; \
+ echo " * This file was generated by Kbuild"; \
+ echo " *"; \
+ echo " */"; \
+ echo ""; \
+ sed -ne $(sed-y) $<; \
+ echo ""; \
+ echo "#endif" ) > $@
+endef
-quiet_cmd_gate = GATE $@
- cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
+# We use internal kbuild rules to avoid the "is up to date" message from make
+arch/$(SRCARCH)/kernel/nr-irqs.s: arch/$(SRCARCH)/kernel/nr-irqs.c
+ $(Q)mkdir -p $(dir $@)
+ $(call if_changed_dep,cc_s_c)
-GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
- $(call ld-option, -Wl$(comma)--hash-style=sysv)
-$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
- $(call if_changed,gate)
+include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s
+ $(Q)mkdir -p $(dir $@)
+ $(call cmd,nr_irqs)
-$(obj)/built-in.o: $(obj)/gate-syms.o
-$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
+#
+# native ivt.S, entry.S and fsys.S
+#
+ASM_PARAVIRT_OBJS = ivt.o entry.o fsys.o
+define paravirtualized_native
+AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE
+AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK
+extra-y += pvchk-$(1)
+endef
+$(foreach obj,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_native,$(obj))))
-GATECFLAGS_gate-syms.o = -r
-$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
- $(call if_changed,gate)
+#
+# Checker for paravirtualizations of privileged operations.
+#
+quiet_cmd_pv_check_sed = PVCHK $@
+define cmd_pv_check_sed
+ sed -f $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed $< > $@
+endef
-# gate-data.o contains the gate DSO image as data in section .data.gate.
-# We must build gate.so before we can assemble it.
-# Note: kbuild does not track this dependency due to usage of .incbin
-$(obj)/gate-data.o: $(obj)/gate.so
+$(obj)/pvchk-sed-%.s: $(src)/%.S $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed FORCE
+ $(call if_changed_dep,as_s_S)
+$(obj)/pvchk-%.s: $(obj)/pvchk-sed-%.s FORCE
+ $(call if_changed,pv_check_sed)
+$(obj)/pvchk-%.o: $(obj)/pvchk-%.s FORCE
+ $(call if_changed,as_o_S)
+.PRECIOUS: $(obj)/pvchk-sed-%.s $(obj)/pvchk-%.s $(obj)/pvchk-%.o
diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate
new file mode 100644
index 00000000000..ceeffc50976
--- /dev/null
+++ b/arch/ia64/kernel/Makefile.gate
@@ -0,0 +1,27 @@
+# The gate DSO image is built using a special linker script.
+
+targets += gate.so gate-syms.o
+
+extra-y += gate.so gate-syms.o gate.lds gate.o
+
+CPPFLAGS_gate.lds := -P -C -U$(ARCH)
+
+quiet_cmd_gate = GATE $@
+ cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
+
+GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
+ $(call if_changed,gate)
+
+$(obj)/built-in.o: $(obj)/gate-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
+
+GATECFLAGS_gate-syms.o = -r
+$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
+ $(call if_changed,gate)
+
+# gate-data.o contains the gate DSO image as data in section .data..gate.
+# We must build gate.so before we can assemble it.
+# Note: kbuild does not track this dependency due to usage of .incbin
+$(obj)/gate-data.o: $(obj)/gate.so
diff --git a/arch/ia64/kernel/acpi-ext.c b/arch/ia64/kernel/acpi-ext.c
index b7515bc808a..8b9318d311a 100644
--- a/arch/ia64/kernel/acpi-ext.c
+++ b/arch/ia64/kernel/acpi-ext.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/types.h>
+#include <linux/slab.h>
#include <linux/acpi.h>
#include <asm/acpi-ext.h>
diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c
deleted file mode 100644
index 5a216c01992..00000000000
--- a/arch/ia64/kernel/acpi-processor.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * arch/ia64/kernel/acpi-processor.c
- *
- * Copyright (C) 2005 Intel Corporation
- * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- * - Added _PDC for platforms with Intel CPUs
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/acpi.h>
-
-#include <acpi/processor.h>
-#include <asm/acpi.h>
-
-static void init_intel_pdc(struct acpi_processor *pr)
-{
- struct acpi_object_list *obj_list;
- union acpi_object *obj;
- u32 *buf;
-
- /* allocate and initialize pdc. It will be used later. */
- obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL);
- if (!obj_list) {
- printk(KERN_ERR "Memory allocation error\n");
- return;
- }
-
- obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL);
- if (!obj) {
- printk(KERN_ERR "Memory allocation error\n");
- kfree(obj_list);
- return;
- }
-
- buf = kmalloc(12, GFP_KERNEL);
- if (!buf) {
- printk(KERN_ERR "Memory allocation error\n");
- kfree(obj);
- kfree(obj_list);
- return;
- }
-
- buf[0] = ACPI_PDC_REVISION_ID;
- buf[1] = 1;
- buf[2] = ACPI_PDC_EST_CAPABILITY_SMP;
-
- obj->type = ACPI_TYPE_BUFFER;
- obj->buffer.length = 12;
- obj->buffer.pointer = (u8 *) buf;
- obj_list->count = 1;
- obj_list->pointer = obj;
- pr->pdc = obj_list;
-
- return;
-}
-
-/* Initialize _PDC data based on the CPU vendor */
-void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
-{
- pr->pdc = NULL;
- init_intel_pdc(pr);
- return;
-}
-
-EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 3d45d24a9d6..615ef81def4 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -44,39 +44,47 @@
#include <linux/efi.h>
#include <linux/mmzone.h>
#include <linux/nodemask.h>
+#include <linux/slab.h>
+#include <acpi/processor.h>
#include <asm/io.h>
#include <asm/iosapic.h>
#include <asm/machvec.h>
#include <asm/page.h>
-#include <asm/system.h>
#include <asm/numa.h>
#include <asm/sal.h>
#include <asm/cyclone.h>
-#define BAD_MADT_ENTRY(entry, end) ( \
- (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
- ((struct acpi_subtable_header *)entry)->length < sizeof(*entry))
-
#define PREFIX "ACPI: "
-void (*pm_idle) (void);
-EXPORT_SYMBOL(pm_idle);
-void (*pm_power_off) (void);
-EXPORT_SYMBOL(pm_power_off);
-
+int acpi_lapic;
unsigned int acpi_cpei_override;
unsigned int acpi_cpei_phys_cpuid;
unsigned long acpi_wakeup_address = 0;
+#ifdef CONFIG_IA64_GENERIC
+static unsigned long __init acpi_find_rsdp(void)
+{
+ unsigned long rsdp_phys = 0;
+
+ if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
+ rsdp_phys = efi.acpi20;
+ else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
+ printk(KERN_WARNING PREFIX
+ "v1.0/r0.71 tables no longer supported\n");
+ return rsdp_phys;
+}
+
const char __init *
acpi_get_sysname(void)
{
-#ifdef CONFIG_IA64_GENERIC
unsigned long rsdp_phys;
struct acpi_table_rsdp *rsdp;
struct acpi_table_xsdt *xsdt;
struct acpi_table_header *hdr;
+#ifdef CONFIG_INTEL_IOMMU
+ u64 i, nentries;
+#endif
rsdp_phys = acpi_find_rsdp();
if (!rsdp_phys) {
@@ -103,28 +111,27 @@ acpi_get_sysname(void)
if (!strcmp(hdr->oem_id, "HP")) {
return "hpzx1";
} else if (!strcmp(hdr->oem_id, "SGI")) {
- return "sn2";
+ if (!strcmp(hdr->oem_table_id + 4, "UV"))
+ return "uv";
+ else
+ return "sn2";
}
- return "dig";
-#else
-# if defined (CONFIG_IA64_HP_SIM)
- return "hpsim";
-# elif defined (CONFIG_IA64_HP_ZX1)
- return "hpzx1";
-# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)
- return "hpzx1_swiotlb";
-# elif defined (CONFIG_IA64_SGI_SN2)
- return "sn2";
-# elif defined (CONFIG_IA64_DIG)
- return "dig";
-# else
-# error Unknown platform. Fix acpi.c.
-# endif
+#ifdef CONFIG_INTEL_IOMMU
+ /* Look for Intel IOMMU */
+ nentries = (hdr->length - sizeof(*hdr)) /
+ sizeof(xsdt->table_offset_entry[0]);
+ for (i = 0; i < nentries; i++) {
+ hdr = __va(xsdt->table_offset_entry[i]);
+ if (strncmp(hdr->signature, ACPI_SIG_DMAR,
+ sizeof(ACPI_SIG_DMAR) - 1) == 0)
+ return "dig_vtd";
+ }
#endif
-}
-#ifdef CONFIG_ACPI
+ return "dig";
+}
+#endif /* CONFIG_IA64_GENERIC */
#define ACPI_MAX_PLATFORM_INTERRUPTS 256
@@ -152,16 +159,19 @@ int acpi_request_vector(u32 int_type)
return vector;
}
-char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
+char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
{
return __va(phys_addr);
}
+void __init __acpi_unmap_table(char *map, unsigned long size)
+{
+}
+
/* --------------------------------------------------------------------------
Boot-time Table Parsing
-------------------------------------------------------------------------- */
-static int total_cpus __initdata;
static int available_cpus __initdata;
struct acpi_table_madt *acpi_madt __initdata;
static u8 has_8259;
@@ -331,11 +341,11 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
iosapic_override_isa_irq(p->source_irq, p->global_irq,
((p->inti_flags & ACPI_MADT_POLARITY_MASK) ==
- ACPI_MADT_POLARITY_ACTIVE_HIGH) ?
- IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
+ ACPI_MADT_POLARITY_ACTIVE_LOW) ?
+ IOSAPIC_POL_LOW : IOSAPIC_POL_HIGH,
((p->inti_flags & ACPI_MADT_TRIGGER_MASK) ==
- ACPI_MADT_TRIGGER_EDGE) ?
- IOSAPIC_EDGE : IOSAPIC_LEVEL);
+ ACPI_MADT_TRIGGER_LEVEL) ?
+ IOSAPIC_LEVEL : IOSAPIC_EDGE);
return 0;
}
@@ -405,27 +415,30 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
static int __initdata srat_num_cpus; /* number of cpus */
-static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
+static u32 pxm_flag[PXM_FLAG_LEN];
#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag))
#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag))
static struct acpi_table_slit __initdata *slit_table;
+cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
-static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
+static int __init
+get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
{
int pxm;
pxm = pa->proximity_domain_lo;
- if (ia64_platform_is("sn2"))
+ if (ia64_platform_is("sn2") || acpi_srat_revision >= 2)
pxm += pa->proximity_domain_hi[0] << 8;
return pxm;
}
-static int get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma)
+static int __init
+get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma)
{
int pxm;
pxm = ma->proximity_domain;
- if (!ia64_platform_is("sn2"))
+ if (!ia64_platform_is("sn2") && acpi_srat_revision <= 1)
pxm &= 0xff;
return pxm;
@@ -445,7 +458,6 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
printk(KERN_ERR
"ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
len, slit->header.length);
- memset(numa_slit, 10, sizeof(numa_slit));
return;
}
slit_table = slit;
@@ -459,6 +471,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
if (!(pa->flags & ACPI_SRAT_CPU_ENABLED))
return;
+ if (srat_num_cpus >= ARRAY_SIZE(node_cpuid)) {
+ printk_once(KERN_WARNING
+ "node_cpuid[%ld] is too small, may not be able to use all cpus\n",
+ ARRAY_SIZE(node_cpuid));
+ return;
+ }
pxm = get_processor_proximity_domain(pa);
/* record this node in proximity bitmap */
@@ -468,10 +486,11 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
(pa->apic_id << 8) | (pa->local_sapic_eid);
/* nid should be overridden as logical node id later */
node_cpuid[srat_num_cpus].nid = pxm;
+ cpu_set(srat_num_cpus, early_cpu_possible_map);
srat_num_cpus++;
}
-void __init
+int __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
unsigned long paddr, size;
@@ -486,7 +505,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
/* Ignore disabled entries */
if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
- return;
+ return -1;
/* record this node in proximity bitmap */
pxm_bit_set(pxm);
@@ -505,6 +524,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
p->size = size;
p->nid = pxm;
num_node_memblks++;
+ return 0;
}
void __init acpi_numa_arch_fixup(void)
@@ -545,7 +565,7 @@ void __init acpi_numa_arch_fixup(void)
}
/* set logical node id in cpu structure */
- for (i = 0; i < srat_num_cpus; i++)
+ for_each_possible_early_cpu(i)
node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
printk(KERN_INFO "Number of logical nodes in system = %d\n",
@@ -553,8 +573,14 @@ void __init acpi_numa_arch_fixup(void)
printk(KERN_INFO "Number of memory chunks in system = %d\n",
num_node_memblks);
- if (!slit_table)
+ if (!slit_table) {
+ for (i = 0; i < MAX_NUMNODES; i++)
+ for (j = 0; j < MAX_NUMNODES; j++)
+ node_distance(i, j) = i == j ? LOCAL_DISTANCE :
+ REMOTE_DISTANCE;
return;
+ }
+
memset(numa_slit, -1, sizeof(numa_slit));
for (i = 0; i < slit_table->locality_count; i++) {
if (!pxm_bit_test(i))
@@ -584,7 +610,7 @@ void __init acpi_numa_arch_fixup(void)
* success: return IRQ number (>=0)
* failure: return < 0
*/
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
{
if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM)
return gsi;
@@ -600,18 +626,19 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity)
ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE :
IOSAPIC_LEVEL);
}
-
-EXPORT_SYMBOL(acpi_register_gsi);
+EXPORT_SYMBOL_GPL(acpi_register_gsi);
void acpi_unregister_gsi(u32 gsi)
{
if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM)
return;
+ if (has_8259 && gsi < 16)
+ return;
+
iosapic_unregister_intr(gsi);
}
-
-EXPORT_SYMBOL(acpi_unregister_gsi);
+EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
static int __init acpi_parse_fadt(struct acpi_table_header *table)
{
@@ -627,20 +654,47 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
fadt = (struct acpi_table_fadt *)fadt_header;
- acpi_register_gsi(fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW);
+ acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE,
+ ACPI_ACTIVE_LOW);
return 0;
}
-unsigned long __init acpi_find_rsdp(void)
+int __init early_acpi_boot_init(void)
{
- unsigned long rsdp_phys = 0;
+ int ret;
- if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
- rsdp_phys = efi.acpi20;
- else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
- printk(KERN_WARNING PREFIX
- "v1.0/r0.71 tables no longer supported\n");
- return rsdp_phys;
+ /*
+ * do a partial walk of MADT to determine how many CPUs
+ * we have including offline CPUs
+ */
+ if (acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) {
+ printk(KERN_ERR PREFIX "Can't find MADT\n");
+ return 0;
+ }
+
+ ret = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
+ acpi_parse_lsapic, NR_CPUS);
+ if (ret < 1)
+ printk(KERN_ERR PREFIX
+ "Error parsing MADT - no LAPIC entries\n");
+ else
+ acpi_lapic = 1;
+
+#ifdef CONFIG_SMP
+ if (available_cpus == 0) {
+ printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
+ printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
+ smp_boot_data.cpu_phys_id[available_cpus] =
+ hard_smp_processor_id();
+ available_cpus = 1; /* We've got at least one of these, no? */
+ }
+ smp_boot_data.cpu_count = available_cpus;
+#endif
+ /* Make boot-up look pretty */
+ printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus,
+ total_cpus);
+
+ return 0;
}
int __init acpi_boot_init(void)
@@ -666,11 +720,6 @@ int __init acpi_boot_init(void)
printk(KERN_ERR PREFIX
"Error parsing LAPIC address override entry\n");
- if (acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, acpi_parse_lsapic, NR_CPUS)
- < 1)
- printk(KERN_ERR PREFIX
- "Error parsing MADT - no LAPIC entries\n");
-
if (acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0)
< 0)
printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
@@ -678,9 +727,11 @@ int __init acpi_boot_init(void)
/* I/O APIC */
if (acpi_table_parse_madt
- (ACPI_MADT_TYPE_IO_SAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1)
- printk(KERN_ERR PREFIX
- "Error parsing MADT - no IOSAPIC entries\n");
+ (ACPI_MADT_TYPE_IO_SAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1) {
+ if (!ia64_platform_is("sn2"))
+ printk(KERN_ERR PREFIX
+ "Error parsing MADT - no IOSAPIC entries\n");
+ }
/* System-Level Interrupt Routing */
@@ -708,18 +759,8 @@ int __init acpi_boot_init(void)
if (acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt))
printk(KERN_ERR PREFIX "Can't find FADT\n");
+#ifdef CONFIG_ACPI_NUMA
#ifdef CONFIG_SMP
- if (available_cpus == 0) {
- printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
- printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
- smp_boot_data.cpu_phys_id[available_cpus] =
- hard_smp_processor_id();
- available_cpus = 1; /* We've got at least one of these, no? */
- }
- smp_boot_data.cpu_count = available_cpus;
-
- smp_build_cpu_map();
-# ifdef CONFIG_ACPI_NUMA
if (srat_num_cpus == 0) {
int cpu, i = 1;
for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
@@ -728,14 +769,9 @@ int __init acpi_boot_init(void)
node_cpuid[i++].phys_id =
smp_boot_data.cpu_phys_id[cpu];
}
-# endif
#endif
-#ifdef CONFIG_ACPI_NUMA
build_cpu_to_node_map();
#endif
- /* Make boot-up look pretty */
- printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus,
- total_cpus);
return 0;
}
@@ -754,18 +790,21 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
return 0;
}
+int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
+{
+ if (isa_irq >= 16)
+ return -1;
+ *gsi = isa_irq;
+ return 0;
+}
+
/*
* ACPI based hotplug CPU support
*/
#ifdef CONFIG_ACPI_HOTPLUG_CPU
-static
-int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
+static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
{
#ifdef CONFIG_ACPI_NUMA
- int pxm_id;
- int nid;
-
- pxm_id = acpi_get_pxm(handle);
/*
* We don't have cpu-only-node hotadd. But if the system equips
* SRAT table, pxm is already found and node is ready.
@@ -773,11 +812,10 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
* This code here is for the system which doesn't have full SRAT
* table for possible cpus.
*/
- nid = acpi_map_pxm_to_node(pxm_id);
node_cpuid[cpu].phys_id = physid;
- node_cpuid[cpu].nid = nid;
+ node_cpuid[cpu].nid = acpi_get_node(handle);
#endif
- return (0);
+ return 0;
}
int additional_cpus __initdata = -1;
@@ -793,11 +831,11 @@ static __init int setup_additional_cpus(char *s)
early_param("additional_cpus", setup_additional_cpus);
/*
- * cpu_possible_map should be static, it cannot change as CPUs
+ * cpu_possible_mask should be static, it cannot change as CPUs
* are onlined, or offlined. The reason is per-cpu data-structures
* are allocated by some modules at init time, and dont expect to
* do this dynamically on cpu arrival/departure.
- * cpu_present_map on the other hand can change dynamically.
+ * cpu_present_mask on the other hand can change dynamically.
* In case when cpu_hotplug is not compiled, then we resort to current
* behaviour, which is cpu_possible == cpu_present.
* - Ashok Raj
@@ -823,72 +861,48 @@ __init void prefill_possible_map(void)
possible = available_cpus + additional_cpus;
- if (possible > NR_CPUS)
- possible = NR_CPUS;
+ if (possible > nr_cpu_ids)
+ possible = nr_cpu_ids;
printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
possible, max((possible - available_cpus), 0));
for (i = 0; i < possible; i++)
- cpu_set(i, cpu_possible_map);
+ set_cpu_possible(i, true);
}
-int acpi_map_lsapic(acpi_handle handle, int *pcpu)
+static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
{
- struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
- union acpi_object *obj;
- struct acpi_madt_local_sapic *lsapic;
cpumask_t tmp_map;
- long physid;
int cpu;
- if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
- return -EINVAL;
-
- if (!buffer.length || !buffer.pointer)
- return -EINVAL;
-
- obj = buffer.pointer;
- if (obj->type != ACPI_TYPE_BUFFER)
- {
- kfree(buffer.pointer);
- return -EINVAL;
- }
-
- lsapic = (struct acpi_madt_local_sapic *)obj->buffer.pointer;
-
- if ((lsapic->header.type != ACPI_MADT_TYPE_LOCAL_SAPIC) ||
- (!lsapic->lapic_flags & ACPI_MADT_ENABLED)) {
- kfree(buffer.pointer);
- return -EINVAL;
- }
-
- physid = ((lsapic->id << 8) | (lsapic->eid));
-
- kfree(buffer.pointer);
- buffer.length = ACPI_ALLOCATE_BUFFER;
- buffer.pointer = NULL;
-
- cpus_complement(tmp_map, cpu_present_map);
- cpu = first_cpu(tmp_map);
- if (cpu >= NR_CPUS)
+ cpumask_complement(&tmp_map, cpu_present_mask);
+ cpu = cpumask_first(&tmp_map);
+ if (cpu >= nr_cpu_ids)
return -EINVAL;
acpi_map_cpu2node(handle, cpu, physid);
- cpu_set(cpu, cpu_present_map);
+ set_cpu_present(cpu, true);
ia64_cpu_to_sapicid[cpu] = physid;
+ acpi_processor_set_pdc(handle);
+
*pcpu = cpu;
return (0);
}
+/* wrapper to silence section mismatch warning */
+int __ref acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
+{
+ return _acpi_map_lsapic(handle, physid, pcpu);
+}
EXPORT_SYMBOL(acpi_map_lsapic);
int acpi_unmap_lsapic(int cpu)
{
ia64_cpu_to_sapicid[cpu] = -1;
- cpu_clear(cpu, cpu_present_map);
+ set_cpu_present(cpu, false);
#ifdef CONFIG_ACPI_NUMA
/* NUMA specific cleanup's */
@@ -901,14 +915,14 @@ EXPORT_SYMBOL(acpi_unmap_lsapic);
#endif /* CONFIG_ACPI_HOTPLUG_CPU */
#ifdef CONFIG_ACPI_NUMA
-static acpi_status __devinit
-acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
+static acpi_status acpi_map_iosapic(acpi_handle handle, u32 depth,
+ void *context, void **ret)
{
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
union acpi_object *obj;
struct acpi_madt_io_sapic *iosapic;
unsigned int gsi_base;
- int pxm, node;
+ int node;
/* Only care about objects w/ a method that returns the MADT */
if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
@@ -935,18 +949,10 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
kfree(buffer.pointer);
- /*
- * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell
- * us which node to associate this with.
- */
- pxm = acpi_get_pxm(handle);
- if (pxm < 0)
- return AE_OK;
-
- node = pxm_to_node(pxm);
-
- if (node >= MAX_NUMNODES || !node_online(node) ||
- cpus_empty(node_to_cpumask(node)))
+ /* OK, it's an IOSAPIC MADT entry; associate it with a node */
+ node = acpi_get_node(handle);
+ if (node == NUMA_NO_NODE || !node_online(node) ||
+ cpumask_empty(cpumask_of_node(node)))
return AE_OK;
/* We know a gsi to node mapping! */
@@ -964,7 +970,7 @@ acpi_map_iosapics (void)
fs_initcall(acpi_map_iosapics);
#endif /* CONFIG_ACPI_NUMA */
-int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
+int __ref acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
{
int err;
@@ -988,20 +994,8 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
EXPORT_SYMBOL(acpi_unregister_ioapic);
/*
- * acpi_save_state_mem() - save kernel state
+ * acpi_suspend_lowlevel() - save kernel state and suspend.
*
* TBD when when IA64 starts to support suspend...
*/
-int acpi_save_state_mem(void) { return 0; }
-
-/*
- * acpi_restore_state()
- */
-void acpi_restore_state_mem(void) {}
-
-/*
- * do_suspend_lowlevel()
- */
-void do_suspend_lowlevel(void) {}
-
-#endif /* CONFIG_ACPI */
+int acpi_suspend_lowlevel(void) { return 0; }
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 0aebc6f79e9..60ef83e6db7 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -7,22 +7,18 @@
#define ASM_OFFSETS_C 1
#include <linux/sched.h>
+#include <linux/pid.h>
#include <linux/clocksource.h>
-
-#include <asm-ia64/processor.h>
-#include <asm-ia64/ptrace.h>
-#include <asm-ia64/siginfo.h>
-#include <asm-ia64/sigcontext.h>
-#include <asm-ia64/mca.h>
+#include <linux/kbuild.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/siginfo.h>
+#include <asm/sigcontext.h>
+#include <asm/mca.h>
#include "../kernel/sigframe.h"
#include "../kernel/fsyscall_gtod_data.h"
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
void foo(void)
{
DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct));
@@ -34,17 +30,29 @@ void foo(void)
DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
+ BUILD_BUG_ON(sizeof(struct upid) != 32);
+ DEFINE(IA64_UPID_SHIFT, 5);
+
BLANK();
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
+ DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
+ DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
+ DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
+#endif
BLANK();
DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+ DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
+ DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
+ DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
@@ -258,8 +266,8 @@ void foo(void)
BLANK();
/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
- DEFINE(IA64_GTOD_LOCK_OFFSET,
- offsetof (struct fsyscall_gtod_data_t, lock));
+ DEFINE(IA64_GTOD_SEQ_OFFSET,
+ offsetof (struct fsyscall_gtod_data_t, seq));
DEFINE(IA64_GTOD_WALL_TIME_OFFSET,
offsetof (struct fsyscall_gtod_data_t, wall_time));
DEFINE(IA64_GTOD_MONO_TIME_OFFSET,
@@ -278,4 +286,5 @@ void foo(void)
offsetof (struct itc_jitter_data_t, itc_jitter));
DEFINE(IA64_ITC_LASTCYCLE_OFFSET,
offsetof (struct itc_jitter_data_t, itc_lastcycle));
+
}
diff --git a/arch/ia64/kernel/audit.c b/arch/ia64/kernel/audit.c
index f3802ae89b1..96a9d18ff4c 100644
--- a/arch/ia64/kernel/audit.c
+++ b/arch/ia64/kernel/audit.c
@@ -30,20 +30,11 @@ static unsigned signal_class[] = {
int audit_classify_arch(int arch)
{
-#ifdef CONFIG_IA32_SUPPORT
- if (arch == AUDIT_ARCH_I386)
- return 1;
-#endif
return 0;
}
int audit_classify_syscall(int abi, unsigned syscall)
{
-#ifdef CONFIG_IA32_SUPPORT
- extern int ia32_classify_syscall(unsigned);
- if (abi == AUDIT_ARCH_I386)
- return ia32_classify_syscall(syscall);
-#endif
switch(syscall) {
case __NR_open:
return 2;
@@ -58,18 +49,6 @@ int audit_classify_syscall(int abi, unsigned syscall)
static int __init audit_classes_init(void)
{
-#ifdef CONFIG_IA32_SUPPORT
- extern __u32 ia32_dir_class[];
- extern __u32 ia32_write_class[];
- extern __u32 ia32_read_class[];
- extern __u32 ia32_chattr_class[];
- extern __u32 ia32_signal_class[];
- audit_register_class(AUDIT_CLASS_WRITE_32, ia32_write_class);
- audit_register_class(AUDIT_CLASS_READ_32, ia32_read_class);
- audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ia32_dir_class);
- audit_register_class(AUDIT_CLASS_CHATTR_32, ia32_chattr_class);
- audit_register_class(AUDIT_CLASS_SIGNAL_32, ia32_signal_class);
-#endif
audit_register_class(AUDIT_CLASS_WRITE, write_class);
audit_register_class(AUDIT_CLASS_READ, read_class);
audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
diff --git a/arch/ia64/kernel/cpufreq/Kconfig b/arch/ia64/kernel/cpufreq/Kconfig
deleted file mode 100644
index 2d9d5279b98..00000000000
--- a/arch/ia64/kernel/cpufreq/Kconfig
+++ /dev/null
@@ -1,29 +0,0 @@
-
-#
-# CPU Frequency scaling
-#
-
-menu "CPU Frequency scaling"
-
-source "drivers/cpufreq/Kconfig"
-
-if CPU_FREQ
-
-comment "CPUFreq processor drivers"
-
-config IA64_ACPI_CPUFREQ
- tristate "ACPI Processor P-States driver"
- select CPU_FREQ_TABLE
- depends on ACPI_PROCESSOR
- help
- This driver adds a CPUFreq driver which utilizes the ACPI
- Processor Performance States.
-
- For details, take a look at <file:Documentation/cpu-freq/>.
-
- If in doubt, say N.
-
-endif # CPU_FREQ
-
-endmenu
-
diff --git a/arch/ia64/kernel/cpufreq/Makefile b/arch/ia64/kernel/cpufreq/Makefile
deleted file mode 100644
index 4838f2a57c7..00000000000
--- a/arch/ia64/kernel/cpufreq/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-obj-$(CONFIG_IA64_ACPI_CPUFREQ) += acpi-cpufreq.o
-
diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
deleted file mode 100644
index 8c6ec707084..00000000000
--- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c
+++ /dev/null
@@ -1,440 +0,0 @@
-/*
- * arch/ia64/kernel/cpufreq/acpi-cpufreq.c
- * This file provides the ACPI based P-state support. This
- * module works with generic cpufreq infrastructure. Most of
- * the code is based on i386 version
- * (arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c)
- *
- * Copyright (C) 2005 Intel Corp
- * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/cpufreq.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <asm/io.h>
-#include <asm/uaccess.h>
-#include <asm/pal.h>
-
-#include <linux/acpi.h>
-#include <acpi/processor.h>
-
-#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg)
-
-MODULE_AUTHOR("Venkatesh Pallipadi");
-MODULE_DESCRIPTION("ACPI Processor P-States Driver");
-MODULE_LICENSE("GPL");
-
-
-struct cpufreq_acpi_io {
- struct acpi_processor_performance acpi_data;
- struct cpufreq_frequency_table *freq_table;
- unsigned int resume;
-};
-
-static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS];
-
-static struct cpufreq_driver acpi_cpufreq_driver;
-
-
-static int
-processor_set_pstate (
- u32 value)
-{
- s64 retval;
-
- dprintk("processor_set_pstate\n");
-
- retval = ia64_pal_set_pstate((u64)value);
-
- if (retval) {
- dprintk("Failed to set freq to 0x%x, with error 0x%x\n",
- value, retval);
- return -ENODEV;
- }
- return (int)retval;
-}
-
-
-static int
-processor_get_pstate (
- u32 *value)
-{
- u64 pstate_index = 0;
- s64 retval;
-
- dprintk("processor_get_pstate\n");
-
- retval = ia64_pal_get_pstate(&pstate_index,
- PAL_GET_PSTATE_TYPE_INSTANT);
- *value = (u32) pstate_index;
-
- if (retval)
- dprintk("Failed to get current freq with "
- "error 0x%x, idx 0x%x\n", retval, *value);
-
- return (int)retval;
-}
-
-
-/* To be used only after data->acpi_data is initialized */
-static unsigned
-extract_clock (
- struct cpufreq_acpi_io *data,
- unsigned value,
- unsigned int cpu)
-{
- unsigned long i;
-
- dprintk("extract_clock\n");
-
- for (i = 0; i < data->acpi_data.state_count; i++) {
- if (value == data->acpi_data.states[i].status)
- return data->acpi_data.states[i].core_frequency;
- }
- return data->acpi_data.states[i-1].core_frequency;
-}
-
-
-static unsigned int
-processor_get_freq (
- struct cpufreq_acpi_io *data,
- unsigned int cpu)
-{
- int ret = 0;
- u32 value = 0;
- cpumask_t saved_mask;
- unsigned long clock_freq;
-
- dprintk("processor_get_freq\n");
-
- saved_mask = current->cpus_allowed;
- set_cpus_allowed(current, cpumask_of_cpu(cpu));
- if (smp_processor_id() != cpu)
- goto migrate_end;
-
- /* processor_get_pstate gets the instantaneous frequency */
- ret = processor_get_pstate(&value);
-
- if (ret) {
- set_cpus_allowed(current, saved_mask);
- printk(KERN_WARNING "get performance failed with error %d\n",
- ret);
- ret = 0;
- goto migrate_end;
- }
- clock_freq = extract_clock(data, value, cpu);
- ret = (clock_freq*1000);
-
-migrate_end:
- set_cpus_allowed(current, saved_mask);
- return ret;
-}
-
-
-static int
-processor_set_freq (
- struct cpufreq_acpi_io *data,
- unsigned int cpu,
- int state)
-{
- int ret = 0;
- u32 value = 0;
- struct cpufreq_freqs cpufreq_freqs;
- cpumask_t saved_mask;
- int retval;
-
- dprintk("processor_set_freq\n");
-
- saved_mask = current->cpus_allowed;
- set_cpus_allowed(current, cpumask_of_cpu(cpu));
- if (smp_processor_id() != cpu) {
- retval = -EAGAIN;
- goto migrate_end;
- }
-
- if (state == data->acpi_data.state) {
- if (unlikely(data->resume)) {
- dprintk("Called after resume, resetting to P%d\n", state);
- data->resume = 0;
- } else {
- dprintk("Already at target state (P%d)\n", state);
- retval = 0;
- goto migrate_end;
- }
- }
-
- dprintk("Transitioning from P%d to P%d\n",
- data->acpi_data.state, state);
-
- /* cpufreq frequency struct */
- cpufreq_freqs.cpu = cpu;
- cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency;
- cpufreq_freqs.new = data->freq_table[state].frequency;
-
- /* notify cpufreq */
- cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
-
- /*
- * First we write the target state's 'control' value to the
- * control_register.
- */
-
- value = (u32) data->acpi_data.states[state].control;
-
- dprintk("Transitioning to state: 0x%08x\n", value);
-
- ret = processor_set_pstate(value);
- if (ret) {
- unsigned int tmp = cpufreq_freqs.new;
- cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
- cpufreq_freqs.new = cpufreq_freqs.old;
- cpufreq_freqs.old = tmp;
- cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE);
- cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
- printk(KERN_WARNING "Transition failed with error %d\n", ret);
- retval = -ENODEV;
- goto migrate_end;
- }
-
- cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE);
-
- data->acpi_data.state = state;
-
- retval = 0;
-
-migrate_end:
- set_cpus_allowed(current, saved_mask);
- return (retval);
-}
-
-
-static unsigned int
-acpi_cpufreq_get (
- unsigned int cpu)
-{
- struct cpufreq_acpi_io *data = acpi_io_data[cpu];
-
- dprintk("acpi_cpufreq_get\n");
-
- return processor_get_freq(data, cpu);
-}
-
-
-static int
-acpi_cpufreq_target (
- struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
-{
- struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
- unsigned int next_state = 0;
- unsigned int result = 0;
-
- dprintk("acpi_cpufreq_setpolicy\n");
-
- result = cpufreq_frequency_table_target(policy,
- data->freq_table, target_freq, relation, &next_state);
- if (result)
- return (result);
-
- result = processor_set_freq(data, policy->cpu, next_state);
-
- return (result);
-}
-
-
-static int
-acpi_cpufreq_verify (
- struct cpufreq_policy *policy)
-{
- unsigned int result = 0;
- struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
-
- dprintk("acpi_cpufreq_verify\n");
-
- result = cpufreq_frequency_table_verify(policy,
- data->freq_table);
-
- return (result);
-}
-
-
-static int
-acpi_cpufreq_cpu_init (
- struct cpufreq_policy *policy)
-{
- unsigned int i;
- unsigned int cpu = policy->cpu;
- struct cpufreq_acpi_io *data;
- unsigned int result = 0;
-
- dprintk("acpi_cpufreq_cpu_init\n");
-
- data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
- if (!data)
- return (-ENOMEM);
-
- acpi_io_data[cpu] = data;
-
- result = acpi_processor_register_performance(&data->acpi_data, cpu);
-
- if (result)
- goto err_free;
-
- /* capability check */
- if (data->acpi_data.state_count <= 1) {
- dprintk("No P-States\n");
- result = -ENODEV;
- goto err_unreg;
- }
-
- if ((data->acpi_data.control_register.space_id !=
- ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (data->acpi_data.status_register.space_id !=
- ACPI_ADR_SPACE_FIXED_HARDWARE)) {
- dprintk("Unsupported address space [%d, %d]\n",
- (u32) (data->acpi_data.control_register.space_id),
- (u32) (data->acpi_data.status_register.space_id));
- result = -ENODEV;
- goto err_unreg;
- }
-
- /* alloc freq_table */
- data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) *
- (data->acpi_data.state_count + 1),
- GFP_KERNEL);
- if (!data->freq_table) {
- result = -ENOMEM;
- goto err_unreg;
- }
-
- /* detect transition latency */
- policy->cpuinfo.transition_latency = 0;
- for (i=0; i<data->acpi_data.state_count; i++) {
- if ((data->acpi_data.states[i].transition_latency * 1000) >
- policy->cpuinfo.transition_latency) {
- policy->cpuinfo.transition_latency =
- data->acpi_data.states[i].transition_latency * 1000;
- }
- }
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
-
- policy->cur = processor_get_freq(data, policy->cpu);
-
- /* table init */
- for (i = 0; i <= data->acpi_data.state_count; i++)
- {
- data->freq_table[i].index = i;
- if (i < data->acpi_data.state_count) {
- data->freq_table[i].frequency =
- data->acpi_data.states[i].core_frequency * 1000;
- } else {
- data->freq_table[i].frequency = CPUFREQ_TABLE_END;
- }
- }
-
- result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
- if (result) {
- goto err_freqfree;
- }
-
- /* notify BIOS that we exist */
- acpi_processor_notify_smm(THIS_MODULE);
-
- printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management "
- "activated.\n", cpu);
-
- for (i = 0; i < data->acpi_data.state_count; i++)
- dprintk(" %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n",
- (i == data->acpi_data.state?'*':' '), i,
- (u32) data->acpi_data.states[i].core_frequency,
- (u32) data->acpi_data.states[i].power,
- (u32) data->acpi_data.states[i].transition_latency,
- (u32) data->acpi_data.states[i].bus_master_latency,
- (u32) data->acpi_data.states[i].status,
- (u32) data->acpi_data.states[i].control);
-
- cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu);
-
- /* the first call to ->target() should result in us actually
- * writing something to the appropriate registers. */
- data->resume = 1;
-
- return (result);
-
- err_freqfree:
- kfree(data->freq_table);
- err_unreg:
- acpi_processor_unregister_performance(&data->acpi_data, cpu);
- err_free:
- kfree(data);
- acpi_io_data[cpu] = NULL;
-
- return (result);
-}
-
-
-static int
-acpi_cpufreq_cpu_exit (
- struct cpufreq_policy *policy)
-{
- struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu];
-
- dprintk("acpi_cpufreq_cpu_exit\n");
-
- if (data) {
- cpufreq_frequency_table_put_attr(policy->cpu);
- acpi_io_data[policy->cpu] = NULL;
- acpi_processor_unregister_performance(&data->acpi_data,
- policy->cpu);
- kfree(data);
- }
-
- return (0);
-}
-
-
-static struct freq_attr* acpi_cpufreq_attr[] = {
- &cpufreq_freq_attr_scaling_available_freqs,
- NULL,
-};
-
-
-static struct cpufreq_driver acpi_cpufreq_driver = {
- .verify = acpi_cpufreq_verify,
- .target = acpi_cpufreq_target,
- .get = acpi_cpufreq_get,
- .init = acpi_cpufreq_cpu_init,
- .exit = acpi_cpufreq_cpu_exit,
- .name = "acpi-cpufreq",
- .owner = THIS_MODULE,
- .attr = acpi_cpufreq_attr,
-};
-
-
-static int __init
-acpi_cpufreq_init (void)
-{
- dprintk("acpi_cpufreq_init\n");
-
- return cpufreq_register_driver(&acpi_cpufreq_driver);
-}
-
-
-static void __exit
-acpi_cpufreq_exit (void)
-{
- dprintk("acpi_cpufreq_exit\n");
-
- cpufreq_unregister_driver(&acpi_cpufreq_driver);
- return;
-}
-
-
-late_initcall(acpi_cpufreq_init);
-module_exit(acpi_cpufreq_exit);
-
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index 1d64ef478dd..2955f359e2a 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -23,7 +23,9 @@
int kdump_status[NR_CPUS];
static atomic_t kdump_cpu_frozen;
atomic_t kdump_in_progress;
+static int kdump_freeze_monarch;
static int kdump_on_init = 1;
+static int kdump_on_fatal_mca = 1;
static inline Elf64_Word
*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
@@ -107,10 +109,38 @@ machine_crash_shutdown(struct pt_regs *pt)
*/
kexec_disable_iosapic();
#ifdef CONFIG_SMP
+ /*
+ * If kdump_on_init is set and an INIT is asserted here, kdump will
+ * be started again via INIT monarch.
+ */
+ local_irq_disable();
+ ia64_set_psr_mc(); /* mask MCA/INIT */
+ if (atomic_inc_return(&kdump_in_progress) != 1)
+ unw_init_running(kdump_cpu_freeze, NULL);
+
+ /*
+ * Now this cpu is ready for kdump.
+ * Stop all others by IPI or INIT. They could receive INIT from
+ * outside and might be INIT monarch, but only thing they have to
+ * do is falling into kdump_cpu_freeze().
+ *
+ * If an INIT is asserted here:
+ * - All receivers might be slaves, since some of cpus could already
+ * be frozen and INIT might be masked on monarch. In this case,
+ * all slaves will be frozen soon since kdump_in_progress will let
+ * them into DIE_INIT_SLAVE_LEAVE.
+ * - One might be a monarch, but INIT rendezvous will fail since
+ * at least this cpu already have INIT masked so it never join
+ * to the rendezvous. In this case, all slaves and monarch will
+ * be frozen soon with no wait since the INIT rendezvous is skipped
+ * by kdump_in_progress.
+ */
kdump_smp_send_stop();
/* not all cpu response to IPI, send INIT to freeze them */
- if (kdump_wait_cpu_freeze() && kdump_on_init) {
+ if (kdump_wait_cpu_freeze()) {
kdump_smp_send_init();
+ /* wait again, don't go ahead if possible */
+ kdump_wait_cpu_freeze();
}
#endif
}
@@ -118,11 +148,7 @@ machine_crash_shutdown(struct pt_regs *pt)
static void
machine_kdump_on_init(void)
{
- if (!ia64_kimage) {
- printk(KERN_NOTICE "machine_kdump_on_init(): "
- "kdump not configured\n");
- return;
- }
+ crash_save_vmcoreinfo();
local_irq_disable();
kexec_disable_iosapic();
machine_kexec(ia64_kimage);
@@ -132,17 +158,17 @@ void
kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
{
int cpuid;
+
local_irq_disable();
cpuid = smp_processor_id();
crash_save_this_cpu();
current->thread.ksp = (__u64)info->sw - 16;
+
+ ia64_set_psr_mc(); /* mask MCA/INIT and stop reentrance */
+
atomic_inc(&kdump_cpu_frozen);
kdump_status[cpuid] = 1;
mb();
-#ifdef CONFIG_HOTPLUG_CPU
- if (cpuid != 0)
- ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]);
-#endif
for (;;)
cpu_relax();
}
@@ -153,68 +179,89 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
struct ia64_mca_notify_die *nd;
struct die_args *args = data;
- if (!kdump_on_init)
+ if (atomic_read(&kdump_in_progress)) {
+ switch (val) {
+ case DIE_INIT_MONARCH_LEAVE:
+ if (!kdump_freeze_monarch)
+ break;
+ /* fall through */
+ case DIE_INIT_SLAVE_LEAVE:
+ case DIE_INIT_MONARCH_ENTER:
+ case DIE_MCA_RENDZVOUS_LEAVE:
+ unw_init_running(kdump_cpu_freeze, NULL);
+ break;
+ }
+ }
+
+ if (!kdump_on_init && !kdump_on_fatal_mca)
return NOTIFY_DONE;
+ if (!ia64_kimage) {
+ if (val == DIE_INIT_MONARCH_LEAVE)
+ ia64_mca_printk(KERN_NOTICE
+ "%s: kdump not configured\n",
+ __func__);
+ return NOTIFY_DONE;
+ }
+
if (val != DIE_INIT_MONARCH_LEAVE &&
- val != DIE_INIT_SLAVE_LEAVE &&
val != DIE_INIT_MONARCH_PROCESS &&
- val != DIE_MCA_RENDZVOUS_LEAVE &&
val != DIE_MCA_MONARCH_LEAVE)
return NOTIFY_DONE;
nd = (struct ia64_mca_notify_die *)args->err;
- /* Reason code 1 means machine check rendezvous*/
- if ((val == DIE_INIT_MONARCH_LEAVE || val == DIE_INIT_SLAVE_LEAVE
- || val == DIE_INIT_MONARCH_PROCESS) && nd->sos->rv_rc == 1)
- return NOTIFY_DONE;
switch (val) {
- case DIE_INIT_MONARCH_PROCESS:
- atomic_set(&kdump_in_progress, 1);
- *(nd->monarch_cpu) = -1;
- break;
- case DIE_INIT_MONARCH_LEAVE:
+ case DIE_INIT_MONARCH_PROCESS:
+ /* Reason code 1 means machine check rendezvous*/
+ if (kdump_on_init && (nd->sos->rv_rc != 1)) {
+ if (atomic_inc_return(&kdump_in_progress) != 1)
+ kdump_freeze_monarch = 1;
+ }
+ break;
+ case DIE_INIT_MONARCH_LEAVE:
+ /* Reason code 1 means machine check rendezvous*/
+ if (kdump_on_init && (nd->sos->rv_rc != 1))
machine_kdump_on_init();
- break;
- case DIE_INIT_SLAVE_LEAVE:
- if (atomic_read(&kdump_in_progress))
- unw_init_running(kdump_cpu_freeze, NULL);
- break;
- case DIE_MCA_RENDZVOUS_LEAVE:
- if (atomic_read(&kdump_in_progress))
- unw_init_running(kdump_cpu_freeze, NULL);
- break;
- case DIE_MCA_MONARCH_LEAVE:
- /* die_register->signr indicate if MCA is recoverable */
- if (!args->signr)
+ break;
+ case DIE_MCA_MONARCH_LEAVE:
+ /* *(nd->data) indicate if MCA is recoverable */
+ if (kdump_on_fatal_mca && !(*(nd->data))) {
+ if (atomic_inc_return(&kdump_in_progress) == 1)
machine_kdump_on_init();
- break;
+ /* We got fatal MCA while kdump!? No way!! */
+ }
+ break;
}
return NOTIFY_DONE;
}
#ifdef CONFIG_SYSCTL
-static ctl_table kdump_on_init_table[] = {
+static struct ctl_table kdump_ctl_table[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "kdump_on_init",
.data = &kdump_on_init,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "kdump_on_fatal_mca",
+ .data = &kdump_on_fatal_mca,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
- { .ctl_name = 0 }
+ { }
};
-static ctl_table sys_table[] = {
+static struct ctl_table sys_table[] = {
{
- .ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
- .child = kdump_on_init_table,
+ .child = kdump_ctl_table,
},
- { .ctl_name = 0 }
+ { }
};
#endif
diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c
index da60e90eeeb..c8c9298666f 100644
--- a/arch/ia64/kernel/crash_dump.c
+++ b/arch/ia64/kernel/crash_dump.c
@@ -8,6 +8,7 @@
#include <linux/errno.h>
#include <linux/types.h>
+#include <linux/crash_dump.h>
#include <asm/page.h>
#include <asm/uaccess.h>
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
index 790ef0d87e1..4826ff957a3 100644
--- a/arch/ia64/kernel/cyclone.c
+++ b/arch/ia64/kernel/cyclone.c
@@ -21,7 +21,7 @@ void __init cyclone_setup(void)
static void __iomem *cyclone_mc;
-static cycle_t read_cyclone(void)
+static cycle_t read_cyclone(struct clocksource *cs)
{
return (cycle_t)readq((void __iomem *)cyclone_mc);
}
@@ -31,8 +31,6 @@ static struct clocksource clocksource_cyclone = {
.rating = 300,
.read = read_cyclone,
.mask = (1LL << 40) - 1,
- .mult = 0, /*to be caluclated*/
- .shift = 16,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
@@ -59,13 +57,13 @@ int __init init_cyclone_clock(void)
return -ENODEV;
}
base = readq(reg);
+ iounmap(reg);
if(!base){
printk(KERN_ERR "Summit chipset: Could not find valid CBAR"
" value.\n");
use_cyclone = 0;
return -ENODEV;
}
- iounmap(reg);
/* setup PMCC */
offset = (base + CYCLONE_PMCC_OFFSET);
@@ -117,10 +115,8 @@ int __init init_cyclone_clock(void)
}
/* initialize last tick */
cyclone_mc = cyclone_timer;
- clocksource_cyclone.fsys_mmio = cyclone_timer;
- clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ,
- clocksource_cyclone.shift);
- clocksource_register(&clocksource_cyclone);
+ clocksource_cyclone.archdata.fsys_mmio = cyclone_timer;
+ clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ);
return 0;
}
diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c
new file mode 100644
index 00000000000..7f791623820
--- /dev/null
+++ b/arch/ia64/kernel/dma-mapping.c
@@ -0,0 +1,24 @@
+#include <linux/dma-mapping.h>
+#include <linux/export.h>
+
+/* Set this to 1 if there is a HW IOMMU in the system */
+int iommu_detected __read_mostly;
+
+struct dma_map_ops *dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+
+static int __init dma_init(void)
+{
+ dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+
+ return 0;
+}
+fs_initcall(dma_init);
+
+struct dma_map_ops *dma_get_ops(struct device *dev)
+{
+ return dma_ops;
+}
+EXPORT_SYMBOL(dma_get_ops);
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 73ca86d0381..741b99c1a0b 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -1,7 +1,8 @@
/*
* Extensible Firmware Interface
*
- * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999
+ * Based on Extensible Firmware Interface Specification version 0.9
+ * April 30, 1999
*
* Copyright (C) 1999 VA Linux Systems
* Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
@@ -22,9 +23,11 @@
*/
#include <linux/module.h>
#include <linux/bootmem.h>
+#include <linux/crash_dump.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/types.h>
+#include <linux/slab.h>
#include <linux/time.h>
#include <linux/efi.h>
#include <linux/kexec.h>
@@ -36,157 +39,176 @@
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/mca.h>
+#include <asm/setup.h>
+#include <asm/tlbflush.h>
#define EFI_DEBUG 0
+static __initdata unsigned long palo_phys;
+
+static __initdata efi_config_table_type_t arch_tables[] = {
+ {PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID, "PALO", &palo_phys},
+ {NULL_GUID, NULL, 0},
+};
+
extern efi_status_t efi_call_phys (void *, ...);
-struct efi efi;
-EXPORT_SYMBOL(efi);
static efi_runtime_services_t *runtime;
-static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
+static u64 mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
#define efi_call_virt(f, args...) (*(f))(args)
-#define STUB_GET_TIME(prefix, adjust_arg) \
-static efi_status_t \
-prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \
-{ \
- struct ia64_fpreg fr[6]; \
- efi_time_cap_t *atc = NULL; \
- efi_status_t ret; \
- \
- if (tc) \
- atc = adjust_arg(tc); \
- ia64_save_scratch_fpregs(fr); \
- ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), atc); \
- ia64_load_scratch_fpregs(fr); \
- return ret; \
+#define STUB_GET_TIME(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_time_cap_t *atc = NULL; \
+ efi_status_t ret; \
+ \
+ if (tc) \
+ atc = adjust_arg(tc); \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), \
+ adjust_arg(tm), atc); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
}
-#define STUB_SET_TIME(prefix, adjust_arg) \
-static efi_status_t \
-prefix##_set_time (efi_time_t *tm) \
-{ \
- struct ia64_fpreg fr[6]; \
- efi_status_t ret; \
- \
- ia64_save_scratch_fpregs(fr); \
- ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), adjust_arg(tm)); \
- ia64_load_scratch_fpregs(fr); \
- return ret; \
+#define STUB_SET_TIME(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_set_time (efi_time_t *tm) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_status_t ret; \
+ \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), \
+ adjust_arg(tm)); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
}
-#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg) \
-static efi_status_t \
-prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm) \
-{ \
- struct ia64_fpreg fr[6]; \
- efi_status_t ret; \
- \
- ia64_save_scratch_fpregs(fr); \
- ret = efi_call_##prefix((efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time), \
- adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm)); \
- ia64_load_scratch_fpregs(fr); \
- return ret; \
+#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, \
+ efi_time_t *tm) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_status_t ret; \
+ \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix( \
+ (efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time), \
+ adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm)); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
}
-#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg) \
-static efi_status_t \
-prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) \
-{ \
- struct ia64_fpreg fr[6]; \
- efi_time_t *atm = NULL; \
- efi_status_t ret; \
- \
- if (tm) \
- atm = adjust_arg(tm); \
- ia64_save_scratch_fpregs(fr); \
- ret = efi_call_##prefix((efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time), \
- enabled, atm); \
- ia64_load_scratch_fpregs(fr); \
- return ret; \
+#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_time_t *atm = NULL; \
+ efi_status_t ret; \
+ \
+ if (tm) \
+ atm = adjust_arg(tm); \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix( \
+ (efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time), \
+ enabled, atm); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
}
-#define STUB_GET_VARIABLE(prefix, adjust_arg) \
-static efi_status_t \
-prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr, \
- unsigned long *data_size, void *data) \
-{ \
- struct ia64_fpreg fr[6]; \
- u32 *aattr = NULL; \
- efi_status_t ret; \
- \
- if (attr) \
- aattr = adjust_arg(attr); \
- ia64_save_scratch_fpregs(fr); \
- ret = efi_call_##prefix((efi_get_variable_t *) __va(runtime->get_variable), \
- adjust_arg(name), adjust_arg(vendor), aattr, \
- adjust_arg(data_size), adjust_arg(data)); \
- ia64_load_scratch_fpregs(fr); \
- return ret; \
+#define STUB_GET_VARIABLE(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr, \
+ unsigned long *data_size, void *data) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ u32 *aattr = NULL; \
+ efi_status_t ret; \
+ \
+ if (attr) \
+ aattr = adjust_arg(attr); \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix( \
+ (efi_get_variable_t *) __va(runtime->get_variable), \
+ adjust_arg(name), adjust_arg(vendor), aattr, \
+ adjust_arg(data_size), adjust_arg(data)); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
}
-#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg) \
-static efi_status_t \
-prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) \
-{ \
- struct ia64_fpreg fr[6]; \
- efi_status_t ret; \
- \
- ia64_save_scratch_fpregs(fr); \
- ret = efi_call_##prefix((efi_get_next_variable_t *) __va(runtime->get_next_variable), \
- adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor)); \
- ia64_load_scratch_fpregs(fr); \
- return ret; \
+#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, \
+ efi_guid_t *vendor) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_status_t ret; \
+ \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix( \
+ (efi_get_next_variable_t *) __va(runtime->get_next_variable), \
+ adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor)); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
}
-#define STUB_SET_VARIABLE(prefix, adjust_arg) \
-static efi_status_t \
-prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, unsigned long attr, \
- unsigned long data_size, void *data) \
-{ \
- struct ia64_fpreg fr[6]; \
- efi_status_t ret; \
- \
- ia64_save_scratch_fpregs(fr); \
- ret = efi_call_##prefix((efi_set_variable_t *) __va(runtime->set_variable), \
- adjust_arg(name), adjust_arg(vendor), attr, data_size, \
- adjust_arg(data)); \
- ia64_load_scratch_fpregs(fr); \
- return ret; \
+#define STUB_SET_VARIABLE(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, \
+ u32 attr, unsigned long data_size, \
+ void *data) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_status_t ret; \
+ \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix( \
+ (efi_set_variable_t *) __va(runtime->set_variable), \
+ adjust_arg(name), adjust_arg(vendor), attr, data_size, \
+ adjust_arg(data)); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
}
-#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg) \
-static efi_status_t \
-prefix##_get_next_high_mono_count (u32 *count) \
-{ \
- struct ia64_fpreg fr[6]; \
- efi_status_t ret; \
- \
- ia64_save_scratch_fpregs(fr); \
- ret = efi_call_##prefix((efi_get_next_high_mono_count_t *) \
- __va(runtime->get_next_high_mono_count), adjust_arg(count)); \
- ia64_load_scratch_fpregs(fr); \
- return ret; \
+#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg) \
+static efi_status_t \
+prefix##_get_next_high_mono_count (u32 *count) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_status_t ret; \
+ \
+ ia64_save_scratch_fpregs(fr); \
+ ret = efi_call_##prefix((efi_get_next_high_mono_count_t *) \
+ __va(runtime->get_next_high_mono_count), \
+ adjust_arg(count)); \
+ ia64_load_scratch_fpregs(fr); \
+ return ret; \
}
-#define STUB_RESET_SYSTEM(prefix, adjust_arg) \
-static void \
-prefix##_reset_system (int reset_type, efi_status_t status, \
- unsigned long data_size, efi_char16_t *data) \
-{ \
- struct ia64_fpreg fr[6]; \
- efi_char16_t *adata = NULL; \
- \
- if (data) \
- adata = adjust_arg(data); \
- \
- ia64_save_scratch_fpregs(fr); \
- efi_call_##prefix((efi_reset_system_t *) __va(runtime->reset_system), \
- reset_type, status, data_size, adata); \
- /* should not return, but just in case... */ \
- ia64_load_scratch_fpregs(fr); \
+#define STUB_RESET_SYSTEM(prefix, adjust_arg) \
+static void \
+prefix##_reset_system (int reset_type, efi_status_t status, \
+ unsigned long data_size, efi_char16_t *data) \
+{ \
+ struct ia64_fpreg fr[6]; \
+ efi_char16_t *adata = NULL; \
+ \
+ if (data) \
+ adata = adjust_arg(data); \
+ \
+ ia64_save_scratch_fpregs(fr); \
+ efi_call_##prefix( \
+ (efi_reset_system_t *) __va(runtime->reset_system), \
+ reset_type, status, data_size, adata); \
+ /* should not return, but just in case... */ \
+ ia64_load_scratch_fpregs(fr); \
}
#define phys_ptr(arg) ((__typeof__(arg)) ia64_tpa(arg))
@@ -218,11 +240,13 @@ efi_gettimeofday (struct timespec *ts)
{
efi_time_t tm;
- memset(ts, 0, sizeof(ts));
- if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS)
+ if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS) {
+ memset(ts, 0, sizeof(*ts));
return;
+ }
- ts->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second);
+ ts->tv_sec = mktime(tm.year, tm.month, tm.day,
+ tm.hour, tm.minute, tm.second);
ts->tv_nsec = tm.nanosecond;
}
@@ -296,8 +320,8 @@ walk (efi_freemem_callback_t callback, void *arg, u64 attr)
}
/*
- * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that
- * has memory that is available for OS use.
+ * Walk the EFI memory map and call CALLBACK once for each EFI memory
+ * descriptor that has memory that is available for OS use.
*/
void
efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
@@ -306,8 +330,8 @@ efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
}
/*
- * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that
- * has memory that is available for uncached allocator.
+ * Walk the EFI memory map and call CALLBACK once for each EFI memory
+ * descriptor that has memory that is available for uncached allocator.
*/
void
efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg)
@@ -316,11 +340,10 @@ efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg)
}
/*
- * Look for the PAL_CODE region reported by EFI and maps it using an
+ * Look for the PAL_CODE region reported by EFI and map it using an
* ITR to enable safe PAL calls in virtual mode. See IA-64 Processor
* Abstraction Layer chapter 11 in ADAG
*/
-
void *
efi_get_pal_addr (void)
{
@@ -340,53 +363,90 @@ efi_get_pal_addr (void)
continue;
if (++pal_code_count > 1) {
- printk(KERN_ERR "Too many EFI Pal Code memory ranges, dropped @ %lx\n",
- md->phys_addr);
+ printk(KERN_ERR "Too many EFI Pal Code memory ranges, "
+ "dropped @ %llx\n", md->phys_addr);
continue;
}
/*
- * The only ITLB entry in region 7 that is used is the one installed by
- * __start(). That entry covers a 64MB range.
+ * The only ITLB entry in region 7 that is used is the one
+ * installed by __start(). That entry covers a 64MB range.
*/
mask = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1);
vaddr = PAGE_OFFSET + md->phys_addr;
/*
- * We must check that the PAL mapping won't overlap with the kernel
- * mapping.
+ * We must check that the PAL mapping won't overlap with the
+ * kernel mapping.
*
- * PAL code is guaranteed to be aligned on a power of 2 between 4k and
- * 256KB and that only one ITR is needed to map it. This implies that the
- * PAL code is always aligned on its size, i.e., the closest matching page
- * size supported by the TLB. Therefore PAL code is guaranteed never to
- * cross a 64MB unless it is bigger than 64MB (very unlikely!). So for
- * now the following test is enough to determine whether or not we need a
- * dedicated ITR for the PAL code.
+ * PAL code is guaranteed to be aligned on a power of 2 between
+ * 4k and 256KB and that only one ITR is needed to map it. This
+ * implies that the PAL code is always aligned on its size,
+ * i.e., the closest matching page size supported by the TLB.
+ * Therefore PAL code is guaranteed never to cross a 64MB unless
+ * it is bigger than 64MB (very unlikely!). So for now the
+ * following test is enough to determine whether or not we need
+ * a dedicated ITR for the PAL code.
*/
if ((vaddr & mask) == (KERNEL_START & mask)) {
printk(KERN_INFO "%s: no need to install ITR for PAL code\n",
- __FUNCTION__);
+ __func__);
continue;
}
- if (md->num_pages << EFI_PAGE_SHIFT > IA64_GRANULE_SIZE)
- panic("Woah! PAL code size bigger than a granule!");
+ if (efi_md_size(md) > IA64_GRANULE_SIZE)
+ panic("Whoa! PAL code size bigger than a granule!");
#if EFI_DEBUG
mask = ~((1 << IA64_GRANULE_SHIFT) - 1);
- printk(KERN_INFO "CPU %d: mapping PAL code [0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
- smp_processor_id(), md->phys_addr,
- md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
- vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
+ printk(KERN_INFO "CPU %d: mapping PAL code "
+ "[0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
+ smp_processor_id(), md->phys_addr,
+ md->phys_addr + efi_md_size(md),
+ vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
#endif
return __va(md->phys_addr);
}
printk(KERN_WARNING "%s: no PAL-code memory-descriptor found\n",
- __FUNCTION__);
+ __func__);
return NULL;
}
+
+static u8 __init palo_checksum(u8 *buffer, u32 length)
+{
+ u8 sum = 0;
+ u8 *end = buffer + length;
+
+ while (buffer < end)
+ sum = (u8) (sum + *(buffer++));
+
+ return sum;
+}
+
+/*
+ * Parse and handle PALO table which is published at:
+ * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
+ */
+static void __init handle_palo(unsigned long phys_addr)
+{
+ struct palo_table *palo = __va(phys_addr);
+ u8 checksum;
+
+ if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
+ printk(KERN_INFO "PALO signature incorrect.\n");
+ return;
+ }
+
+ checksum = palo_checksum((u8 *)palo, palo->length);
+ if (checksum) {
+ printk(KERN_INFO "PALO checksum incorrect.\n");
+ return;
+ }
+
+ setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO);
+}
+
void
efi_map_pal_code (void)
{
@@ -400,24 +460,30 @@ efi_map_pal_code (void)
* Cannot write to CRx with PSR.ic=1
*/
psr = ia64_clear_ic();
- ia64_itr(0x1, IA64_TR_PALCODE, GRANULEROUNDDOWN((unsigned long) pal_vaddr),
+ ia64_itr(0x1, IA64_TR_PALCODE,
+ GRANULEROUNDDOWN((unsigned long) pal_vaddr),
pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)),
IA64_GRANULE_SHIFT);
+ paravirt_dv_serialize_data();
ia64_set_psr(psr); /* restore psr */
- ia64_srlz_i();
}
void __init
efi_init (void)
{
void *efi_map_start, *efi_map_end;
- efi_config_table_t *config_tables;
efi_char16_t *c16;
u64 efi_desc_size;
char *cp, vendor[100] = "unknown";
int i;
- /* it's too early to be able to use the standard kernel command line support... */
+ set_bit(EFI_BOOT, &efi.flags);
+ set_bit(EFI_64BIT, &efi.flags);
+
+ /*
+ * It's too early to be able to use the standard kernel command line
+ * support...
+ */
for (cp = boot_command_line; *cp; ) {
if (memcmp(cp, "mem=", 4) == 0) {
mem_limit = memparse(cp + 4, &cp);
@@ -433,9 +499,11 @@ efi_init (void)
}
}
if (min_addr != 0UL)
- printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20);
+ printk(KERN_INFO "Ignoring memory below %lluMB\n",
+ min_addr >> 20);
if (max_addr != ~0UL)
- printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
+ printk(KERN_INFO "Ignoring memory above %lluMB\n",
+ max_addr >> 20);
efi.systab = __va(ia64_boot_param->efi_systab);
@@ -443,17 +511,15 @@ efi_init (void)
* Verify the EFI Table
*/
if (efi.systab == NULL)
- panic("Woah! Can't find EFI system table.\n");
+ panic("Whoa! Can't find EFI system table.\n");
if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
- panic("Woah! EFI system table signature incorrect\n");
+ panic("Whoa! EFI system table signature incorrect\n");
if ((efi.systab->hdr.revision >> 16) == 0)
printk(KERN_WARNING "Warning: EFI system table version "
"%d.%02d, expected 1.00 or greater\n",
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff);
- config_tables = __va(efi.systab->tables);
-
/* Show what we know for posterity */
c16 = __va(efi.systab->fw_vendor);
if (c16) {
@@ -463,39 +529,18 @@ efi_init (void)
}
printk(KERN_INFO "EFI v%u.%.02u by %s:",
- efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
-
- efi.mps = EFI_INVALID_TABLE_ADDR;
- efi.acpi = EFI_INVALID_TABLE_ADDR;
- efi.acpi20 = EFI_INVALID_TABLE_ADDR;
- efi.smbios = EFI_INVALID_TABLE_ADDR;
- efi.sal_systab = EFI_INVALID_TABLE_ADDR;
- efi.boot_info = EFI_INVALID_TABLE_ADDR;
- efi.hcdp = EFI_INVALID_TABLE_ADDR;
- efi.uga = EFI_INVALID_TABLE_ADDR;
-
- for (i = 0; i < (int) efi.systab->nr_tables; i++) {
- if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
- efi.mps = config_tables[i].table;
- printk(" MPS=0x%lx", config_tables[i].table);
- } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
- efi.acpi20 = config_tables[i].table;
- printk(" ACPI 2.0=0x%lx", config_tables[i].table);
- } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
- efi.acpi = config_tables[i].table;
- printk(" ACPI=0x%lx", config_tables[i].table);
- } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
- efi.smbios = config_tables[i].table;
- printk(" SMBIOS=0x%lx", config_tables[i].table);
- } else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
- efi.sal_systab = config_tables[i].table;
- printk(" SALsystab=0x%lx", config_tables[i].table);
- } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
- efi.hcdp = config_tables[i].table;
- printk(" HCDP=0x%lx", config_tables[i].table);
- }
- }
- printk("\n");
+ efi.systab->hdr.revision >> 16,
+ efi.systab->hdr.revision & 0xffff, vendor);
+
+ set_bit(EFI_SYSTEM_TABLES, &efi.flags);
+
+ palo_phys = EFI_INVALID_TABLE_ADDR;
+
+ if (efi_config_init(arch_tables) != 0)
+ return;
+
+ if (palo_phys != EFI_INVALID_TABLE_ADDR)
+ handle_palo(palo_phys);
runtime = __va(efi.systab->runtime);
efi.get_time = phys_get_time;
@@ -518,12 +563,33 @@ efi_init (void)
efi_memory_desc_t *md;
void *p;
- for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
+ for (i = 0, p = efi_map_start; p < efi_map_end;
+ ++i, p += efi_desc_size)
+ {
+ const char *unit;
+ unsigned long size;
+
md = p;
- printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n",
+ size = md->num_pages << EFI_PAGE_SHIFT;
+
+ if ((size >> 40) > 0) {
+ size >>= 40;
+ unit = "TB";
+ } else if ((size >> 30) > 0) {
+ size >>= 30;
+ unit = "GB";
+ } else if ((size >> 20) > 0) {
+ size >>= 20;
+ unit = "MB";
+ } else {
+ size >>= 10;
+ unit = "KB";
+ }
+
+ printk("mem%02d: type=%2u, attr=0x%016lx, "
+ "range=[0x%016lx-0x%016lx) (%4lu%s)\n",
i, md->type, md->attribute, md->phys_addr,
- md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
- md->num_pages >> (20 - EFI_PAGE_SHIFT));
+ md->phys_addr + efi_md_size(md), size, unit);
}
}
#endif
@@ -548,8 +614,8 @@ efi_enter_virtual_mode (void)
md = p;
if (md->attribute & EFI_MEMORY_RUNTIME) {
/*
- * Some descriptors have multiple bits set, so the order of
- * the tests is relevant.
+ * Some descriptors have multiple bits set, so the
+ * order of the tests is relevant.
*/
if (md->attribute & EFI_MEMORY_WB) {
md->virt_addr = (u64) __va(md->phys_addr);
@@ -557,21 +623,26 @@ efi_enter_virtual_mode (void)
md->virt_addr = (u64) ioremap(md->phys_addr, 0);
} else if (md->attribute & EFI_MEMORY_WC) {
#if 0
- md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
- | _PAGE_D
- | _PAGE_MA_WC
- | _PAGE_PL_0
- | _PAGE_AR_RW));
+ md->virt_addr = ia64_remap(md->phys_addr,
+ (_PAGE_A |
+ _PAGE_P |
+ _PAGE_D |
+ _PAGE_MA_WC |
+ _PAGE_PL_0 |
+ _PAGE_AR_RW));
#else
printk(KERN_INFO "EFI_MEMORY_WC mapping\n");
md->virt_addr = (u64) ioremap(md->phys_addr, 0);
#endif
} else if (md->attribute & EFI_MEMORY_WT) {
#if 0
- md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
- | _PAGE_D | _PAGE_MA_WT
- | _PAGE_PL_0
- | _PAGE_AR_RW));
+ md->virt_addr = ia64_remap(md->phys_addr,
+ (_PAGE_A |
+ _PAGE_P |
+ _PAGE_D |
+ _PAGE_MA_WT |
+ _PAGE_PL_0 |
+ _PAGE_AR_RW));
#else
printk(KERN_INFO "EFI_MEMORY_WT mapping\n");
md->virt_addr = (u64) ioremap(md->phys_addr, 0);
@@ -582,16 +653,20 @@ efi_enter_virtual_mode (void)
status = efi_call_phys(__va(runtime->set_virtual_address_map),
ia64_boot_param->efi_memmap_size,
- efi_desc_size, ia64_boot_param->efi_memdesc_version,
+ efi_desc_size,
+ ia64_boot_param->efi_memdesc_version,
ia64_boot_param->efi_memmap);
if (status != EFI_SUCCESS) {
- printk(KERN_WARNING "warning: unable to switch EFI into virtual mode "
- "(status=%lu)\n", status);
+ printk(KERN_WARNING "warning: unable to switch EFI into "
+ "virtual mode (status=%lu)\n", status);
return;
}
+ set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+
/*
- * Now that EFI is in virtual mode, we call the EFI functions more efficiently:
+ * Now that EFI is in virtual mode, we call the EFI functions more
+ * efficiently:
*/
efi.get_time = virt_get_time;
efi.set_time = virt_set_time;
@@ -605,8 +680,8 @@ efi_enter_virtual_mode (void)
}
/*
- * Walk the EFI memory map looking for the I/O port range. There can only be one entry of
- * this type, other I/O port ranges should be described via ACPI.
+ * Walk the EFI memory map looking for the I/O port range. There can only be
+ * one entry of this type, other I/O port ranges should be described via ACPI.
*/
u64
efi_get_iobase (void)
@@ -655,7 +730,7 @@ efi_memory_descriptor (unsigned long phys_addr)
for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
md = p;
- if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
+ if (phys_addr - md->phys_addr < efi_md_size(md))
return md;
}
return NULL;
@@ -677,7 +752,6 @@ efi_memmap_intersects (unsigned long phys_addr, unsigned long size)
for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
md = p;
-
if (md->phys_addr < end && efi_md_end(md) > phys_addr)
return 1;
}
@@ -730,7 +804,7 @@ efi_mem_attribute (unsigned long phys_addr, unsigned long size)
if (!md || (md->attribute & ~EFI_MEMORY_RUNTIME) != attr)
return 0;
} while (md);
- return 0;
+ return 0; /* never reached */
}
u64
@@ -766,12 +840,12 @@ kern_mem_attribute (unsigned long phys_addr, unsigned long size)
if (!md || md->attribute != attr)
return 0;
} while (md);
- return 0;
+ return 0; /* never reached */
}
EXPORT_SYMBOL(kern_mem_attribute);
int
-valid_phys_addr_range (unsigned long phys_addr, unsigned long size)
+valid_phys_addr_range (phys_addr_t phys_addr, unsigned long size)
{
u64 attr;
@@ -882,7 +956,7 @@ efi_uart_console_only(void)
return 1;
uart = 0;
}
- hdr = (struct efi_generic_dev_path *) ((u8 *) hdr + hdr->length);
+ hdr = (struct efi_generic_dev_path *)((u8 *) hdr + hdr->length);
}
printk(KERN_ERR "Malformed %s value\n", name);
return 0;
@@ -920,10 +994,12 @@ find_memmap_space (void)
if (!efi_wb(md)) {
continue;
}
- if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) {
+ if (pmd == NULL || !efi_wb(pmd) ||
+ efi_md_end(pmd) != md->phys_addr) {
contig_low = GRANULEROUNDUP(md->phys_addr);
contig_high = efi_md_end(md);
- for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) {
+ for (q = p + efi_desc_size; q < efi_map_end;
+ q += efi_desc_size) {
check_md = q;
if (!efi_wb(check_md))
break;
@@ -967,8 +1043,8 @@ find_memmap_space (void)
* to use. We can allocate partial granules only if the unavailable
* parts exist, and are WB.
*/
-void
-efi_memmap_init(unsigned long *s, unsigned long *e)
+unsigned long
+efi_memmap_init(u64 *s, u64 *e)
{
struct kern_memdesc *k, *prev = NULL;
u64 contig_low=0, contig_high=0;
@@ -987,8 +1063,9 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) {
md = p;
if (!efi_wb(md)) {
- if (efi_uc(md) && (md->type == EFI_CONVENTIONAL_MEMORY ||
- md->type == EFI_BOOT_SERVICES_DATA)) {
+ if (efi_uc(md) &&
+ (md->type == EFI_CONVENTIONAL_MEMORY ||
+ md->type == EFI_BOOT_SERVICES_DATA)) {
k->attribute = EFI_MEMORY_UC;
k->start = md->phys_addr;
k->num_pages = md->num_pages;
@@ -996,10 +1073,12 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
}
continue;
}
- if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) {
+ if (pmd == NULL || !efi_wb(pmd) ||
+ efi_md_end(pmd) != md->phys_addr) {
contig_low = GRANULEROUNDUP(md->phys_addr);
contig_high = efi_md_end(md);
- for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) {
+ for (q = p + efi_desc_size; q < efi_map_end;
+ q += efi_desc_size) {
check_md = q;
if (!efi_wb(check_md))
break;
@@ -1012,11 +1091,6 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
if (!is_memory_available(md))
continue;
-#ifdef CONFIG_CRASH_DUMP
- /* saved_max_pfn should ignore max_addr= command line arg */
- if (saved_max_pfn < (efi_md_end(md) >> PAGE_SHIFT))
- saved_max_pfn = (efi_md_end(md) >> PAGE_SHIFT);
-#endif
/*
* Round ends inward to granule boundaries
* Give trimmings to uncached allocator
@@ -1024,13 +1098,17 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
if (md->phys_addr < contig_low) {
lim = min(efi_md_end(md), contig_low);
if (efi_uc(md)) {
- if (k > kern_memmap && (k-1)->attribute == EFI_MEMORY_UC &&
+ if (k > kern_memmap &&
+ (k-1)->attribute == EFI_MEMORY_UC &&
kmd_end(k-1) == md->phys_addr) {
- (k-1)->num_pages += (lim - md->phys_addr) >> EFI_PAGE_SHIFT;
+ (k-1)->num_pages +=
+ (lim - md->phys_addr)
+ >> EFI_PAGE_SHIFT;
} else {
k->attribute = EFI_MEMORY_UC;
k->start = md->phys_addr;
- k->num_pages = (lim - md->phys_addr) >> EFI_PAGE_SHIFT;
+ k->num_pages = (lim - md->phys_addr)
+ >> EFI_PAGE_SHIFT;
k++;
}
}
@@ -1048,7 +1126,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
} else {
k->attribute = EFI_MEMORY_UC;
k->start = lim;
- k->num_pages = (efi_md_end(md) - lim) >> EFI_PAGE_SHIFT;
+ k->num_pages = (efi_md_end(md) - lim)
+ >> EFI_PAGE_SHIFT;
k++;
}
}
@@ -1084,11 +1163,14 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
/* reserve the memory we are using for kern_memmap */
*s = (u64)kern_memmap;
*e = (u64)++k;
+
+ return total_mem;
}
void
efi_initialize_iomem_resources(struct resource *code_resource,
- struct resource *data_resource)
+ struct resource *data_resource,
+ struct resource *bss_resource)
{
struct resource *res;
void *efi_map_start, *efi_map_end, *p;
@@ -1109,7 +1191,7 @@ efi_initialize_iomem_resources(struct resource *code_resource,
if (md->num_pages == 0) /* should not happen */
continue;
- flags = IORESOURCE_MEM;
+ flags = IORESOURCE_MEM | IORESOURCE_BUSY;
switch (md->type) {
case EFI_MEMORY_MAPPED_IO:
@@ -1124,19 +1206,19 @@ efi_initialize_iomem_resources(struct resource *code_resource,
if (md->attribute & EFI_MEMORY_WP) {
name = "System ROM";
flags |= IORESOURCE_READONLY;
- } else {
+ } else if (md->attribute == EFI_MEMORY_UC)
+ name = "Uncached RAM";
+ else
name = "System RAM";
- }
break;
case EFI_ACPI_MEMORY_NVS:
name = "ACPI Non-volatile Storage";
- flags |= IORESOURCE_BUSY;
break;
case EFI_UNUSABLE_MEMORY:
name = "reserved";
- flags |= IORESOURCE_BUSY | IORESOURCE_DISABLED;
+ flags |= IORESOURCE_DISABLED;
break;
case EFI_RESERVED_TYPE:
@@ -1145,18 +1227,19 @@ efi_initialize_iomem_resources(struct resource *code_resource,
case EFI_ACPI_RECLAIM_MEMORY:
default:
name = "reserved";
- flags |= IORESOURCE_BUSY;
break;
}
- if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) {
- printk(KERN_ERR "failed to alocate resource for iomem\n");
+ if ((res = kzalloc(sizeof(struct resource),
+ GFP_KERNEL)) == NULL) {
+ printk(KERN_ERR
+ "failed to allocate resource for iomem\n");
return;
}
res->name = name;
res->start = md->phys_addr;
- res->end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+ res->end = md->phys_addr + efi_md_size(md) - 1;
res->flags = flags;
if (insert_resource(&iomem_resource, res) < 0)
@@ -1169,6 +1252,7 @@ efi_initialize_iomem_resources(struct resource *code_resource,
*/
insert_resource(res, code_resource);
insert_resource(res, data_resource);
+ insert_resource(res, bss_resource);
#ifdef CONFIG_KEXEC
insert_resource(res, &efi_memmap_res);
insert_resource(res, &boot_param_res);
@@ -1184,50 +1268,50 @@ efi_initialize_iomem_resources(struct resource *code_resource,
rsvd_regions are sorted
*/
unsigned long __init
-kdump_find_rsvd_region (unsigned long size,
- struct rsvd_region *r, int n)
+kdump_find_rsvd_region (unsigned long size, struct rsvd_region *r, int n)
{
- int i;
- u64 start, end;
- u64 alignment = 1UL << _PAGE_SIZE_64M;
- void *efi_map_start, *efi_map_end, *p;
- efi_memory_desc_t *md;
- u64 efi_desc_size;
-
- efi_map_start = __va(ia64_boot_param->efi_memmap);
- efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
- efi_desc_size = ia64_boot_param->efi_memdesc_size;
-
- for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
- md = p;
- if (!efi_wb(md))
- continue;
- start = ALIGN(md->phys_addr, alignment);
- end = efi_md_end(md);
- for (i = 0; i < n; i++) {
- if (__pa(r[i].start) >= start && __pa(r[i].end) < end) {
- if (__pa(r[i].start) > start + size)
- return start;
- start = ALIGN(__pa(r[i].end), alignment);
- if (i < n-1 && __pa(r[i+1].start) < start + size)
- continue;
- else
- break;
+ int i;
+ u64 start, end;
+ u64 alignment = 1UL << _PAGE_SIZE_64M;
+ void *efi_map_start, *efi_map_end, *p;
+ efi_memory_desc_t *md;
+ u64 efi_desc_size;
+
+ efi_map_start = __va(ia64_boot_param->efi_memmap);
+ efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
+ efi_desc_size = ia64_boot_param->efi_memdesc_size;
+
+ for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ md = p;
+ if (!efi_wb(md))
+ continue;
+ start = ALIGN(md->phys_addr, alignment);
+ end = efi_md_end(md);
+ for (i = 0; i < n; i++) {
+ if (__pa(r[i].start) >= start && __pa(r[i].end) < end) {
+ if (__pa(r[i].start) > start + size)
+ return start;
+ start = ALIGN(__pa(r[i].end), alignment);
+ if (i < n-1 &&
+ __pa(r[i+1].start) < start + size)
+ continue;
+ else
+ break;
+ }
}
- }
- if (end > start + size)
- return start;
- }
-
- printk(KERN_WARNING "Cannot reserve 0x%lx byte of memory for crashdump\n",
- size);
- return ~0UL;
+ if (end > start + size)
+ return start;
+ }
+
+ printk(KERN_WARNING
+ "Cannot reserve 0x%lx byte of memory for crashdump\n", size);
+ return ~0UL;
}
#endif
-#ifdef CONFIG_PROC_VMCORE
+#ifdef CONFIG_CRASH_DUMP
/* locate the size find a the descriptor at a certain address */
-unsigned long
+unsigned long __init
vmcore_find_descriptor_size (unsigned long address)
{
void *efi_map_start, *efi_map_end, *p;
diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
new file mode 100644
index 00000000000..04bc8fd5f89
--- /dev/null
+++ b/arch/ia64/kernel/elfcore.c
@@ -0,0 +1,76 @@
+#include <linux/elf.h>
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/elf.h>
+
+
+Elf64_Half elf_core_extra_phdrs(void)
+{
+ return GATE_EHDR->e_phnum;
+}
+
+int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
+{
+ const struct elf_phdr *const gate_phdrs =
+ (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+ int i;
+ Elf64_Off ofs = 0;
+
+ for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+ struct elf_phdr phdr = gate_phdrs[i];
+
+ if (phdr.p_type == PT_LOAD) {
+ phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz);
+ phdr.p_filesz = phdr.p_memsz;
+ if (ofs == 0) {
+ ofs = phdr.p_offset = offset;
+ offset += phdr.p_filesz;
+ } else {
+ phdr.p_offset = ofs;
+ }
+ } else {
+ phdr.p_offset += ofs;
+ }
+ phdr.p_paddr = 0; /* match other core phdrs */
+ if (!dump_emit(cprm, &phdr, sizeof(phdr)))
+ return 0;
+ }
+ return 1;
+}
+
+int elf_core_write_extra_data(struct coredump_params *cprm)
+{
+ const struct elf_phdr *const gate_phdrs =
+ (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+ int i;
+
+ for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+ if (gate_phdrs[i].p_type == PT_LOAD) {
+ void *addr = (void *)gate_phdrs[i].p_vaddr;
+ size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz);
+
+ if (!dump_emit(cprm, addr, memsz))
+ return 0;
+ break;
+ }
+ }
+ return 1;
+}
+
+size_t elf_core_extra_data_size(void)
+{
+ const struct elf_phdr *const gate_phdrs =
+ (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
+ int i;
+ size_t size = 0;
+
+ for (i = 0; i < GATE_EHDR->e_phnum; ++i) {
+ if (gate_phdrs[i].p_type == PT_LOAD) {
+ size += PAGE_ALIGN(gate_phdrs[i].p_memsz);
+ break;
+ }
+ }
+ return size;
+}
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index c36f43c9460..ba3d03503e8 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -23,6 +23,11 @@
* 11/07/2000
*/
/*
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ * pv_ops.
+ */
+/*
* Global (preserved) predicate usage on syscall entry/exit path:
*
* pKStk: See entry.h.
@@ -42,9 +47,11 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/unistd.h>
+#include <asm/ftrace.h>
#include "minstate.h"
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
/*
* execve() is special because in case of success, we need to
* setup a null register window frame.
@@ -54,31 +61,20 @@ ENTRY(ia64_execve)
* Allocate 8 input registers since ptrace() may clobber them
*/
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
- alloc loc1=ar.pfs,8,2,4,0
+ alloc loc1=ar.pfs,8,2,3,0
mov loc0=rp
.body
mov out0=in0 // filename
;; // stop bit between alloc and call
mov out1=in1 // argv
mov out2=in2 // envp
- add out3=16,sp // regs
br.call.sptk.many rp=sys_execve
.ret0:
-#ifdef CONFIG_IA32_SUPPORT
- /*
- * Check if we're returning to ia32 mode. If so, we need to restore ia32 registers
- * from pt_regs.
- */
- adds r16=PT(CR_IPSR)+16,sp
- ;;
- ld8 r16=[r16]
-#endif
cmp4.ge p6,p7=r8,r0
mov ar.pfs=loc1 // restore ar.pfs
sxt4 r8=r8 // return 64-bit result
;;
stf.spill [sp]=f0
-(p6) cmp.ne pKStk,pUStk=r0,r0 // a successful execve() lands us in user-mode...
mov rp=loc0
(p6) mov ar.pfs=r0 // clear ar.pfs on success
(p7) br.ret.sptk.many rp
@@ -101,12 +97,6 @@ ENTRY(ia64_execve)
ldf.fill f23=[sp]; ldf.fill f24=[sp]; mov f25=f0
ldf.fill f26=[sp]; ldf.fill f27=[sp]; mov f28=f0
ldf.fill f29=[sp]; ldf.fill f30=[sp]; mov f31=f0
-#ifdef CONFIG_IA32_SUPPORT
- tbit.nz p6,p0=r16, IA64_PSR_IS_BIT
- movl loc0=ia64_ret_from_ia32_execve
- ;;
-(p6) mov rp=loc0
-#endif
br.ret.sptk.many rp
END(ia64_execve)
@@ -126,13 +116,12 @@ GLOBAL_ENTRY(sys_clone2)
mov loc1=r16 // save ar.pfs across do_fork
.body
mov out1=in1
- mov out3=in2
+ mov out2=in2
tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
- mov out4=in3 // parent_tidptr: valid only w/CLONE_PARENT_SETTID
+ mov out3=in3 // parent_tidptr: valid only w/CLONE_PARENT_SETTID
;;
(p6) st8 [r2]=in5 // store TLS in r16 for copy_thread()
- mov out5=in4 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
- adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
+ mov out4=in4 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
mov out0=in0 // out0 = clone_flags
br.call.sptk.many rp=do_fork
.ret1: .restore sp
@@ -158,13 +147,12 @@ GLOBAL_ENTRY(sys_clone)
mov loc1=r16 // save ar.pfs across do_fork
.body
mov out1=in1
- mov out3=16 // stacksize (compensates for 16-byte scratch area)
+ mov out2=16 // stacksize (compensates for 16-byte scratch area)
tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
- mov out4=in2 // parent_tidptr: valid only w/CLONE_PARENT_SETTID
+ mov out3=in2 // parent_tidptr: valid only w/CLONE_PARENT_SETTID
;;
(p6) st8 [r2]=in4 // store TLS in r13 (tp)
- mov out5=in3 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
- adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
+ mov out4=in3 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
mov out0=in0 // out0 = clone_flags
br.call.sptk.many rp=do_fork
.ret2: .restore sp
@@ -173,6 +161,7 @@ GLOBAL_ENTRY(sys_clone)
mov rp=loc0
br.ret.sptk.many rp
END(sys_clone)
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
/*
* prev_task <- ia64_switch_to(struct task_struct *next)
@@ -180,7 +169,7 @@ END(sys_clone)
* called. The code starting at .map relies on this. The rest of the code
* doesn't care about the interrupt masking status.
*/
-GLOBAL_ENTRY(ia64_switch_to)
+GLOBAL_ENTRY(__paravirt_switch_to)
.prologue
alloc r16=ar.pfs,1,0,0,0
DO_SAVE_SWITCH_STACK
@@ -204,7 +193,7 @@ GLOBAL_ENTRY(ia64_switch_to)
;;
.done:
ld8 sp=[r21] // load kernel stack pointer of new task
- mov IA64_KR(CURRENT)=in0 // update "current" application register
+ MOV_TO_KR(CURRENT, in0, r8, r9) // update "current" application register
mov r8=r13 // return pointer to previously running task
mov r13=in0 // set "current" pointer
;;
@@ -216,26 +205,25 @@ GLOBAL_ENTRY(ia64_switch_to)
br.ret.sptk.many rp // boogie on out in new context
.map:
- rsm psr.ic // interrupts (psr.i) are already disabled here
+ RSM_PSR_IC(r25) // interrupts (psr.i) are already disabled here
movl r25=PAGE_KERNEL
;;
srlz.d
or r23=r25,r20 // construct PA | page properties
mov r25=IA64_GRANULE_SHIFT<<2
;;
- mov cr.itir=r25
- mov cr.ifa=in0 // VA of next task...
+ MOV_TO_ITIR(p0, r25, r8)
+ MOV_TO_IFA(in0, r8) // VA of next task...
;;
mov r25=IA64_TR_CURRENT_STACK
- mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped...
+ MOV_TO_KR(CURRENT_STACK, r26, r8, r9) // remember last page we mapped...
;;
itr.d dtr[r25]=r23 // wire in new mapping...
- ssm psr.ic // reenable the psr.ic bit
- ;;
- srlz.d
+ SSM_PSR_IC_AND_SRLZ_D(r8, r9) // reenable the psr.ic bit
br.cond.sptk .done
-END(ia64_switch_to)
+END(__paravirt_switch_to)
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
/*
* Note that interrupts are enabled during save_switch_stack and load_switch_stack. This
* means that we may get an interrupt with "sp" pointing to the new kernel stack while
@@ -375,7 +363,7 @@ END(save_switch_stack)
* - b7 holds address to return to
* - must not touch r8-r11
*/
-ENTRY(load_switch_stack)
+GLOBAL_ENTRY(load_switch_stack)
.prologue
.altrp b7
@@ -492,18 +480,6 @@ GLOBAL_ENTRY(prefetch_stack)
br.ret.sptk.many rp
END(prefetch_stack)
-GLOBAL_ENTRY(kernel_execve)
- mov r15=__NR_execve // put syscall number in place
- break __BREAK_SYSCALL
- br.ret.sptk.many rp
-END(kernel_execve)
-
-GLOBAL_ENTRY(clone)
- mov r15=__NR_clone // put syscall number in place
- break __BREAK_SYSCALL
- br.ret.sptk.many rp
-END(clone)
-
/*
* Invoke a system call, but do some tracing before and after the call.
* We MUST preserve the current register frame throughout this routine
@@ -528,6 +504,11 @@ GLOBAL_ENTRY(ia64_trace_syscall)
stf.spill [r16]=f10
stf.spill [r17]=f11
br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
+ cmp.lt p6,p0=r8,r0 // check tracehook
+ adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
+ adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10
+ mov r10=0
+(p6) br.cond.sptk strace_error // syscall failed ->
adds r16=PT(F6)+16,sp
adds r17=PT(F7)+16,sp
;;
@@ -570,7 +551,8 @@ GLOBAL_ENTRY(ia64_trace_syscall)
br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
.ret3:
(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
- br.cond.sptk .work_pending_syscall_end
+(pUStk) rsm psr.i // disable interrupts
+ br.cond.sptk ia64_work_pending_syscall_end
strace_error:
ld8 r3=[r2] // load pt_regs.r8
@@ -601,6 +583,27 @@ GLOBAL_ENTRY(ia64_strace_leave_kernel)
.ret4: br.cond.sptk ia64_leave_kernel
END(ia64_strace_leave_kernel)
+ENTRY(call_payload)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(0)
+ /* call the kernel_thread payload; fn is in r4, arg - in r5 */
+ alloc loc1=ar.pfs,0,3,1,0
+ mov loc0=rp
+ mov loc2=gp
+ mov out0=r5 // arg
+ ld8 r14 = [r4], 8 // fn.address
+ ;;
+ mov b6 = r14
+ ld8 gp = [r4] // fn.gp
+ ;;
+ br.call.sptk.many rp=b6 // fn(arg)
+.ret12: mov gp=loc2
+ mov rp=loc0
+ mov ar.pfs=loc1
+ /* ... and if it has returned, we are going to userland */
+ cmp.ne pKStk,pUStk=r0,r0
+ br.ret.sptk.many rp
+END(call_payload)
+
GLOBAL_ENTRY(ia64_ret_from_clone)
PT_REGS_UNWIND_INFO(0)
{ /*
@@ -617,6 +620,7 @@ GLOBAL_ENTRY(ia64_ret_from_clone)
br.call.sptk.many rp=ia64_invoke_schedule_tail
}
.ret8:
+(pKStk) br.call.sptk.many rp=call_payload
adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
;;
ld4 r2=[r2]
@@ -635,8 +639,17 @@ GLOBAL_ENTRY(ia64_ret_from_syscall)
adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
mov r10=r0 // clear error indication in r10
(p7) br.cond.spnt handle_syscall_error // handle potential syscall failure
+#ifdef CONFIG_PARAVIRT
+ ;;
+ br.cond.sptk.few ia64_leave_syscall
+ ;;
+#endif /* CONFIG_PARAVIRT */
END(ia64_ret_from_syscall)
+#ifndef CONFIG_PARAVIRT
// fall through
+#endif
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
+
/*
* ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
* need to switch to bank 0 and doesn't restore the scratch registers.
@@ -681,7 +694,7 @@ END(ia64_ret_from_syscall)
* ar.csd: cleared
* ar.ssd: cleared
*/
-ENTRY(ia64_leave_syscall)
+GLOBAL_ENTRY(__paravirt_leave_syscall)
PT_REGS_UNWIND_INFO(0)
/*
* work.need_resched etc. mustn't get changed by this CPU before it returns to
@@ -691,11 +704,11 @@ ENTRY(ia64_leave_syscall)
* extra work. We always check for extra work when returning to user-level.
* With CONFIG_PREEMPT, we also check for extra work when the preempt_count
* is 0. After extra work processing has been completed, execution
- * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
+ * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check
* needs to be redone.
*/
#ifdef CONFIG_PREEMPT
- rsm psr.i // disable interrupts
+ RSM_PSR_I(p0, r2, r18) // disable interrupts
cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall
(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
;;
@@ -705,11 +718,22 @@ ENTRY(ia64_leave_syscall)
;;
cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0)
#else /* !CONFIG_PREEMPT */
-(pUStk) rsm psr.i
+ RSM_PSR_I(pUStk, r2, r18)
cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall
(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
#endif
-.work_processed_syscall:
+.global __paravirt_work_processed_syscall;
+__paravirt_work_processed_syscall:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ adds r2=PT(LOADRS)+16,r12
+ MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave
+ adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+ ;;
+(p6) ld4 r31=[r18] // load current_thread_info()->flags
+ ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
+ adds r3=PT(AR_BSPSTORE)+16,r12 // deferred
+ ;;
+#else
adds r2=PT(LOADRS)+16,r12
adds r3=PT(AR_BSPSTORE)+16,r12
adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -718,6 +742,7 @@ ENTRY(ia64_leave_syscall)
ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
nop.i 0
;;
+#endif
mov r16=ar.bsp // M2 get existing backing store pointer
ld8 r18=[r2],PT(R9)-PT(B6) // load b6
(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
@@ -732,19 +757,28 @@ ENTRY(ia64_leave_syscall)
(pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE!
;;
invala // M0|1 invalidate ALAT
- rsm psr.i | psr.ic // M2 turn off interrupts and interruption collection
+ RSM_PSR_I_IC(r28, r29, r30) // M2 turn off interrupts and interruption collection
cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs
ld8 r29=[r2],16 // M0|1 load cr.ipsr
ld8 r28=[r3],16 // M0|1 load cr.iip
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+ ;;
+ ld8 r30=[r2],16 // M0|1 load cr.ifs
+ ld8 r25=[r3],16 // M0|1 load ar.unat
+(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+ ;;
+#else
mov r22=r0 // A clear r22
;;
ld8 r30=[r2],16 // M0|1 load cr.ifs
ld8 r25=[r3],16 // M0|1 load ar.unat
(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
;;
+#endif
ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
-(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
+ MOV_FROM_PSR(pKStk, r22, r21) // M2 read PSR now that interrupts are disabled
nop 0
;;
ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
@@ -759,7 +793,11 @@ ENTRY(ia64_leave_syscall)
ld8.fill r1=[r3],16 // M0|1 load r1
(pUStk) mov r17=1 // A
;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+(pUStk) st1 [r15]=r17 // M2|3
+#else
(pUStk) st1 [r14]=r17 // M2|3
+#endif
ld8.fill r13=[r3],16 // M0|1
mov f8=f0 // F clear f8
;;
@@ -773,14 +811,24 @@ ENTRY(ia64_leave_syscall)
srlz.d // M0 ensure interruption collection is off (for cover)
shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
- cover // B add current frame into dirty partition & set cr.ifs
+ COVER // B add current frame into dirty partition & set cr.ifs
;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ mov r19=ar.bsp // M2 get new backing store pointer
+ st8 [r14]=r22 // M save time at leave
+ mov f10=f0 // F clear f10
+
+ mov r22=r0 // A clear r22
+ movl r14=__kernel_syscall_via_epc // X
+ ;;
+#else
mov r19=ar.bsp // M2 get new backing store pointer
mov f10=f0 // F clear f10
nop.m 0
movl r14=__kernel_syscall_via_epc // X
;;
+#endif
mov.m ar.csd=r0 // M2 clear ar.csd
mov.m ar.ccv=r0 // M2 clear ar.ccv
mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc)
@@ -788,22 +836,9 @@ ENTRY(ia64_leave_syscall)
mov.m ar.ssd=r0 // M2 clear ar.ssd
mov f11=f0 // F clear f11
br.cond.sptk.many rbs_switch // B
-END(ia64_leave_syscall)
+END(__paravirt_leave_syscall)
-#ifdef CONFIG_IA32_SUPPORT
-GLOBAL_ENTRY(ia64_ret_from_ia32_execve)
- PT_REGS_UNWIND_INFO(0)
- adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
- adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10
- ;;
- .mem.offset 0,0
- st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit
- .mem.offset 8,0
- st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit
-END(ia64_ret_from_ia32_execve)
- // fall through
-#endif /* CONFIG_IA32_SUPPORT */
-GLOBAL_ENTRY(ia64_leave_kernel)
+GLOBAL_ENTRY(__paravirt_leave_kernel)
PT_REGS_UNWIND_INFO(0)
/*
* work.need_resched etc. mustn't get changed by this CPU before it returns to
@@ -817,7 +852,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
* needs to be redone.
*/
#ifdef CONFIG_PREEMPT
- rsm psr.i // disable interrupts
+ RSM_PSR_I(p0, r17, r31) // disable interrupts
cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel
(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
;;
@@ -827,7 +862,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
;;
cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0)
#else
-(pUStk) rsm psr.i
+ RSM_PSR_I(pUStk, r17, r31)
cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel
(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
#endif
@@ -875,7 +910,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
mov ar.csd=r30
mov ar.ssd=r31
;;
- rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection
+ RSM_PSR_I_IC(r23, r22, r25) // initiate turning off of interrupt and interruption collection
invala // invalidate ALAT
;;
ld8.fill r22=[r2],24
@@ -907,16 +942,24 @@ GLOBAL_ENTRY(ia64_leave_kernel)
mov ar.ccv=r15
;;
ldf.fill f11=[r2]
- bsw.0 // switch back to bank 0 (no stop bit required beforehand...)
+ BSW_0(r2, r3, r15) // switch back to bank 0 (no stop bit required beforehand...)
;;
(pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
adds r16=PT(CR_IPSR)+16,r12
adds r17=PT(CR_IIP)+16,r12
-(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ .pred.rel.mutex pUStk,pKStk
+ MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled
+ MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave
+ nop.i 0
+ ;;
+#else
+ MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled
nop.i 0
nop.i 0
;;
+#endif
ld8 r29=[r16],16 // load cr.ipsr
ld8 r28=[r17],16 // load cr.iip
;;
@@ -938,15 +981,37 @@ GLOBAL_ENTRY(ia64_leave_kernel)
;;
ld8.fill r12=[r16],16
ld8.fill r13=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
+#else
(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
+#endif
;;
ld8 r20=[r16],16 // ar.fpsr
ld8.fill r15=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred
+#endif
;;
ld8.fill r14=[r16],16
ld8.fill r2=[r17]
(pUStk) mov r17=1
;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;;
+ // mib : mov add br -> mib : ld8 add br
+ // bbb_ : br nop cover;; mbb_ : mov br cover;;
+ //
+ // no one require bsp in r16 if (pKStk) branch is selected.
+(pUStk) st8 [r3]=r22 // save time at leave
+(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
+ shr.u r18=r19,16 // get byte size of existing "dirty" partition
+ ;;
+ ld8.fill r3=[r16] // deferred
+ LOAD_PHYS_STACK_REG_SIZE(r17)
+(pKStk) br.cond.dpnt skip_rbs_switch
+ mov r16=ar.bsp // get existing backing store pointer
+#else
ld8.fill r3=[r16]
(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
shr.u r18=r19,16 // get byte size of existing "dirty" partition
@@ -954,6 +1019,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
mov r16=ar.bsp // get existing backing store pointer
LOAD_PHYS_STACK_REG_SIZE(r17)
(pKStk) br.cond.dpnt skip_rbs_switch
+#endif
/*
* Restore user backing store.
@@ -961,7 +1027,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
* NOTE: alloc, loadrs, and cover can't be predicated.
*/
(pNonSys) br.cond.dpnt dont_preserve_current_frame
- cover // add current frame into dirty partition and set cr.ifs
+ COVER // add current frame into dirty partition and set cr.ifs
;;
mov r19=ar.bsp // get new backing store pointer
rbs_switch:
@@ -1064,16 +1130,16 @@ skip_rbs_switch:
(pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp
(pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise
;;
- mov cr.ipsr=r29 // M2
+ MOV_TO_IPSR(p0, r29, r25) // M2
mov ar.pfs=r26 // I0
(pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise
-(p9) mov cr.ifs=r30 // M2
+ MOV_TO_IFS(p9, r30, r25)// M2
mov b0=r21 // I0
(pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise
mov ar.fpsr=r20 // M2
- mov cr.iip=r28 // M2
+ MOV_TO_IIP(r28, r25) // M2
nop 0
;;
(pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode
@@ -1082,7 +1148,7 @@ skip_rbs_switch:
mov ar.rsc=r27 // M2
mov pr=r31,-1 // I0
- rfi // B
+ RFI // B
/*
* On entry:
@@ -1090,6 +1156,9 @@ skip_rbs_switch:
* r31 = current->thread_info->flags
* On exit:
* p6 = TRUE if work-pending-check needs to be redone
+ *
+ * Interrupts are disabled on entry, reenabled depend on work, and
+ * disabled on exit.
*/
.work_pending_syscall:
add r2=-8,r2
@@ -1098,42 +1167,30 @@ skip_rbs_switch:
st8 [r2]=r8
st8 [r3]=r10
.work_pending:
- tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
+ tbit.z p6,p0=r31,TIF_NEED_RESCHED // is resched not needed?
(p6) br.cond.sptk.few .notify
-#ifdef CONFIG_PREEMPT
-(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
- ;;
-(pKStk) st4 [r20]=r21
- ssm psr.i // enable interrupts
-#endif
- br.call.spnt.many rp=schedule
-.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1
- rsm psr.i // disable interrupts
- ;;
-#ifdef CONFIG_PREEMPT
-(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
- ;;
-(pKStk) st4 [r20]=r0 // preempt_count() <- 0
-#endif
-(pLvSys)br.cond.sptk.few .work_pending_syscall_end
- br.cond.sptk.many .work_processed_kernel // re-check
+ br.call.spnt.many rp=preempt_schedule_irq
+.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 (re-check)
+(pLvSys)br.cond.sptk.few __paravirt_pending_syscall_end
+ br.cond.sptk.many .work_processed_kernel
.notify:
(pUStk) br.call.spnt.many rp=notify_resume_user
-.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0
-(pLvSys)br.cond.sptk.few .work_pending_syscall_end
- br.cond.sptk.many .work_processed_kernel // don't re-check
+.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 (don't re-check)
+(pLvSys)br.cond.sptk.few __paravirt_pending_syscall_end
+ br.cond.sptk.many .work_processed_kernel
-.work_pending_syscall_end:
+.global __paravirt_pending_syscall_end;
+__paravirt_pending_syscall_end:
adds r2=PT(R8)+16,r12
adds r3=PT(R10)+16,r12
;;
ld8 r8=[r2]
ld8 r10=[r3]
- br.cond.sptk.many .work_processed_syscall // re-check
-
-END(ia64_leave_kernel)
+ br.cond.sptk.many __paravirt_work_processed_syscall_target
+END(__paravirt_leave_kernel)
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
ENTRY(handle_syscall_error)
/*
* Some system calls (e.g., ptrace, mmap) can return arbitrary values which could
@@ -1168,11 +1225,14 @@ GLOBAL_ENTRY(ia64_invoke_schedule_tail)
END(ia64_invoke_schedule_tail)
/*
- * Setup stack and call do_notify_resume_user(). Note that pSys and pNonSys need to
- * be set up by the caller. We declare 8 input registers so the system call
- * args get preserved, in case we need to restart a system call.
+ * Setup stack and call do_notify_resume_user(), keeping interrupts
+ * disabled.
+ *
+ * Note that pSys and pNonSys need to be set up by the caller.
+ * We declare 8 input registers so the system call args get preserved,
+ * in case we need to restart a system call.
*/
-ENTRY(notify_resume_user)
+GLOBAL_ENTRY(notify_resume_user)
.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
mov r9=ar.unat
@@ -1234,7 +1294,7 @@ ENTRY(sys_rt_sigreturn)
adds sp=16,sp
;;
ld8 r9=[sp] // load new ar.unat
- mov.sptk b7=r8,ia64_leave_kernel
+ mov.sptk b7=r8,ia64_native_leave_kernel
;;
mov ar.unat=r9
br.many b7
@@ -1298,6 +1358,105 @@ GLOBAL_ENTRY(unw_init_running)
br.ret.sptk.many rp
END(unw_init_running)
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+GLOBAL_ENTRY(_mcount)
+ br ftrace_stub
+END(_mcount)
+
+.here:
+ br.ret.sptk.many b0
+
+GLOBAL_ENTRY(ftrace_caller)
+ alloc out0 = ar.pfs, 8, 0, 4, 0
+ mov out3 = r0
+ ;;
+ mov out2 = b0
+ add r3 = 0x20, r3
+ mov out1 = r1;
+ br.call.sptk.many b0 = ftrace_patch_gp
+ //this might be called from module, so we must patch gp
+ftrace_patch_gp:
+ movl gp=__gp
+ mov b0 = r3
+ ;;
+.global ftrace_call;
+ftrace_call:
+{
+ .mlx
+ nop.m 0x0
+ movl r3 = .here;;
+}
+ alloc loc0 = ar.pfs, 4, 4, 2, 0
+ ;;
+ mov loc1 = b0
+ mov out0 = b0
+ mov loc2 = r8
+ mov loc3 = r15
+ ;;
+ adds out0 = -MCOUNT_INSN_SIZE, out0
+ mov out1 = in2
+ mov b6 = r3
+
+ br.call.sptk.many b0 = b6
+ ;;
+ mov ar.pfs = loc0
+ mov b0 = loc1
+ mov r8 = loc2
+ mov r15 = loc3
+ br ftrace_stub
+ ;;
+END(ftrace_caller)
+
+#else
+GLOBAL_ENTRY(_mcount)
+ movl r2 = ftrace_stub
+ movl r3 = ftrace_trace_function;;
+ ld8 r3 = [r3];;
+ ld8 r3 = [r3];;
+ cmp.eq p7,p0 = r2, r3
+(p7) br.sptk.many ftrace_stub
+ ;;
+
+ alloc loc0 = ar.pfs, 4, 4, 2, 0
+ ;;
+ mov loc1 = b0
+ mov out0 = b0
+ mov loc2 = r8
+ mov loc3 = r15
+ ;;
+ adds out0 = -MCOUNT_INSN_SIZE, out0
+ mov out1 = in2
+ mov b6 = r3
+
+ br.call.sptk.many b0 = b6
+ ;;
+ mov ar.pfs = loc0
+ mov b0 = loc1
+ mov r8 = loc2
+ mov r15 = loc3
+ br ftrace_stub
+ ;;
+END(_mcount)
+#endif
+
+GLOBAL_ENTRY(ftrace_stub)
+ mov r3 = b0
+ movl r2 = _mcount_ret_helper
+ ;;
+ mov b6 = r2
+ mov b7 = r3
+ br.ret.sptk.many b6
+
+_mcount_ret_helper:
+ mov b0 = r42
+ mov r1 = r41
+ mov ar.pfs = r40
+ br b7
+END(ftrace_stub)
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
.rodata
.align 8
.globl sys_call_table
@@ -1336,7 +1495,7 @@ sys_call_table:
data8 sys_mkdir // 1055
data8 sys_rmdir
data8 sys_dup
- data8 sys_pipe
+ data8 sys_ia64_pipe
data8 sys_times
data8 ia64_brk // 1060
data8 sys_setgid
@@ -1447,7 +1606,7 @@ sys_call_table:
data8 sys_sched_get_priority_min
data8 sys_sched_rr_get_interval
data8 sys_nanosleep
- data8 sys_nfsservctl
+ data8 sys_ni_syscall // old nfsservctl
data8 sys_prctl // 1170
data8 sys_getpagesize
data8 sys_mmap2
@@ -1573,7 +1732,7 @@ sys_call_table:
data8 sys_fchmodat
data8 sys_faccessat
data8 sys_pselect6
- data8 sys_ppoll
+ data8 sys_ppoll // 1295
data8 sys_unshare
data8 sys_splice
data8 sys_set_robust_list
@@ -1586,7 +1745,37 @@ sys_call_table:
data8 sys_epoll_pwait // 1305
data8 sys_utimensat
data8 sys_signalfd
- data8 sys_timerfd
+ data8 sys_ni_syscall
data8 sys_eventfd
+ data8 sys_timerfd_create // 1310
+ data8 sys_timerfd_settime
+ data8 sys_timerfd_gettime
+ data8 sys_signalfd4
+ data8 sys_eventfd2
+ data8 sys_epoll_create1 // 1315
+ data8 sys_dup3
+ data8 sys_pipe2
+ data8 sys_inotify_init1
+ data8 sys_preadv
+ data8 sys_pwritev // 1320
+ data8 sys_rt_tgsigqueueinfo
+ data8 sys_recvmmsg
+ data8 sys_fanotify_init
+ data8 sys_fanotify_mark
+ data8 sys_prlimit64 // 1325
+ data8 sys_name_to_handle_at
+ data8 sys_open_by_handle_at
+ data8 sys_clock_adjtime
+ data8 sys_syncfs
+ data8 sys_setns // 1330
+ data8 sys_sendmmsg
+ data8 sys_process_vm_readv
+ data8 sys_process_vm_writev
+ data8 sys_accept4
+ data8 sys_finit_module // 1335
+ data8 sys_sched_setattr
+ data8 sys_sched_getattr
+ data8 sys_renameat2
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c
index b642648cc2a..0c161ed6d18 100644
--- a/arch/ia64/kernel/err_inject.c
+++ b/arch/ia64/kernel/err_inject.c
@@ -24,7 +24,7 @@
* Copyright (C) 2006, Intel Corp. All rights reserved.
*
*/
-#include <linux/sysdev.h>
+#include <linux/device.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/cpu.h>
@@ -35,10 +35,10 @@
#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte;
#define define_one_ro(name) \
-static SYSDEV_ATTR(name, 0444, show_##name, NULL)
+static DEVICE_ATTR(name, 0444, show_##name, NULL)
#define define_one_rw(name) \
-static SYSDEV_ATTR(name, 0644, show_##name, store_##name)
+static DEVICE_ATTR(name, 0644, show_##name, store_##name)
static u64 call_start[NR_CPUS];
static u64 phys_addr[NR_CPUS];
@@ -55,7 +55,8 @@ static u64 resources[NR_CPUS];
#define show(name) \
static ssize_t \
-show_##name(struct sys_device *dev, char *buf) \
+show_##name(struct device *dev, struct device_attribute *attr, \
+ char *buf) \
{ \
u32 cpu=dev->id; \
return sprintf(buf, "%lx\n", name[cpu]); \
@@ -63,7 +64,8 @@ show_##name(struct sys_device *dev, char *buf) \
#define store(name) \
static ssize_t \
-store_##name(struct sys_device *dev, const char *buf, size_t size) \
+store_##name(struct device *dev, struct device_attribute *attr, \
+ const char *buf, size_t size) \
{ \
unsigned int cpu=dev->id; \
name[cpu] = simple_strtoull(buf, NULL, 16); \
@@ -76,7 +78,8 @@ show(call_start)
* processor. The cpu number in driver is only used for storing data.
*/
static ssize_t
-store_call_start(struct sys_device *dev, const char *buf, size_t size)
+store_call_start(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t size)
{
unsigned int cpu=dev->id;
unsigned long call_start = simple_strtoull(buf, NULL, 16);
@@ -124,14 +127,16 @@ show(err_type_info)
store(err_type_info)
static ssize_t
-show_virtual_to_phys(struct sys_device *dev, char *buf)
+show_virtual_to_phys(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
unsigned int cpu=dev->id;
return sprintf(buf, "%lx\n", phys_addr[cpu]);
}
static ssize_t
-store_virtual_to_phys(struct sys_device *dev, const char *buf, size_t size)
+store_virtual_to_phys(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t size)
{
unsigned int cpu=dev->id;
u64 virt_addr=simple_strtoull(buf, NULL, 16);
@@ -154,7 +159,8 @@ show(err_struct_info)
store(err_struct_info)
static ssize_t
-show_err_data_buffer(struct sys_device *dev, char *buf)
+show_err_data_buffer(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
unsigned int cpu=dev->id;
@@ -165,7 +171,9 @@ show_err_data_buffer(struct sys_device *dev, char *buf)
}
static ssize_t
-store_err_data_buffer(struct sys_device *dev, const char *buf, size_t size)
+store_err_data_buffer(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
{
unsigned int cpu=dev->id;
int ret;
@@ -201,14 +209,14 @@ define_one_ro(capabilities);
define_one_ro(resources);
static struct attribute *default_attrs[] = {
- &attr_call_start.attr,
- &attr_virtual_to_phys.attr,
- &attr_err_type_info.attr,
- &attr_err_struct_info.attr,
- &attr_err_data_buffer.attr,
- &attr_status.attr,
- &attr_capabilities.attr,
- &attr_resources.attr,
+ &dev_attr_call_start.attr,
+ &dev_attr_virtual_to_phys.attr,
+ &dev_attr_err_type_info.attr,
+ &dev_attr_err_struct_info.attr,
+ &dev_attr_err_data_buffer.attr,
+ &dev_attr_status.attr,
+ &dev_attr_capabilities.attr,
+ &dev_attr_resources.attr,
NULL
};
@@ -217,23 +225,23 @@ static struct attribute_group err_inject_attr_group = {
.name = "err_inject"
};
/* Add/Remove err_inject interface for CPU device */
-static int __cpuinit err_inject_add_dev(struct sys_device * sys_dev)
+static int err_inject_add_dev(struct device *sys_dev)
{
return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group);
}
-static int __cpuinit err_inject_remove_dev(struct sys_device * sys_dev)
+static int err_inject_remove_dev(struct device *sys_dev)
{
sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
return 0;
}
-static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb,
+static int err_inject_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
- struct sys_device *sys_dev;
+ struct device *sys_dev;
- sys_dev = get_cpu_sysdev(cpu);
+ sys_dev = get_cpu_device(cpu);
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
@@ -248,7 +256,7 @@ static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata err_inject_cpu_notifier =
+static struct notifier_block err_inject_cpu_notifier =
{
.notifier_call = err_inject_cpu_callback,
};
@@ -261,12 +269,17 @@ err_inject_init(void)
#ifdef ERR_INJ_DEBUG
printk(KERN_INFO "Enter error injection driver.\n");
#endif
+
+ cpu_notifier_register_begin();
+
for_each_online_cpu(i) {
err_inject_cpu_callback(&err_inject_cpu_notifier, CPU_ONLINE,
(void *)(long)i);
}
- register_hotcpu_notifier(&err_inject_cpu_notifier);
+ __register_hotcpu_notifier(&err_inject_cpu_notifier);
+
+ cpu_notifier_register_done();
return 0;
}
@@ -275,16 +288,22 @@ static void __exit
err_inject_exit(void)
{
int i;
- struct sys_device *sys_dev;
+ struct device *sys_dev;
#ifdef ERR_INJ_DEBUG
printk(KERN_INFO "Exit error injection driver.\n");
#endif
+
+ cpu_notifier_register_begin();
+
for_each_online_cpu(i) {
- sys_dev = get_cpu_sysdev(i);
+ sys_dev = get_cpu_device(i);
sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group);
}
- unregister_hotcpu_notifier(&err_inject_cpu_notifier);
+
+ __unregister_hotcpu_notifier(&err_inject_cpu_notifier);
+
+ cpu_notifier_register_done();
}
module_init(err_inject_init);
diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c
index ebf4e988e78..b091111270c 100644
--- a/arch/ia64/kernel/esi.c
+++ b/arch/ia64/kernel/esi.c
@@ -65,7 +65,7 @@ static int __init esi_init (void)
}
if (!esi)
- return -ENODEV;;
+ return -ENODEV;
systab = __va(esi);
@@ -84,7 +84,7 @@ static int __init esi_init (void)
case ESI_DESC_ENTRY_POINT:
break;
default:
- printk(KERN_WARNING "Unkown table type %d found in "
+ printk(KERN_WARNING "Unknown table type %d found in "
"ESI table, ignoring rest of table\n", *p);
return -ENODEV;
}
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 44841971f07..abc6dee3799 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -21,10 +21,10 @@
#include <asm/thread_info.h>
#include <asm/sal.h>
#include <asm/signal.h>
-#include <asm/system.h>
#include <asm/unistd.h>
#include "entry.h"
+#include "paravirt_inst.h"
/*
* See Documentation/ia64/fsys.txt for details on fsyscalls.
@@ -61,80 +61,59 @@ ENTRY(fsys_getpid)
.prologue
.altrp b6
.body
- add r9=TI_FLAGS+IA64_TASK_SIZE,r16
- ;;
- ld4 r9=[r9]
- add r8=IA64_TASK_TGID_OFFSET,r16
- ;;
- and r9=TIF_ALLWORK_MASK,r9
- ld4 r8=[r8] // r8 = current->tgid
- ;;
- cmp.ne p8,p0=0,r9
-(p8) br.spnt.many fsys_fallback_syscall
- FSYS_RETURN
-END(fsys_getpid)
-
-ENTRY(fsys_getppid)
- .prologue
- .altrp b6
- .body
add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
;;
ld8 r17=[r17] // r17 = current->group_leader
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
;;
-
ld4 r9=[r9]
- add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
+ add r17=IA64_TASK_TGIDLINK_OFFSET,r17
;;
and r9=TIF_ALLWORK_MASK,r9
-
-1: ld8 r18=[r17] // r18 = current->group_leader->real_parent
+ ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid
;;
- cmp.ne p8,p0=0,r9
- add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = &current->group_leader->real_parent->tgid
+ add r8=IA64_PID_LEVEL_OFFSET,r17
;;
-
- /*
- * The .acq is needed to ensure that the read of tgid has returned its data before
- * we re-check "real_parent".
- */
- ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid
-#ifdef CONFIG_SMP
- /*
- * Re-read current->group_leader->real_parent.
- */
- ld8 r19=[r17] // r19 = current->group_leader->real_parent
-(p8) br.spnt.many fsys_fallback_syscall
+ ld4 r8=[r8] // r8 = pid->level
+ add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
;;
- cmp.ne p6,p0=r18,r19 // did real_parent change?
- mov r19=0 // i must not leak kernel bits...
-(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check
+ shl r8=r8,IA64_UPID_SHIFT
;;
- mov r17=0 // i must not leak kernel bits...
- mov r18=0 // i must not leak kernel bits...
-#else
- mov r17=0 // i must not leak kernel bits...
- mov r18=0 // i must not leak kernel bits...
- mov r19=0 // i must not leak kernel bits...
-#endif
+ add r17=r17,r8 // r17 = &pid->numbers[pid->level]
+ ;;
+ ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
+ ;;
+ mov r17=0
+ ;;
+ cmp.ne p8,p0=0,r9
+(p8) br.spnt.many fsys_fallback_syscall
FSYS_RETURN
-END(fsys_getppid)
+END(fsys_getpid)
ENTRY(fsys_set_tid_address)
.prologue
.altrp b6
.body
add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+ add r17=IA64_TASK_TGIDLINK_OFFSET,r16
;;
ld4 r9=[r9]
tnat.z p6,p7=r32 // check argument register for being NaT
+ ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid
;;
and r9=TIF_ALLWORK_MASK,r9
- add r8=IA64_TASK_PID_OFFSET,r16
+ add r8=IA64_PID_LEVEL_OFFSET,r17
add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
;;
- ld4 r8=[r8]
+ ld4 r8=[r8] // r8 = pid->level
+ add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
+ ;;
+ shl r8=r8,IA64_UPID_SHIFT
+ ;;
+ add r17=r17,r8 // r17 = &pid->numbers[pid->level]
+ ;;
+ ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
+ ;;
cmp.ne p8,p0=0,r9
mov r17=-1
;;
@@ -147,7 +126,7 @@ ENTRY(fsys_set_tid_address)
FSYS_RETURN
END(fsys_set_tid_address)
-#if IA64_GTOD_LOCK_OFFSET !=0
+#if IA64_GTOD_SEQ_OFFSET !=0
#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t
#endif
#if IA64_ITC_JITTER_OFFSET !=0
@@ -210,27 +189,25 @@ ENTRY(fsys_gettimeofday)
// Note that instructions are optimized for McKinley. McKinley can
// process two bundles simultaneously and therefore we continuously
// try to feed the CPU two bundles and then a stop.
- //
- // Additional note that code has changed a lot. Optimization is TBD.
- // Comments begin with "?" are maybe outdated.
- tnat.nz p6,p0 = r31 // ? branch deferred to fit later bundle
- mov pr = r30,0xc000 // Set predicates according to function
+
add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+ tnat.nz p6,p0 = r31 // guard against Nat argument
+(p6) br.cond.spnt.few .fail_einval
movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
;;
+ ld4 r2 = [r2] // process work pending flags
movl r29 = itc_jitter_data // itc_jitter
add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time
- ld4 r2 = [r2] // process work pending flags
- ;;
-(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
- add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+ mov pr = r30,0xc000 // Set predicates according to function
+ ;;
and r2 = TIF_ALLWORK_MASK,r2
-(p6) br.cond.spnt.few .fail_einval // ? deferred branch
+ add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
;;
- add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
+ add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
-(p6) br.cond.spnt.many fsys_fallback_syscall
+(p6) br.cond.spnt.many fsys_fallback_syscall
;;
// Begin critical section
.time_redo:
@@ -255,10 +232,9 @@ ENTRY(fsys_gettimeofday)
(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control
;;
.pred.rel.mutex p8,p9
-(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
+ MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!!
(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues..
(p13) ld8 r25 = [r19] // get itc_lastcycle value
- ;; // ? could be removed by moving the last add upward
ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec
;;
ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec
@@ -285,13 +261,12 @@ ENTRY(fsys_gettimeofday)
EX(.fail_efault, probe.w.fault r31, 3)
xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
;;
- // ? simulate tbit.nz.or p7,p0 = r28,0
getf.sig r2 = f8
mf
;;
ld4 r10 = [r20] // gtod_lock.sequence
shr.u r2 = r2,r23 // shift by factor
- ;; // ? overloaded 3 bundles!
+ ;;
add r8 = r8,r2 // Add xtime.nsecs
cmp4.ne p7,p0 = r28,r10
(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo
@@ -319,9 +294,9 @@ EX(.fail_efault, probe.w.fault r31, 3)
EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it
;;
- mov r8 = r0
(p14) getf.sig r2 = f8
;;
+ mov r8 = r0
(p14) shr.u r21 = r2, 4
;;
EX(.fail_efault, st8 [r31] = r9)
@@ -350,163 +325,6 @@ ENTRY(fsys_clock_gettime)
END(fsys_clock_gettime)
/*
- * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
- */
-#if _NSIG_WORDS != 1
-# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
-#endif
-ENTRY(fsys_rt_sigprocmask)
- .prologue
- .altrp b6
- .body
-
- add r2=IA64_TASK_BLOCKED_OFFSET,r16
- add r9=TI_FLAGS+IA64_TASK_SIZE,r16
- cmp4.ltu p6,p0=SIG_SETMASK,r32
-
- cmp.ne p15,p0=r0,r34 // oset != NULL?
- tnat.nz p8,p0=r34
- add r31=IA64_TASK_SIGHAND_OFFSET,r16
- ;;
- ld8 r3=[r2] // read/prefetch current->blocked
- ld4 r9=[r9]
- tnat.nz.or p6,p0=r35
-
- cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
- tnat.nz.or p6,p0=r32
-(p6) br.spnt.few .fail_einval // fail with EINVAL
- ;;
-#ifdef CONFIG_SMP
- ld8 r31=[r31] // r31 <- current->sighand
-#endif
- and r9=TIF_ALLWORK_MASK,r9
- tnat.nz.or p8,p0=r33
- ;;
- cmp.ne p7,p0=0,r9
- cmp.eq p6,p0=r0,r33 // set == NULL?
- add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
-(p8) br.spnt.few .fail_efault // fail with EFAULT
-(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work...
-(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask
-
- /* Argh, we actually have to do some work and _update_ the signal mask: */
-
-EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set
-EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set
- mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
- ;;
-
- rsm psr.i // mask interrupt delivery
- mov ar.ccv=0
- andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP
-
-#ifdef CONFIG_SMP
- mov r17=1
- ;;
- cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock
- mov r8=EINVAL // default to EINVAL
- ;;
- ld8 r3=[r2] // re-read current->blocked now that we hold the lock
- cmp4.ne p6,p0=r18,r0
-(p6) br.cond.spnt.many .lock_contention
- ;;
-#else
- ld8 r3=[r2] // re-read current->blocked now that we hold the lock
- mov r8=EINVAL // default to EINVAL
-#endif
- add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
- add r19=IA64_TASK_SIGNAL_OFFSET,r16
- cmp4.eq p6,p0=SIG_BLOCK,r32
- ;;
- ld8 r19=[r19] // r19 <- current->signal
- cmp4.eq p7,p0=SIG_UNBLOCK,r32
- cmp4.eq p8,p0=SIG_SETMASK,r32
- ;;
- ld8 r18=[r18] // r18 <- current->pending.signal
- .pred.rel.mutex p6,p7,p8
-(p6) or r14=r3,r14 // SIG_BLOCK
-(p7) andcm r14=r3,r14 // SIG_UNBLOCK
-
-(p8) mov r14=r14 // SIG_SETMASK
-(p6) mov r8=0 // clear error code
- // recalc_sigpending()
- add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
-
- add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
- ;;
- ld4 r17=[r17] // r17 <- current->signal->group_stop_count
-(p7) mov r8=0 // clear error code
-
- ld8 r19=[r19] // r19 <- current->signal->shared_pending
- ;;
- cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)?
-(p8) mov r8=0 // clear error code
-
- or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending
- ;;
- // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
- andcm r18=r18,r14
- add r9=TI_FLAGS+IA64_TASK_SIZE,r16
- ;;
-
-(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending
- mov r19=0 // i must not leak kernel bits...
-(p6) br.cond.dpnt.many .sig_pending
- ;;
-
-1: ld4 r17=[r9] // r17 <- current->thread_info->flags
- ;;
- mov ar.ccv=r17
- and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING)
- ;;
-
- st8 [r2]=r14 // update current->blocked with new mask
- cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18
- ;;
- cmp.ne p6,p0=r17,r8 // update failed?
-(p6) br.cond.spnt.few 1b // yes -> retry
-
-#ifdef CONFIG_SMP
- st4.rel [r31]=r0 // release the lock
-#endif
- ssm psr.i
- ;;
-
- srlz.d // ensure psr.i is set again
- mov r18=0 // i must not leak kernel bits...
-
-.store_mask:
-EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset
-EX(.fail_efault, (p15) st8 [r34]=r3)
- mov r2=0 // i must not leak kernel bits...
- mov r3=0 // i must not leak kernel bits...
- mov r8=0 // return 0
- mov r9=0 // i must not leak kernel bits...
- mov r14=0 // i must not leak kernel bits...
- mov r17=0 // i must not leak kernel bits...
- mov r31=0 // i must not leak kernel bits...
- FSYS_RETURN
-
-.sig_pending:
-#ifdef CONFIG_SMP
- st4.rel [r31]=r0 // release the lock
-#endif
- ssm psr.i
- ;;
- srlz.d
- br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall
-
-#ifdef CONFIG_SMP
-.lock_contention:
- /* Rather than spinning here, fall back on doing a heavy-weight syscall. */
- ssm psr.i
- ;;
- srlz.d
- br.sptk.many fsys_fallback_syscall
-#endif
-END(fsys_rt_sigprocmask)
-
-/*
* fsys_getcpu doesn't use the third parameter in this implementation. It reads
* current_thread_info()->cpu and corresponding node in cpu_to_node_map.
*/
@@ -525,11 +343,15 @@ ENTRY(fsys_getcpu)
;;
tnat.nz p7,p0 = r33 // I guard against NaT argument
(p7) br.cond.spnt.few .fail_einval // B
+ ;;
+ cmp.ne p6,p0=r32,r0
+ cmp.ne p7,p0=r33,r0
+ ;;
#ifdef CONFIG_NUMA
movl r17=cpu_to_node_map
;;
-EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
-EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
+EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles
+EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles
shladd r18=r3,1,r17
;;
ld2 r20=[r18] // r20 = cpu_to_node_map[cpu]
@@ -539,20 +361,20 @@ EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
(p8) br.spnt.many fsys_fallback_syscall
;;
;;
-EX(.fail_efault, st4 [r32] = r3)
-EX(.fail_efault, st2 [r33] = r20)
+EX(.fail_efault, (p6) st4 [r32] = r3)
+EX(.fail_efault, (p7) st2 [r33] = r20)
mov r8=0
;;
#else
-EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
-EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
+EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles
+EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles
and r2 = TIF_ALLWORK_MASK,r2
;;
cmp.ne p8,p0=0,r2
(p8) br.spnt.many fsys_fallback_syscall
;;
-EX(.fail_efault, st4 [r32] = r3)
-EX(.fail_efault, st2 [r33] = r0)
+EX(.fail_efault, (p6) st4 [r32] = r3)
+EX(.fail_efault, (p7) st2 [r33] = r0)
mov r8=0
;;
#endif
@@ -570,17 +392,17 @@ ENTRY(fsys_fallback_syscall)
adds r17=-1024,r15
movl r14=sys_call_table
;;
- rsm psr.i
+ RSM_PSR_I(p0, r26, r27)
shladd r18=r17,3,r14
;;
ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
- mov r29=psr // read psr (12 cyc load latency)
+ MOV_FROM_PSR(p0, r29, r26) // read psr (12 cyc load latency)
mov r27=ar.rsc
mov r21=ar.fpsr
mov r26=ar.pfs
END(fsys_fallback_syscall)
/* FALL THROUGH */
-GLOBAL_ENTRY(fsys_bubble_down)
+GLOBAL_ENTRY(paravirt_fsys_bubble_down)
.prologue
.altrp b6
.body
@@ -618,7 +440,7 @@ GLOBAL_ENTRY(fsys_bubble_down)
*
* PSR.BE : already is turned off in __kernel_syscall_via_epc()
* PSR.AC : don't care (kernel normally turns PSR.AC on)
- * PSR.I : already turned off by the time fsys_bubble_down gets
+ * PSR.I : already turned off by the time paravirt_fsys_bubble_down gets
* invoked
* PSR.DFL: always 0 (kernel never turns it on)
* PSR.DFH: don't care --- kernel never touches f32-f127 on its own
@@ -628,7 +450,7 @@ GLOBAL_ENTRY(fsys_bubble_down)
* PSR.DB : don't care --- kernel never enables kernel-level
* breakpoints
* PSR.TB : must be 0 already; if it wasn't zero on entry to
- * __kernel_syscall_via_epc, the branch to fsys_bubble_down
+ * __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down
* will trigger a taken branch; the taken-trap-handler then
* converts the syscall into a break-based system-call.
*/
@@ -660,7 +482,11 @@ GLOBAL_ENTRY(fsys_bubble_down)
nop.i 0
;;
mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting
+#else
nop.m 0
+#endif
nop.i 0
;;
mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
@@ -682,25 +508,47 @@ GLOBAL_ENTRY(fsys_bubble_down)
cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
br.call.sptk.many b7=ia64_syscall_setup // B
;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ // mov.m r30=ar.itc is called in advance
+ add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
+ add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
+ ;;
+ ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
+ ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel
+ ;;
+ ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
+ ld8 r21=[r17] // cumulated utime
+ sub r22=r19,r18 // stime before leave kernel
+ ;;
+ st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp
+ sub r18=r30,r19 // elapsed time in user mode
+ ;;
+ add r20=r20,r22 // sum stime
+ add r21=r21,r18 // sum utime
+ ;;
+ st8 [r16]=r20 // update stime
+ st8 [r17]=r21 // update utime
+ ;;
+#endif
mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
mov rp=r14 // I0 set the real return addr
and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
;;
- ssm psr.i // M2 we're on kernel stacks now, reenable irqs
+ SSM_PSR_I(p0, p6, r22) // M2 we're on kernel stacks now, reenable irqs
cmp.eq p8,p0=r3,r0 // A
(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
nop.m 0
(p8) br.call.sptk.many b6=b6 // B (ignore return address)
br.cond.spnt ia64_trace_syscall // B
-END(fsys_bubble_down)
+END(paravirt_fsys_bubble_down)
.rodata
.align 8
- .globl fsyscall_table
+ .globl paravirt_fsyscall_table
- data8 fsys_bubble_down
-fsyscall_table:
+ data8 paravirt_fsys_bubble_down
+paravirt_fsyscall_table:
data8 fsys_ni_syscall
data8 0 // exit // 1025
data8 0 // read
@@ -719,7 +567,7 @@ fsyscall_table:
data8 0 // chown
data8 0 // lseek // 1040
data8 fsys_getpid // getpid
- data8 fsys_getppid // getppid
+ data8 0 // getppid
data8 0 // mount
data8 0 // umount
data8 0 // setuid // 1045
@@ -856,7 +704,7 @@ fsyscall_table:
data8 0 // sigaltstack
data8 0 // rt_sigaction
data8 0 // rt_sigpending
- data8 fsys_rt_sigprocmask // rt_sigprocmask
+ data8 0 // rt_sigprocmask
data8 0 // rt_sigqueueinfo // 1180
data8 0 // rt_sigreturn
data8 0 // rt_sigsuspend
@@ -985,4 +833,4 @@ fsyscall_table:
// fill in zeros for the remaining entries
.zero:
- .space fsyscall_table + 8*NR_syscalls - .zero, 0
+ .space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0
diff --git a/arch/ia64/kernel/fsyscall_gtod_data.h b/arch/ia64/kernel/fsyscall_gtod_data.h
index 490dab55fba..146b15b5fec 100644
--- a/arch/ia64/kernel/fsyscall_gtod_data.h
+++ b/arch/ia64/kernel/fsyscall_gtod_data.h
@@ -6,7 +6,7 @@
*/
struct fsyscall_gtod_data_t {
- seqlock_t lock;
+ seqcount_t seq;
struct timespec wall_time;
struct timespec monotonic_time;
cycle_t clk_mask;
@@ -14,10 +14,10 @@ struct fsyscall_gtod_data_t {
u32 clk_shift;
void *clk_fsys_mmio;
cycle_t clk_cycle_last;
-} __attribute__ ((aligned (L1_CACHE_BYTES)));
+} ____cacheline_aligned;
struct itc_jitter_data_t {
int itc_jitter;
cycle_t itc_lastcycle;
-} __attribute__ ((aligned (L1_CACHE_BYTES)));
+} ____cacheline_aligned;
diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c
new file mode 100644
index 00000000000..3b0c2aa0785
--- /dev/null
+++ b/arch/ia64/kernel/ftrace.c
@@ -0,0 +1,204 @@
+/*
+ * Dynamic function tracing support.
+ *
+ * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
+ *
+ * For licencing details, see COPYING.
+ *
+ * Defines low-level handling of mcount calls when the kernel
+ * is compiled with the -pg flag. When using dynamic ftrace, the
+ * mcount call-sites get patched lazily with NOP till they are
+ * enabled. All code mutation routines here take effect atomically.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/patch.h>
+
+/* In IA64, each function will be added below two bundles with -pg option */
+static unsigned char __attribute__((aligned(8)))
+ftrace_orig_code[MCOUNT_INSN_SIZE] = {
+ 0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
+ 0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
+ 0x05, 0x00, 0xc4, 0x00, /* mov r42=b0 */
+ 0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
+ 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
+ 0x08, 0x00, 0x00, 0x50 /* br.call.sptk.many b0 = _mcount;; */
+};
+
+struct ftrace_orig_insn {
+ u64 dummy1, dummy2, dummy3;
+ u64 dummy4:64-41+13;
+ u64 imm20:20;
+ u64 dummy5:3;
+ u64 sign:1;
+ u64 dummy6:4;
+};
+
+/* mcount stub will be converted below for nop */
+static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+ 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+ 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */
+ 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
+ 0x00, 0x00, 0x04, 0x00
+};
+
+static unsigned char *ftrace_nop_replace(void)
+{
+ return ftrace_nop_code;
+}
+
+/*
+ * mcount stub will be converted below for call
+ * Note: Just the last instruction is changed against nop
+ * */
+static unsigned char __attribute__((aligned(8)))
+ftrace_call_code[MCOUNT_INSN_SIZE] = {
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
+ 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
+ 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */
+ 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
+ 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
+ 0xf8, 0xff, 0xff, 0xc8
+};
+
+struct ftrace_call_insn {
+ u64 dummy1, dummy2;
+ u64 dummy3:48;
+ u64 imm39_l:16;
+ u64 imm39_h:23;
+ u64 dummy4:13;
+ u64 imm20:20;
+ u64 dummy5:3;
+ u64 i:1;
+ u64 dummy6:4;
+};
+
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+ struct ftrace_call_insn *code = (void *)ftrace_call_code;
+ unsigned long offset = addr - (ip + 0x10);
+
+ code->imm39_l = offset >> 24;
+ code->imm39_h = offset >> 40;
+ code->imm20 = offset >> 4;
+ code->i = offset >> 63;
+ return ftrace_call_code;
+}
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+ unsigned char *new_code, int do_check)
+{
+ unsigned char replaced[MCOUNT_INSN_SIZE];
+
+ /*
+ * Note: Due to modules and __init, code can
+ * disappear and change, we need to protect against faulting
+ * as well as code changing. We do this by using the
+ * probe_kernel_* functions.
+ *
+ * No real locking needed, this code is run through
+ * kstop_machine, or before SMP starts.
+ */
+
+ if (!do_check)
+ goto skip_check;
+
+ /* read the text we want to modify */
+ if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ /* Make sure it is what we expect it to be */
+ if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+ return -EINVAL;
+
+skip_check:
+ /* replace the text with the new text */
+ if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
+ return -EPERM;
+ flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
+
+ return 0;
+}
+
+static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
+ unsigned long ip = rec->ip;
+
+ if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+ if (rec->flags & FTRACE_FL_CONVERTED) {
+ struct ftrace_call_insn *call_insn, *tmp_call;
+
+ call_insn = (void *)ftrace_call_code;
+ tmp_call = (void *)replaced;
+ call_insn->imm39_l = tmp_call->imm39_l;
+ call_insn->imm39_h = tmp_call->imm39_h;
+ call_insn->imm20 = tmp_call->imm20;
+ call_insn->i = tmp_call->i;
+ if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
+ return -EINVAL;
+ return 0;
+ } else {
+ struct ftrace_orig_insn *call_insn, *tmp_call;
+
+ call_insn = (void *)ftrace_orig_code;
+ tmp_call = (void *)replaced;
+ call_insn->sign = tmp_call->sign;
+ call_insn->imm20 = tmp_call->imm20;
+ if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
+ return -EINVAL;
+ return 0;
+ }
+}
+
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ int ret;
+ char *new;
+
+ ret = ftrace_make_nop_check(rec, addr);
+ if (ret)
+ return ret;
+ new = ftrace_nop_replace();
+ return ftrace_modify_code(rec->ip, NULL, new, 0);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ unsigned char *old, *new;
+
+ old= ftrace_nop_replace();
+ new = ftrace_call_replace(ip, addr);
+ return ftrace_modify_code(ip, old, new, 1);
+}
+
+/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip;
+ unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
+
+ if (func == ftrace_stub)
+ return 0;
+ ip = ((struct fnptr *)func)->ip;
+
+ ia64_patch_imm64(addr + 2, ip);
+
+ flush_icache_range(addr, addr + 16);
+ return 0;
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void)
+{
+ return 0;
+}
diff --git a/arch/ia64/kernel/gate-data.S b/arch/ia64/kernel/gate-data.S
index 258c0a3238f..b3ef1c72e13 100644
--- a/arch/ia64/kernel/gate-data.S
+++ b/arch/ia64/kernel/gate-data.S
@@ -1,3 +1,3 @@
- .section .data.gate, "aw"
+ .section .data..gate, "aw"
.incbin "arch/ia64/kernel/gate.so"
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index 74b1ccce4e8..b5f8bdd8618 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -11,8 +11,10 @@
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/sigcontext.h>
-#include <asm/system.h>
#include <asm/unistd.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include "paravirt_inst.h"
/*
* We can't easily refer to symbols inside the kernel. To avoid full runtime relocation,
@@ -20,18 +22,18 @@
* to targets outside the shared object) and to avoid multi-phase kernel builds, we
* simply create minimalistic "patch lists" in special ELF sections.
*/
- .section ".data.patch.fsyscall_table", "a"
+ .section ".data..patch.fsyscall_table", "a"
.previous
#define LOAD_FSYSCALL_TABLE(reg) \
[1:] movl reg=0; \
- .xdata4 ".data.patch.fsyscall_table", 1b-.
+ .xdata4 ".data..patch.fsyscall_table", 1b-.
- .section ".data.patch.brl_fsys_bubble_down", "a"
+ .section ".data..patch.brl_fsys_bubble_down", "a"
.previous
#define BRL_COND_FSYS_BUBBLE_DOWN(pr) \
[1:](pr)brl.cond.sptk 0; \
;; \
- .xdata4 ".data.patch.brl_fsys_bubble_down", 1b-.
+ .xdata4 ".data..patch.brl_fsys_bubble_down", 1b-.
GLOBAL_ENTRY(__kernel_syscall_via_break)
.prologue
@@ -48,87 +50,6 @@ GLOBAL_ENTRY(__kernel_syscall_via_break)
}
END(__kernel_syscall_via_break)
-/*
- * On entry:
- * r11 = saved ar.pfs
- * r15 = system call #
- * b0 = saved return address
- * b6 = return address
- * On exit:
- * r11 = saved ar.pfs
- * r15 = system call #
- * b0 = saved return address
- * all other "scratch" registers: undefined
- * all "preserved" registers: same as on entry
- */
-
-GLOBAL_ENTRY(__kernel_syscall_via_epc)
- .prologue
- .altrp b6
- .body
-{
- /*
- * Note: the kernel cannot assume that the first two instructions in this
- * bundle get executed. The remaining code must be safe even if
- * they do not get executed.
- */
- adds r17=-1024,r15 // A
- mov r10=0 // A default to successful syscall execution
- epc // B causes split-issue
-}
- ;;
- rsm psr.be | psr.i // M2 (5 cyc to srlz.d)
- LOAD_FSYSCALL_TABLE(r14) // X
- ;;
- mov r16=IA64_KR(CURRENT) // M2 (12 cyc)
- shladd r18=r17,3,r14 // A
- mov r19=NR_syscalls-1 // A
- ;;
- lfetch [r18] // M0|1
- mov r29=psr // M2 (12 cyc)
- // If r17 is a NaT, p6 will be zero
- cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)?
- ;;
- mov r21=ar.fpsr // M2 (12 cyc)
- tnat.nz p10,p9=r15 // I0
- mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...)
- ;;
- srlz.d // M0 (forces split-issue) ensure PSR.BE==0
-(p6) ld8 r18=[r18] // M0|1
- nop.i 0
- ;;
- nop.m 0
-(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!)
- nop.i 0
- ;;
-(p8) ssm psr.i
-(p6) mov b7=r18 // I0
-(p8) br.dptk.many b7 // B
-
- mov r27=ar.rsc // M2 (12 cyc)
-/*
- * brl.cond doesn't work as intended because the linker would convert this branch
- * into a branch to a PLT. Perhaps there will be a way to avoid this with some
- * future version of the linker. In the meantime, we just use an indirect branch
- * instead.
- */
-#ifdef CONFIG_ITANIUM
-(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
- ;;
-(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
- ;;
-(p6) mov b7=r14
-(p6) br.sptk.many b7
-#else
- BRL_COND_FSYS_BUBBLE_DOWN(p6)
-#endif
- ssm psr.i
- mov r10=-1
-(p10) mov r8=EINVAL
-(p9) mov r8=ENOSYS
- FSYS_RETURN
-END(__kernel_syscall_via_epc)
-
# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET)
# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET)
# define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET)
@@ -374,3 +295,92 @@ restore_rbs:
// invala not necessary as that will happen when returning to user-mode
br.cond.sptk back_from_restore_rbs
END(__kernel_sigtramp)
+
+/*
+ * On entry:
+ * r11 = saved ar.pfs
+ * r15 = system call #
+ * b0 = saved return address
+ * b6 = return address
+ * On exit:
+ * r11 = saved ar.pfs
+ * r15 = system call #
+ * b0 = saved return address
+ * all other "scratch" registers: undefined
+ * all "preserved" registers: same as on entry
+ */
+
+GLOBAL_ENTRY(__kernel_syscall_via_epc)
+ .prologue
+ .altrp b6
+ .body
+{
+ /*
+ * Note: the kernel cannot assume that the first two instructions in this
+ * bundle get executed. The remaining code must be safe even if
+ * they do not get executed.
+ */
+ adds r17=-1024,r15 // A
+ mov r10=0 // A default to successful syscall execution
+ epc // B causes split-issue
+}
+ ;;
+ RSM_PSR_BE_I(r20, r22) // M2 (5 cyc to srlz.d)
+ LOAD_FSYSCALL_TABLE(r14) // X
+ ;;
+ mov r16=IA64_KR(CURRENT) // M2 (12 cyc)
+ shladd r18=r17,3,r14 // A
+ mov r19=NR_syscalls-1 // A
+ ;;
+ lfetch [r18] // M0|1
+ MOV_FROM_PSR(p0, r29, r8) // M2 (12 cyc)
+ // If r17 is a NaT, p6 will be zero
+ cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)?
+ ;;
+ mov r21=ar.fpsr // M2 (12 cyc)
+ tnat.nz p10,p9=r15 // I0
+ mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...)
+ ;;
+ srlz.d // M0 (forces split-issue) ensure PSR.BE==0
+(p6) ld8 r18=[r18] // M0|1
+ nop.i 0
+ ;;
+ nop.m 0
+(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!)
+ nop.i 0
+ ;;
+ SSM_PSR_I(p8, p14, r25)
+(p6) mov b7=r18 // I0
+(p8) br.dptk.many b7 // B
+
+ mov r27=ar.rsc // M2 (12 cyc)
+/*
+ * brl.cond doesn't work as intended because the linker would convert this branch
+ * into a branch to a PLT. Perhaps there will be a way to avoid this with some
+ * future version of the linker. In the meantime, we just use an indirect branch
+ * instead.
+ */
+#ifdef CONFIG_ITANIUM
+(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
+ ;;
+(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
+ ;;
+(p6) mov b7=r14
+(p6) br.sptk.many b7
+#else
+ BRL_COND_FSYS_BUBBLE_DOWN(p6)
+#endif
+ SSM_PSR_I(p0, p14, r10)
+ mov r10=-1
+(p10) mov r8=EINVAL
+(p9) mov r8=ENOSYS
+ FSYS_RETURN
+
+#ifdef CONFIG_PARAVIRT
+ /*
+ * padd to make the size of this symbol constant
+ * independent of paravirtualization.
+ */
+ .align PAGE_SIZE / 8
+#endif
+END(__kernel_syscall_via_epc)
diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S
index 6d198339bf8..e518f7902af 100644
--- a/arch/ia64/kernel/gate.lds.S
+++ b/arch/ia64/kernel/gate.lds.S
@@ -1,80 +1,92 @@
/*
- * Linker script for gate DSO. The gate pages are an ELF shared object prelinked to its
- * virtual address, with only one read-only segment and one execute-only segment (both fit
- * in one page). This script controls its layout.
+ * Linker script for gate DSO. The gate pages are an ELF shared object
+ * prelinked to its virtual address, with only one read-only segment and
+ * one execute-only segment (both fit in one page). This script controls
+ * its layout.
*/
-
-#include <asm/system.h>
+#include <asm/page.h>
+#include "paravirt_patchlist.h"
SECTIONS
{
- . = GATE_ADDR + SIZEOF_HEADERS;
-
- .hash : { *(.hash) } :readable
- .gnu.hash : { *(.gnu.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
- .dynamic : { *(.dynamic) } :readable :dynamic
-
- /*
- * This linker script is used both with -r and with -shared. For the layouts to match,
- * we need to skip more than enough space for the dynamic symbol table et al. If this
- * amount is insufficient, ld -shared will barf. Just increase it here.
- */
- . = GATE_ADDR + 0x500;
-
- .data.patch : {
- __start_gate_mckinley_e9_patchlist = .;
- *(.data.patch.mckinley_e9)
- __end_gate_mckinley_e9_patchlist = .;
-
- __start_gate_vtop_patchlist = .;
- *(.data.patch.vtop)
- __end_gate_vtop_patchlist = .;
-
- __start_gate_fsyscall_patchlist = .;
- *(.data.patch.fsyscall_table)
- __end_gate_fsyscall_patchlist = .;
-
- __start_gate_brl_fsys_bubble_down_patchlist = .;
- *(.data.patch.brl_fsys_bubble_down)
- __end_gate_brl_fsys_bubble_down_patchlist = .;
- } :readable
- .IA_64.unwind_info : { *(.IA_64.unwind_info*) }
- .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind
+ . = GATE_ADDR + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :readable
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note*) } :readable :note
+
+ .dynamic : { *(.dynamic) } :readable :dynamic
+
+ /*
+ * This linker script is used both with -r and with -shared. For
+ * the layouts to match, we need to skip more than enough space for
+ * the dynamic symbol table et al. If this amount is insufficient,
+ * ld -shared will barf. Just increase it here.
+ */
+ . = GATE_ADDR + 0x600;
+
+ .data..patch : {
+ __paravirt_start_gate_mckinley_e9_patchlist = .;
+ *(.data..patch.mckinley_e9)
+ __paravirt_end_gate_mckinley_e9_patchlist = .;
+
+ __paravirt_start_gate_vtop_patchlist = .;
+ *(.data..patch.vtop)
+ __paravirt_end_gate_vtop_patchlist = .;
+
+ __paravirt_start_gate_fsyscall_patchlist = .;
+ *(.data..patch.fsyscall_table)
+ __paravirt_end_gate_fsyscall_patchlist = .;
+
+ __paravirt_start_gate_brl_fsys_bubble_down_patchlist = .;
+ *(.data..patch.brl_fsys_bubble_down)
+ __paravirt_end_gate_brl_fsys_bubble_down_patchlist = .;
+ } :readable
+
+ .IA_64.unwind_info : { *(.IA_64.unwind_info*) }
+ .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind
#ifdef HAVE_BUGGY_SEGREL
- .text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) } :readable
+ .text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) } :readable
#else
- . = ALIGN (PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
- .text : { *(.text) *(.text.*) } :epc
+ . = ALIGN(PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
+ .text : { *(.text) *(.text.*) } :epc
#endif
- /DISCARD/ : {
- *(.got.plt) *(.got)
- *(.data .data.* .gnu.linkonce.d.*)
- *(.dynbss)
- *(.bss .bss.* .gnu.linkonce.b.*)
- *(__ex_table)
- *(__mca_table)
- }
+ /DISCARD/ : {
+ *(.got.plt) *(.got)
+ *(.data .data.* .gnu.linkonce.d.*)
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ *(__ex_table)
+ *(__mca_table)
+ }
}
/*
+ * ld does not recognize this name token; use the constant.
+ */
+#define PT_IA_64_UNWIND 0x70000001
+
+/*
* We must supply the ELF program headers explicitly to get just one
* PT_LOAD segment, and set the flags explicitly to make segments read-only.
*/
PHDRS
{
- readable PT_LOAD FILEHDR PHDRS FLAGS(4); /* PF_R */
+ readable PT_LOAD FILEHDR PHDRS FLAGS(4); /* PF_R */
#ifndef HAVE_BUGGY_SEGREL
- epc PT_LOAD FILEHDR PHDRS FLAGS(1); /* PF_X */
+ epc PT_LOAD FILEHDR PHDRS FLAGS(1); /* PF_X */
#endif
- dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
- unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ unwind PT_IA_64_UNWIND;
}
/*
@@ -82,14 +94,14 @@ PHDRS
*/
VERSION
{
- LINUX_2.5 {
- global:
- __kernel_syscall_via_break;
- __kernel_syscall_via_epc;
- __kernel_sigtramp;
-
- local: *;
- };
+ LINUX_2.5 {
+ global:
+ __kernel_syscall_via_break;
+ __kernel_syscall_via_epc;
+ __kernel_sigtramp;
+
+ local: *;
+ };
}
/* The ELF entry point can be used to set the AT_SYSINFO value. */
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 4e5e27540e2..a4acddad0c7 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -26,11 +26,13 @@
#include <asm/mmu_context.h>
#include <asm/asm-offsets.h>
#include <asm/pal.h>
+#include <asm/paravirt.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
-#include <asm/system.h>
#include <asm/mca_asm.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
#ifdef CONFIG_HOTPLUG_CPU
#define SAL_PSR_BITS_TO_SET \
@@ -164,7 +166,7 @@ RestRR: \
mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \
mov rr[_tmp1]=_tmp2
- .section __special_page_section,"ax"
+ __PAGE_ALIGNED_DATA
.global empty_zero_page
empty_zero_page:
@@ -178,7 +180,7 @@ swapper_pg_dir:
halt_msg:
stringz "Halting kernel\n"
- .section .text.head,"ax"
+ __REF
.global start_ap
@@ -356,7 +358,32 @@ start_ap:
mov ar.rsc=0 // place RSE in enforced lazy mode
;;
loadrs // clear the dirty partition
- mov IA64_KR(PER_CPU_DATA)=r0 // clear physical per-CPU base
+ movl r19=__phys_per_cpu_start
+ mov r18=PERCPU_PAGE_SIZE
+ ;;
+#ifndef CONFIG_SMP
+ add r19=r19,r18
+ ;;
+#else
+(isAP) br.few 2f
+ movl r20=__cpu0_per_cpu
+ ;;
+ shr.u r18=r18,3
+1:
+ ld8 r21=[r19],8;;
+ st8[r20]=r21,8
+ adds r18=-1,r18;;
+ cmp4.lt p7,p6=0,r18
+(p7) br.cond.dptk.few 1b
+ mov r19=r20
+ ;;
+2:
+#endif
+ tpa r19=r19
+ ;;
+ .pred.rel.mutex isBP,isAP
+(isBP) mov IA64_KR(PER_CPU_DATA)=r19 // per-CPU base for cpu0
+(isAP) mov IA64_KR(PER_CPU_DATA)=r0 // clear physical per-CPU base
;;
mov ar.bspstore=r2 // establish the new RSE stack
;;
@@ -367,6 +394,41 @@ start_ap:
;;
(isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader
+#ifdef CONFIG_PARAVIRT
+
+ movl r14=hypervisor_setup_hooks
+ movl r15=hypervisor_type
+ mov r16=num_hypervisor_hooks
+ ;;
+ ld8 r2=[r15]
+ ;;
+ cmp.ltu p7,p0=r2,r16 // array size check
+ shladd r8=r2,3,r14
+ ;;
+(p7) ld8 r9=[r8]
+ ;;
+(p7) mov b1=r9
+(p7) cmp.ne.unc p7,p0=r9,r0 // no actual branch to NULL
+ ;;
+(p7) br.call.sptk.many rp=b1
+
+ __INITDATA
+
+default_setup_hook = 0 // Currently nothing needs to be done.
+
+ .global hypervisor_type
+hypervisor_type:
+ data8 PARAVIRT_HYPERVISOR_TYPE_DEFAULT
+
+ // must have the same order with PARAVIRT_HYPERVISOR_TYPE_xxx
+
+hypervisor_setup_hooks:
+ data8 default_setup_hook
+num_hypervisor_hooks = (. - hypervisor_setup_hooks) / 8
+ .previous
+
+#endif
+
#ifdef CONFIG_SMP
(isAP) br.call.sptk.many rp=start_secondary
.ret0:
@@ -970,7 +1032,7 @@ END(ia64_delay_loop)
* Return a CPU-local timestamp in nano-seconds. This timestamp is
* NOT synchronized across CPUs its return value must never be
* compared against the values returned on another CPU. The usage in
- * kernel/sched.c ensures that.
+ * kernel/sched/core.c ensures that.
*
* The return-value of sched_clock() is NOT supposed to wrap-around.
* If it did, it would cause some scheduling hiccups (at the worst).
@@ -984,8 +1046,8 @@ END(ia64_delay_loop)
* except that the multiplication and the shift are done with 128-bit
* intermediate precision so that we can produce a full 64-bit result.
*/
-GLOBAL_ENTRY(sched_clock)
- addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+GLOBAL_ENTRY(ia64_native_sched_clock)
+ addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
mov.m r9=ar.itc // fetch cycle-counter (35 cyc)
;;
ldf8 f8=[r8]
@@ -1000,20 +1062,33 @@ GLOBAL_ENTRY(sched_clock)
;;
shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
br.ret.sptk.many rp
-END(sched_clock)
+END(ia64_native_sched_clock)
+#ifndef CONFIG_PARAVIRT
+ //unsigned long long
+ //sched_clock(void) __attribute__((alias("ia64_native_sched_clock")));
+ .global sched_clock
+sched_clock = ia64_native_sched_clock
+#endif
-GLOBAL_ENTRY(start_kernel_thread)
- .prologue
- .save rp, r0 // this is the end of the call-chain
- .body
- alloc r2 = ar.pfs, 0, 0, 2, 0
- mov out0 = r9
- mov out1 = r11;;
- br.call.sptk.many rp = kernel_thread_helper;;
- mov out0 = r8
- br.call.sptk.many rp = sys_exit;;
-1: br.sptk.few 1b // not reached
-END(start_kernel_thread)
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+GLOBAL_ENTRY(cycle_to_cputime)
+ alloc r16=ar.pfs,1,0,0,0
+ addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+ ;;
+ ldf8 f8=[r8]
+ ;;
+ setf.sig f9=r32
+ ;;
+ xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc)
+ xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product
+ ;;
+ getf.sig r8=f10 // (5 cyc)
+ getf.sig r9=f11
+ ;;
+ shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
+ br.ret.sptk.many rp
+END(cycle_to_cputime)
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_IA64_BRL_EMU
@@ -1038,95 +1113,6 @@ SET_REG(b5);
#endif /* CONFIG_IA64_BRL_EMU */
#ifdef CONFIG_SMP
- /*
- * This routine handles spinlock contention. It uses a non-standard calling
- * convention to avoid converting leaf routines into interior routines. Because
- * of this special convention, there are several restrictions:
- *
- * - do not use gp relative variables, this code is called from the kernel
- * and from modules, r1 is undefined.
- * - do not use stacked registers, the caller owns them.
- * - do not use the scratch stack space, the caller owns it.
- * - do not use any registers other than the ones listed below
- *
- * Inputs:
- * ar.pfs - saved CFM of caller
- * ar.ccv - 0 (and available for use)
- * r27 - flags from spin_lock_irqsave or 0. Must be preserved.
- * r28 - available for use.
- * r29 - available for use.
- * r30 - available for use.
- * r31 - address of lock, available for use.
- * b6 - return address
- * p14 - available for use.
- * p15 - used to track flag status.
- *
- * If you patch this code to use more registers, do not forget to update
- * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h.
- */
-
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-
-GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
- .prologue
- .save ar.pfs, r0 // this code effectively has a zero frame size
- .save rp, r28
- .body
- nop 0
- tbit.nz p15,p0=r27,IA64_PSR_I_BIT
- .restore sp // pop existing prologue after next insn
- mov b6 = r28
- .prologue
- .save ar.pfs, r0
- .altrp b6
- .body
- ;;
-(p15) ssm psr.i // reenable interrupts if they were on
- // DavidM says that srlz.d is slow and is not required in this case
-.wait:
- // exponential backoff, kdb, lockmeter etc. go in here
- hint @pause
- ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word
- nop 0
- ;;
- cmp4.ne p14,p0=r30,r0
-(p14) br.cond.sptk.few .wait
-(p15) rsm psr.i // disable interrupts if we reenabled them
- br.cond.sptk.few b6 // lock is now free, try to acquire
- .global ia64_spinlock_contention_pre3_4_end // for kernprof
-ia64_spinlock_contention_pre3_4_end:
-END(ia64_spinlock_contention_pre3_4)
-
-#else
-
-GLOBAL_ENTRY(ia64_spinlock_contention)
- .prologue
- .altrp b6
- .body
- tbit.nz p15,p0=r27,IA64_PSR_I_BIT
- ;;
-.wait:
-(p15) ssm psr.i // reenable interrupts if they were on
- // DavidM says that srlz.d is slow and is not required in this case
-.wait2:
- // exponential backoff, kdb, lockmeter etc. go in here
- hint @pause
- ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word
- ;;
- cmp4.ne p14,p0=r30,r0
- mov r30 = 1
-(p14) br.cond.sptk.few .wait2
-(p15) rsm psr.i // disable interrupts if we reenabled them
- ;;
- cmpxchg4.acq r30=[r31], r30, ar.ccv
- ;;
- cmp4.ne p14,p0=r0,r30
-(p14) br.cond.sptk.few .wait
-
- br.ret.sptk.many b6 // lock is now taken
-END(ia64_spinlock_contention)
-
-#endif
#ifdef CONFIG_HOTPLUG_CPU
GLOBAL_ENTRY(ia64_jump_to_sal)
@@ -1150,7 +1136,7 @@ GLOBAL_ENTRY(ia64_jump_to_sal)
movl r16=SAL_PSR_BITS_TO_SET;;
mov cr.ipsr=r16
mov cr.ifs=r0;;
- rfi;;
+ rfi;; // note: this unmask MCA/INIT (psr.mc)
1:
/*
* Invalidate all TLB data/inst
@@ -1176,6 +1162,7 @@ tlb_purge_done:
RESTORE_REG(cr.dcr, r25, r17);;
RESTORE_REG(cr.iva, r25, r17);;
RESTORE_REG(cr.pta, r25, r17);;
+ srlz.d;; // required not to violate RAW dependency
RESTORE_REG(cr.itv, r25, r17);;
RESTORE_REG(cr.pmv, r25, r17);;
RESTORE_REG(cr.cmcv, r25, r17);;
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index bd17190bebb..5b7791dd396 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -12,18 +12,16 @@ EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(strlen);
+#include <asm/pgtable.h>
+EXPORT_SYMBOL_GPL(empty_zero_page);
+
#include <asm/checksum.h>
EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */
EXPORT_SYMBOL(csum_ipv6_magic);
-#include <asm/semaphore.h>
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__up);
-
#include <asm/page.h>
EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(copy_page);
#ifdef CONFIG_VIRTUAL_MEM_MAP
#include <linux/bootmem.h>
@@ -32,9 +30,9 @@ EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic
#endif
#include <asm/processor.h>
-EXPORT_SYMBOL(per_cpu__cpu_info);
+EXPORT_SYMBOL(ia64_cpu_info);
#ifdef CONFIG_SMP
-EXPORT_SYMBOL(per_cpu__local_per_cpu_offset);
+EXPORT_SYMBOL(local_per_cpu_offset);
#endif
#include <asm/uaccess.h>
@@ -86,29 +84,15 @@ EXPORT_SYMBOL(ia64_save_scratch_fpregs);
#include <asm/unwind.h>
EXPORT_SYMBOL(unw_init_running);
-#ifdef ASM_SUPPORTED
-# ifdef CONFIG_SMP
-# if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
-/*
- * This is not a normal routine and we don't want a function descriptor for it, so we use
- * a fake declaration here.
- */
-extern char ia64_spinlock_contention_pre3_4;
-EXPORT_SYMBOL(ia64_spinlock_contention_pre3_4);
-# else
-/*
- * This is not a normal routine and we don't want a function descriptor for it, so we use
- * a fake declaration here.
- */
-extern char ia64_spinlock_contention;
-EXPORT_SYMBOL(ia64_spinlock_contention);
-# endif
-# endif
-#endif
-
#if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE)
extern void esi_call_phys (void);
EXPORT_SYMBOL_GPL(esi_call_phys);
#endif
extern char ia64_ivt[];
EXPORT_SYMBOL(ia64_ivt);
+
+#include <asm/ftrace.h>
+#ifdef CONFIG_FUNCTION_TRACER
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(_mcount);
+#endif
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index bc8efcad28b..f9efe9739d3 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -17,19 +17,13 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
/*
* Initial task structure.
*
* We need to make sure that this is properly aligned due to the way process stacks are
- * handled. This is done by having a special ".data.init_task" section...
+ * handled. This is done by having a special ".data..init_task" section...
*/
#define init_thread_info init_task_mem.s.thread_info
@@ -39,7 +33,8 @@ union {
struct thread_info thread_info;
} s;
unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
-} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{
+} init_task_mem asm ("init_task") __init_task_data =
+ {{
.task = INIT_TASK(init_task_mem.s.task),
.thread_info = INIT_THREAD_INFO(init_task_mem.s.task)
}};
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index cfe4654838f..cd44a57c73b 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -69,14 +69,14 @@
* systems, we use one-to-one mapping between IA-64 vector and IRQ. A
* platform can implement platform_irq_to_vector(irq) and
* platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
- * Please see also include/asm-ia64/hw_irq.h for those APIs.
+ * Please see also arch/ia64/include/asm/hw_irq.h for those APIs.
*
* To sum up, there are three levels of mappings involved:
*
* PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
*
* Note: The term "IRQ" is loosely used everywhere in Linux kernel to
- * describeinterrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ
+ * describe interrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ
* (isa_irq) is the only exception in this source code.
*/
@@ -86,6 +86,7 @@
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/pci.h>
+#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/string.h>
#include <linux/bootmem.h>
@@ -97,7 +98,6 @@
#include <asm/machvec.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
-#include <asm/system.h>
#undef DEBUG_INTERRUPT_ROUTING
@@ -107,10 +107,6 @@
#define DBG(fmt...)
#endif
-#define NR_PREALLOCATE_RTE_ENTRIES \
- (PAGE_SIZE / sizeof(struct iosapic_rte_info))
-#define RTE_PREALLOCATED (1)
-
static DEFINE_SPINLOCK(iosapic_lock);
/*
@@ -135,7 +131,6 @@ struct iosapic_rte_info {
struct list_head rte_list; /* RTEs sharing the same vector */
char rte_index; /* IOSAPIC RTE index */
int refcnt; /* reference counter */
- unsigned int flags; /* flags */
struct iosapic *iosapic;
} ____cacheline_aligned;
@@ -152,10 +147,7 @@ static struct iosapic_intr_info {
unsigned char trigger : 1; /* trigger mode (see iosapic.h) */
} iosapic_intr_info[NR_IRQS];
-static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */
-
-static int iosapic_kmalloc_ok;
-static LIST_HEAD(free_rte_list);
+static unsigned char pcat_compat; /* 8259 compatibility flag */
static inline void
iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val)
@@ -199,19 +191,6 @@ static inline int __gsi_to_irq(unsigned int gsi)
return -1;
}
-/*
- * Translate GSI number to the corresponding IA-64 interrupt vector. If no
- * entry exists, return -1.
- */
-inline int
-gsi_to_vector (unsigned int gsi)
-{
- int irq = __gsi_to_irq(gsi);
- if (check_irq_used(irq) < 0)
- return -1;
- return irq_to_vector(irq);
-}
-
int
gsi_to_irq (unsigned int gsi)
{
@@ -277,7 +256,7 @@ set_rte (unsigned int gsi, unsigned int irq, unsigned int dest, int mask)
}
static void
-nop (unsigned int irq)
+nop (struct irq_data *data)
{
/* do nothing... */
}
@@ -307,8 +286,9 @@ kexec_disable_iosapic(void)
#endif
static void
-mask_irq (unsigned int irq)
+mask_irq (struct irq_data *data)
{
+ unsigned int irq = data->irq;
u32 low32;
int rte_index;
struct iosapic_rte_info *rte;
@@ -325,8 +305,9 @@ mask_irq (unsigned int irq)
}
static void
-unmask_irq (unsigned int irq)
+unmask_irq (struct irq_data *data)
{
+ unsigned int irq = data->irq;
u32 low32;
int rte_index;
struct iosapic_rte_info *rte;
@@ -342,29 +323,31 @@ unmask_irq (unsigned int irq)
}
-static void
-iosapic_set_affinity (unsigned int irq, cpumask_t mask)
+static int
+iosapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
{
#ifdef CONFIG_SMP
+ unsigned int irq = data->irq;
u32 high32, low32;
- int dest, rte_index;
+ int cpu, dest, rte_index;
int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
struct iosapic_rte_info *rte;
struct iosapic *iosapic;
irq &= (~IA64_IRQ_REDIRECTED);
- cpus_and(mask, mask, cpu_online_map);
- if (cpus_empty(mask))
- return;
+ cpu = cpumask_first_and(cpu_online_mask, mask);
+ if (cpu >= nr_cpu_ids)
+ return -1;
- if (reassign_irq_vector(irq, first_cpu(mask)))
- return;
+ if (irq_prepare_move(irq, cpu))
+ return -1;
- dest = cpu_physical_id(first_cpu(mask));
+ dest = cpu_physical_id(cpu);
if (!iosapic_intr_info[irq].count)
- return; /* not an IOSAPIC interrupt */
+ return -1; /* not an IOSAPIC interrupt */
set_irq_affinity_info(irq, dest, redir);
@@ -389,7 +372,9 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
}
+
#endif
+ return 0;
}
/*
@@ -397,30 +382,33 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
*/
static unsigned int
-iosapic_startup_level_irq (unsigned int irq)
+iosapic_startup_level_irq (struct irq_data *data)
{
- unmask_irq(irq);
+ unmask_irq(data);
return 0;
}
static void
-iosapic_end_level_irq (unsigned int irq)
+iosapic_unmask_level_irq (struct irq_data *data)
{
+ unsigned int irq = data->irq;
ia64_vector vec = irq_to_vector(irq);
struct iosapic_rte_info *rte;
int do_unmask_irq = 0;
- if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
+ irq_complete_move(irq);
+ if (unlikely(irqd_is_setaffinity_pending(data))) {
do_unmask_irq = 1;
- mask_irq(irq);
- }
+ mask_irq(data);
+ } else
+ unmask_irq(data);
list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
iosapic_eoi(rte->iosapic->addr, vec);
if (unlikely(do_unmask_irq)) {
- move_masked_irq(irq);
- unmask_irq(irq);
+ irq_move_masked_irq(data);
+ unmask_irq(data);
}
}
@@ -429,17 +417,16 @@ iosapic_end_level_irq (unsigned int irq)
#define iosapic_disable_level_irq mask_irq
#define iosapic_ack_level_irq nop
-struct irq_chip irq_type_iosapic_level = {
- .name = "IO-SAPIC-level",
- .startup = iosapic_startup_level_irq,
- .shutdown = iosapic_shutdown_level_irq,
- .enable = iosapic_enable_level_irq,
- .disable = iosapic_disable_level_irq,
- .ack = iosapic_ack_level_irq,
- .end = iosapic_end_level_irq,
- .mask = mask_irq,
- .unmask = unmask_irq,
- .set_affinity = iosapic_set_affinity
+static struct irq_chip irq_type_iosapic_level = {
+ .name = "IO-SAPIC-level",
+ .irq_startup = iosapic_startup_level_irq,
+ .irq_shutdown = iosapic_shutdown_level_irq,
+ .irq_enable = iosapic_enable_level_irq,
+ .irq_disable = iosapic_disable_level_irq,
+ .irq_ack = iosapic_ack_level_irq,
+ .irq_mask = mask_irq,
+ .irq_unmask = iosapic_unmask_level_irq,
+ .irq_set_affinity = iosapic_set_affinity
};
/*
@@ -447,9 +434,9 @@ struct irq_chip irq_type_iosapic_level = {
*/
static unsigned int
-iosapic_startup_edge_irq (unsigned int irq)
+iosapic_startup_edge_irq (struct irq_data *data)
{
- unmask_irq(irq);
+ unmask_irq(data);
/*
* IOSAPIC simply drops interrupts pended while the
* corresponding pin was masked, so we can't know if an
@@ -459,39 +446,28 @@ iosapic_startup_edge_irq (unsigned int irq)
}
static void
-iosapic_ack_edge_irq (unsigned int irq)
+iosapic_ack_edge_irq (struct irq_data *data)
{
- irq_desc_t *idesc = irq_desc + irq;
-
- move_native_irq(irq);
- /*
- * Once we have recorded IRQ_PENDING already, we can mask the
- * interrupt for real. This prevents IRQ storms from unhandled
- * devices.
- */
- if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) ==
- (IRQ_PENDING|IRQ_DISABLED))
- mask_irq(irq);
+ irq_complete_move(data->irq);
+ irq_move_irq(data);
}
#define iosapic_enable_edge_irq unmask_irq
#define iosapic_disable_edge_irq nop
-#define iosapic_end_edge_irq nop
-
-struct irq_chip irq_type_iosapic_edge = {
- .name = "IO-SAPIC-edge",
- .startup = iosapic_startup_edge_irq,
- .shutdown = iosapic_disable_edge_irq,
- .enable = iosapic_enable_edge_irq,
- .disable = iosapic_disable_edge_irq,
- .ack = iosapic_ack_edge_irq,
- .end = iosapic_end_edge_irq,
- .mask = mask_irq,
- .unmask = unmask_irq,
- .set_affinity = iosapic_set_affinity
+
+static struct irq_chip irq_type_iosapic_edge = {
+ .name = "IO-SAPIC-edge",
+ .irq_startup = iosapic_startup_edge_irq,
+ .irq_shutdown = iosapic_disable_edge_irq,
+ .irq_enable = iosapic_enable_edge_irq,
+ .irq_disable = iosapic_disable_edge_irq,
+ .irq_ack = iosapic_ack_edge_irq,
+ .irq_mask = mask_irq,
+ .irq_unmask = unmask_irq,
+ .irq_set_affinity = iosapic_set_affinity
};
-unsigned int
+static unsigned int
iosapic_version (char __iomem *addr)
{
/*
@@ -518,7 +494,7 @@ static int iosapic_find_sharable_irq(unsigned long trigger, unsigned long pol)
if (trigger == IOSAPIC_EDGE)
return -EINVAL;
- for (i = 0; i <= NR_IRQS; i++) {
+ for (i = 0; i < NR_IRQS; i++) {
info = &iosapic_intr_info[i];
if (info->trigger == trigger && info->polarity == pol &&
(info->dmode == IOSAPIC_FIXED ||
@@ -545,7 +521,7 @@ iosapic_reassign_vector (int irq)
if (iosapic_intr_info[irq].count) {
new_irq = create_irq();
if (new_irq < 0)
- panic("%s: out of interrupt vectors!\n", __FUNCTION__);
+ panic("%s: out of interrupt vectors!\n", __func__);
printk(KERN_INFO "Reassigning vector %d to %d\n",
irq_to_vector(irq), irq_to_vector(new_irq));
memcpy(&iosapic_intr_info[new_irq], &iosapic_intr_info[irq],
@@ -560,66 +536,41 @@ iosapic_reassign_vector (int irq)
}
}
-static struct iosapic_rte_info * __init_refok iosapic_alloc_rte (void)
+static inline int irq_is_shared (int irq)
{
- int i;
- struct iosapic_rte_info *rte;
- int preallocated = 0;
-
- if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
- rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
- NR_PREALLOCATE_RTE_ENTRIES);
- if (!rte)
- return NULL;
- for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
- list_add(&rte->rte_list, &free_rte_list);
- }
-
- if (!list_empty(&free_rte_list)) {
- rte = list_entry(free_rte_list.next, struct iosapic_rte_info,
- rte_list);
- list_del(&rte->rte_list);
- preallocated++;
- } else {
- rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC);
- if (!rte)
- return NULL;
- }
-
- memset(rte, 0, sizeof(struct iosapic_rte_info));
- if (preallocated)
- rte->flags |= RTE_PREALLOCATED;
-
- return rte;
+ return (iosapic_intr_info[irq].count > 1);
}
-static inline int irq_is_shared (int irq)
+struct irq_chip*
+ia64_native_iosapic_get_irq_chip(unsigned long trigger)
{
- return (iosapic_intr_info[irq].count > 1);
+ if (trigger == IOSAPIC_EDGE)
+ return &irq_type_iosapic_edge;
+ else
+ return &irq_type_iosapic_level;
}
static int
register_intr (unsigned int gsi, int irq, unsigned char delivery,
unsigned long polarity, unsigned long trigger)
{
- irq_desc_t *idesc;
- struct hw_interrupt_type *irq_type;
+ struct irq_chip *chip, *irq_type;
int index;
struct iosapic_rte_info *rte;
index = find_iosapic(gsi);
if (index < 0) {
printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
- __FUNCTION__, gsi);
+ __func__, gsi);
return -ENODEV;
}
rte = find_rte(irq, gsi);
if (!rte) {
- rte = iosapic_alloc_rte();
+ rte = kzalloc(sizeof (*rte), GFP_ATOMIC);
if (!rte) {
printk(KERN_WARNING "%s: cannot allocate memory\n",
- __FUNCTION__);
+ __func__);
return -ENOMEM;
}
@@ -636,7 +587,7 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery,
(info->trigger != trigger || info->polarity != polarity)){
printk (KERN_WARNING
"%s: cannot override the interrupt\n",
- __FUNCTION__);
+ __func__);
return -EINVAL;
}
rte->refcnt++;
@@ -648,20 +599,20 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery,
iosapic_intr_info[irq].dmode = delivery;
iosapic_intr_info[irq].trigger = trigger;
- if (trigger == IOSAPIC_EDGE)
- irq_type = &irq_type_iosapic_edge;
- else
- irq_type = &irq_type_iosapic_level;
+ irq_type = iosapic_get_irq_chip(trigger);
- idesc = irq_desc + irq;
- if (idesc->chip != irq_type) {
- if (idesc->chip != &no_irq_type)
+ chip = irq_get_chip(irq);
+ if (irq_type != NULL && chip != irq_type) {
+ if (chip != &no_irq_chip)
printk(KERN_WARNING
"%s: changing vector %d from %s to %s\n",
- __FUNCTION__, irq_to_vector(irq),
- idesc->chip->name, irq_type->name);
- idesc->chip = irq_type;
+ __func__, irq_to_vector(irq),
+ chip->name, irq_type->name);
+ chip = irq_type;
}
+ __irq_set_chip_handler_name_locked(irq, chip, trigger == IOSAPIC_EDGE ?
+ handle_edge_irq : handle_level_irq,
+ NULL);
return 0;
}
@@ -702,32 +653,31 @@ get_target_cpu (unsigned int gsi, int irq)
#ifdef CONFIG_NUMA
{
int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
- cpumask_t cpu_mask;
+ const struct cpumask *cpu_mask;
iosapic_index = find_iosapic(gsi);
if (iosapic_index < 0 ||
iosapic_lists[iosapic_index].node == MAX_NUMNODES)
goto skip_numa_setup;
- cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
- cpus_and(cpu_mask, cpu_mask, domain);
- for_each_cpu_mask(numa_cpu, cpu_mask) {
- if (!cpu_online(numa_cpu))
- cpu_clear(numa_cpu, cpu_mask);
+ cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node);
+ num_cpus = 0;
+ for_each_cpu_and(numa_cpu, cpu_mask, &domain) {
+ if (cpu_online(numa_cpu))
+ num_cpus++;
}
- num_cpus = cpus_weight(cpu_mask);
-
if (!num_cpus)
goto skip_numa_setup;
/* Use irq assignment to distribute across cpus in node */
cpu_index = irq % num_cpus;
- for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
- numa_cpu = next_cpu(numa_cpu, cpu_mask);
+ for_each_cpu_and(numa_cpu, cpu_mask, &domain)
+ if (cpu_online(numa_cpu) && i++ >= cpu_index)
+ break;
- if (numa_cpu != NR_CPUS)
+ if (numa_cpu < nr_cpu_ids)
return cpu_physical_id(numa_cpu);
}
skip_numa_setup:
@@ -738,7 +688,7 @@ skip_numa_setup:
* case of NUMA.)
*/
do {
- if (++cpu >= NR_CPUS)
+ if (++cpu >= nr_cpu_ids)
cpu = 0;
} while (!cpu_online(cpu) || !cpu_isset(cpu, domain));
@@ -748,6 +698,15 @@ skip_numa_setup:
#endif
}
+static inline unsigned char choose_dmode(void)
+{
+#ifdef CONFIG_SMP
+ if (smp_int_redirect & SMP_IRQ_REDIRECTION)
+ return IOSAPIC_LOWEST_PRIORITY;
+#endif
+ return IOSAPIC_FIXED;
+}
+
/*
* ACPI can describe IOSAPIC interrupts via static tables and namespace
* methods. This provides an interface to register those interrupts and
@@ -762,6 +721,8 @@ iosapic_register_intr (unsigned int gsi,
unsigned long flags;
struct iosapic_rte_info *rte;
u32 low32;
+ unsigned char dmode;
+ struct irq_desc *desc;
/*
* If this GSI has already been registered (i.e., it's a
@@ -774,7 +735,7 @@ iosapic_register_intr (unsigned int gsi,
rte = find_rte(irq, gsi);
if(iosapic_intr_info[irq].count == 0) {
assign_irq_vector(irq);
- dynamic_irq_init(irq);
+ irq_init_desc(irq);
} else if (rte->refcnt != NO_REF_RTE) {
rte->refcnt++;
goto unlock_iosapic_lock;
@@ -789,12 +750,13 @@ iosapic_register_intr (unsigned int gsi,
goto unlock_iosapic_lock;
}
- spin_lock(&irq_desc[irq].lock);
+ desc = irq_to_desc(irq);
+ raw_spin_lock(&desc->lock);
dest = get_target_cpu(gsi, irq);
- err = register_intr(gsi, irq, IOSAPIC_LOWEST_PRIORITY,
- polarity, trigger);
+ dmode = choose_dmode();
+ err = register_intr(gsi, irq, dmode, polarity, trigger);
if (err < 0) {
- spin_unlock(&irq_desc[irq].lock);
+ raw_spin_unlock(&desc->lock);
irq = err;
goto unlock_iosapic_lock;
}
@@ -813,7 +775,7 @@ iosapic_register_intr (unsigned int gsi,
(polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
cpu_logical_id(dest), dest, irq_to_vector(irq));
- spin_unlock(&irq_desc[irq].lock);
+ raw_spin_unlock(&desc->lock);
unlock_iosapic_lock:
spin_unlock_irqrestore(&iosapic_lock, flags);
return irq;
@@ -824,7 +786,6 @@ iosapic_unregister_intr (unsigned int gsi)
{
unsigned long flags;
int irq, index;
- irq_desc_t *idesc;
u32 low32;
unsigned long trigger, polarity;
unsigned int dest;
@@ -854,7 +815,6 @@ iosapic_unregister_intr (unsigned int gsi)
if (--rte->refcnt > 0)
goto out;
- idesc = irq_desc + irq;
rte->refcnt = NO_REF_RTE;
/* Mask the interrupt */
@@ -878,7 +838,7 @@ iosapic_unregister_intr (unsigned int gsi)
if (iosapic_intr_info[irq].count == 0) {
#ifdef CONFIG_SMP
/* Clear affinity */
- cpus_setall(idesc->affinity);
+ cpumask_setall(irq_get_irq_data(irq)->affinity);
#endif
/* Clear the interrupt information */
iosapic_intr_info[irq].dest = 0;
@@ -921,18 +881,18 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
case ACPI_INTERRUPT_INIT:
irq = create_irq();
if (irq < 0)
- panic("%s: out of interrupt vectors!\n", __FUNCTION__);
+ panic("%s: out of interrupt vectors!\n", __func__);
vector = irq_to_vector(irq);
delivery = IOSAPIC_INIT;
break;
case ACPI_INTERRUPT_CPEI:
irq = vector = IA64_CPE_VECTOR;
BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
- delivery = IOSAPIC_LOWEST_PRIORITY;
+ delivery = IOSAPIC_FIXED;
mask = 1;
break;
default:
- printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__,
+ printk(KERN_ERR "%s: invalid int type 0x%x\n", __func__,
int_type);
return -1;
}
@@ -954,17 +914,17 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
/*
* ACPI calls this when it finds an entry for a legacy ISA IRQ override.
*/
-void __devinit
-iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
- unsigned long polarity,
- unsigned long trigger)
+void iosapic_override_isa_irq(unsigned int isa_irq, unsigned int gsi,
+ unsigned long polarity, unsigned long trigger)
{
int vector, irq;
unsigned int dest = cpu_physical_id(smp_processor_id());
+ unsigned char dmode;
irq = vector = isa_irq_to_vector(isa_irq);
BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
- register_intr(gsi, irq, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
+ dmode = choose_dmode();
+ register_intr(gsi, irq, dmode, polarity, trigger);
DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
@@ -975,6 +935,22 @@ iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
}
void __init
+ia64_native_iosapic_pcat_compat_init(void)
+{
+ if (pcat_compat) {
+ /*
+ * Disable the compatibility mode interrupts (8259 style),
+ * needs IN/OUT support enabled.
+ */
+ printk(KERN_INFO
+ "%s: Disabling PC-AT compatible 8259 interrupts\n",
+ __func__);
+ outb(0xff, 0xA1);
+ outb(0xff, 0x21);
+ }
+}
+
+void __init
iosapic_system_init (int system_pcat_compat)
{
int irq;
@@ -988,17 +964,8 @@ iosapic_system_init (int system_pcat_compat)
}
pcat_compat = system_pcat_compat;
- if (pcat_compat) {
- /*
- * Disable the compatibility mode interrupts (8259 style),
- * needs IN/OUT support enabled.
- */
- printk(KERN_INFO
- "%s: Disabling PC-AT compatible 8259 interrupts\n",
- __FUNCTION__);
- outb(0xff, 0xA1);
- outb(0xff, 0x21);
- }
+ if (pcat_compat)
+ iosapic_pcat_compat_init();
}
static inline int
@@ -1010,7 +977,7 @@ iosapic_alloc (void)
if (!iosapic_lists[index].addr)
return index;
- printk(KERN_WARNING "%s: failed to allocate iosapic\n", __FUNCTION__);
+ printk(KERN_WARNING "%s: failed to allocate iosapic\n", __func__);
return -1;
}
@@ -1043,8 +1010,27 @@ iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
return 0;
}
-int __devinit
-iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
+static int
+iosapic_delete_rte(unsigned int irq, unsigned int gsi)
+{
+ struct iosapic_rte_info *rte, *temp;
+
+ list_for_each_entry_safe(rte, temp, &iosapic_intr_info[irq].rtes,
+ rte_list) {
+ if (rte->iosapic->gsi_base + rte->rte_index == gsi) {
+ if (rte->refcnt)
+ return -EBUSY;
+
+ list_del(&rte->rte_list);
+ kfree(rte);
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+int iosapic_init(unsigned long phys_addr, unsigned int gsi_base)
{
int num_rte, err, index;
unsigned int isa_irq, ver;
@@ -1059,6 +1045,10 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
}
addr = ioremap(phys_addr, 0);
+ if (addr == NULL) {
+ spin_unlock_irqrestore(&iosapic_lock, flags);
+ return -ENOMEM;
+ }
ver = iosapic_version(addr);
if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
iounmap(addr);
@@ -1097,56 +1087,55 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
return 0;
}
-#ifdef CONFIG_HOTPLUG
-int
-iosapic_remove (unsigned int gsi_base)
+int iosapic_remove(unsigned int gsi_base)
{
- int index, err = 0;
+ int i, irq, index, err = 0;
unsigned long flags;
spin_lock_irqsave(&iosapic_lock, flags);
index = find_iosapic(gsi_base);
if (index < 0) {
printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
- __FUNCTION__, gsi_base);
+ __func__, gsi_base);
goto out;
}
if (iosapic_lists[index].rtes_inuse) {
err = -EBUSY;
printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
- __FUNCTION__, gsi_base);
+ __func__, gsi_base);
goto out;
}
+ for (i = gsi_base; i < gsi_base + iosapic_lists[index].num_rte; i++) {
+ irq = __gsi_to_irq(i);
+ if (irq < 0)
+ continue;
+
+ err = iosapic_delete_rte(irq, i);
+ if (err)
+ goto out;
+ }
+
iounmap(iosapic_lists[index].addr);
iosapic_free(index);
out:
spin_unlock_irqrestore(&iosapic_lock, flags);
return err;
}
-#endif /* CONFIG_HOTPLUG */
#ifdef CONFIG_NUMA
-void __devinit
-map_iosapic_to_node(unsigned int gsi_base, int node)
+void map_iosapic_to_node(unsigned int gsi_base, int node)
{
int index;
index = find_iosapic(gsi_base);
if (index < 0) {
printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
- __FUNCTION__, gsi_base);
+ __func__, gsi_base);
return;
}
iosapic_lists[index].node = node;
return;
}
#endif
-
-static int __init iosapic_enable_kmalloc (void)
-{
- iosapic_kmalloc_ok = 1;
- return 0;
-}
-core_initcall (iosapic_enable_kmalloc);
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 44be1c952b7..f2c41828113 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -23,6 +23,8 @@
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <asm/mca.h>
+
/*
* 'what should we do if we get a hw irq event on an illegal vector'.
* each architecture has to answer this themselves.
@@ -53,45 +55,9 @@ atomic_t irq_err_count;
/*
* /proc/interrupts printing:
*/
-
-int show_interrupts(struct seq_file *p, void *v)
+int arch_show_interrupts(struct seq_file *p, int prec)
{
- int i = *(loff_t *) v, j;
- struct irqaction * action;
- unsigned long flags;
-
- if (i == 0) {
- seq_printf(p, " ");
- for_each_online_cpu(j) {
- seq_printf(p, "CPU%d ",j);
- }
- seq_putc(p, '\n');
- }
-
- if (i < NR_IRQS) {
- spin_lock_irqsave(&irq_desc[i].lock, flags);
- action = irq_desc[i].action;
- if (!action)
- goto skip;
- seq_printf(p, "%3d: ",i);
-#ifndef CONFIG_SMP
- seq_printf(p, "%10u ", kstat_irqs(i));
-#else
- for_each_online_cpu(j) {
- seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
- }
-#endif
- seq_printf(p, " %14s", irq_desc[i].chip->name);
- seq_printf(p, " %s", action->name);
-
- for (action=action->next; action; action = action->next)
- seq_printf(p, ", %s", action->name);
-
- seq_putc(p, '\n');
-skip:
- spin_unlock_irqrestore(&irq_desc[i].lock, flags);
- } else if (i == NR_IRQS)
- seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
return 0;
}
@@ -100,21 +66,18 @@ static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
{
- cpumask_t mask = CPU_MASK_NONE;
-
- cpu_set(cpu_logical_id(hwid), mask);
-
if (irq < NR_IRQS) {
- irq_desc[irq].affinity = mask;
+ cpumask_copy(irq_get_irq_data(irq)->affinity,
+ cpumask_of(cpu_logical_id(hwid)));
irq_redir[irq] = (char) (redir & 0xff);
}
}
-bool is_affinity_mask_valid(cpumask_t cpumask)
+bool is_affinity_mask_valid(const struct cpumask *cpumask)
{
if (ia64_platform_is("sn2")) {
/* Only allow one CPU to be specified in the smp_affinity mask */
- if (cpus_weight(cpumask) != 1)
+ if (cpumask_weight(cpumask) != 1)
return false;
}
return true;
@@ -122,23 +85,29 @@ bool is_affinity_mask_valid(cpumask_t cpumask)
#endif /* CONFIG_SMP */
+int __init arch_early_irq_init(void)
+{
+ ia64_mca_irq_init();
+ return 0;
+}
+
#ifdef CONFIG_HOTPLUG_CPU
unsigned int vectors_in_migration[NR_IRQS];
/*
- * Since cpu_online_map is already updated, we just need to check for
+ * Since cpu_online_mask is already updated, we just need to check for
* affinity that has zeros
*/
static void migrate_irqs(void)
{
- cpumask_t mask;
- irq_desc_t *desc;
int irq, new_cpu;
for (irq=0; irq < NR_IRQS; irq++) {
- desc = irq_desc + irq;
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_data *data = irq_desc_get_irq_data(desc);
+ struct irq_chip *chip = irq_data_get_irq_chip(data);
- if (desc->status == IRQ_DISABLED)
+ if (irqd_irq_disabled(data))
continue;
/*
@@ -147,31 +116,31 @@ static void migrate_irqs(void)
* tell CPU not to respond to these local intr sources.
* such as ITV,CPEI,MCA etc.
*/
- if (desc->status == IRQ_PER_CPU)
+ if (irqd_is_per_cpu(data))
continue;
- cpus_and(mask, irq_desc[irq].affinity, cpu_online_map);
- if (any_online_cpu(mask) == NR_CPUS) {
+ if (cpumask_any_and(data->affinity, cpu_online_mask)
+ >= nr_cpu_ids) {
/*
* Save it for phase 2 processing
*/
vectors_in_migration[irq] = irq;
- new_cpu = any_online_cpu(cpu_online_map);
- mask = cpumask_of_cpu(new_cpu);
+ new_cpu = cpumask_any(cpu_online_mask);
/*
* Al three are essential, currently WARN_ON.. maybe panic?
*/
- if (desc->chip && desc->chip->disable &&
- desc->chip->enable && desc->chip->set_affinity) {
- desc->chip->disable(irq);
- desc->chip->set_affinity(irq, mask);
- desc->chip->enable(irq);
+ if (chip && chip->irq_disable &&
+ chip->irq_enable && chip->irq_set_affinity) {
+ chip->irq_disable(data);
+ chip->irq_set_affinity(data,
+ cpumask_of(new_cpu), false);
+ chip->irq_enable(data);
} else {
- WARN_ON((!(desc->chip) || !(desc->chip->disable) ||
- !(desc->chip->enable) ||
- !(desc->chip->set_affinity)));
+ WARN_ON((!chip || !chip->irq_disable ||
+ !chip->irq_enable ||
+ !chip->irq_set_affinity));
}
}
}
@@ -181,16 +150,16 @@ void fixup_irqs(void)
{
unsigned int irq;
extern void ia64_process_pending_intr(void);
- extern void ia64_disable_timer(void);
extern volatile int time_keeper_id;
- ia64_disable_timer();
+ /* Mask ITV to disable timer */
+ ia64_set_itv(1 << 16);
/*
* Find a new timesync master
*/
if (smp_processor_id() == time_keeper_id) {
- time_keeper_id = first_cpu(cpu_online_map);
+ time_keeper_id = cpumask_first(cpu_online_mask);
printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id);
}
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 00a4599e5f4..03ea78ed64a 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -22,14 +22,15 @@
#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/kernel_stat.h>
-#include <linux/slab.h>
#include <linux/ptrace.h>
-#include <linux/random.h> /* for rand_initialize_irq() */
#include <linux/signal.h>
#include <linux/smp.h>
#include <linux/threads.h>
#include <linux/bitops.h>
#include <linux/irq.h>
+#include <linux/ratelimit.h>
+#include <linux/acpi.h>
+#include <linux/sched.h>
#include <asm/delay.h>
#include <asm/intrinsics.h>
@@ -37,7 +38,6 @@
#include <asm/hw_irq.h>
#include <asm/machvec.h>
#include <asm/pgtable.h>
-#include <asm/system.h>
#include <asm/tlbflush.h>
#ifdef CONFIG_PERFMON
@@ -93,14 +93,6 @@ static int irq_status[NR_IRQS] = {
[0 ... NR_IRQS -1] = IRQ_UNUSED
};
-int check_irq_used(int irq)
-{
- if (irq_status[irq] == IRQ_USED)
- return 1;
-
- return -1;
-}
-
static inline int find_unassigned_irq(void)
{
int irq;
@@ -116,7 +108,7 @@ static inline int find_unassigned_vector(cpumask_t domain)
cpumask_t mask;
int pos, vector;
- cpus_and(mask, domain, cpu_online_map);
+ cpumask_and(&mask, &domain, cpu_online_mask);
if (cpus_empty(mask))
return -EINVAL;
@@ -139,7 +131,7 @@ static int __bind_irq_vector(int irq, int vector, cpumask_t domain)
BUG_ON((unsigned)irq >= NR_IRQS);
BUG_ON((unsigned)vector >= IA64_NUM_VECTORS);
- cpus_and(mask, domain, cpu_online_map);
+ cpumask_and(&mask, &domain, cpu_online_mask);
if (cpus_empty(mask))
return -EINVAL;
if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain))
@@ -177,7 +169,7 @@ static void __clear_irq_vector(int irq)
BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED);
vector = cfg->vector;
domain = cfg->domain;
- cpus_and(mask, cfg->domain, cpu_online_map);
+ cpumask_and(&mask, &cfg->domain, cpu_online_mask);
for_each_cpu_mask(cpu, mask)
per_cpu(vector_irq, cpu)[vector] = -1;
cfg->vector = IRQ_VECTOR_UNASSIGNED;
@@ -196,11 +188,11 @@ static void clear_irq_vector(int irq)
}
int
-assign_irq_vector (int irq)
+ia64_native_assign_irq_vector (int irq)
{
unsigned long flags;
int vector, cpu;
- cpumask_t domain;
+ cpumask_t domain = CPU_MASK_NONE;
vector = -ENOSPC;
@@ -222,7 +214,7 @@ assign_irq_vector (int irq)
}
void
-free_irq_vector (int vector)
+ia64_native_free_irq_vector (int vector)
{
if (vector < IA64_FIRST_DEVICE_VECTOR ||
vector > IA64_LAST_DEVICE_VECTOR)
@@ -260,6 +252,7 @@ void __setup_vector_irq(int cpu)
}
#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG))
+
static enum vector_domain_type {
VECTOR_DOMAIN_NONE,
VECTOR_DOMAIN_PERCPU
@@ -272,6 +265,100 @@ static cpumask_t vector_allocation_domain(int cpu)
return CPU_MASK_ALL;
}
+static int __irq_prepare_move(int irq, int cpu)
+{
+ struct irq_cfg *cfg = &irq_cfg[irq];
+ int vector;
+ cpumask_t domain;
+
+ if (cfg->move_in_progress || cfg->move_cleanup_count)
+ return -EBUSY;
+ if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu))
+ return -EINVAL;
+ if (cpu_isset(cpu, cfg->domain))
+ return 0;
+ domain = vector_allocation_domain(cpu);
+ vector = find_unassigned_vector(domain);
+ if (vector < 0)
+ return -ENOSPC;
+ cfg->move_in_progress = 1;
+ cfg->old_domain = cfg->domain;
+ cfg->vector = IRQ_VECTOR_UNASSIGNED;
+ cfg->domain = CPU_MASK_NONE;
+ BUG_ON(__bind_irq_vector(irq, vector, domain));
+ return 0;
+}
+
+int irq_prepare_move(int irq, int cpu)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&vector_lock, flags);
+ ret = __irq_prepare_move(irq, cpu);
+ spin_unlock_irqrestore(&vector_lock, flags);
+ return ret;
+}
+
+void irq_complete_move(unsigned irq)
+{
+ struct irq_cfg *cfg = &irq_cfg[irq];
+ cpumask_t cleanup_mask;
+ int i;
+
+ if (likely(!cfg->move_in_progress))
+ return;
+
+ if (unlikely(cpu_isset(smp_processor_id(), cfg->old_domain)))
+ return;
+
+ cpumask_and(&cleanup_mask, &cfg->old_domain, cpu_online_mask);
+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+ for_each_cpu_mask(i, cleanup_mask)
+ platform_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0);
+ cfg->move_in_progress = 0;
+}
+
+static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id)
+{
+ int me = smp_processor_id();
+ ia64_vector vector;
+ unsigned long flags;
+
+ for (vector = IA64_FIRST_DEVICE_VECTOR;
+ vector < IA64_LAST_DEVICE_VECTOR; vector++) {
+ int irq;
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+ irq = __get_cpu_var(vector_irq)[vector];
+ if (irq < 0)
+ continue;
+
+ desc = irq_to_desc(irq);
+ cfg = irq_cfg + irq;
+ raw_spin_lock(&desc->lock);
+ if (!cfg->move_cleanup_count)
+ goto unlock;
+
+ if (!cpu_isset(me, cfg->old_domain))
+ goto unlock;
+
+ spin_lock_irqsave(&vector_lock, flags);
+ __get_cpu_var(vector_irq)[vector] = -1;
+ cpu_clear(me, vector_table[vector]);
+ spin_unlock_irqrestore(&vector_lock, flags);
+ cfg->move_cleanup_count--;
+ unlock:
+ raw_spin_unlock(&desc->lock);
+ }
+ return IRQ_HANDLED;
+}
+
+static struct irqaction irq_move_irqaction = {
+ .handler = smp_irq_move_cleanup_interrupt,
+ .name = "irq_move"
+};
+
static int __init parse_vector_domain(char *arg)
{
if (!arg)
@@ -295,44 +382,13 @@ void destroy_and_reserve_irq(unsigned int irq)
{
unsigned long flags;
- dynamic_irq_cleanup(irq);
-
+ irq_init_desc(irq);
spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq);
irq_status[irq] = IRQ_RSVD;
spin_unlock_irqrestore(&vector_lock, flags);
}
-static int __reassign_irq_vector(int irq, int cpu)
-{
- struct irq_cfg *cfg = &irq_cfg[irq];
- int vector;
- cpumask_t domain;
-
- if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu))
- return -EINVAL;
- if (cpu_isset(cpu, cfg->domain))
- return 0;
- domain = vector_allocation_domain(cpu);
- vector = find_unassigned_vector(domain);
- if (vector < 0)
- return -ENOSPC;
- __clear_irq_vector(irq);
- BUG_ON(__bind_irq_vector(irq, vector, domain));
- return 0;
-}
-
-int reassign_irq_vector(int irq, int cpu)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&vector_lock, flags);
- ret = __reassign_irq_vector(irq, cpu);
- spin_unlock_irqrestore(&vector_lock, flags);
- return ret;
-}
-
/*
* Dynamic irq allocate and deallocation for MSI
*/
@@ -340,7 +396,7 @@ int create_irq(void)
{
unsigned long flags;
int irq, vector, cpu;
- cpumask_t domain;
+ cpumask_t domain = CPU_MASK_NONE;
irq = vector = -ENOSPC;
spin_lock_irqsave(&vector_lock, flags);
@@ -359,13 +415,13 @@ int create_irq(void)
out:
spin_unlock_irqrestore(&vector_lock, flags);
if (irq >= 0)
- dynamic_irq_init(irq);
+ irq_init_desc(irq);
return irq;
}
void destroy_irq(unsigned int irq)
{
- dynamic_irq_cleanup(irq);
+ irq_init_desc(irq);
clear_irq_vector(irq);
}
@@ -402,13 +458,9 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
sp = ia64_getreg(_IA64_REG_SP);
if ((sp - bsp) < 1024) {
- static unsigned char count;
- static long last_time;
+ static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
- if (jiffies - last_time > 5*HZ)
- count = 0;
- if (++count < 5) {
- last_time = jiffies;
+ if (__ratelimit(&ratelimit)) {
printk("ia64_handle_irq: DANGER: less than "
"1KB of free stack space!!\n"
"(bsp=0x%lx, sp=%lx)\n", bsp, sp);
@@ -426,21 +478,22 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
ia64_srlz_d();
while (vector != IA64_SPURIOUS_INT_VECTOR) {
+ int irq = local_vector_to_irq(vector);
+
if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
smp_local_flush_tlb();
- kstat_this_cpu.irqs[vector]++;
- } else if (unlikely(IS_RESCHEDULE(vector)))
- kstat_this_cpu.irqs[vector]++;
- else {
- int irq = local_vector_to_irq(vector);
-
+ kstat_incr_irq_this_cpu(irq);
+ } else if (unlikely(IS_RESCHEDULE(vector))) {
+ scheduler_ipi();
+ kstat_incr_irq_this_cpu(irq);
+ } else {
ia64_setreg(_IA64_REG_CR_TPR, vector);
ia64_srlz_d();
if (unlikely(irq < 0)) {
printk(KERN_ERR "%s: Unexpected interrupt "
"vector %d on CPU %d is not mapped "
- "to any IRQ!\n", __FUNCTION__, vector,
+ "to any IRQ!\n", __func__, vector,
smp_processor_id());
} else
generic_handle_irq(irq);
@@ -476,22 +529,23 @@ void ia64_process_pending_intr(void)
vector = ia64_get_ivr();
- irq_enter();
- saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
- ia64_srlz_d();
+ irq_enter();
+ saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+ ia64_srlz_d();
/*
* Perform normal interrupt style processing
*/
while (vector != IA64_SPURIOUS_INT_VECTOR) {
+ int irq = local_vector_to_irq(vector);
+
if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
smp_local_flush_tlb();
- kstat_this_cpu.irqs[vector]++;
- } else if (unlikely(IS_RESCHEDULE(vector)))
- kstat_this_cpu.irqs[vector]++;
- else {
+ kstat_incr_irq_this_cpu(irq);
+ } else if (unlikely(IS_RESCHEDULE(vector))) {
+ kstat_incr_irq_this_cpu(irq);
+ } else {
struct pt_regs *old_regs = set_irq_regs(NULL);
- int irq = local_vector_to_irq(vector);
ia64_setreg(_IA64_REG_CR_TPR, vector);
ia64_srlz_d();
@@ -505,7 +559,7 @@ void ia64_process_pending_intr(void)
if (unlikely(irq < 0)) {
printk(KERN_ERR "%s: Unexpected interrupt "
"vector %d on CPU %d not being mapped "
- "to any IRQ!!\n", __FUNCTION__, vector,
+ "to any IRQ!!\n", __func__, vector,
smp_processor_id());
} else {
vectors_in_migration[irq]=0;
@@ -533,52 +587,65 @@ static irqreturn_t dummy_handler (int irq, void *dev_id)
{
BUG();
}
-extern irqreturn_t handle_IPI (int irq, void *dev_id);
static struct irqaction ipi_irqaction = {
.handler = handle_IPI,
- .flags = IRQF_DISABLED,
.name = "IPI"
};
+/*
+ * KVM uses this interrupt to force a cpu out of guest mode
+ */
static struct irqaction resched_irqaction = {
.handler = dummy_handler,
- .flags = IRQF_DISABLED,
.name = "resched"
};
static struct irqaction tlb_irqaction = {
.handler = dummy_handler,
- .flags = IRQF_DISABLED,
.name = "tlb_flush"
};
#endif
void
-register_percpu_irq (ia64_vector vec, struct irqaction *action)
+ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action)
{
- irq_desc_t *desc;
unsigned int irq;
irq = vec;
BUG_ON(bind_irq_vector(irq, vec, CPU_MASK_ALL));
- desc = irq_desc + irq;
- desc->status |= IRQ_PER_CPU;
- desc->chip = &irq_type_ia64_lsapic;
+ irq_set_status_flags(irq, IRQ_PER_CPU);
+ irq_set_chip(irq, &irq_type_ia64_lsapic);
if (action)
setup_irq(irq, action);
+ irq_set_handler(irq, handle_percpu_irq);
}
void __init
-init_IRQ (void)
+ia64_native_register_ipi(void)
{
- register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
#ifdef CONFIG_SMP
register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction);
register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction);
#endif
+}
+
+void __init
+init_IRQ (void)
+{
+#ifdef CONFIG_ACPI
+ acpi_boot_init();
+#endif
+ ia64_register_ipi();
+ register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
+#ifdef CONFIG_SMP
+#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG)
+ if (vector_domain_type != VECTOR_DOMAIN_NONE)
+ register_percpu_irq(IA64_IRQ_MOVE_VECTOR, &irq_move_irqaction);
+#endif
+#endif
#ifdef CONFIG_PERFMON
pfm_init_percpu();
#endif
@@ -592,11 +659,7 @@ ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
unsigned long ipi_data;
unsigned long phys_cpu_id;
-#ifdef CONFIG_SMP
phys_cpu_id = cpu_physical_id(cpu);
-#else
- phys_cpu_id = (ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff;
-#endif
/*
* cpu number is in 8bit ID and 8bit EID
diff --git a/arch/ia64/kernel/irq_lsapic.c b/arch/ia64/kernel/irq_lsapic.c
index e56a7a36aca..1b3a776e516 100644
--- a/arch/ia64/kernel/irq_lsapic.c
+++ b/arch/ia64/kernel/irq_lsapic.c
@@ -15,31 +15,30 @@
#include <linux/irq.h>
static unsigned int
-lsapic_noop_startup (unsigned int irq)
+lsapic_noop_startup (struct irq_data *data)
{
return 0;
}
static void
-lsapic_noop (unsigned int irq)
+lsapic_noop (struct irq_data *data)
{
/* nothing to do... */
}
-static int lsapic_retrigger(unsigned int irq)
+static int lsapic_retrigger(struct irq_data *data)
{
- ia64_resend_irq(irq);
+ ia64_resend_irq(data->irq);
return 1;
}
-struct hw_interrupt_type irq_type_ia64_lsapic = {
- .name = "LSAPIC",
- .startup = lsapic_noop_startup,
- .shutdown = lsapic_noop,
- .enable = lsapic_noop,
- .disable = lsapic_noop,
- .ack = lsapic_noop,
- .end = lsapic_noop,
- .retrigger = lsapic_retrigger,
+struct irq_chip irq_type_ia64_lsapic = {
+ .name = "LSAPIC",
+ .irq_startup = lsapic_noop_startup,
+ .irq_shutdown = lsapic_noop,
+ .irq_enable = lsapic_noop,
+ .irq_disable = lsapic_noop,
+ .irq_ack = lsapic_noop,
+ .irq_retrigger = lsapic_retrigger,
};
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 34f44d8be00..18e794a5724 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -12,6 +12,14 @@
*
* 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP
* 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co
+ * Dan Magenheimer <dan.magenheimer@hp.com>
+ * Xen paravirtualization
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ * pv_ops.
+ * Yaozu (Eddie) Dong <eddie.dong@intel.com>
*/
/*
* This file defines the interruption vector table used by the CPU.
@@ -41,18 +49,16 @@
#include <asm/asmmacro.h>
#include <asm/break.h>
-#include <asm/ia32.h>
#include <asm/kregs.h>
#include <asm/asm-offsets.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
-#include <asm/system.h>
#include <asm/thread_info.h>
#include <asm/unistd.h>
#include <asm/errno.h>
-#if 1
+#if 0
# define PSR_DEFAULT_BITS psr.ac
#else
# define PSR_DEFAULT_BITS 0
@@ -75,7 +81,7 @@
mov r19=n;; /* prepare to save predicates */ \
br.sptk.many dispatch_to_fault_handler
- .section .text.ivt,"ax"
+ .section .text..ivt,"ax"
.align 32768 // align on 32KB boundary
.global ia64_ivt
@@ -102,13 +108,13 @@ ENTRY(vhpt_miss)
* - the faulting virtual address uses unimplemented address bits
* - the faulting virtual address has no valid page table mapping
*/
- mov r16=cr.ifa // get address that caused the TLB miss
+ MOV_FROM_IFA(r16) // get address that caused the TLB miss
#ifdef CONFIG_HUGETLB_PAGE
movl r18=PAGE_SHIFT
- mov r25=cr.itir
+ MOV_FROM_ITIR(r25)
#endif
;;
- rsm psr.dt // use physical addressing for data
+ RSM_PSR_DT // use physical addressing for data
mov r31=pr // save the predicate registers
mov r19=IA64_KR(PT_BASE) // get page table base address
shl r21=r16,3 // shift bit 60 into sign bit
@@ -168,21 +174,21 @@ ENTRY(vhpt_miss)
dep r21=r19,r20,3,(PAGE_SHIFT-3) // r21=pte_offset(pmd,addr)
;;
(p7) ld8 r18=[r21] // read *pte
- mov r19=cr.isr // cr.isr bit 32 tells us if this is an insn miss
+ MOV_FROM_ISR(r19) // cr.isr bit 32 tells us if this is an insn miss
;;
(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared?
- mov r22=cr.iha // get the VHPT address that caused the TLB miss
+ MOV_FROM_IHA(r22) // get the VHPT address that caused the TLB miss
;; // avoid RAW on p7
(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss?
dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address
;;
-(p10) itc.i r18 // insert the instruction TLB entry
-(p11) itc.d r18 // insert the data TLB entry
+ ITC_I_AND_D(p10, p11, r18, r24) // insert the instruction TLB entry and
+ // insert the data TLB entry
(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault)
- mov cr.ifa=r22
+ MOV_TO_IFA(r22, r24)
#ifdef CONFIG_HUGETLB_PAGE
-(p8) mov cr.itir=r25 // change to default page-size for VHPT
+ MOV_TO_ITIR(p8, r25, r24) // change to default page-size for VHPT
#endif
/*
@@ -192,7 +198,7 @@ ENTRY(vhpt_miss)
*/
adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
;;
-(p7) itc.d r24
+ ITC_D(p7, r24, r25)
;;
#ifdef CONFIG_SMP
/*
@@ -234,7 +240,7 @@ ENTRY(vhpt_miss)
#endif
mov pr=r31,-1 // restore predicate registers
- rfi
+ RFI
END(vhpt_miss)
.org ia64_ivt+0x400
@@ -248,11 +254,11 @@ ENTRY(itlb_miss)
* mode, walk the page table, and then re-execute the PTE read and
* go on normally after that.
*/
- mov r16=cr.ifa // get virtual address
+ MOV_FROM_IFA(r16) // get virtual address
mov r29=b0 // save b0
mov r31=pr // save predicates
.itlb_fault:
- mov r17=cr.iha // get virtual address of PTE
+ MOV_FROM_IHA(r17) // get virtual address of PTE
movl r30=1f // load nested fault continuation point
;;
1: ld8 r18=[r17] // read *pte
@@ -261,7 +267,7 @@ ENTRY(itlb_miss)
tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
(p6) br.cond.spnt page_fault
;;
- itc.i r18
+ ITC_I(p0, r18, r19)
;;
#ifdef CONFIG_SMP
/*
@@ -278,7 +284,7 @@ ENTRY(itlb_miss)
(p7) ptc.l r16,r20
#endif
mov pr=r31,-1
- rfi
+ RFI
END(itlb_miss)
.org ia64_ivt+0x0800
@@ -292,11 +298,11 @@ ENTRY(dtlb_miss)
* mode, walk the page table, and then re-execute the PTE read and
* go on normally after that.
*/
- mov r16=cr.ifa // get virtual address
+ MOV_FROM_IFA(r16) // get virtual address
mov r29=b0 // save b0
mov r31=pr // save predicates
dtlb_fault:
- mov r17=cr.iha // get virtual address of PTE
+ MOV_FROM_IHA(r17) // get virtual address of PTE
movl r30=1f // load nested fault continuation point
;;
1: ld8 r18=[r17] // read *pte
@@ -305,7 +311,7 @@ dtlb_fault:
tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
(p6) br.cond.spnt page_fault
;;
- itc.d r18
+ ITC_D(p0, r18, r19)
;;
#ifdef CONFIG_SMP
/*
@@ -322,7 +328,7 @@ dtlb_fault:
(p7) ptc.l r16,r20
#endif
mov pr=r31,-1
- rfi
+ RFI
END(dtlb_miss)
.org ia64_ivt+0x0c00
@@ -330,9 +336,9 @@ END(dtlb_miss)
// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
ENTRY(alt_itlb_miss)
DBG_FAULT(3)
- mov r16=cr.ifa // get address that caused the TLB miss
+ MOV_FROM_IFA(r16) // get address that caused the TLB miss
movl r17=PAGE_KERNEL
- mov r21=cr.ipsr
+ MOV_FROM_IPSR(p0, r21)
movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
mov r31=pr
;;
@@ -341,9 +347,9 @@ ENTRY(alt_itlb_miss)
;;
cmp.gt p8,p0=6,r22 // user mode
;;
-(p8) thash r17=r16
+ THASH(p8, r17, r16, r23)
;;
-(p8) mov cr.iha=r17
+ MOV_TO_IHA(p8, r17, r23)
(p8) mov r29=b0 // save b0
(p8) br.cond.dptk .itlb_fault
#endif
@@ -358,9 +364,9 @@ ENTRY(alt_itlb_miss)
or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
(p8) br.cond.spnt page_fault
;;
- itc.i r19 // insert the TLB entry
+ ITC_I(p0, r19, r18) // insert the TLB entry
mov pr=r31,-1
- rfi
+ RFI
END(alt_itlb_miss)
.org ia64_ivt+0x1000
@@ -368,11 +374,11 @@ END(alt_itlb_miss)
// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
ENTRY(alt_dtlb_miss)
DBG_FAULT(4)
- mov r16=cr.ifa // get address that caused the TLB miss
+ MOV_FROM_IFA(r16) // get address that caused the TLB miss
movl r17=PAGE_KERNEL
- mov r20=cr.isr
+ MOV_FROM_ISR(r20)
movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- mov r21=cr.ipsr
+ MOV_FROM_IPSR(p0, r21)
mov r31=pr
mov r24=PERCPU_ADDR
;;
@@ -381,9 +387,9 @@ ENTRY(alt_dtlb_miss)
;;
cmp.gt p8,p0=6,r22 // access to region 0-5
;;
-(p8) thash r17=r16
+ THASH(p8, r17, r16, r25)
;;
-(p8) mov cr.iha=r17
+ MOV_TO_IHA(p8, r17, r25)
(p8) mov r29=b0 // save b0
(p8) br.cond.dptk dtlb_fault
#endif
@@ -402,7 +408,7 @@ ENTRY(alt_dtlb_miss)
tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
;;
(p10) sub r19=r19,r26
-(p10) mov cr.itir=r25
+ MOV_TO_ITIR(p10, r25, r24)
cmp.ne p8,p0=r0,r23
(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
(p12) dep r17=-1,r17,4,1 // set ma=UC for region 6 addr
@@ -411,11 +417,11 @@ ENTRY(alt_dtlb_miss)
dep r21=-1,r21,IA64_PSR_ED_BIT,1
;;
or r19=r19,r17 // insert PTE control bits into r19
-(p6) mov cr.ipsr=r21
+ MOV_TO_IPSR(p6, r21, r24)
;;
-(p7) itc.d r19 // insert the TLB entry
+ ITC_D(p7, r19, r18) // insert the TLB entry
mov pr=r31,-1
- rfi
+ RFI
END(alt_dtlb_miss)
.org ia64_ivt+0x1400
@@ -444,10 +450,10 @@ ENTRY(nested_dtlb_miss)
*
* Clobbered: b0, r18, r19, r21, r22, psr.dt (cleared)
*/
- rsm psr.dt // switch to using physical data addressing
+ RSM_PSR_DT // switch to using physical data addressing
mov r19=IA64_KR(PT_BASE) // get the page table base address
shl r21=r16,3 // shift bit 60 into sign bit
- mov r18=cr.itir
+ MOV_FROM_ITIR(r18)
;;
shr.u r17=r16,61 // get the region number into r17
extr.u r18=r18,2,6 // get the faulting page size
@@ -507,33 +513,6 @@ ENTRY(ikey_miss)
FAULT(6)
END(ikey_miss)
- //-----------------------------------------------------------------------------------
- // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
-ENTRY(page_fault)
- ssm psr.dt
- ;;
- srlz.i
- ;;
- SAVE_MIN_WITH_COVER
- alloc r15=ar.pfs,0,0,3,0
- mov out0=cr.ifa
- mov out1=cr.isr
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption collectin is on
- ;;
-(p15) ssm psr.i // restore psr.i
- movl r14=ia64_leave_kernel
- ;;
- SAVE_REST
- mov rp=r14
- ;;
- adds out2=16,r12 // out2 = pointer to pt_regs
- br.call.sptk.many b6=ia64_do_page_fault // ignore return address
-END(page_fault)
-
.org ia64_ivt+0x1c00
/////////////////////////////////////////////////////////////////////////////////////////
// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
@@ -556,10 +535,10 @@ ENTRY(dirty_bit)
* page table TLB entry isn't present, we take a nested TLB miss hit where we look
* up the physical address of the L3 PTE and then continue at label 1 below.
*/
- mov r16=cr.ifa // get the address that caused the fault
+ MOV_FROM_IFA(r16) // get the address that caused the fault
movl r30=1f // load continuation point in case of nested fault
;;
- thash r17=r16 // compute virtual address of L3 PTE
+ THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE
mov r29=b0 // save b0 in case of nested fault
mov r31=pr // save pr
#ifdef CONFIG_SMP
@@ -576,7 +555,7 @@ ENTRY(dirty_bit)
;;
(p6) cmp.eq p6,p7=r26,r18 // Only compare if page is present
;;
-(p6) itc.d r25 // install updated PTE
+ ITC_D(p6, r25, r18) // install updated PTE
;;
/*
* Tell the assemblers dependency-violation checker that the above "itc" instructions
@@ -599,10 +578,10 @@ ENTRY(dirty_bit)
mov b0=r29 // restore b0
;;
st8 [r17]=r18 // store back updated PTE
- itc.d r18 // install updated PTE
+ ITC_D(p0, r18, r16) // install updated PTE
#endif
mov pr=r31,-1 // restore pr
- rfi
+ RFI
END(dirty_bit)
.org ia64_ivt+0x2400
@@ -611,22 +590,22 @@ END(dirty_bit)
ENTRY(iaccess_bit)
DBG_FAULT(9)
// Like Entry 8, except for instruction access
- mov r16=cr.ifa // get the address that caused the fault
+ MOV_FROM_IFA(r16) // get the address that caused the fault
movl r30=1f // load continuation point in case of nested fault
mov r31=pr // save predicates
#ifdef CONFIG_ITANIUM
/*
* Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
*/
- mov r17=cr.ipsr
+ MOV_FROM_IPSR(p0, r17)
;;
- mov r18=cr.iip
+ MOV_FROM_IIP(r18)
tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set?
;;
(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa
#endif /* CONFIG_ITANIUM */
;;
- thash r17=r16 // compute virtual address of L3 PTE
+ THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE
mov r29=b0 // save b0 in case of nested fault)
#ifdef CONFIG_SMP
mov r28=ar.ccv // save ar.ccv
@@ -642,7 +621,7 @@ ENTRY(iaccess_bit)
;;
(p6) cmp.eq p6,p7=r26,r18 // Only if page present
;;
-(p6) itc.i r25 // install updated PTE
+ ITC_I(p6, r25, r26) // install updated PTE
;;
/*
* Tell the assemblers dependency-violation checker that the above "itc" instructions
@@ -665,10 +644,10 @@ ENTRY(iaccess_bit)
mov b0=r29 // restore b0
;;
st8 [r17]=r18 // store back updated PTE
- itc.i r18 // install updated PTE
+ ITC_I(p0, r18, r16) // install updated PTE
#endif /* !CONFIG_SMP */
mov pr=r31,-1
- rfi
+ RFI
END(iaccess_bit)
.org ia64_ivt+0x2800
@@ -677,10 +656,10 @@ END(iaccess_bit)
ENTRY(daccess_bit)
DBG_FAULT(10)
// Like Entry 8, except for data access
- mov r16=cr.ifa // get the address that caused the fault
+ MOV_FROM_IFA(r16) // get the address that caused the fault
movl r30=1f // load continuation point in case of nested fault
;;
- thash r17=r16 // compute virtual address of L3 PTE
+ THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE
mov r31=pr
mov r29=b0 // save b0 in case of nested fault)
#ifdef CONFIG_SMP
@@ -697,7 +676,7 @@ ENTRY(daccess_bit)
;;
(p6) cmp.eq p6,p7=r26,r18 // Only if page is present
;;
-(p6) itc.d r25 // install updated PTE
+ ITC_D(p6, r25, r26) // install updated PTE
/*
* Tell the assemblers dependency-violation checker that the above "itc" instructions
* cannot possibly affect the following loads:
@@ -717,11 +696,11 @@ ENTRY(daccess_bit)
or r18=_PAGE_A,r18 // set the accessed bit
;;
st8 [r17]=r18 // store back updated PTE
- itc.d r18 // install updated PTE
+ ITC_D(p0, r18, r16) // install updated PTE
#endif
mov b0=r29 // restore b0
mov pr=r31,-1
- rfi
+ RFI
END(daccess_bit)
.org ia64_ivt+0x2c00
@@ -745,10 +724,10 @@ ENTRY(break_fault)
*/
DBG_FAULT(11)
mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12 cyc)
- mov r29=cr.ipsr // M2 (12 cyc)
+ MOV_FROM_IPSR(p0, r29) // M2 (12 cyc)
mov r31=pr // I0 (2 cyc)
- mov r17=cr.iim // M2 (2 cyc)
+ MOV_FROM_IIM(r17) // M2 (2 cyc)
mov.m r27=ar.rsc // M2 (12 cyc)
mov r18=__IA64_BREAK_SYSCALL // A
@@ -767,7 +746,7 @@ ENTRY(break_fault)
nop.m 0
movl r30=sys_call_table // X
- mov r28=cr.iip // M2 (2 cyc)
+ MOV_FROM_IIP(r28) // M2 (2 cyc)
cmp.eq p0,p7=r18,r17 // I0 is this a system call?
(p7) br.cond.spnt non_syscall // B no ->
//
@@ -805,8 +784,13 @@ ENTRY(break_fault)
(p8) adds r28=16,r28 // A switch cr.iip to next bundle
(p9) adds r8=1,r8 // A increment ei to next slot
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ ;;
+ mov b6=r30 // I0 setup syscall handler branch reg early
+#else
nop.i 0
;;
+#endif
mov.m r25=ar.unat // M2 (5 cyc)
dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr
@@ -817,7 +801,11 @@ ENTRY(break_fault)
//
///////////////////////////////////////////////////////////////////////
st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting
+#else
mov b6=r30 // I0 setup syscall handler branch reg early
+#endif
cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already?
and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit
@@ -829,20 +817,43 @@ ENTRY(break_fault)
cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited?
br.call.sptk.many b7=ia64_syscall_setup // B
1:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ // mov.m r30=ar.itc is called in advance, and r13 is current
+ add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A
+ add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A
+(pKStk) br.cond.spnt .skip_accounting // B unlikely skip
+ ;;
+ ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // M get last stamp
+ ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // M time at leave
+ ;;
+ ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // M cumulated stime
+ ld8 r21=[r17] // M cumulated utime
+ sub r22=r19,r18 // A stime before leave
+ ;;
+ st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // M update stamp
+ sub r18=r30,r19 // A elapsed time in user
+ ;;
+ add r20=r20,r22 // A sum stime
+ add r21=r21,r18 // A sum utime
+ ;;
+ st8 [r16]=r20 // M update stime
+ st8 [r17]=r21 // M update utime
+ ;;
+.skip_accounting:
+#endif
mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
nop 0
- bsw.1 // B (6 cyc) regs are saved, switch to bank 1
+ BSW_1(r2, r14) // B (6 cyc) regs are saved, switch to bank 1
;;
- ssm psr.ic | PSR_DEFAULT_BITS // M2 now it's safe to re-enable intr.-collection
+ SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r16) // M2 now it's safe to re-enable intr.-collection
+ // M0 ensure interruption collection is on
movl r3=ia64_ret_from_syscall // X
;;
-
- srlz.i // M0 ensure interruption collection is on
mov rp=r3 // I0 set the real return addr
(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
-(p15) ssm psr.i // M2 restore psr.i
+ SSM_PSR_I(p15, p15, r16) // M2 restore psr.i
(p14) br.call.sptk.many b6=b6 // B invoke syscall-handker (ignore return addr)
br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing thingamagic
// NOT REACHED
@@ -862,27 +873,8 @@ END(break_fault)
/////////////////////////////////////////////////////////////////////////////////////////
// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
ENTRY(interrupt)
- DBG_FAULT(12)
- mov r31=pr // prepare to save predicates
- ;;
- SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- adds r3=8,r2 // set up second base pointer for SAVE_REST
- srlz.i // ensure everybody knows psr.ic is back on
- ;;
- SAVE_REST
- ;;
- MCA_RECOVER_RANGE(interrupt)
- alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
- mov out0=cr.ivr // pass cr.ivr as first arg
- add out1=16,sp // pass pointer to pt_regs as second arg
- ;;
- srlz.d // make sure we see the effect of cr.ivr
- movl r14=ia64_leave_kernel
- ;;
- mov rp=r14
- br.call.sptk.many b6=ia64_handle_irq
+ /* interrupt handler has become too big to fit this area. */
+ br.sptk.many __interrupt
END(interrupt)
.org ia64_ivt+0x3400
@@ -928,6 +920,7 @@ END(interrupt)
* - r27: saved ar.rsc
* - r28: saved cr.iip
* - r29: saved cr.ipsr
+ * - r30: ar.itc for accounting (don't touch)
* - r31: saved pr
* - b0: original contents (to be saved)
* On exit:
@@ -944,6 +937,7 @@ END(interrupt)
* - ar.fpsr: set to kernel settings
* - b6: preserved (same as on entry)
*/
+#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE
GLOBAL_ENTRY(ia64_syscall_setup)
#if PT(B6) != 0
# error This code assumes that b6 is the first field in pt_regs.
@@ -1035,6 +1029,7 @@ GLOBAL_ENTRY(ia64_syscall_setup)
(p10) mov r8=-EINVAL
br.ret.sptk.many b7
END(ia64_syscall_setup)
+#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
.org ia64_ivt+0x3c00
/////////////////////////////////////////////////////////////////////////////////////////
@@ -1042,53 +1037,46 @@ END(ia64_syscall_setup)
DBG_FAULT(15)
FAULT(15)
+ .org ia64_ivt+0x4000
+/////////////////////////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+ DBG_FAULT(16)
+ FAULT(16)
+
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE)
/*
- * Squatting in this space ...
+ * There is no particular reason for this code to be here, other than
+ * that there happens to be space here that would go unused otherwise.
+ * If this fault ever gets "unreserved", simply moved the following
+ * code to a more suitable spot...
*
- * This special case dispatcher for illegal operation faults allows preserved
- * registers to be modified through a callback function (asm only) that is handed
- * back from the fault handler in r8. Up to three arguments can be passed to the
- * callback function by returning an aggregate with the callback as its first
- * element, followed by the arguments.
+ * account_sys_enter is called from SAVE_MIN* macros if accounting is
+ * enabled and if the macro is entered from user mode.
*/
-ENTRY(dispatch_illegal_op_fault)
- .prologue
- .body
- SAVE_MIN_WITH_COVER
- ssm psr.ic | PSR_DEFAULT_BITS
+GLOBAL_ENTRY(account_sys_enter)
+ // mov.m r20=ar.itc is called in advance, and r13 is current
+ add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
+ add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
;;
- srlz.i // guarantee that interruption collection is on
+ ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
+ ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at left from kernel
+ ;;
+ ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
+ ld8 r21=[r17] // cumulated utime
+ sub r22=r19,r18 // stime before leave kernel
;;
-(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer for SAVE_REST
+ st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP // update stamp
+ sub r18=r20,r19 // elapsed time in user mode
;;
- alloc r14=ar.pfs,0,0,1,0 // must be first in insn group
- mov out0=ar.ec
- ;;
- SAVE_REST
- PT_REGS_UNWIND_INFO(0)
- ;;
- br.call.sptk.many rp=ia64_illegal_op_fault
-.ret0: ;;
- alloc r14=ar.pfs,0,0,3,0 // must be first in insn group
- mov out0=r9
- mov out1=r10
- mov out2=r11
- movl r15=ia64_leave_kernel
+ add r23=r23,r22 // sum stime
+ add r21=r21,r18 // sum utime
;;
- mov rp=r15
- mov b6=r8
+ st8 [r16]=r23 // update stime
+ st8 [r17]=r21 // update utime
;;
- cmp.ne p6,p0=0,r8
-(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel
- br.sptk.many ia64_leave_kernel
-END(dispatch_illegal_op_fault)
-
- .org ia64_ivt+0x4000
-/////////////////////////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
- DBG_FAULT(16)
- FAULT(16)
+ br.ret.sptk.many rp
+END(account_sys_enter)
+#endif
.org ia64_ivt+0x4400
/////////////////////////////////////////////////////////////////////////////////////////
@@ -1096,110 +1084,18 @@ END(dispatch_illegal_op_fault)
DBG_FAULT(17)
FAULT(17)
-ENTRY(non_syscall)
- mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER
- ;;
- SAVE_MIN_WITH_COVER
-
- // There is no particular reason for this code to be here, other than that
- // there happens to be space here that would go unused otherwise. If this
- // fault ever gets "unreserved", simply moved the following code to a more
- // suitable spot...
-
- alloc r14=ar.pfs,0,0,2,0
- mov out0=cr.iim
- add out1=16,sp
- adds r3=8,r2 // set up second base pointer for SAVE_REST
-
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- movl r15=ia64_leave_kernel
- ;;
- SAVE_REST
- mov rp=r15
- ;;
- br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr
-END(non_syscall)
-
.org ia64_ivt+0x4800
/////////////////////////////////////////////////////////////////////////////////////////
// 0x4800 Entry 18 (size 64 bundles) Reserved
DBG_FAULT(18)
FAULT(18)
- /*
- * There is no particular reason for this code to be here, other than that
- * there happens to be space here that would go unused otherwise. If this
- * fault ever gets "unreserved", simply moved the following code to a more
- * suitable spot...
- */
-
-ENTRY(dispatch_unaligned_handler)
- SAVE_MIN_WITH_COVER
- ;;
- alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
- mov out0=cr.ifa
- adds out1=16,sp
-
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer
- ;;
- SAVE_REST
- movl r14=ia64_leave_kernel
- ;;
- mov rp=r14
- br.sptk.many ia64_prepare_handle_unaligned
-END(dispatch_unaligned_handler)
-
.org ia64_ivt+0x4c00
/////////////////////////////////////////////////////////////////////////////////////////
// 0x4c00 Entry 19 (size 64 bundles) Reserved
DBG_FAULT(19)
FAULT(19)
- /*
- * There is no particular reason for this code to be here, other than that
- * there happens to be space here that would go unused otherwise. If this
- * fault ever gets "unreserved", simply moved the following code to a more
- * suitable spot...
- */
-
-ENTRY(dispatch_to_fault_handler)
- /*
- * Input:
- * psr.ic: off
- * r19: fault vector number (e.g., 24 for General Exception)
- * r31: contains saved predicates (pr)
- */
- SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,5,0
- mov out0=r15
- mov out1=cr.isr
- mov out2=cr.ifa
- mov out3=cr.iim
- mov out4=cr.itir
- ;;
- ssm psr.ic | PSR_DEFAULT_BITS
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
-(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer for SAVE_REST
- ;;
- SAVE_REST
- movl r14=ia64_leave_kernel
- ;;
- mov rp=r14
- br.call.sptk.many b6=ia64_fault
-END(dispatch_to_fault_handler)
-
//
// --- End of long entries, Beginning of short entries
//
@@ -1209,8 +1105,8 @@ END(dispatch_to_fault_handler)
// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
ENTRY(page_not_present)
DBG_FAULT(20)
- mov r16=cr.ifa
- rsm psr.dt
+ MOV_FROM_IFA(r16)
+ RSM_PSR_DT
/*
* The Linux page fault handler doesn't expect non-present pages to be in
* the TLB. Flush the existing entry now, so we meet that expectation.
@@ -1229,8 +1125,8 @@ END(page_not_present)
// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
ENTRY(key_permission)
DBG_FAULT(21)
- mov r16=cr.ifa
- rsm psr.dt
+ MOV_FROM_IFA(r16)
+ RSM_PSR_DT
mov r31=pr
;;
srlz.d
@@ -1242,8 +1138,8 @@ END(key_permission)
// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
ENTRY(iaccess_rights)
DBG_FAULT(22)
- mov r16=cr.ifa
- rsm psr.dt
+ MOV_FROM_IFA(r16)
+ RSM_PSR_DT
mov r31=pr
;;
srlz.d
@@ -1255,8 +1151,8 @@ END(iaccess_rights)
// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
ENTRY(daccess_rights)
DBG_FAULT(23)
- mov r16=cr.ifa
- rsm psr.dt
+ MOV_FROM_IFA(r16)
+ RSM_PSR_DT
mov r31=pr
;;
srlz.d
@@ -1268,7 +1164,7 @@ END(daccess_rights)
// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
ENTRY(general_exception)
DBG_FAULT(24)
- mov r16=cr.isr
+ MOV_FROM_ISR(r16)
mov r31=pr
;;
cmp4.eq p6,p0=0,r16
@@ -1297,8 +1193,8 @@ END(disabled_fp_reg)
ENTRY(nat_consumption)
DBG_FAULT(26)
- mov r16=cr.ipsr
- mov r17=cr.isr
+ MOV_FROM_IPSR(p0, r16)
+ MOV_FROM_ISR(r17)
mov r31=pr // save PR
;;
and r18=0xf,r17 // r18 = cr.ipsr.code{3:0}
@@ -1308,10 +1204,10 @@ ENTRY(nat_consumption)
dep r16=-1,r16,IA64_PSR_ED_BIT,1
(p6) br.cond.spnt 1f // branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH)
;;
- mov cr.ipsr=r16 // set cr.ipsr.na
+ MOV_TO_IPSR(p0, r16, r18)
mov pr=r31,-1
;;
- rfi
+ RFI
1: mov pr=r31,-1
;;
@@ -1333,26 +1229,26 @@ ENTRY(speculation_vector)
*
* cr.imm contains zero_ext(imm21)
*/
- mov r18=cr.iim
+ MOV_FROM_IIM(r18)
;;
- mov r17=cr.iip
+ MOV_FROM_IIP(r17)
shl r18=r18,43 // put sign bit in position (43=64-21)
;;
- mov r16=cr.ipsr
+ MOV_FROM_IPSR(p0, r16)
shr r18=r18,39 // sign extend (39=43-4)
;;
add r17=r17,r18 // now add the offset
;;
- mov cr.iip=r17
+ MOV_TO_IIP(r17, r19)
dep r16=0,r16,41,2 // clear EI
;;
- mov cr.ipsr=r16
+ MOV_TO_IPSR(p0, r16, r19)
;;
- rfi // and go back
+ RFI
END(speculation_vector)
.org ia64_ivt+0x5800
@@ -1488,28 +1384,6 @@ END(ia32_exception)
// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
ENTRY(ia32_intercept)
DBG_FAULT(46)
-#ifdef CONFIG_IA32_SUPPORT
- mov r31=pr
- mov r16=cr.isr
- ;;
- extr.u r17=r16,16,8 // get ISR.code
- mov r18=ar.eflag
- mov r19=cr.iim // old eflag value
- ;;
- cmp.ne p6,p0=2,r17
-(p6) br.cond.spnt 1f // not a system flag fault
- xor r16=r18,r19
- ;;
- extr.u r17=r16,18,1 // get the eflags.ac bit
- ;;
- cmp.eq p6,p0=0,r17
-(p6) br.cond.spnt 1f // eflags.ac bit didn't change
- ;;
- mov pr=r31,-1 // restore predicate registers
- rfi
-
-1:
-#endif // CONFIG_IA32_SUPPORT
FAULT(46)
END(ia32_intercept)
@@ -1518,12 +1392,7 @@ END(ia32_intercept)
// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
ENTRY(ia32_interrupt)
DBG_FAULT(47)
-#ifdef CONFIG_IA32_SUPPORT
- mov r31=pr
- br.sptk.many dispatch_to_ia32_handler
-#else
FAULT(47)
-#endif
END(ia32_interrupt)
.org ia64_ivt+0x6c00
@@ -1646,89 +1515,174 @@ END(ia32_interrupt)
DBG_FAULT(67)
FAULT(67)
-#ifdef CONFIG_IA32_SUPPORT
+ //-----------------------------------------------------------------------------------
+ // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
+ENTRY(page_fault)
+ SSM_PSR_DT_AND_SRLZ_I
+ ;;
+ SAVE_MIN_WITH_COVER
+ alloc r15=ar.pfs,0,0,3,0
+ MOV_FROM_IFA(out0)
+ MOV_FROM_ISR(out1)
+ SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3)
+ adds r3=8,r2 // set up second base pointer
+ SSM_PSR_I(p15, p15, r14) // restore psr.i
+ movl r14=ia64_leave_kernel
+ ;;
+ SAVE_REST
+ mov rp=r14
+ ;;
+ adds out2=16,r12 // out2 = pointer to pt_regs
+ br.call.sptk.many b6=ia64_do_page_fault // ignore return address
+END(page_fault)
- /*
- * There is no particular reason for this code to be here, other than that
- * there happens to be space here that would go unused otherwise. If this
- * fault ever gets "unreserved", simply moved the following code to a more
- * suitable spot...
- */
+ENTRY(non_syscall)
+ mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER
+ ;;
+ SAVE_MIN_WITH_COVER
- // IA32 interrupt entry point
+ // There is no particular reason for this code to be here, other than that
+ // there happens to be space here that would go unused otherwise. If this
+ // fault ever gets "unreserved", simply moved the following code to a more
+ // suitable spot...
-ENTRY(dispatch_to_ia32_handler)
- SAVE_MIN
+ alloc r14=ar.pfs,0,0,2,0
+ MOV_FROM_IIM(out0)
+ add out1=16,sp
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+
+ SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24)
+ // guarantee that interruption collection is on
+ SSM_PSR_I(p15, p15, r15) // restore psr.i
+ movl r15=ia64_leave_kernel
;;
- mov r14=cr.isr
- ssm psr.ic | PSR_DEFAULT_BITS
+ SAVE_REST
+ mov rp=r15
;;
- srlz.i // guarantee that interruption collection is on
+ br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr
+END(non_syscall)
+
+ENTRY(__interrupt)
+ DBG_FAULT(12)
+ mov r31=pr // prepare to save predicates
;;
-(p15) ssm psr.i
- adds r3=8,r2 // Base pointer for SAVE_REST
+ SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3
+ SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14)
+ // ensure everybody knows psr.ic is back on
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
;;
SAVE_REST
;;
- mov r15=0x80
- shr r14=r14,16 // Get interrupt number
- ;;
- cmp.ne p6,p0=r14,r15
-(p6) br.call.dpnt.many b6=non_ia32_syscall
-
- adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions
- adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp
+ MCA_RECOVER_RANGE(interrupt)
+ alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
+ MOV_FROM_IVR(out0, r8) // pass cr.ivr as first arg
+ add out1=16,sp // pass pointer to pt_regs as second arg
;;
- cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
- ld8 r8=[r14] // get r8
+ srlz.d // make sure we see the effect of cr.ivr
+ movl r14=ia64_leave_kernel
;;
- st8 [r15]=r8 // save original EAX in r1 (IA32 procs don't use the GP)
+ mov rp=r14
+ br.call.sptk.many b6=ia64_handle_irq
+END(__interrupt)
+
+ /*
+ * There is no particular reason for this code to be here, other than that
+ * there happens to be space here that would go unused otherwise. If this
+ * fault ever gets "unreserved", simply moved the following code to a more
+ * suitable spot...
+ */
+
+ENTRY(dispatch_unaligned_handler)
+ SAVE_MIN_WITH_COVER
;;
- alloc r15=ar.pfs,0,0,6,0 // must first in an insn group
+ alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
+ MOV_FROM_IFA(out0)
+ adds out1=16,sp
+
+ SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24)
+ // guarantee that interruption collection is on
+ SSM_PSR_I(p15, p15, r3) // restore psr.i
+ adds r3=8,r2 // set up second base pointer
;;
- ld4 r8=[r14],8 // r8 == eax (syscall number)
- mov r15=IA32_NR_syscalls
+ SAVE_REST
+ movl r14=ia64_leave_kernel
;;
- cmp.ltu.unc p6,p7=r8,r15
- ld4 out1=[r14],8 // r9 == ecx
+ mov rp=r14
+ br.sptk.many ia64_prepare_handle_unaligned
+END(dispatch_unaligned_handler)
+
+ /*
+ * There is no particular reason for this code to be here, other than that
+ * there happens to be space here that would go unused otherwise. If this
+ * fault ever gets "unreserved", simply moved the following code to a more
+ * suitable spot...
+ */
+
+ENTRY(dispatch_to_fault_handler)
+ /*
+ * Input:
+ * psr.ic: off
+ * r19: fault vector number (e.g., 24 for General Exception)
+ * r31: contains saved predicates (pr)
+ */
+ SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,5,0
+ MOV_FROM_ISR(out1)
+ MOV_FROM_IFA(out2)
+ MOV_FROM_IIM(out3)
+ MOV_FROM_ITIR(out4)
;;
- ld4 out2=[r14],8 // r10 == edx
+ SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0)
+ // guarantee that interruption collection is on
+ mov out0=r15
;;
- ld4 out0=[r14] // r11 == ebx
- adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp
+ SSM_PSR_I(p15, p15, r3) // restore psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
;;
- ld4 out5=[r14],PT(R14)-PT(R13) // r13 == ebp
+ SAVE_REST
+ movl r14=ia64_leave_kernel
;;
- ld4 out3=[r14],PT(R15)-PT(R14) // r14 == esi
- adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
+ mov rp=r14
+ br.call.sptk.many b6=ia64_fault
+END(dispatch_to_fault_handler)
+
+ /*
+ * Squatting in this space ...
+ *
+ * This special case dispatcher for illegal operation faults allows preserved
+ * registers to be modified through a callback function (asm only) that is handed
+ * back from the fault handler in r8. Up to three arguments can be passed to the
+ * callback function by returning an aggregate with the callback as its first
+ * element, followed by the arguments.
+ */
+ENTRY(dispatch_illegal_op_fault)
+ .prologue
+ .body
+ SAVE_MIN_WITH_COVER
+ SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24)
+ // guarantee that interruption collection is on
;;
- ld4 out4=[r14] // r15 == edi
- movl r16=ia32_syscall_table
+ SSM_PSR_I(p15, p15, r3) // restore psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
;;
-(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number
- ld4 r2=[r2] // r2 = current_thread_info()->flags
+ alloc r14=ar.pfs,0,0,1,0 // must be first in insn group
+ mov out0=ar.ec
;;
- ld8 r16=[r16]
- and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
+ SAVE_REST
+ PT_REGS_UNWIND_INFO(0)
;;
- mov b6=r16
- movl r15=ia32_ret_from_syscall
- cmp.eq p8,p0=r2,r0
+ br.call.sptk.many rp=ia64_illegal_op_fault
+.ret0: ;;
+ alloc r14=ar.pfs,0,0,3,0 // must be first in insn group
+ mov out0=r9
+ mov out1=r10
+ mov out2=r11
+ movl r15=ia64_leave_kernel
;;
mov rp=r15
-(p8) br.call.sptk.many b6=b6
- br.cond.sptk ia32_trace_syscall
-
-non_ia32_syscall:
- alloc r15=ar.pfs,0,0,2,0
- mov out0=r14 // interrupt #
- add out1=16,sp // pointer to pt_regs
- ;; // avoid WAW on CFM
- br.call.sptk.many rp=ia32_bad_interrupt
-.ret1: movl r15=ia64_leave_kernel
+ mov b6=r8
;;
- mov rp=r15
- br.ret.sptk.many rp
-END(dispatch_to_ia32_handler)
-
-#endif /* CONFIG_IA32_SUPPORT */
+ cmp.ne p6,p0=0,r8
+(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel
+ br.sptk.many ia64_leave_kernel
+END(dispatch_illegal_op_fault)
diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S
index 621630256c4..f69389c7be1 100644
--- a/arch/ia64/kernel/jprobes.S
+++ b/arch/ia64/kernel/jprobes.S
@@ -45,7 +45,7 @@
* to the correct location.
*/
#include <asm/asmmacro.h>
-#include <asm-ia64/break.h>
+#include <asm/break.h>
/*
* void jprobe_break(void)
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 5dc98b5abcf..074fde49c9e 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -40,6 +40,8 @@ extern void jprobe_inst_return(void);
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
enum instruction_type {A, I, M, F, B, L, X, u};
static enum instruction_type bundle_encoding[32][3] = {
{ M, I, I }, /* 00 */
@@ -76,6 +78,20 @@ static enum instruction_type bundle_encoding[32][3] = {
{ u, u, u }, /* 1F */
};
+/* Insert a long branch code */
+static void __kprobes set_brl_inst(void *from, void *to)
+{
+ s64 rel = ((s64) to - (s64) from) >> 4;
+ bundle_t *brl;
+ brl = (bundle_t *) ((u64) from & ~0xf);
+ brl->quad0.template = 0x05; /* [MLX](stop) */
+ brl->quad0.slot0 = NOP_M_INST; /* nop.m 0x0 */
+ brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2;
+ brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46);
+ /* brl.cond.sptk.many.clr rel<<4 (qp=0) */
+ brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff);
+}
+
/*
* In this function we check to see if the instruction
* is IP relative instruction and update the kprobe
@@ -180,8 +196,8 @@ static int __kprobes unsupported_inst(uint template, uint slot,
qp = kprobe_inst & 0x3f;
if (is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) {
if (slot == 1 && qp) {
- printk(KERN_WARNING "Kprobes on cmp unc"
- "instruction on slot 1 at <0x%lx>"
+ printk(KERN_WARNING "Kprobes on cmp unc "
+ "instruction on slot 1 at <0x%lx> "
"is not supported\n", addr);
return -EINVAL;
@@ -219,8 +235,8 @@ static int __kprobes unsupported_inst(uint template, uint slot,
* bit 12 to be equal to 1
*/
if (slot == 1 && qp) {
- printk(KERN_WARNING "Kprobes on test bit"
- "instruction on slot at <0x%lx>"
+ printk(KERN_WARNING "Kprobes on test bit "
+ "instruction on slot at <0x%lx> "
"is not supported\n", addr);
return -EINVAL;
}
@@ -240,7 +256,7 @@ static int __kprobes unsupported_inst(uint template, uint slot,
*/
int x6=(kprobe_inst >> 27) & 0x3F;
if ((x6 == 0x10) || (x6 == 0x11)) {
- printk(KERN_WARNING "Kprobes on"
+ printk(KERN_WARNING "Kprobes on "
"Indirect Predict is not supported\n");
return -EINVAL;
}
@@ -379,9 +395,10 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{
unsigned int i;
- i = atomic_sub_return(1, &kcb->prev_kprobe_index);
- __get_cpu_var(current_kprobe) = kcb->prev_kprobe[i].kp;
- kcb->kprobe_status = kcb->prev_kprobe[i].status;
+ i = atomic_read(&kcb->prev_kprobe_index);
+ __get_cpu_var(current_kprobe) = kcb->prev_kprobe[i-1].kp;
+ kcb->kprobe_status = kcb->prev_kprobe[i-1].status;
+ atomic_sub(1, &kcb->prev_kprobe_index);
}
static void __kprobes set_current_kprobe(struct kprobe *p,
@@ -406,19 +423,18 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kretprobe_instance *ri = NULL;
struct hlist_head *head, empty_rp;
- struct hlist_node *node, *tmp;
+ struct hlist_node *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address =
((struct fnptr *)kretprobe_trampoline)->ip;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
* task either because an multiple functions in the call path
- * have a return probe installed on them, and/or more then one return
+ * have a return probe installed on them, and/or more than one return
* return probe was registered for a target function.
*
* We can handle this because:
@@ -428,7 +444,24 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
* real return address, and all the rest will point to
* kretprobe_trampoline
*/
- hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ regs->cr_iip = orig_ret_address;
+
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
@@ -450,13 +483,11 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
kretprobe_assert(ri, orig_ret_address, trampoline_address);
- regs->cr_iip = orig_ret_address;
-
reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
preempt_enable_no_resched();
- hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);
}
@@ -468,7 +499,6 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
return 1;
}
-/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -478,6 +508,77 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
}
+/* Check the instruction in the slot is break */
+static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot)
+{
+ unsigned int major_opcode;
+ unsigned int template = bundle->quad0.template;
+ unsigned long kprobe_inst;
+
+ /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
+ if (slot == 1 && bundle_encoding[template][1] == L)
+ slot++;
+
+ /* Get Kprobe probe instruction at given slot*/
+ get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
+
+ /* For break instruction,
+ * Bits 37:40 Major opcode to be zero
+ * Bits 27:32 X6 to be zero
+ * Bits 32:35 X3 to be zero
+ */
+ if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) {
+ /* Not a break instruction */
+ return 0;
+ }
+
+ /* Is a break instruction */
+ return 1;
+}
+
+/*
+ * In this function, we check whether the target bundle modifies IP or
+ * it triggers an exception. If so, it cannot be boostable.
+ */
+static int __kprobes can_boost(bundle_t *bundle, uint slot,
+ unsigned long bundle_addr)
+{
+ unsigned int template = bundle->quad0.template;
+
+ do {
+ if (search_exception_tables(bundle_addr + slot) ||
+ __is_ia64_break_inst(bundle, slot))
+ return 0; /* exception may occur in this bundle*/
+ } while ((++slot) < 3);
+ template &= 0x1e;
+ if (template >= 0x10 /* including B unit */ ||
+ template == 0x04 /* including X unit */ ||
+ template == 0x06) /* undefined */
+ return 0;
+
+ return 1;
+}
+
+/* Prepare long jump bundle and disables other boosters if need */
+static void __kprobes prepare_booster(struct kprobe *p)
+{
+ unsigned long addr = (unsigned long)p->addr & ~0xFULL;
+ unsigned int slot = (unsigned long)p->addr & 0xf;
+ struct kprobe *other_kp;
+
+ if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) {
+ set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1);
+ p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE;
+ }
+
+ /* disables boosters in previous slots */
+ for (; addr < (unsigned long)p->addr; addr++) {
+ other_kp = get_kprobe((void *)addr);
+ if (other_kp)
+ other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE;
+ }
+}
+
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
unsigned long addr = (unsigned long) p->addr;
@@ -512,6 +613,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp);
+ prepare_booster(p);
+
return 0;
}
@@ -525,7 +628,9 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
src = &p->opcode.bundle;
flush_icache_range((unsigned long)p->ainsn.insn,
- (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
+ (unsigned long)p->ainsn.insn +
+ sizeof(kprobe_opcode_t) * MAX_INSN_SIZE);
+
switch (p->ainsn.slot) {
case 0:
dest->quad0.slot0 = src->quad0.slot0;
@@ -565,14 +670,16 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
- mutex_lock(&kprobe_mutex);
- free_insn_slot(p->ainsn.insn, 0);
- mutex_unlock(&kprobe_mutex);
+ if (p->ainsn.insn) {
+ free_insn_slot(p->ainsn.insn,
+ p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
+ p->ainsn.insn = NULL;
+ }
}
/*
* We are resuming execution after a single step fault, so the pt_regs
* structure reflects the register state after we executed the instruction
- * located in the kprobe (p->ainsn.insn.bundle). We still need to adjust
+ * located in the kprobe (p->ainsn.insn->bundle). We still need to adjust
* the ip to point back to the original stack address. To set the IP address
* to original stack address, handle the case where we need to fixup the
* relative IP address and/or fixup branch register.
@@ -589,7 +696,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
if (slot == 1 && bundle_encoding[template][1] == L)
slot = 2;
- if (p->ainsn.inst_flag) {
+ if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) {
if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
/* Fix relative IP address */
@@ -668,33 +775,12 @@ static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
{
unsigned int slot = ia64_psr(regs)->ri;
- unsigned int template, major_opcode;
- unsigned long kprobe_inst;
unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip;
bundle_t bundle;
memcpy(&bundle, kprobe_addr, sizeof(bundle_t));
- template = bundle.quad0.template;
-
- /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
- if (slot == 1 && bundle_encoding[template][1] == L)
- slot++;
-
- /* Get Kprobe probe instruction at given slot*/
- get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode);
-
- /* For break instruction,
- * Bits 37:40 Major opcode to be zero
- * Bits 27:32 X6 to be zero
- * Bits 32:35 X3 to be zero
- */
- if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) {
- /* Not a break instruction */
- return 0;
- }
- /* Is a break instruction */
- return 1;
+ return __is_ia64_break_inst(&bundle, slot);
}
static int __kprobes pre_kprobes_handler(struct die_args *args)
@@ -784,6 +870,19 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
return 1;
ss_probe:
+#if !defined(CONFIG_PREEMPT)
+ if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
+ /* Boost up -- we can execute copied instructions directly */
+ ia64_psr(regs)->ri = p->ainsn.slot;
+ regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL;
+ /* turn single stepping off */
+ ia64_psr(regs)->ss = 0;
+
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ return 1;
+ }
+#endif
prepare_ss(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
return 1;
@@ -820,7 +919,7 @@ out:
return 1;
}
-int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -848,7 +947,7 @@ int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
case KPROBE_HIT_SSDONE:
/*
* We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accouting
+ * we can also use npre/npostfault count for accounting
* these specific fault cases.
*/
kprobes_inc_nmissed_count(cur);
@@ -983,6 +1082,11 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
return 1;
}
+/* ia64 does not need this */
+void __kprobes jprobe_return(void)
+{
+}
+
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index 4f0f3b8c1ee..5151a649c96 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c
@@ -15,16 +15,23 @@
#include <linux/cpu.h>
#include <linux/irq.h>
#include <linux/efi.h>
+#include <linux/numa.h>
+#include <linux/mmzone.h>
+
+#include <asm/numa.h>
#include <asm/mmu_context.h>
#include <asm/setup.h>
#include <asm/delay.h>
#include <asm/meminit.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/mca.h>
-typedef NORET_TYPE void (*relocate_new_kernel_t)(
+typedef void (*relocate_new_kernel_t)(
unsigned long indirection_page,
unsigned long start_address,
struct ia64_boot_param *boot_param,
- unsigned long pal_addr) ATTRIB_NORET;
+ unsigned long pal_addr) __noreturn;
struct kimage *ia64_kimage;
@@ -78,16 +85,29 @@ static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
struct kimage *image = arg;
relocate_new_kernel_t rnk;
void *pal_addr = efi_get_pal_addr();
- unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
- unsigned long vector;
+ unsigned long code_addr;
int ii;
+ u64 fp, gp;
+ ia64_fptr_t *init_handler = (ia64_fptr_t *)ia64_os_init_on_kdump;
BUG_ON(!image);
+ code_addr = (unsigned long)page_address(image->control_code_page);
if (image->type == KEXEC_TYPE_CRASH) {
crash_save_this_cpu();
current->thread.ksp = (__u64)info->sw - 16;
+
+ /* Register noop init handler */
+ fp = ia64_tpa(init_handler->fp);
+ gp = ia64_tpa(ia64_getreg(_IA64_REG_GP));
+ ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, fp, gp, 0, fp, gp, 0);
+ } else {
+ /* Unregister init handlers of current kernel */
+ ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, 0, 0, 0, 0, 0, 0);
}
+ /* Unregister mca handler - No more recovery on current kernel */
+ ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, 0, 0, 0, 0, 0, 0);
+
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
@@ -107,11 +127,8 @@ static void ia64_machine_kexec(struct unw_frame_info *info, void *arg)
/* unmask TPR and clear any pending interrupts */
ia64_setreg(_IA64_REG_CR_TPR, 0);
ia64_srlz_d();
- vector = ia64_get_ivr();
- while (vector != IA64_SPURIOUS_INT_VECTOR) {
+ while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR)
ia64_eoi();
- vector = ia64_get_ivr();
- }
platform_kernel_launch_event();
rnk = (relocate_new_kernel_t)&code_addr;
(*rnk)(image->head, image->start, ia64_boot_param,
@@ -125,3 +142,29 @@ void machine_kexec(struct kimage *image)
unw_init_running(ia64_machine_kexec, image);
for(;;);
}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+#if defined(CONFIG_DISCONTIGMEM) || defined(CONFIG_SPARSEMEM)
+ VMCOREINFO_SYMBOL(pgdat_list);
+ VMCOREINFO_LENGTH(pgdat_list, MAX_NUMNODES);
+#endif
+#ifdef CONFIG_NUMA
+ VMCOREINFO_SYMBOL(node_memblk);
+ VMCOREINFO_LENGTH(node_memblk, NR_NODE_MEMBLKS);
+ VMCOREINFO_STRUCT_SIZE(node_memblk_s);
+ VMCOREINFO_OFFSET(node_memblk_s, start_paddr);
+ VMCOREINFO_OFFSET(node_memblk_s, size);
+#endif
+#ifdef CONFIG_PGTABLE_3
+ VMCOREINFO_CONFIG(PGTABLE_3);
+#elif defined(CONFIG_PGTABLE_4)
+ VMCOREINFO_CONFIG(PGTABLE_4);
+#endif
+}
+
+unsigned long paddr_vmcoreinfo_note(void)
+{
+ return ia64_tpa((unsigned long)(char *)&vmcoreinfo_note);
+}
+
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
index 7ccb228ceed..f5a1e5246b3 100644
--- a/arch/ia64/kernel/machvec.c
+++ b/arch/ia64/kernel/machvec.c
@@ -1,7 +1,6 @@
#include <linux/module.h>
-
+#include <linux/dma-mapping.h>
#include <asm/machvec.h>
-#include <asm/system.h>
#ifdef CONFIG_IA64_GENERIC
@@ -75,14 +74,16 @@ machvec_timer_interrupt (int irq, void *dev_id)
EXPORT_SYMBOL(machvec_timer_interrupt);
void
-machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle, size_t size, int dir)
+machvec_dma_sync_single(struct device *hwdev, dma_addr_t dma_handle, size_t size,
+ enum dma_data_direction dir)
{
mb();
}
EXPORT_SYMBOL(machvec_dma_sync_single);
void
-machvec_dma_sync_sg (struct device *hwdev, struct scatterlist *sg, int n, int dir)
+machvec_dma_sync_sg(struct device *hwdev, struct scatterlist *sg, int n,
+ enum dma_data_direction dir)
{
mb();
}
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 63b73f3d4c9..db7b36bb068 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -2,65 +2,74 @@
* File: mca.c
* Purpose: Generic MCA handling layer
*
- * Updated for latest kernel
* Copyright (C) 2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
*
* Copyright (C) 2002 Dell Inc.
- * Copyright (C) Matt Domsch (Matt_Domsch@dell.com)
+ * Copyright (C) Matt Domsch <Matt_Domsch@dell.com>
*
* Copyright (C) 2002 Intel
- * Copyright (C) Jenna Hall (jenna.s.hall@intel.com)
+ * Copyright (C) Jenna Hall <jenna.s.hall@intel.com>
*
* Copyright (C) 2001 Intel
- * Copyright (C) Fred Lewis (frederick.v.lewis@intel.com)
+ * Copyright (C) Fred Lewis <frederick.v.lewis@intel.com>
*
* Copyright (C) 2000 Intel
- * Copyright (C) Chuck Fleckenstein (cfleck@co.intel.com)
+ * Copyright (C) Chuck Fleckenstein <cfleck@co.intel.com>
*
- * Copyright (C) 1999, 2004 Silicon Graphics, Inc.
- * Copyright (C) Vijay Chander(vijay@engr.sgi.com)
+ * Copyright (C) 1999, 2004-2008 Silicon Graphics, Inc.
+ * Copyright (C) Vijay Chander <vijay@engr.sgi.com>
*
- * 03/04/15 D. Mosberger Added INIT backtrace support.
- * 02/03/25 M. Domsch GUID cleanups
+ * Copyright (C) 2006 FUJITSU LIMITED
+ * Copyright (C) Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
*
- * 02/01/04 J. Hall Aligned MCA stack to 16 bytes, added platform vs. CPU
- * error flag, set SAL default return values, changed
- * error record structure to linked list, added init call
- * to sal_get_state_info_size().
+ * 2000-03-29 Chuck Fleckenstein <cfleck@co.intel.com>
+ * Fixed PAL/SAL update issues, began MCA bug fixes, logging issues,
+ * added min save state dump, added INIT handler.
*
- * 01/01/03 F. Lewis Added setup of CMCI and CPEI IRQs, logging of corrected
- * platform errors, completed code for logging of
- * corrected & uncorrected machine check errors, and
- * updated for conformance with Nov. 2000 revision of the
- * SAL 3.0 spec.
- * 00/03/29 C. Fleckenstein Fixed PAL/SAL update issues, began MCA bug fixes, logging issues,
- * added min save state dump, added INIT handler.
+ * 2001-01-03 Fred Lewis <frederick.v.lewis@intel.com>
+ * Added setup of CMCI and CPEI IRQs, logging of corrected platform
+ * errors, completed code for logging of corrected & uncorrected
+ * machine check errors, and updated for conformance with Nov. 2000
+ * revision of the SAL 3.0 spec.
+ *
+ * 2002-01-04 Jenna Hall <jenna.s.hall@intel.com>
+ * Aligned MCA stack to 16 bytes, added platform vs. CPU error flag,
+ * set SAL default return values, changed error record structure to
+ * linked list, added init call to sal_get_state_info_size().
+ *
+ * 2002-03-25 Matt Domsch <Matt_Domsch@dell.com>
+ * GUID cleanups.
+ *
+ * 2003-04-15 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Added INIT backtrace support.
*
* 2003-12-08 Keith Owens <kaos@sgi.com>
- * smp_call_function() must not be called from interrupt context (can
- * deadlock on tasklist_lock). Use keventd to call smp_call_function().
+ * smp_call_function() must not be called from interrupt context
+ * (can deadlock on tasklist_lock).
+ * Use keventd to call smp_call_function().
*
* 2004-02-01 Keith Owens <kaos@sgi.com>
- * Avoid deadlock when using printk() for MCA and INIT records.
- * Delete all record printing code, moved to salinfo_decode in user space.
- * Mark variables and functions static where possible.
- * Delete dead variables and functions.
- * Reorder to remove the need for forward declarations and to consolidate
- * related code.
+ * Avoid deadlock when using printk() for MCA and INIT records.
+ * Delete all record printing code, moved to salinfo_decode in user
+ * space. Mark variables and functions static where possible.
+ * Delete dead variables and functions. Reorder to remove the need
+ * for forward declarations and to consolidate related code.
*
* 2005-08-12 Keith Owens <kaos@sgi.com>
- * Convert MCA/INIT handlers to use per event stacks and SAL/OS state.
+ * Convert MCA/INIT handlers to use per event stacks and SAL/OS
+ * state.
*
* 2005-10-07 Keith Owens <kaos@sgi.com>
* Add notify_die() hooks.
*
* 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
- * Add printing support for MCA/INIT.
+ * Add printing support for MCA/INIT.
*
* 2007-04-27 Russ Anderson <rja@sgi.com>
* Support multiple cpus going through OS_MCA in the same event.
*/
+#include <linux/jiffies.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/sched.h>
@@ -75,19 +84,21 @@
#include <linux/workqueue.h>
#include <linux/cpumask.h>
#include <linux/kdebug.h>
+#include <linux/cpu.h>
+#include <linux/gfp.h>
#include <asm/delay.h>
#include <asm/machvec.h>
#include <asm/meminit.h>
#include <asm/page.h>
#include <asm/ptrace.h>
-#include <asm/system.h>
#include <asm/sal.h>
#include <asm/mca.h>
#include <asm/kexec.h>
#include <asm/irq.h>
#include <asm/hw_irq.h>
+#include <asm/tlb.h>
#include "mca_drv.h"
#include "entry.h"
@@ -98,11 +109,26 @@
# define IA64_MCA_DEBUG(fmt...)
#endif
+#define NOTIFY_INIT(event, regs, arg, spin) \
+do { \
+ if ((notify_die((event), "INIT", (regs), (arg), 0, 0) \
+ == NOTIFY_STOP) && ((spin) == 1)) \
+ ia64_mca_spin(__func__); \
+} while (0)
+
+#define NOTIFY_MCA(event, regs, arg, spin) \
+do { \
+ if ((notify_die((event), "MCA", (regs), (arg), 0, 0) \
+ == NOTIFY_STOP) && ((spin) == 1)) \
+ ia64_mca_spin(__func__); \
+} while (0)
+
/* Used by mca_asm.S */
DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */
DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */
+DEFINE_PER_CPU(u64, ia64_mca_tr_reload); /* Flag for TR reload */
unsigned long __per_cpu_mca[NR_CPUS];
@@ -191,7 +217,7 @@ void ia64_mca_printk(const char *fmt, ...)
/* Copy the output into mlogbuf */
if (oops_in_progress) {
/* mlogbuf was abandoned, use printk directly instead. */
- printk(temp_buf);
+ printk("%s", temp_buf);
} else {
spin_lock(&mlogbuf_wlock);
for (p = temp_buf; *p; p++) {
@@ -242,7 +268,7 @@ void ia64_mlogbuf_dump(void)
}
*p = '\0';
if (temp_buf[0])
- printk(temp_buf);
+ printk("%s", temp_buf);
mlogbuf_start = index;
mlogbuf_timestamp = 0;
@@ -284,7 +310,8 @@ static void ia64_mlogbuf_dump_from_init(void)
if (mlogbuf_finished)
return;
- if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) {
+ if (mlogbuf_timestamp &&
+ time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) {
printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
" and the system seems to be messed up.\n");
ia64_mlogbuf_finish(0);
@@ -404,8 +431,8 @@ ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe)
IA64_LOG_INDEX_INC(sal_info_type);
IA64_LOG_UNLOCK(sal_info_type);
if (irq_safe) {
- IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. "
- "Record length = %ld\n", __FUNCTION__, sal_info_type, total_len);
+ IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. Record length = %ld\n",
+ __func__, sal_info_type, total_len);
}
*buffer = (u8 *) log_buffer;
return total_len;
@@ -509,7 +536,7 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg)
static DEFINE_SPINLOCK(cpe_history_lock);
IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",
- __FUNCTION__, cpe_irq, smp_processor_id());
+ __func__, cpe_irq, smp_processor_id());
/* SAL spec states this should run w/ interrupts enabled */
local_irq_enable();
@@ -554,6 +581,8 @@ out:
/* Get the CPE error record and log it */
ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE);
+ local_irq_disable();
+
return IRQ_HANDLED;
}
@@ -571,7 +600,7 @@ out:
* Outputs
* None
*/
-static void __init
+void
ia64_mca_register_cpev (int cpev)
{
/* Register the CPE interrupt vector with SAL */
@@ -585,7 +614,7 @@ ia64_mca_register_cpev (int cpev)
}
IA64_MCA_DEBUG("%s: corrected platform error "
- "vector %#x registered\n", __FUNCTION__, cpev);
+ "vector %#x registered\n", __func__, cpev);
}
#endif /* CONFIG_ACPI */
@@ -602,7 +631,7 @@ ia64_mca_register_cpev (int cpev)
* Outputs
* None
*/
-void __cpuinit
+void
ia64_mca_cmc_vector_setup (void)
{
cmcv_reg_t cmcv;
@@ -612,12 +641,11 @@ ia64_mca_cmc_vector_setup (void)
cmcv.cmcv_vector = IA64_CMC_VECTOR;
ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
- IA64_MCA_DEBUG("%s: CPU %d corrected "
- "machine check vector %#x registered.\n",
- __FUNCTION__, smp_processor_id(), IA64_CMC_VECTOR);
+ IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x registered.\n",
+ __func__, smp_processor_id(), IA64_CMC_VECTOR);
IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n",
- __FUNCTION__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV));
+ __func__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV));
}
/*
@@ -642,9 +670,8 @@ ia64_mca_cmc_vector_disable (void *dummy)
cmcv.cmcv_mask = 1; /* Mask/disable interrupt */
ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
- IA64_MCA_DEBUG("%s: CPU %d corrected "
- "machine check vector %#x disabled.\n",
- __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector);
+ IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x disabled.\n",
+ __func__, smp_processor_id(), cmcv.cmcv_vector);
}
/*
@@ -669,9 +696,8 @@ ia64_mca_cmc_vector_enable (void *dummy)
cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */
ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
- IA64_MCA_DEBUG("%s: CPU %d corrected "
- "machine check vector %#x enabled.\n",
- __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector);
+ IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x enabled.\n",
+ __func__, smp_processor_id(), cmcv.cmcv_vector);
}
/*
@@ -683,7 +709,7 @@ ia64_mca_cmc_vector_enable (void *dummy)
static void
ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused)
{
- on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 1, 0);
+ on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 0);
}
/*
@@ -695,14 +721,13 @@ ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused)
static void
ia64_mca_cmc_vector_enable_keventd(struct work_struct *unused)
{
- on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0);
+ on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 0);
}
/*
* ia64_mca_wakeup
*
- * Send an inter-cpu interrupt to wake-up a particular cpu
- * and mark that cpu to be out of rendez.
+ * Send an inter-cpu interrupt to wake-up a particular cpu.
*
* Inputs : cpuid
* Outputs : None
@@ -711,14 +736,12 @@ static void
ia64_mca_wakeup(int cpu)
{
platform_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0);
- ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
-
}
/*
* ia64_mca_wakeup_all
*
- * Wakeup all the cpus which have rendez'ed previously.
+ * Wakeup all the slave cpus which have rendez'ed previously.
*
* Inputs : None
* Outputs : None
@@ -741,7 +764,10 @@ ia64_mca_wakeup_all(void)
*
* This is handler used to put slave processors into spinloop
* while the monarch processor does the mca handling and later
- * wake each slave up once the monarch is done.
+ * wake each slave up once the monarch is done. The state
+ * IA64_MCA_RENDEZ_CHECKIN_DONE indicates the cpu is rendez'ed
+ * in SAL. The state IA64_MCA_RENDEZ_CHECKIN_NOTDONE indicates
+ * the cpu has come out of OS rendezvous.
*
* Inputs : None
* Outputs : None
@@ -756,9 +782,8 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg)
/* Mask all interrupts */
local_irq_save(flags);
- if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", get_irq_regs(),
- (long)&nd, 0, 0) == NOTIFY_STOP)
- ia64_mca_spin(__FUNCTION__);
+
+ NOTIFY_MCA(DIE_MCA_RENDZVOUS_ENTER, get_irq_regs(), (long)&nd, 1);
ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE;
/* Register with the SAL monarch that the slave has
@@ -766,18 +791,15 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg)
*/
ia64_sal_mc_rendez();
- if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", get_irq_regs(),
- (long)&nd, 0, 0) == NOTIFY_STOP)
- ia64_mca_spin(__FUNCTION__);
+ NOTIFY_MCA(DIE_MCA_RENDZVOUS_PROCESS, get_irq_regs(), (long)&nd, 1);
/* Wait for the monarch cpu to exit. */
while (monarch_cpu != -1)
cpu_relax(); /* spin until monarch leaves */
- if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", get_irq_regs(),
- (long)&nd, 0, 0) == NOTIFY_STOP)
- ia64_mca_spin(__FUNCTION__);
+ NOTIFY_MCA(DIE_MCA_RENDZVOUS_LEAVE, get_irq_regs(), (long)&nd, 1);
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
/* Enable all interrupts */
local_irq_restore(flags);
return IRQ_HANDLED;
@@ -830,7 +852,7 @@ EXPORT_SYMBOL(ia64_unreg_MCA_extension);
static inline void
-copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
+copy_reg(const u64 *fr, u64 fnat, unsigned long *tr, unsigned long *tnat)
{
u64 fslot, tslot, nat;
*tr = *fr;
@@ -867,6 +889,65 @@ ia64_mca_modify_comm(const struct task_struct *previous_current)
memcpy(current->comm, comm, sizeof(current->comm));
}
+static void
+finish_pt_regs(struct pt_regs *regs, struct ia64_sal_os_state *sos,
+ unsigned long *nat)
+{
+ const pal_min_state_area_t *ms = sos->pal_min_state;
+ const u64 *bank;
+
+ /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use
+ * pmsa_{xip,xpsr,xfs}
+ */
+ if (ia64_psr(regs)->ic) {
+ regs->cr_iip = ms->pmsa_iip;
+ regs->cr_ipsr = ms->pmsa_ipsr;
+ regs->cr_ifs = ms->pmsa_ifs;
+ } else {
+ regs->cr_iip = ms->pmsa_xip;
+ regs->cr_ipsr = ms->pmsa_xpsr;
+ regs->cr_ifs = ms->pmsa_xfs;
+
+ sos->iip = ms->pmsa_iip;
+ sos->ipsr = ms->pmsa_ipsr;
+ sos->ifs = ms->pmsa_ifs;
+ }
+ regs->pr = ms->pmsa_pr;
+ regs->b0 = ms->pmsa_br0;
+ regs->ar_rsc = ms->pmsa_rsc;
+ copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &regs->r1, nat);
+ copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &regs->r2, nat);
+ copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &regs->r3, nat);
+ copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &regs->r8, nat);
+ copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &regs->r9, nat);
+ copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &regs->r10, nat);
+ copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &regs->r11, nat);
+ copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &regs->r12, nat);
+ copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &regs->r13, nat);
+ copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &regs->r14, nat);
+ copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &regs->r15, nat);
+ if (ia64_psr(regs)->bn)
+ bank = ms->pmsa_bank1_gr;
+ else
+ bank = ms->pmsa_bank0_gr;
+ copy_reg(&bank[16-16], ms->pmsa_nat_bits, &regs->r16, nat);
+ copy_reg(&bank[17-16], ms->pmsa_nat_bits, &regs->r17, nat);
+ copy_reg(&bank[18-16], ms->pmsa_nat_bits, &regs->r18, nat);
+ copy_reg(&bank[19-16], ms->pmsa_nat_bits, &regs->r19, nat);
+ copy_reg(&bank[20-16], ms->pmsa_nat_bits, &regs->r20, nat);
+ copy_reg(&bank[21-16], ms->pmsa_nat_bits, &regs->r21, nat);
+ copy_reg(&bank[22-16], ms->pmsa_nat_bits, &regs->r22, nat);
+ copy_reg(&bank[23-16], ms->pmsa_nat_bits, &regs->r23, nat);
+ copy_reg(&bank[24-16], ms->pmsa_nat_bits, &regs->r24, nat);
+ copy_reg(&bank[25-16], ms->pmsa_nat_bits, &regs->r25, nat);
+ copy_reg(&bank[26-16], ms->pmsa_nat_bits, &regs->r26, nat);
+ copy_reg(&bank[27-16], ms->pmsa_nat_bits, &regs->r27, nat);
+ copy_reg(&bank[28-16], ms->pmsa_nat_bits, &regs->r28, nat);
+ copy_reg(&bank[29-16], ms->pmsa_nat_bits, &regs->r29, nat);
+ copy_reg(&bank[30-16], ms->pmsa_nat_bits, &regs->r30, nat);
+ copy_reg(&bank[31-16], ms->pmsa_nat_bits, &regs->r31, nat);
+}
+
/* On entry to this routine, we are running on the per cpu stack, see
* mca_asm.h. The original stack has not been touched by this event. Some of
* the original stack's registers will be in the RBS on this stack. This stack
@@ -894,14 +975,13 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
struct switch_stack *old_sw;
unsigned size = sizeof(struct pt_regs) +
sizeof(struct switch_stack) + 16;
- u64 *old_bspstore, *old_bsp;
- u64 *new_bspstore, *new_bsp;
- u64 old_unat, old_rnat, new_rnat, nat;
+ unsigned long *old_bspstore, *old_bsp;
+ unsigned long *new_bspstore, *new_bsp;
+ unsigned long old_unat, old_rnat, new_rnat, nat;
u64 slots, loadrs = regs->loadrs;
u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1];
u64 ar_bspstore = regs->ar_bspstore;
u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16);
- const u64 *bank;
const char *msg;
int cpu = smp_processor_id();
@@ -948,10 +1028,10 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
* loadrs for the new stack and save it in the new pt_regs, where
* ia64_old_stack() can get it.
*/
- old_bspstore = (u64 *)ar_bspstore;
- old_bsp = (u64 *)ar_bsp;
+ old_bspstore = (unsigned long *)ar_bspstore;
+ old_bsp = (unsigned long *)ar_bsp;
slots = ia64_rse_num_regs(old_bspstore, old_bsp);
- new_bspstore = (u64 *)((u64)current + IA64_RBS_OFFSET);
+ new_bspstore = (unsigned long *)((u64)current + IA64_RBS_OFFSET);
new_bsp = ia64_rse_skip_regs(new_bspstore, slots);
regs->loadrs = (new_bsp - new_bspstore) * 8 << 16;
@@ -1004,54 +1084,9 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
p = (char *)r12 - sizeof(*regs);
old_regs = (struct pt_regs *)p;
memcpy(old_regs, regs, sizeof(*regs));
- /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use
- * pmsa_{xip,xpsr,xfs}
- */
- if (ia64_psr(regs)->ic) {
- old_regs->cr_iip = ms->pmsa_iip;
- old_regs->cr_ipsr = ms->pmsa_ipsr;
- old_regs->cr_ifs = ms->pmsa_ifs;
- } else {
- old_regs->cr_iip = ms->pmsa_xip;
- old_regs->cr_ipsr = ms->pmsa_xpsr;
- old_regs->cr_ifs = ms->pmsa_xfs;
- }
- old_regs->pr = ms->pmsa_pr;
- old_regs->b0 = ms->pmsa_br0;
old_regs->loadrs = loadrs;
- old_regs->ar_rsc = ms->pmsa_rsc;
old_unat = old_regs->ar_unat;
- copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &old_regs->r1, &old_unat);
- copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &old_regs->r2, &old_unat);
- copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &old_regs->r3, &old_unat);
- copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &old_regs->r8, &old_unat);
- copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &old_regs->r9, &old_unat);
- copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &old_regs->r10, &old_unat);
- copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &old_regs->r11, &old_unat);
- copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &old_regs->r12, &old_unat);
- copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &old_regs->r13, &old_unat);
- copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &old_regs->r14, &old_unat);
- copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &old_regs->r15, &old_unat);
- if (ia64_psr(old_regs)->bn)
- bank = ms->pmsa_bank1_gr;
- else
- bank = ms->pmsa_bank0_gr;
- copy_reg(&bank[16-16], ms->pmsa_nat_bits, &old_regs->r16, &old_unat);
- copy_reg(&bank[17-16], ms->pmsa_nat_bits, &old_regs->r17, &old_unat);
- copy_reg(&bank[18-16], ms->pmsa_nat_bits, &old_regs->r18, &old_unat);
- copy_reg(&bank[19-16], ms->pmsa_nat_bits, &old_regs->r19, &old_unat);
- copy_reg(&bank[20-16], ms->pmsa_nat_bits, &old_regs->r20, &old_unat);
- copy_reg(&bank[21-16], ms->pmsa_nat_bits, &old_regs->r21, &old_unat);
- copy_reg(&bank[22-16], ms->pmsa_nat_bits, &old_regs->r22, &old_unat);
- copy_reg(&bank[23-16], ms->pmsa_nat_bits, &old_regs->r23, &old_unat);
- copy_reg(&bank[24-16], ms->pmsa_nat_bits, &old_regs->r24, &old_unat);
- copy_reg(&bank[25-16], ms->pmsa_nat_bits, &old_regs->r25, &old_unat);
- copy_reg(&bank[26-16], ms->pmsa_nat_bits, &old_regs->r26, &old_unat);
- copy_reg(&bank[27-16], ms->pmsa_nat_bits, &old_regs->r27, &old_unat);
- copy_reg(&bank[28-16], ms->pmsa_nat_bits, &old_regs->r28, &old_unat);
- copy_reg(&bank[29-16], ms->pmsa_nat_bits, &old_regs->r29, &old_unat);
- copy_reg(&bank[30-16], ms->pmsa_nat_bits, &old_regs->r30, &old_unat);
- copy_reg(&bank[31-16], ms->pmsa_nat_bits, &old_regs->r31, &old_unat);
+ finish_pt_regs(old_regs, sos, &old_unat);
/* Next stack a struct switch_stack. mca_asm.S built a partial
* switch_stack, copy it and fill in the blanks using pt_regs and
@@ -1119,8 +1154,10 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
return previous_current;
no_mod:
- printk(KERN_INFO "cpu %d, %s %s, original stack not modified\n",
+ mprintk(KERN_INFO "cpu %d, %s %s, original stack not modified\n",
smp_processor_id(), type, msg);
+ old_unat = regs->ar_unat;
+ finish_pt_regs(regs, sos, &old_unat);
return previous_current;
}
@@ -1135,30 +1172,27 @@ no_mod:
static void
ia64_wait_for_slaves(int monarch, const char *type)
{
- int c, wait = 0, missing = 0;
- for_each_online_cpu(c) {
- if (c == monarch)
- continue;
- if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
- udelay(1000); /* short wait first */
- wait = 1;
- break;
- }
- }
- if (!wait)
- goto all_in;
- for_each_online_cpu(c) {
- if (c == monarch)
- continue;
- if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
- udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */
- if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
- missing = 1;
- break;
+ int c, i , wait;
+
+ /*
+ * wait 5 seconds total for slaves (arbitrary)
+ */
+ for (i = 0; i < 5000; i++) {
+ wait = 0;
+ for_each_online_cpu(c) {
+ if (c == monarch)
+ continue;
+ if (ia64_mc_info.imi_rendez_checkin[c]
+ == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
+ udelay(1000); /* short wait */
+ wait = 1;
+ break;
+ }
}
+ if (!wait)
+ goto all_in;
}
- if (!missing)
- goto all_in;
+
/*
* Maybe slave(s) dead. Print buffered messages immediately.
*/
@@ -1178,6 +1212,52 @@ all_in:
return;
}
+/* mca_insert_tr
+ *
+ * Switch rid when TR reload and needed!
+ * iord: 1: itr, 2: itr;
+ *
+*/
+static void mca_insert_tr(u64 iord)
+{
+
+ int i;
+ u64 old_rr;
+ struct ia64_tr_entry *p;
+ unsigned long psr;
+ int cpu = smp_processor_id();
+
+ if (!ia64_idtrs[cpu])
+ return;
+
+ psr = ia64_clear_ic();
+ for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) {
+ p = ia64_idtrs[cpu] + (iord - 1) * IA64_TR_ALLOC_MAX;
+ if (p->pte & 0x1) {
+ old_rr = ia64_get_rr(p->ifa);
+ if (old_rr != p->rr) {
+ ia64_set_rr(p->ifa, p->rr);
+ ia64_srlz_d();
+ }
+ ia64_ptr(iord, p->ifa, p->itir >> 2);
+ ia64_srlz_i();
+ if (iord & 0x1) {
+ ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2);
+ ia64_srlz_i();
+ }
+ if (iord & 0x2) {
+ ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2);
+ ia64_srlz_i();
+ }
+ if (old_rr != p->rr) {
+ ia64_set_rr(p->ifa, old_rr);
+ ia64_srlz_d();
+ }
+ }
+ }
+ ia64_set_psr(psr);
+}
+
/*
* ia64_mca_handler
*
@@ -1205,7 +1285,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
int recover, cpu = smp_processor_id();
struct task_struct *previous_current;
struct ia64_mca_notify_die nd =
- { .sos = sos, .monarch_cpu = &monarch_cpu };
+ { .sos = sos, .monarch_cpu = &monarch_cpu, .data = &recover };
static atomic_t mca_count;
static cpumask_t mca_cpu;
@@ -1221,28 +1301,26 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
- if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__FUNCTION__);
+ NOTIFY_MCA(DIE_MCA_MONARCH_ENTER, regs, (long)&nd, 1);
+
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
if (sos->monarch) {
ia64_wait_for_slaves(cpu, "MCA");
+
+ /* Wakeup all the processors which are spinning in the
+ * rendezvous loop. They will leave SAL, then spin in the OS
+ * with interrupts disabled until this monarch cpu leaves the
+ * MCA handler. That gets control back to the OS so we can
+ * backtrace the other cpus, backtrace when spinning in SAL
+ * does not work.
+ */
+ ia64_mca_wakeup_all();
} else {
- ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
while (cpu_isset(cpu, mca_cpu))
cpu_relax(); /* spin until monarch wakes us */
- ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
- }
+ }
- /* Wakeup all the processors which are spinning in the rendezvous loop.
- * They will leave SAL, then spin in the OS with interrupts disabled
- * until this monarch cpu leaves the MCA handler. That gets control
- * back to the OS so we can backtrace the other cpus, backtrace when
- * spinning in SAL does not work.
- */
- ia64_mca_wakeup_all();
- if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__FUNCTION__);
+ NOTIFY_MCA(DIE_MCA_MONARCH_PROCESS, regs, (long)&nd, 1);
/* Get the MCA error record and log it */
ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
@@ -1261,15 +1339,14 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
} else {
/* Dump buffered message to console */
ia64_mlogbuf_finish(1);
-#ifdef CONFIG_KEXEC
- atomic_set(&kdump_in_progress, 1);
- monarch_cpu = -1;
-#endif
}
- if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
- == NOTIFY_STOP)
- ia64_mca_spin(__FUNCTION__);
+ if (__get_cpu_var(ia64_mca_tr_reload)) {
+ mca_insert_tr(0x1); /*Reload dynamic itrs*/
+ mca_insert_tr(0x2); /*Reload dynamic itrs*/
+ }
+
+ NOTIFY_MCA(DIE_MCA_MONARCH_LEAVE, regs, (long)&nd, 1);
if (atomic_dec_return(&mca_count) > 0) {
int i;
@@ -1277,21 +1354,22 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
/* wake up the next monarch cpu,
* and put this cpu in the rendez loop.
*/
- ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
for_each_online_cpu(i) {
if (cpu_isset(i, mca_cpu)) {
monarch_cpu = i;
cpu_clear(i, mca_cpu); /* wake next cpu */
while (monarch_cpu != -1)
cpu_relax(); /* spin until last cpu leaves */
- ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
set_curr_task(cpu, previous_current);
+ ia64_mc_info.imi_rendez_checkin[cpu]
+ = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
return;
}
}
}
set_curr_task(cpu, previous_current);
- monarch_cpu = -1;
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+ monarch_cpu = -1; /* This frees the slaves and previous monarchs */
}
static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd);
@@ -1319,7 +1397,7 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg)
static DEFINE_SPINLOCK(cmc_history_lock);
IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",
- __FUNCTION__, cmc_irq, smp_processor_id());
+ __func__, cmc_irq, smp_processor_id());
/* SAL spec states this should run w/ interrupts enabled */
local_irq_enable();
@@ -1368,6 +1446,8 @@ out:
/* Get the CMC error record and log it */
ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC);
+ local_irq_disable();
+
return IRQ_HANDLED;
}
@@ -1398,9 +1478,9 @@ ia64_mca_cmc_int_caller(int cmc_irq, void *arg)
ia64_mca_cmc_int_handler(cmc_irq, arg);
- for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);
+ cpuid = cpumask_next(cpuid+1, cpu_online_mask);
- if (cpuid < NR_CPUS) {
+ if (cpuid < nr_cpu_ids) {
platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);
} else {
/* If no log record, switch out of polling mode */
@@ -1434,7 +1514,8 @@ static void
ia64_mca_cmc_poll (unsigned long dummy)
{
/* Trigger a CMC interrupt cascade */
- platform_send_ipi(first_cpu(cpu_online_map), IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);
+ platform_send_ipi(cpumask_first(cpu_online_mask), IA64_CMCP_VECTOR,
+ IA64_IPI_DM_INT, 0);
}
/*
@@ -1467,7 +1548,7 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg)
ia64_mca_cpe_int_handler(cpe_irq, arg);
- for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);
+ cpuid = cpumask_next(cpuid+1, cpu_online_mask);
if (cpuid < NR_CPUS) {
platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
@@ -1510,7 +1591,8 @@ static void
ia64_mca_cpe_poll (unsigned long dummy)
{
/* Trigger a CPE interrupt cascade */
- platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
+ platform_send_ipi(cpumask_first(cpu_online_mask), IA64_CPEP_VECTOR,
+ IA64_IPI_DM_INT, 0);
}
#endif /* CONFIG_ACPI */
@@ -1589,7 +1671,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
struct ia64_mca_notify_die nd =
{ .sos = sos, .monarch_cpu = &monarch_cpu };
- (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0);
+ NOTIFY_INIT(DIE_INIT_ENTER, regs, (long)&nd, 0);
mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
sos->proc_state_param, cpu, sos->monarch);
@@ -1605,7 +1687,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
*/
if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) {
mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
- __FUNCTION__, cpu);
+ __func__, cpu);
atomic_dec(&slaves);
sos->monarch = 1;
}
@@ -1617,26 +1699,35 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
*/
if (sos->monarch && atomic_add_return(1, &monarchs) > 1) {
mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
- __FUNCTION__, cpu);
+ __func__, cpu);
atomic_dec(&monarchs);
sos->monarch = 0;
}
if (!sos->monarch) {
ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT;
+
+#ifdef CONFIG_KEXEC
+ while (monarch_cpu == -1 && !atomic_read(&kdump_in_progress))
+ udelay(1000);
+#else
while (monarch_cpu == -1)
- cpu_relax(); /* spin until monarch enters */
- if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__FUNCTION__);
- if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__FUNCTION__);
+ cpu_relax(); /* spin until monarch enters */
+#endif
+
+ NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1);
+ NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1);
+
+#ifdef CONFIG_KEXEC
+ while (monarch_cpu != -1 && !atomic_read(&kdump_in_progress))
+ udelay(1000);
+#else
while (monarch_cpu != -1)
- cpu_relax(); /* spin until monarch leaves */
- if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__FUNCTION__);
+ cpu_relax(); /* spin until monarch leaves */
+#endif
+
+ NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1);
+
mprintk("Slave on cpu %d returning to normal service.\n", cpu);
set_curr_task(cpu, previous_current);
ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
@@ -1645,9 +1736,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
}
monarch_cpu = cpu;
- if (notify_die(DIE_INIT_MONARCH_ENTER, "INIT", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__FUNCTION__);
+ NOTIFY_INIT(DIE_INIT_MONARCH_ENTER, regs, (long)&nd, 1);
/*
* Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be
@@ -1662,12 +1751,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
* to default_monarch_init_process() above and just print all the
* tasks.
*/
- if (notify_die(DIE_INIT_MONARCH_PROCESS, "INIT", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__FUNCTION__);
- if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0)
- == NOTIFY_STOP)
- ia64_mca_spin(__FUNCTION__);
+ NOTIFY_INIT(DIE_INIT_MONARCH_PROCESS, regs, (long)&nd, 1);
+ NOTIFY_INIT(DIE_INIT_MONARCH_LEAVE, regs, (long)&nd, 1);
+
mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu);
atomic_dec(&monarchs);
set_curr_task(cpu, previous_current);
@@ -1686,38 +1772,32 @@ __setup("disable_cpe_poll", ia64_mca_disable_cpe_polling);
static struct irqaction cmci_irqaction = {
.handler = ia64_mca_cmc_int_handler,
- .flags = IRQF_DISABLED,
.name = "cmc_hndlr"
};
static struct irqaction cmcp_irqaction = {
.handler = ia64_mca_cmc_int_caller,
- .flags = IRQF_DISABLED,
.name = "cmc_poll"
};
static struct irqaction mca_rdzv_irqaction = {
.handler = ia64_mca_rendez_int_handler,
- .flags = IRQF_DISABLED,
.name = "mca_rdzv"
};
static struct irqaction mca_wkup_irqaction = {
.handler = ia64_mca_wakeup_int_handler,
- .flags = IRQF_DISABLED,
.name = "mca_wkup"
};
#ifdef CONFIG_ACPI
static struct irqaction mca_cpe_irqaction = {
.handler = ia64_mca_cpe_int_handler,
- .flags = IRQF_DISABLED,
.name = "cpe_hndlr"
};
static struct irqaction mca_cpep_irqaction = {
.handler = ia64_mca_cpe_int_caller,
- .flags = IRQF_DISABLED,
.name = "cpe_poll"
};
#endif /* CONFIG_ACPI */
@@ -1728,7 +1808,7 @@ static struct irqaction mca_cpep_irqaction = {
* format most of the fields.
*/
-static void __cpuinit
+static void
format_mca_init_stack(void *mca_data, unsigned long offset,
const char *type, int cpu)
{
@@ -1753,45 +1833,42 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
/* Caller prevents this from being called after init */
static void * __init_refok mca_bootmem(void)
{
- void *p;
-
- p = alloc_bootmem(sizeof(struct ia64_mca_cpu) * NR_CPUS +
- KERNEL_STACK_SIZE);
- return (void *)ALIGN((unsigned long)p, KERNEL_STACK_SIZE);
+ return __alloc_bootmem(sizeof(struct ia64_mca_cpu),
+ KERNEL_STACK_SIZE, 0);
}
/* Do per-CPU MCA-related initialization. */
-void __cpuinit
+void
ia64_mca_cpu_init(void *cpu_data)
{
void *pal_vaddr;
+ void *data;
+ long sz = sizeof(struct ia64_mca_cpu);
+ int cpu = smp_processor_id();
static int first_time = 1;
- if (first_time) {
- void *mca_data;
- int cpu;
-
- first_time = 0;
- mca_data = mca_bootmem();
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
- format_mca_init_stack(mca_data,
- offsetof(struct ia64_mca_cpu, mca_stack),
- "MCA", cpu);
- format_mca_init_stack(mca_data,
- offsetof(struct ia64_mca_cpu, init_stack),
- "INIT", cpu);
- __per_cpu_mca[cpu] = __pa(mca_data);
- mca_data += sizeof(struct ia64_mca_cpu);
- }
- }
-
/*
- * The MCA info structure was allocated earlier and its
- * physical address saved in __per_cpu_mca[cpu]. Copy that
- * address * to ia64_mca_data so we can access it as a per-CPU
- * variable.
+ * Structure will already be allocated if cpu has been online,
+ * then offlined.
*/
- __get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()];
+ if (__per_cpu_mca[cpu]) {
+ data = __va(__per_cpu_mca[cpu]);
+ } else {
+ if (first_time) {
+ data = mca_bootmem();
+ first_time = 0;
+ } else
+ data = (void *)__get_free_pages(GFP_KERNEL,
+ get_order(sz));
+ if (!data)
+ panic("Could not allocate MCA memory for cpu %d\n",
+ cpu);
+ }
+ format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, mca_stack),
+ "MCA", cpu);
+ format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, init_stack),
+ "INIT", cpu);
+ __get_cpu_var(ia64_mca_data) = __per_cpu_mca[cpu] = __pa(data);
/*
* Stash away a copy of the PTE needed to map the per-CPU page.
@@ -1813,6 +1890,36 @@ ia64_mca_cpu_init(void *cpu_data)
PAGE_KERNEL));
}
+static void ia64_mca_cmc_vector_adjust(void *dummy)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ if (!cmc_polling_enabled)
+ ia64_mca_cmc_vector_enable(NULL);
+ local_irq_restore(flags);
+}
+
+static int mca_cpu_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
+{
+ int hotcpu = (unsigned long) hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ smp_call_function_single(hotcpu, ia64_mca_cmc_vector_adjust,
+ NULL, 0);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block mca_cpu_notifier = {
+ .notifier_call = mca_cpu_callback
+};
+
/*
* ia64_mca_init
*
@@ -1840,15 +1947,15 @@ ia64_mca_init(void)
ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave;
ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch;
int i;
- s64 rc;
+ long rc;
struct ia64_sal_retval isrv;
- u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */
+ unsigned long timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */
static struct notifier_block default_init_monarch_nb = {
.notifier_call = default_monarch_init_process,
.priority = 0/* we need to notified last */
};
- IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__);
+ IA64_MCA_DEBUG("%s: begin\n", __func__);
/* Clear the Rendez checkin flag for all cpus */
for(i = 0 ; i < NR_CPUS; i++)
@@ -1872,7 +1979,7 @@ ia64_mca_init(void)
printk(KERN_INFO "Increasing MCA rendezvous timeout from "
"%ld to %ld milliseconds\n", timeout, isrv.v0);
timeout = isrv.v0;
- (void) notify_die(DIE_MCA_NEW_TIMEOUT, "MCA", NULL, timeout, 0, 0);
+ NOTIFY_MCA(DIE_MCA_NEW_TIMEOUT, NULL, timeout, 0);
continue;
}
printk(KERN_ERR "Failed to register rendezvous interrupt "
@@ -1892,7 +1999,7 @@ ia64_mca_init(void)
return;
}
- IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __FUNCTION__);
+ IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __func__);
ia64_mc_info.imi_mca_handler = ia64_tpa(mca_hldlr_ptr->fp);
/*
@@ -1913,7 +2020,7 @@ ia64_mca_init(void)
return;
}
- IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __FUNCTION__,
+ IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __func__,
ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp));
/*
@@ -1925,7 +2032,7 @@ ia64_mca_init(void)
ia64_mc_info.imi_slave_init_handler = ia64_tpa(init_hldlr_ptr_slave->fp);
ia64_mc_info.imi_slave_init_handler_size = 0;
- IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__,
+ IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __func__,
ia64_mc_info.imi_monarch_init_handler);
/* Register the os init handler with SAL */
@@ -1946,8 +2053,31 @@ ia64_mca_init(void)
return;
}
- IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__);
+ IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __func__);
+
+ /* Initialize the areas set aside by the OS to buffer the
+ * platform/processor error states for MCA/INIT/CMC
+ * handling.
+ */
+ ia64_log_init(SAL_INFO_TYPE_MCA);
+ ia64_log_init(SAL_INFO_TYPE_INIT);
+ ia64_log_init(SAL_INFO_TYPE_CMC);
+ ia64_log_init(SAL_INFO_TYPE_CPE);
+
+ mca_init = 1;
+ printk(KERN_INFO "MCA related initialization done\n");
+}
+
+/*
+ * These pieces cannot be done in ia64_mca_init() because it is called before
+ * early_irq_init() which would wipe out our percpu irq registrations. But we
+ * cannot leave them until ia64_mca_late_init() because by then all the other
+ * processors have been brought online and have set their own CMC vectors to
+ * point at a non-existant action. Called from arch_early_irq_init().
+ */
+void __init ia64_mca_irq_init(void)
+{
/*
* Configure the CMCI/P vector and handler. Interrupts for CMC are
* per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c).
@@ -1966,18 +2096,6 @@ ia64_mca_init(void)
/* Setup the CPEI/P handler */
register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction);
#endif
-
- /* Initialize the areas set aside by the OS to buffer the
- * platform/processor error states for MCA/INIT/CMC
- * handling.
- */
- ia64_log_init(SAL_INFO_TYPE_MCA);
- ia64_log_init(SAL_INFO_TYPE_INIT);
- ia64_log_init(SAL_INFO_TYPE_CMC);
- ia64_log_init(SAL_INFO_TYPE_CPE);
-
- mca_init = 1;
- printk(KERN_INFO "MCA related initialization done\n");
}
/*
@@ -1996,6 +2114,8 @@ ia64_mca_late_init(void)
if (!mca_init)
return 0;
+ register_hotcpu_notifier(&mca_cpu_notifier);
+
/* Setup the CMCI/P vector and handler */
init_timer(&cmc_poll_timer);
cmc_poll_timer.function = ia64_mca_cmc_poll;
@@ -2004,7 +2124,7 @@ ia64_mca_late_init(void)
cmc_polling_enabled = 0;
schedule_work(&cmc_enable_work);
- IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __FUNCTION__);
+ IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __func__);
#ifdef CONFIG_ACPI
/* Setup the CPEI/P vector and handler */
@@ -2013,7 +2133,6 @@ ia64_mca_late_init(void)
cpe_poll_timer.function = ia64_mca_cpe_poll;
{
- irq_desc_t *desc;
unsigned int irq;
if (cpe_vector >= 0) {
@@ -2021,23 +2140,22 @@ ia64_mca_late_init(void)
irq = local_vector_to_irq(cpe_vector);
if (irq > 0) {
cpe_poll_enabled = 0;
- desc = irq_desc + irq;
- desc->status |= IRQ_PER_CPU;
+ irq_set_status_flags(irq, IRQ_PER_CPU);
setup_irq(irq, &mca_cpe_irqaction);
ia64_cpe_irq = irq;
ia64_mca_register_cpev(cpe_vector);
IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n",
- __FUNCTION__);
+ __func__);
return 0;
}
printk(KERN_ERR "%s: Failed to find irq for CPE "
"interrupt handler, vector %d\n",
- __FUNCTION__, cpe_vector);
+ __func__, cpe_vector);
}
/* If platform doesn't support CPEI, get the timer going. */
if (cpe_poll_enabled) {
ia64_mca_cpe_poll(0UL);
- IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __FUNCTION__);
+ IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __func__);
}
}
#endif
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 0f5965fcdf8..d5bdf9de36b 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -1,24 +1,28 @@
-//
-// assembly portion of the IA64 MCA handling
-//
-// Mods by cfleck to integrate into kernel build
-// 00/03/15 davidm Added various stop bits to get a clean compile
-//
-// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp
-// kstack, switch modes, jump to C INIT handler
-//
-// 02/01/04 J.Hall <jenna.s.hall@intel.com>
-// Before entering virtual mode code:
-// 1. Check for TLB CPU error
-// 2. Restore current thread pointer to kr6
-// 3. Move stack ptr 16 bytes to conform to C calling convention
-//
-// 04/11/12 Russ Anderson <rja@sgi.com>
-// Added per cpu MCA/INIT stack save areas.
-//
-// 12/08/05 Keith Owens <kaos@sgi.com>
-// Use per cpu MCA/INIT stacks for all data.
-//
+/*
+ * File: mca_asm.S
+ * Purpose: assembly portion of the IA64 MCA handling
+ *
+ * Mods by cfleck to integrate into kernel build
+ *
+ * 2000-03-15 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Added various stop bits to get a clean compile
+ *
+ * 2000-03-29 Chuck Fleckenstein <cfleck@co.intel.com>
+ * Added code to save INIT handoff state in pt_regs format,
+ * switch to temp kstack, switch modes, jump to C INIT handler
+ *
+ * 2002-01-04 J.Hall <jenna.s.hall@intel.com>
+ * Before entering virtual mode code:
+ * 1. Check for TLB CPU error
+ * 2. Restore current thread pointer to kr6
+ * 3. Move stack ptr 16 bytes to conform to C calling convention
+ *
+ * 2004-11-12 Russ Anderson <rja@sgi.com>
+ * Added per cpu MCA/INIT stack save areas.
+ *
+ * 2005-12-08 Keith Owens <kaos@sgi.com>
+ * Use per cpu MCA/INIT stacks for all data.
+ */
#include <linux/threads.h>
#include <asm/asmmacro.h>
@@ -36,6 +40,7 @@
.global ia64_do_tlb_purge
.global ia64_os_mca_dispatch
+ .global ia64_os_init_on_kdump
.global ia64_os_init_dispatch_monarch
.global ia64_os_init_dispatch_slave
@@ -54,7 +59,7 @@
ia64_do_tlb_purge:
#define O(member) IA64_CPUINFO_##member##_OFFSET
- GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2
+ GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2
;;
addl r17=O(PTCE_STRIDE),r2
addl r2=O(PTCE_BASE),r2
@@ -215,8 +220,13 @@ ia64_reload_tr:
mov r20=IA64_TR_CURRENT_STACK
;;
itr.d dtr[r20]=r16
+ GET_THIS_PADDR(r2, ia64_mca_tr_reload)
+ mov r18 = 1
;;
srlz.d
+ ;;
+ st8 [r2] =r18
+ ;;
done_tlb_purge_and_reload:
@@ -290,6 +300,25 @@ END(ia64_os_mca_virtual_begin)
//StartMain////////////////////////////////////////////////////////////////////
//
+// NOP init handler for kdump. In panic situation, we may receive INIT
+// while kernel transition. Since we initialize registers on leave from
+// current kernel, no longer monarch/slave handlers of current kernel in
+// virtual mode are called safely.
+// We can unregister these init handlers from SAL, however then the INIT
+// will result in warmboot by SAL and we cannot retrieve the crashdump.
+// Therefore register this NOP function to SAL, to prevent entering virtual
+// mode and resulting warmboot by SAL.
+//
+ia64_os_init_on_kdump:
+ mov r8=r0 // IA64_INIT_RESUME
+ mov r9=r10 // SAL_GP
+ mov r22=r17 // *minstate
+ ;;
+ mov r10=r0 // return to same context
+ mov b0=r12 // SAL_CHECK return address
+ br b0
+
+//
// SAL to OS entry point for INIT on all processors. This has been defined for
// registration purposes with SAL as a part of ia64_mca_init. Monarch and
// slave INIT have identical processing, except for the value of the
@@ -1064,3 +1093,30 @@ GLOBAL_ENTRY(ia64_get_rnat)
mov ar.rsc=3
br.ret.sptk.many rp
END(ia64_get_rnat)
+
+
+// void ia64_set_psr_mc(void)
+//
+// Set psr.mc bit to mask MCA/INIT.
+GLOBAL_ENTRY(ia64_set_psr_mc)
+ rsm psr.i | psr.ic // disable interrupts
+ ;;
+ srlz.d
+ ;;
+ mov r14 = psr // get psr{36:35,31:0}
+ movl r15 = 1f
+ ;;
+ dep r14 = -1, r14, PSR_MC, 1 // set psr.mc
+ ;;
+ dep r14 = -1, r14, PSR_IC, 1 // set psr.ic
+ ;;
+ dep r14 = -1, r14, PSR_BN, 1 // keep bank1 in use
+ ;;
+ mov cr.ipsr = r14
+ mov cr.ifs = r0
+ mov cr.iip = r15
+ ;;
+ rfi
+1:
+ br.ret.sptk.many rp
+END(ia64_set_psr_mc)
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index aba813c2c15..94f8bf777af 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -3,7 +3,7 @@
* Purpose: Generic MCA handling layer
*
* Copyright (C) 2004 FUJITSU LIMITED
- * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
* Copyright (C) 2005 Silicon Graphics, Inc
* Copyright (C) 2005 Keith Owens <kaos@sgi.com>
* Copyright (C) 2006 Russ Anderson <rja@sgi.com>
@@ -22,12 +22,12 @@
#include <linux/smp.h>
#include <linux/workqueue.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <asm/delay.h>
#include <asm/machvec.h>
#include <asm/page.h>
#include <asm/ptrace.h>
-#include <asm/system.h>
#include <asm/sal.h>
#include <asm/mca.h>
@@ -158,7 +158,8 @@ mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
ia64_mlogbuf_dump();
printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
"iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
- raw_smp_processor_id(), current->pid, current->uid,
+ raw_smp_processor_id(), current->pid,
+ from_kuid(&init_user_ns, current_uid()),
iip, ipsr, paddr, current->comm);
spin_lock(&mca_bh_lock);
@@ -348,7 +349,7 @@ init_record_index_pools(void)
/* - 3 - */
slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
- slidx_pool.buffer = (slidx_list_t *)
+ slidx_pool.buffer =
kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL);
return slidx_pool.buffer ? 0 : -ENOMEM;
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h
index c85e943ba5f..53b8ecb5b4b 100644
--- a/arch/ia64/kernel/mca_drv.h
+++ b/arch/ia64/kernel/mca_drv.h
@@ -3,7 +3,7 @@
* Purpose: Define helpers for Generic MCA handling
*
* Copyright (C) 2004 FUJITSU LIMITED
- * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
*/
/*
* Processor error section:
@@ -118,7 +118,5 @@ struct mca_table_entry {
extern const struct mca_table_entry *search_mca_tables (unsigned long addr);
extern int mca_recover_range(unsigned long);
-extern void ia64_mca_printk(const char * fmt, ...)
- __attribute__ ((format (printf, 1, 2)));
extern void ia64_mlogbuf_dump(void);
diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S
index 3bccb06c8d2..767ac2c20d1 100644
--- a/arch/ia64/kernel/mca_drv_asm.S
+++ b/arch/ia64/kernel/mca_drv_asm.S
@@ -3,7 +3,7 @@
* Purpose: Assembly portion of Generic MCA handling
*
* Copyright (C) 2004 FUJITSU LIMITED
- * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
+ * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
*/
#include <linux/threads.h>
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index c9ac8bada78..cc82a7d744c 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -2,6 +2,22 @@
#include <asm/cache.h>
#include "entry.h"
+#include "paravirt_inst.h"
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define ACCOUNT_GET_STAMP \
+(pUStk) mov.m r20=ar.itc;
+#define ACCOUNT_SYS_ENTER \
+(pUStk) br.call.spnt rp=account_sys_enter \
+ ;;
+#else
+#define ACCOUNT_GET_STAMP
+#define ACCOUNT_SYS_ENTER
+#endif
+
+.section ".data..patch.rse", "a"
+.previous
/*
* DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
@@ -28,16 +44,16 @@
* Note that psr.ic is NOT turned on by this macro. This is so that
* we can pass interruption state as arguments to a handler.
*/
-#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
+#define IA64_NATIVE_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND) \
mov r16=IA64_KR(CURRENT); /* M */ \
mov r27=ar.rsc; /* M */ \
mov r20=r1; /* A */ \
mov r25=ar.unat; /* M */ \
- mov r29=cr.ipsr; /* M */ \
+ MOV_FROM_IPSR(p0,r29); /* M */ \
mov r26=ar.pfs; /* I */ \
- mov r28=cr.iip; /* M */ \
+ MOV_FROM_IIP(r28); /* M */ \
mov r21=ar.fpsr; /* M */ \
- COVER; /* B;; (or nothing) */ \
+ __COVER; /* B;; (or nothing) */ \
;; \
adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \
;; \
@@ -75,6 +91,7 @@
tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \
mov r29=b0 \
;; \
+ WORKAROUND; \
adds r16=PT(R8),r1; /* initialize first base pointer */ \
adds r17=PT(R9),r1; /* initialize second base pointer */ \
(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \
@@ -122,11 +139,13 @@
;; \
.mem.offset 0,0; st8.spill [r16]=r2,16; \
.mem.offset 8,0; st8.spill [r17]=r3,16; \
+ ACCOUNT_GET_STAMP \
adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
;; \
EXTRA; \
movl r1=__gp; /* establish kernel global pointer */ \
;; \
+ ACCOUNT_SYS_ENTER \
bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
;;
@@ -192,6 +211,40 @@
st8 [r25]=r10; /* ar.ssd */ \
;;
-#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs,)
-#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19)
-#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, )
+#define RSE_WORKAROUND \
+(pUStk) extr.u r17=r18,3,6; \
+(pUStk) sub r16=r18,r22; \
+[1:](pKStk) br.cond.sptk.many 1f; \
+ .xdata4 ".data..patch.rse",1b-. \
+ ;; \
+ cmp.ge p6,p7 = 33,r17; \
+ ;; \
+(p6) mov r17=0x310; \
+(p7) mov r17=0x308; \
+ ;; \
+ cmp.leu p1,p0=r16,r17; \
+(p1) br.cond.sptk.many 1f; \
+ dep.z r17=r26,0,62; \
+ movl r16=2f; \
+ ;; \
+ mov ar.pfs=r17; \
+ dep r27=r0,r27,16,14; \
+ mov b0=r16; \
+ ;; \
+ br.ret.sptk b0; \
+ ;; \
+2: \
+ mov ar.rsc=r0 \
+ ;; \
+ flushrs; \
+ ;; \
+ mov ar.bspstore=r22 \
+ ;; \
+ mov r18=ar.bsp; \
+ ;; \
+1: \
+ .pred.rel "mutex", pKStk, pUStk
+
+#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(COVER, mov r30=cr.ifs, , RSE_WORKAROUND)
+#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(COVER, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND)
+#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, , )
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index 196287928ba..24603be24c1 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -135,15 +135,6 @@ static const char *reloc_name[256] = {
#undef N
-struct got_entry {
- uint64_t val;
-};
-
-struct fdesc {
- uint64_t ip;
- uint64_t gp;
-};
-
/* Opaque struct for insns, to protect against derefs. */
struct insn;
@@ -180,7 +171,8 @@ apply_imm60 (struct module *mod, struct insn *insn, uint64_t val)
return 0;
}
if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) {
- printk(KERN_ERR "%s: value %ld out of IMM60 range\n", mod->name, (int64_t) val);
+ printk(KERN_ERR "%s: value %ld out of IMM60 range\n",
+ mod->name, (long) val);
return 0;
}
ia64_patch_imm60((u64) insn, val);
@@ -191,7 +183,8 @@ static int
apply_imm22 (struct module *mod, struct insn *insn, uint64_t val)
{
if (val + (1 << 21) >= (1 << 22)) {
- printk(KERN_ERR "%s: value %li out of IMM22 range\n", mod->name, (int64_t)val);
+ printk(KERN_ERR "%s: value %li out of IMM22 range\n",
+ mod->name, (long)val);
return 0;
}
ia64_patch((u64) insn, 0x01fffcfe000UL, ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
@@ -205,7 +198,8 @@ static int
apply_imm21b (struct module *mod, struct insn *insn, uint64_t val)
{
if (val + (1 << 20) >= (1 << 21)) {
- printk(KERN_ERR "%s: value %li out of IMM21b range\n", mod->name, (int64_t)val);
+ printk(KERN_ERR "%s: value %li out of IMM21b range\n",
+ mod->name, (long)val);
return 0;
}
ia64_patch((u64) insn, 0x11ffffe000UL, ( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */
@@ -310,18 +304,11 @@ plt_target (struct plt_entry *plt)
#endif /* !USE_BRL */
-void *
-module_alloc (unsigned long size)
-{
- if (!size)
- return NULL;
- return vmalloc(size);
-}
-
void
module_free (struct module *mod, void *module_region)
{
- if (mod->arch.init_unw_table && module_region == mod->module_init) {
+ if (mod && mod->arch.init_unw_table &&
+ module_region == mod->module_init) {
unw_remove_unwind_table(mod->arch.init_unw_table);
mod->arch.init_unw_table = NULL;
}
@@ -454,6 +441,14 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings,
mod->arch.opd = s;
else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0)
mod->arch.unwind = s;
+#ifdef CONFIG_PARAVIRT
+ else if (strcmp(".paravirt_bundles",
+ secstrings + s->sh_name) == 0)
+ mod->arch.paravirt_bundles = s;
+ else if (strcmp(".paravirt_insts",
+ secstrings + s->sh_name) == 0)
+ mod->arch.paravirt_insts = s;
+#endif
if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) {
printk(KERN_ERR "%s: sections missing\n", mod->name);
@@ -493,7 +488,7 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings,
mod->arch.opd->sh_addralign = 8;
mod->arch.opd->sh_size = fdescs * sizeof(struct fdesc);
DEBUGP("%s: core.plt=%lx, init.plt=%lx, got=%lx, fdesc=%lx\n",
- __FUNCTION__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size,
+ __func__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size,
mod->arch.got->sh_size, mod->arch.opd->sh_size);
return 0;
}
@@ -533,8 +528,7 @@ get_ltoff (struct module *mod, uint64_t value, int *okp)
goto found;
/* Not enough GOT entries? */
- if (e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size))
- BUG();
+ BUG_ON(e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size));
e->val = value;
++mod->arch.next_got_entry;
@@ -585,7 +579,7 @@ get_plt (struct module *mod, const struct insn *insn, uint64_t value, int *okp)
#if ARCH_MODULE_DEBUG
if (plt_target(plt) != target_ip) {
printk("%s: mistargeted PLT: wanted %lx, got %lx\n",
- __FUNCTION__, target_ip, plt_target(plt));
+ __func__, target_ip, plt_target(plt));
*okp = 0;
return 0;
}
@@ -702,8 +696,9 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
case RV_PCREL2:
if (r_type == R_IA64_PCREL21BI) {
if (!is_internal(mod, val)) {
- printk(KERN_ERR "%s: %s reloc against non-local symbol (%lx)\n",
- __FUNCTION__, reloc_name[r_type], val);
+ printk(KERN_ERR "%s: %s reloc against "
+ "non-local symbol (%lx)\n", __func__,
+ reloc_name[r_type], (unsigned long)val);
return -ENOEXEC;
}
format = RF_INSN21B;
@@ -737,7 +732,7 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
case R_IA64_LDXMOV:
if (gp_addressable(mod, val)) {
/* turn "ld8" into "mov": */
- DEBUGP("%s: patching ld8 at %p to mov\n", __FUNCTION__, location);
+ DEBUGP("%s: patching ld8 at %p to mov\n", __func__, location);
ia64_patch((u64) location, 0x1fff80fe000UL, 0x10000000000UL);
}
return 0;
@@ -771,7 +766,7 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
if (!ok)
return -ENOEXEC;
- DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __FUNCTION__, location, val,
+ DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __func__, location, val,
reloc_name[r_type] ? reloc_name[r_type] : "?", sym->st_value + addend);
switch (format) {
@@ -807,7 +802,7 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind
Elf64_Shdr *target_sec;
int ret;
- DEBUGP("%s: applying section %u (%u relocs) to %u\n", __FUNCTION__,
+ DEBUGP("%s: applying section %u (%u relocs) to %u\n", __func__,
relsec, n, sechdrs[relsec].sh_info);
target_sec = sechdrs + sechdrs[relsec].sh_info;
@@ -835,7 +830,7 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind
gp = mod->core_size / 2;
gp = (uint64_t) mod->module_core + ((gp + 7) & -8);
mod->arch.gp = gp;
- DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp);
+ DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp);
}
for (i = 0; i < n; i++) {
@@ -850,14 +845,6 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind
return 0;
}
-int
-apply_relocate (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
- unsigned int relsec, struct module *mod)
-{
- printk(KERN_ERR "module %s: REL relocs in section %u unsupported\n", mod->name, relsec);
- return -ENOEXEC;
-}
-
/*
* Modules contain a single unwind table which covers both the core and the init text
* sections but since the two are not contiguous, we need to split this table up such that
@@ -903,7 +890,7 @@ register_unwind_table (struct module *mod)
init = start + num_core;
}
- DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __FUNCTION__,
+ DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __func__,
mod->name, mod->arch.gp, num_init, num_core);
/*
@@ -912,13 +899,13 @@ register_unwind_table (struct module *mod)
if (num_core > 0) {
mod->arch.core_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
core, core + num_core);
- DEBUGP("%s: core: handle=%p [%p-%p)\n", __FUNCTION__,
+ DEBUGP("%s: core: handle=%p [%p-%p)\n", __func__,
mod->arch.core_unw_table, core, core + num_core);
}
if (num_init > 0) {
mod->arch.init_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
init, init + num_init);
- DEBUGP("%s: init: handle=%p [%p-%p)\n", __FUNCTION__,
+ DEBUGP("%s: init: handle=%p [%p-%p)\n", __func__,
mod->arch.init_unw_table, init, init + num_init);
}
}
@@ -926,9 +913,33 @@ register_unwind_table (struct module *mod)
int
module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod)
{
- DEBUGP("%s: init: entry=%p\n", __FUNCTION__, mod->init);
+ DEBUGP("%s: init: entry=%p\n", __func__, mod->init);
if (mod->arch.unwind)
register_unwind_table(mod);
+#ifdef CONFIG_PARAVIRT
+ if (mod->arch.paravirt_bundles) {
+ struct paravirt_patch_site_bundle *start =
+ (struct paravirt_patch_site_bundle *)
+ mod->arch.paravirt_bundles->sh_addr;
+ struct paravirt_patch_site_bundle *end =
+ (struct paravirt_patch_site_bundle *)
+ (mod->arch.paravirt_bundles->sh_addr +
+ mod->arch.paravirt_bundles->sh_size);
+
+ paravirt_patch_apply_bundle(start, end);
+ }
+ if (mod->arch.paravirt_insts) {
+ struct paravirt_patch_site_inst *start =
+ (struct paravirt_patch_site_inst *)
+ mod->arch.paravirt_insts->sh_addr;
+ struct paravirt_patch_site_inst *end =
+ (struct paravirt_patch_site_inst *)
+ (mod->arch.paravirt_insts->sh_addr +
+ mod->arch.paravirt_insts->sh_size);
+
+ paravirt_patch_apply_inst(start, end);
+ }
+#endif
return 0;
}
@@ -940,14 +951,3 @@ module_arch_cleanup (struct module *mod)
if (mod->arch.core_unw_table)
unw_remove_unwind_table(mod->arch.core_unw_table);
}
-
-#ifdef CONFIG_SMP
-void
-percpu_modcopy (void *pcpudst, const void *src, unsigned long size)
-{
- unsigned int i;
- for_each_possible_cpu(i) {
- memcpy(pcpudst + __per_cpu_offset[i], src, size);
- }
-}
-#endif /* CONFIG_SMP */
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 2fdbd5c3f21..c430f9198d1 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -5,66 +5,29 @@
#include <linux/pci.h>
#include <linux/irq.h>
#include <linux/msi.h>
+#include <linux/dmar.h>
#include <asm/smp.h>
-
-/*
- * Shifts for APIC-based data
- */
-
-#define MSI_DATA_VECTOR_SHIFT 0
-#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT)
-#define MSI_DATA_VECTOR_MASK 0xffffff00
-
-#define MSI_DATA_DELIVERY_SHIFT 8
-#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_SHIFT)
-#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_SHIFT)
-
-#define MSI_DATA_LEVEL_SHIFT 14
-#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
-#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT)
-
-#define MSI_DATA_TRIGGER_SHIFT 15
-#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT)
-#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT)
-
-/*
- * Shift/mask fields for APIC-based bus address
- */
-
-#define MSI_TARGET_CPU_SHIFT 4
-#define MSI_ADDR_HEADER 0xfee00000
-
-#define MSI_ADDR_DESTID_MASK 0xfff0000f
-#define MSI_ADDR_DESTID_CPU(cpu) ((cpu) << MSI_TARGET_CPU_SHIFT)
-
-#define MSI_ADDR_DESTMODE_SHIFT 2
-#define MSI_ADDR_DESTMODE_PHYS (0 << MSI_ADDR_DESTMODE_SHIFT)
-#define MSI_ADDR_DESTMODE_LOGIC (1 << MSI_ADDR_DESTMODE_SHIFT)
-
-#define MSI_ADDR_REDIRECTION_SHIFT 3
-#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT)
-#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
+#include <asm/msidef.h>
static struct irq_chip ia64_msi_chip;
#ifdef CONFIG_SMP
-static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
+static int ia64_set_msi_irq_affinity(struct irq_data *idata,
+ const cpumask_t *cpu_mask, bool force)
{
struct msi_msg msg;
u32 addr, data;
- int cpu = first_cpu(cpu_mask);
-
- if (!cpu_online(cpu))
- return;
+ int cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
+ unsigned int irq = idata->irq;
- if (reassign_irq_vector(irq, cpu))
- return;
+ if (irq_prepare_move(irq, cpu))
+ return -1;
- read_msi_msg(irq, &msg);
+ get_cached_msi_msg(irq, &msg);
addr = msg.address_lo;
- addr &= MSI_ADDR_DESTID_MASK;
- addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
+ addr &= MSI_ADDR_DEST_ID_MASK;
+ addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
msg.address_lo = addr;
data = msg.data;
@@ -73,7 +36,9 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
msg.data = data;
write_msi_msg(irq, &msg);
- irq_desc[irq].affinity = cpumask_of_cpu(cpu);
+ cpumask_copy(idata->affinity, cpumask_of(cpu));
+
+ return 0;
}
#endif /* CONFIG_SMP */
@@ -88,17 +53,17 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
if (irq < 0)
return irq;
- set_irq_msi(irq, desc);
- cpus_and(mask, irq_to_domain(irq), cpu_online_map);
+ irq_set_msi_desc(irq, desc);
+ cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask);
dest_phys_id = cpu_physical_id(first_cpu(mask));
vector = irq_to_vector(irq);
msg.address_hi = 0;
msg.address_lo =
MSI_ADDR_HEADER |
- MSI_ADDR_DESTMODE_PHYS |
+ MSI_ADDR_DEST_MODE_PHYS |
MSI_ADDR_REDIRECTION_CPU |
- MSI_ADDR_DESTID_CPU(dest_phys_id);
+ MSI_ADDR_DEST_ID_CPU(dest_phys_id);
msg.data =
MSI_DATA_TRIGGER_EDGE |
@@ -107,9 +72,9 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
MSI_DATA_VECTOR(vector);
write_msi_msg(irq, &msg);
- set_irq_chip_and_handler(irq, &ia64_msi_chip, handle_edge_irq);
+ irq_set_chip_and_handler(irq, &ia64_msi_chip, handle_edge_irq);
- return irq;
+ return 0;
}
void ia64_teardown_msi_irq(unsigned int irq)
@@ -117,15 +82,16 @@ void ia64_teardown_msi_irq(unsigned int irq)
destroy_irq(irq);
}
-static void ia64_ack_msi_irq(unsigned int irq)
+static void ia64_ack_msi_irq(struct irq_data *data)
{
- move_native_irq(irq);
+ irq_complete_move(data->irq);
+ irq_move_irq(data);
ia64_eoi();
}
-static int ia64_msi_retrigger_irq(unsigned int irq)
+static int ia64_msi_retrigger_irq(struct irq_data *data)
{
- unsigned int vector = irq_to_vector(irq);
+ unsigned int vector = irq_to_vector(data->irq);
ia64_resend_irq(vector);
return 1;
@@ -135,14 +101,14 @@ static int ia64_msi_retrigger_irq(unsigned int irq)
* Generic ops used on most IA64 platforms.
*/
static struct irq_chip ia64_msi_chip = {
- .name = "PCI-MSI",
- .mask = mask_msi_irq,
- .unmask = unmask_msi_irq,
- .ack = ia64_ack_msi_irq,
+ .name = "PCI-MSI",
+ .irq_mask = mask_msi_irq,
+ .irq_unmask = unmask_msi_irq,
+ .irq_ack = ia64_ack_msi_irq,
#ifdef CONFIG_SMP
- .set_affinity = ia64_set_msi_irq_affinity,
+ .irq_set_affinity = ia64_set_msi_irq_affinity,
#endif
- .retrigger = ia64_msi_retrigger_irq,
+ .irq_retrigger = ia64_msi_retrigger_irq,
};
@@ -161,3 +127,82 @@ void arch_teardown_msi_irq(unsigned int irq)
return ia64_teardown_msi_irq(irq);
}
+
+#ifdef CONFIG_INTEL_IOMMU
+#ifdef CONFIG_SMP
+static int dmar_msi_set_affinity(struct irq_data *data,
+ const struct cpumask *mask, bool force)
+{
+ unsigned int irq = data->irq;
+ struct irq_cfg *cfg = irq_cfg + irq;
+ struct msi_msg msg;
+ int cpu = cpumask_first_and(mask, cpu_online_mask);
+
+ if (irq_prepare_move(irq, cpu))
+ return -1;
+
+ dmar_msi_read(irq, &msg);
+
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+ msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
+
+ dmar_msi_write(irq, &msg);
+ cpumask_copy(data->affinity, mask);
+
+ return 0;
+}
+#endif /* CONFIG_SMP */
+
+static struct irq_chip dmar_msi_type = {
+ .name = "DMAR_MSI",
+ .irq_unmask = dmar_msi_unmask,
+ .irq_mask = dmar_msi_mask,
+ .irq_ack = ia64_ack_msi_irq,
+#ifdef CONFIG_SMP
+ .irq_set_affinity = dmar_msi_set_affinity,
+#endif
+ .irq_retrigger = ia64_msi_retrigger_irq,
+};
+
+static int
+msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+{
+ struct irq_cfg *cfg = irq_cfg + irq;
+ unsigned dest;
+ cpumask_t mask;
+
+ cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask);
+ dest = cpu_physical_id(first_cpu(mask));
+
+ msg->address_hi = 0;
+ msg->address_lo =
+ MSI_ADDR_HEADER |
+ MSI_ADDR_DEST_MODE_PHYS |
+ MSI_ADDR_REDIRECTION_CPU |
+ MSI_ADDR_DEST_ID_CPU(dest);
+
+ msg->data =
+ MSI_DATA_TRIGGER_EDGE |
+ MSI_DATA_LEVEL_ASSERT |
+ MSI_DATA_DELIVERY_FIXED |
+ MSI_DATA_VECTOR(cfg->vector);
+ return 0;
+}
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+ int ret;
+ struct msi_msg msg;
+
+ ret = msi_compose_msg(NULL, irq, &msg);
+ if (ret < 0)
+ return ret;
+ dmar_msi_write(irq, &msg);
+ irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+ "edge");
+ return 0;
+}
+#endif /* CONFIG_INTEL_IOMMU */
+
diff --git a/arch/ia64/kernel/nr-irqs.c b/arch/ia64/kernel/nr-irqs.c
new file mode 100644
index 00000000000..f6769cd54bd
--- /dev/null
+++ b/arch/ia64/kernel/nr-irqs.c
@@ -0,0 +1,21 @@
+/*
+ * calculate
+ * NR_IRQS = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, FOO_NR_IRQS...)
+ * depending on config.
+ * This must be calculated before processing asm-offset.c.
+ */
+
+#define ASM_OFFSETS_C 1
+
+#include <linux/kbuild.h>
+#include <linux/threads.h>
+#include <asm/native/irq.h>
+
+void foo(void)
+{
+ union paravirt_nr_irqs_max {
+ char ia64_native_nr_irqs[IA64_NATIVE_NR_IRQS];
+ };
+
+ DEFINE(NR_IRQS, sizeof (union paravirt_nr_irqs_max));
+}
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index a78b45f5fe2..d288cde9360 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -30,7 +30,7 @@ EXPORT_SYMBOL(cpu_to_node_map);
cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
EXPORT_SYMBOL(node_to_cpu_mask);
-void __cpuinit map_cpu_to_node(int cpu, int nid)
+void map_cpu_to_node(int cpu, int nid)
{
int oldnid;
if (nid < 0) { /* just initialize by zero */
@@ -51,7 +51,7 @@ void __cpuinit map_cpu_to_node(int cpu, int nid)
return;
}
-void __cpuinit unmap_cpu_from_node(int cpu, int nid)
+void unmap_cpu_from_node(int cpu, int nid)
{
WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid]));
WARN_ON(cpu_to_node_map[cpu] != nid);
@@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void)
for(node=0; node < MAX_NUMNODES; node++)
cpus_clear(node_to_cpu_mask[node]);
- for(cpu = 0; cpu < NR_CPUS; ++cpu) {
+ for_each_possible_early_cpu(cpu) {
node = -1;
for (i = 0; i < NR_CPUS; ++i)
if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index 85829e27785..c39c3cd3ac3 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -22,6 +22,7 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/efi.h>
@@ -41,7 +42,7 @@ MODULE_LICENSE("GPL");
#define PALINFO_VERSION "0.5"
-typedef int (*palinfo_func_t)(char*);
+typedef int (*palinfo_func_t)(struct seq_file *);
typedef struct {
const char *name; /* name of the proc entry */
@@ -54,7 +55,7 @@ typedef struct {
* A bunch of string array to get pretty printing
*/
-static char *cache_types[] = {
+static const char *cache_types[] = {
"", /* not used */
"Instruction",
"Data",
@@ -122,19 +123,16 @@ static const char *mem_attrib[]={
* - a pointer to the end of the buffer
*
*/
-static char *
-bitvector_process(char *p, u64 vector)
+static void bitvector_process(struct seq_file *m, u64 vector)
{
int i,j;
- const char *units[]={ "", "K", "M", "G", "T" };
+ static const char *units[]={ "", "K", "M", "G", "T" };
for (i=0, j=0; i < 64; i++ , j=i/10) {
- if (vector & 0x1) {
- p += sprintf(p, "%d%s ", 1 << (i-j*10), units[j]);
- }
+ if (vector & 0x1)
+ seq_printf(m, "%d%s ", 1 << (i-j*10), units[j]);
vector >>= 1;
}
- return p;
}
/*
@@ -149,8 +147,7 @@ bitvector_process(char *p, u64 vector)
* - a pointer to the end of the buffer
*
*/
-static char *
-bitregister_process(char *p, u64 *reg_info, int max)
+static void bitregister_process(struct seq_file *m, u64 *reg_info, int max)
{
int i, begin, skip = 0;
u64 value = reg_info[0];
@@ -163,9 +160,9 @@ bitregister_process(char *p, u64 *reg_info, int max)
if ((value & 0x1) == 0 && skip == 0) {
if (begin <= i - 2)
- p += sprintf(p, "%d-%d ", begin, i-1);
+ seq_printf(m, "%d-%d ", begin, i-1);
else
- p += sprintf(p, "%d ", i-1);
+ seq_printf(m, "%d ", i-1);
skip = 1;
begin = -1;
} else if ((value & 0x1) && skip == 1) {
@@ -176,19 +173,15 @@ bitregister_process(char *p, u64 *reg_info, int max)
}
if (begin > -1) {
if (begin < 127)
- p += sprintf(p, "%d-127", begin);
+ seq_printf(m, "%d-127", begin);
else
- p += sprintf(p, "127");
+ seq_puts(m, "127");
}
-
- return p;
}
-static int
-power_info(char *page)
+static int power_info(struct seq_file *m)
{
s64 status;
- char *p = page;
u64 halt_info_buffer[8];
pal_power_mgmt_info_u_t *halt_info =(pal_power_mgmt_info_u_t *)halt_info_buffer;
int i;
@@ -198,103 +191,103 @@ power_info(char *page)
for (i=0; i < 8 ; i++ ) {
if (halt_info[i].pal_power_mgmt_info_s.im == 1) {
- p += sprintf(p, "Power level %d:\n"
- "\tentry_latency : %d cycles\n"
- "\texit_latency : %d cycles\n"
- "\tpower consumption : %d mW\n"
- "\tCache+TLB coherency : %s\n", i,
- halt_info[i].pal_power_mgmt_info_s.entry_latency,
- halt_info[i].pal_power_mgmt_info_s.exit_latency,
- halt_info[i].pal_power_mgmt_info_s.power_consumption,
- halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No");
+ seq_printf(m,
+ "Power level %d:\n"
+ "\tentry_latency : %d cycles\n"
+ "\texit_latency : %d cycles\n"
+ "\tpower consumption : %d mW\n"
+ "\tCache+TLB coherency : %s\n", i,
+ halt_info[i].pal_power_mgmt_info_s.entry_latency,
+ halt_info[i].pal_power_mgmt_info_s.exit_latency,
+ halt_info[i].pal_power_mgmt_info_s.power_consumption,
+ halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No");
} else {
- p += sprintf(p,"Power level %d: not implemented\n",i);
+ seq_printf(m,"Power level %d: not implemented\n", i);
}
}
- return p - page;
+ return 0;
}
-static int
-cache_info(char *page)
+static int cache_info(struct seq_file *m)
{
- char *p = page;
- u64 i, levels, unique_caches;
+ unsigned long i, levels, unique_caches;
pal_cache_config_info_t cci;
int j, k;
- s64 status;
+ long status;
if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) {
printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status);
return 0;
}
- p += sprintf(p, "Cache levels : %ld\nUnique caches : %ld\n\n", levels, unique_caches);
+ seq_printf(m, "Cache levels : %ld\nUnique caches : %ld\n\n",
+ levels, unique_caches);
for (i=0; i < levels; i++) {
-
for (j=2; j >0 ; j--) {
-
/* even without unification some level may not be present */
- if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0) {
+ if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0)
continue;
- }
- p += sprintf(p,
- "%s Cache level %lu:\n"
- "\tSize : %u bytes\n"
- "\tAttributes : ",
- cache_types[j+cci.pcci_unified], i+1,
- cci.pcci_cache_size);
-
- if (cci.pcci_unified) p += sprintf(p, "Unified ");
-
- p += sprintf(p, "%s\n", cache_mattrib[cci.pcci_cache_attr]);
-
- p += sprintf(p,
- "\tAssociativity : %d\n"
- "\tLine size : %d bytes\n"
- "\tStride : %d bytes\n",
- cci.pcci_assoc, 1<<cci.pcci_line_size, 1<<cci.pcci_stride);
+
+ seq_printf(m,
+ "%s Cache level %lu:\n"
+ "\tSize : %u bytes\n"
+ "\tAttributes : ",
+ cache_types[j+cci.pcci_unified], i+1,
+ cci.pcci_cache_size);
+
+ if (cci.pcci_unified)
+ seq_puts(m, "Unified ");
+
+ seq_printf(m, "%s\n", cache_mattrib[cci.pcci_cache_attr]);
+
+ seq_printf(m,
+ "\tAssociativity : %d\n"
+ "\tLine size : %d bytes\n"
+ "\tStride : %d bytes\n",
+ cci.pcci_assoc,
+ 1<<cci.pcci_line_size,
+ 1<<cci.pcci_stride);
if (j == 1)
- p += sprintf(p, "\tStore latency : N/A\n");
+ seq_puts(m, "\tStore latency : N/A\n");
else
- p += sprintf(p, "\tStore latency : %d cycle(s)\n",
- cci.pcci_st_latency);
+ seq_printf(m, "\tStore latency : %d cycle(s)\n",
+ cci.pcci_st_latency);
- p += sprintf(p,
- "\tLoad latency : %d cycle(s)\n"
- "\tStore hints : ", cci.pcci_ld_latency);
+ seq_printf(m,
+ "\tLoad latency : %d cycle(s)\n"
+ "\tStore hints : ", cci.pcci_ld_latency);
for(k=0; k < 8; k++ ) {
if ( cci.pcci_st_hints & 0x1)
- p += sprintf(p, "[%s]", cache_st_hints[k]);
+ seq_printf(m, "[%s]", cache_st_hints[k]);
cci.pcci_st_hints >>=1;
}
- p += sprintf(p, "\n\tLoad hints : ");
+ seq_puts(m, "\n\tLoad hints : ");
for(k=0; k < 8; k++ ) {
if (cci.pcci_ld_hints & 0x1)
- p += sprintf(p, "[%s]", cache_ld_hints[k]);
+ seq_printf(m, "[%s]", cache_ld_hints[k]);
cci.pcci_ld_hints >>=1;
}
- p += sprintf(p,
- "\n\tAlias boundary : %d byte(s)\n"
- "\tTag LSB : %d\n"
- "\tTag MSB : %d\n",
- 1<<cci.pcci_alias_boundary, cci.pcci_tag_lsb,
- cci.pcci_tag_msb);
+ seq_printf(m,
+ "\n\tAlias boundary : %d byte(s)\n"
+ "\tTag LSB : %d\n"
+ "\tTag MSB : %d\n",
+ 1<<cci.pcci_alias_boundary, cci.pcci_tag_lsb,
+ cci.pcci_tag_msb);
/* when unified, data(j=2) is enough */
- if (cci.pcci_unified) break;
+ if (cci.pcci_unified)
+ break;
}
}
- return p - page;
+ return 0;
}
-static int
-vm_info(char *page)
+static int vm_info(struct seq_file *m)
{
- char *p = page;
u64 tr_pages =0, vw_pages=0, tc_pages;
u64 attrib;
pal_vm_info_1_u_t vm_info_1;
@@ -303,13 +296,13 @@ vm_info(char *page)
ia64_ptce_info_t ptce;
const char *sep;
int i, j;
- s64 status;
+ long status;
if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
} else {
- p += sprintf(p,
+ seq_printf(m,
"Physical Address Space : %d bits\n"
"Virtual Address Space : %d bits\n"
"Protection Key Registers(PKR) : %d\n"
@@ -324,49 +317,49 @@ vm_info(char *page)
vm_info_1.pal_vm_info_1_s.hash_tag_id,
vm_info_2.pal_vm_info_2_s.rid_size);
if (vm_info_2.pal_vm_info_2_s.max_purges == PAL_MAX_PURGES)
- p += sprintf(p, "unlimited\n");
+ seq_puts(m, "unlimited\n");
else
- p += sprintf(p, "%d\n",
+ seq_printf(m, "%d\n",
vm_info_2.pal_vm_info_2_s.max_purges ?
vm_info_2.pal_vm_info_2_s.max_purges : 1);
}
if (ia64_pal_mem_attrib(&attrib) == 0) {
- p += sprintf(p, "Supported memory attributes : ");
+ seq_puts(m, "Supported memory attributes : ");
sep = "";
for (i = 0; i < 8; i++) {
if (attrib & (1 << i)) {
- p += sprintf(p, "%s%s", sep, mem_attrib[i]);
+ seq_printf(m, "%s%s", sep, mem_attrib[i]);
sep = ", ";
}
}
- p += sprintf(p, "\n");
+ seq_putc(m, '\n');
}
if ((status = ia64_pal_vm_page_size(&tr_pages, &vw_pages)) !=0) {
printk(KERN_ERR "ia64_pal_vm_page_size=%ld\n", status);
} else {
- p += sprintf(p,
- "\nTLB walker : %simplemented\n"
- "Number of DTR : %d\n"
- "Number of ITR : %d\n"
- "TLB insertable page sizes : ",
- vm_info_1.pal_vm_info_1_s.vw ? "" : "not ",
- vm_info_1.pal_vm_info_1_s.max_dtr_entry+1,
- vm_info_1.pal_vm_info_1_s.max_itr_entry+1);
-
+ seq_printf(m,
+ "\nTLB walker : %simplemented\n"
+ "Number of DTR : %d\n"
+ "Number of ITR : %d\n"
+ "TLB insertable page sizes : ",
+ vm_info_1.pal_vm_info_1_s.vw ? "" : "not ",
+ vm_info_1.pal_vm_info_1_s.max_dtr_entry+1,
+ vm_info_1.pal_vm_info_1_s.max_itr_entry+1);
- p = bitvector_process(p, tr_pages);
+ bitvector_process(m, tr_pages);
- p += sprintf(p, "\nTLB purgeable page sizes : ");
+ seq_puts(m, "\nTLB purgeable page sizes : ");
- p = bitvector_process(p, vw_pages);
+ bitvector_process(m, vw_pages);
}
- if ((status=ia64_get_ptce(&ptce)) != 0) {
+
+ if ((status = ia64_get_ptce(&ptce)) != 0) {
printk(KERN_ERR "ia64_get_ptce=%ld\n", status);
} else {
- p += sprintf(p,
+ seq_printf(m,
"\nPurge base address : 0x%016lx\n"
"Purge outer loop count : %d\n"
"Purge inner loop count : %d\n"
@@ -375,7 +368,7 @@ vm_info(char *page)
ptce.base, ptce.count[0], ptce.count[1],
ptce.stride[0], ptce.stride[1]);
- p += sprintf(p,
+ seq_printf(m,
"TC Levels : %d\n"
"Unique TC(s) : %d\n",
vm_info_1.pal_vm_info_1_s.num_tc_levels,
@@ -385,13 +378,11 @@ vm_info(char *page)
for (j=2; j>0 ; j--) {
tc_pages = 0; /* just in case */
-
/* even without unification, some levels may not be present */
- if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) {
+ if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0)
continue;
- }
- p += sprintf(p,
+ seq_printf(m,
"\n%s Translation Cache Level %d:\n"
"\tHash sets : %d\n"
"\tAssociativity : %d\n"
@@ -403,15 +394,15 @@ vm_info(char *page)
tc_info.tc_num_entries);
if (tc_info.tc_pf)
- p += sprintf(p, "PreferredPageSizeOptimized ");
+ seq_puts(m, "PreferredPageSizeOptimized ");
if (tc_info.tc_unified)
- p += sprintf(p, "Unified ");
+ seq_puts(m, "Unified ");
if (tc_info.tc_reduce_tr)
- p += sprintf(p, "TCReduction");
+ seq_puts(m, "TCReduction");
- p += sprintf(p, "\n\tSupported page sizes: ");
+ seq_puts(m, "\n\tSupported page sizes: ");
- p = bitvector_process(p, tc_pages);
+ bitvector_process(m, tc_pages);
/* when unified date (j=2) is enough */
if (tc_info.tc_unified)
@@ -419,22 +410,20 @@ vm_info(char *page)
}
}
}
- p += sprintf(p, "\n");
- return p - page;
+ seq_putc(m, '\n');
+ return 0;
}
-static int
-register_info(char *page)
+static int register_info(struct seq_file *m)
{
- char *p = page;
u64 reg_info[2];
u64 info;
- u64 phys_stacked;
+ unsigned long phys_stacked;
pal_hints_u_t hints;
- u64 iregs, dregs;
- char *info_type[]={
+ unsigned long iregs, dregs;
+ static const char * const info_type[] = {
"Implemented AR(s)",
"AR(s) with read side-effects",
"Implemented CR(s)",
@@ -442,35 +431,31 @@ register_info(char *page)
};
for(info=0; info < 4; info++) {
-
- if (ia64_pal_register_info(info, &reg_info[0], &reg_info[1]) != 0) return 0;
-
- p += sprintf(p, "%-32s : ", info_type[info]);
-
- p = bitregister_process(p, reg_info, 128);
-
- p += sprintf(p, "\n");
+ if (ia64_pal_register_info(info, &reg_info[0], &reg_info[1]) != 0)
+ return 0;
+ seq_printf(m, "%-32s : ", info_type[info]);
+ bitregister_process(m, reg_info, 128);
+ seq_putc(m, '\n');
}
- if (ia64_pal_rse_info(&phys_stacked, &hints) == 0) {
+ if (ia64_pal_rse_info(&phys_stacked, &hints) == 0)
+ seq_printf(m,
+ "RSE stacked physical registers : %ld\n"
+ "RSE load/store hints : %ld (%s)\n",
+ phys_stacked, hints.ph_data,
+ hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)");
- p += sprintf(p,
- "RSE stacked physical registers : %ld\n"
- "RSE load/store hints : %ld (%s)\n",
- phys_stacked, hints.ph_data,
- hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)");
- }
if (ia64_pal_debug_info(&iregs, &dregs))
return 0;
- p += sprintf(p,
- "Instruction debug register pairs : %ld\n"
- "Data debug register pairs : %ld\n", iregs, dregs);
+ seq_printf(m,
+ "Instruction debug register pairs : %ld\n"
+ "Data debug register pairs : %ld\n", iregs, dregs);
- return p - page;
+ return 0;
}
-static const char *proc_features[]={
+static const char *const proc_features_0[]={ /* Feature set 0 */
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
@@ -502,29 +487,91 @@ static const char *proc_features[]={
"Enable BERR promotion"
};
+static const char *const proc_features_16[]={ /* Feature set 16 */
+ "Disable ETM",
+ "Enable ETM",
+ "Enable MCA on half-way timer",
+ "Enable snoop WC",
+ NULL,
+ "Enable Fast Deferral",
+ "Disable MCA on memory aliasing",
+ "Enable RSB",
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "DP system processor",
+ "Low Voltage",
+ "HT supported",
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL
+};
-static int
-processor_info(char *page)
+static const char *const *const proc_features[]={
+ proc_features_0,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ proc_features_16,
+ NULL, NULL, NULL, NULL,
+};
+
+static void feature_set_info(struct seq_file *m, u64 avail, u64 status, u64 control,
+ unsigned long set)
{
- char *p = page;
- const char **v = proc_features;
- u64 avail=1, status=1, control=1;
+ const char *const *vf, *const *v;
int i;
- s64 ret;
- if ((ret=ia64_pal_proc_get_features(&avail, &status, &control)) != 0) return 0;
+ vf = v = proc_features[set];
+ for(i=0; i < 64; i++, avail >>=1, status >>=1, control >>=1) {
- for(i=0; i < 64; i++, v++,avail >>=1, status >>=1, control >>=1) {
- if ( ! *v ) continue;
- p += sprintf(p, "%-40s : %s%s %s\n", *v,
- avail & 0x1 ? "" : "NotImpl",
- avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "",
- avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
+ if (!(control)) /* No remaining bits set */
+ break;
+ if (!(avail & 0x1)) /* Print only bits that are available */
+ continue;
+ if (vf)
+ v = vf + i;
+ if ( v && *v ) {
+ seq_printf(m, "%-40s : %s %s\n", *v,
+ avail & 0x1 ? (status & 0x1 ?
+ "On " : "Off"): "",
+ avail & 0x1 ? (control & 0x1 ?
+ "Ctrl" : "NoCtrl"): "");
+ } else {
+ seq_printf(m, "Feature set %2ld bit %2d\t\t\t"
+ " : %s %s\n",
+ set, i,
+ avail & 0x1 ? (status & 0x1 ?
+ "On " : "Off"): "",
+ avail & 0x1 ? (control & 0x1 ?
+ "Ctrl" : "NoCtrl"): "");
+ }
}
- return p - page;
}
-static const char *bus_features[]={
+static int processor_info(struct seq_file *m)
+{
+ u64 avail=1, status=1, control=1, feature_set=0;
+ s64 ret;
+
+ do {
+ ret = ia64_pal_proc_get_features(&avail, &status, &control,
+ feature_set);
+ if (ret < 0)
+ return 0;
+
+ if (ret == 1) {
+ feature_set++;
+ continue;
+ }
+
+ feature_set_info(m, avail, status, control, feature_set);
+ feature_set++;
+ } while(1);
+
+ return 0;
+}
+
+static const char *const bus_features[]={
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
@@ -550,162 +597,155 @@ static const char *bus_features[]={
};
-static int
-bus_info(char *page)
+static int bus_info(struct seq_file *m)
{
- char *p = page;
- const char **v = bus_features;
+ const char *const *v = bus_features;
pal_bus_features_u_t av, st, ct;
u64 avail, status, control;
int i;
s64 ret;
- if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) return 0;
+ if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0)
+ return 0;
avail = av.pal_bus_features_val;
status = st.pal_bus_features_val;
control = ct.pal_bus_features_val;
for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) {
- if ( ! *v ) continue;
- p += sprintf(p, "%-48s : %s%s %s\n", *v,
- avail & 0x1 ? "" : "NotImpl",
- avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "",
- avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
+ if ( ! *v )
+ continue;
+ seq_printf(m, "%-48s : %s%s %s\n", *v,
+ avail & 0x1 ? "" : "NotImpl",
+ avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "",
+ avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
}
- return p - page;
+ return 0;
}
-static int
-version_info(char *page)
+static int version_info(struct seq_file *m)
{
pal_version_u_t min_ver, cur_ver;
- char *p = page;
if (ia64_pal_version(&min_ver, &cur_ver) != 0)
return 0;
- p += sprintf(p,
- "PAL_vendor : 0x%02x (min=0x%02x)\n"
- "PAL_A : %02x.%02x (min=%02x.%02x)\n"
- "PAL_B : %02x.%02x (min=%02x.%02x)\n",
- cur_ver.pal_version_s.pv_pal_vendor,
- min_ver.pal_version_s.pv_pal_vendor,
- cur_ver.pal_version_s.pv_pal_a_model,
- cur_ver.pal_version_s.pv_pal_a_rev,
- min_ver.pal_version_s.pv_pal_a_model,
- min_ver.pal_version_s.pv_pal_a_rev,
- cur_ver.pal_version_s.pv_pal_b_model,
- cur_ver.pal_version_s.pv_pal_b_rev,
- min_ver.pal_version_s.pv_pal_b_model,
- min_ver.pal_version_s.pv_pal_b_rev);
- return p - page;
+ seq_printf(m,
+ "PAL_vendor : 0x%02x (min=0x%02x)\n"
+ "PAL_A : %02x.%02x (min=%02x.%02x)\n"
+ "PAL_B : %02x.%02x (min=%02x.%02x)\n",
+ cur_ver.pal_version_s.pv_pal_vendor,
+ min_ver.pal_version_s.pv_pal_vendor,
+ cur_ver.pal_version_s.pv_pal_a_model,
+ cur_ver.pal_version_s.pv_pal_a_rev,
+ min_ver.pal_version_s.pv_pal_a_model,
+ min_ver.pal_version_s.pv_pal_a_rev,
+ cur_ver.pal_version_s.pv_pal_b_model,
+ cur_ver.pal_version_s.pv_pal_b_rev,
+ min_ver.pal_version_s.pv_pal_b_model,
+ min_ver.pal_version_s.pv_pal_b_rev);
+ return 0;
}
-static int
-perfmon_info(char *page)
+static int perfmon_info(struct seq_file *m)
{
- char *p = page;
u64 pm_buffer[16];
pal_perf_mon_info_u_t pm_info;
- if (ia64_pal_perf_mon_info(pm_buffer, &pm_info) != 0) return 0;
-
- p += sprintf(p,
- "PMC/PMD pairs : %d\n"
- "Counter width : %d bits\n"
- "Cycle event number : %d\n"
- "Retired event number : %d\n"
- "Implemented PMC : ",
- pm_info.pal_perf_mon_info_s.generic, pm_info.pal_perf_mon_info_s.width,
- pm_info.pal_perf_mon_info_s.cycles, pm_info.pal_perf_mon_info_s.retired);
+ if (ia64_pal_perf_mon_info(pm_buffer, &pm_info) != 0)
+ return 0;
- p = bitregister_process(p, pm_buffer, 256);
- p += sprintf(p, "\nImplemented PMD : ");
- p = bitregister_process(p, pm_buffer+4, 256);
- p += sprintf(p, "\nCycles count capable : ");
- p = bitregister_process(p, pm_buffer+8, 256);
- p += sprintf(p, "\nRetired bundles count capable : ");
+ seq_printf(m,
+ "PMC/PMD pairs : %d\n"
+ "Counter width : %d bits\n"
+ "Cycle event number : %d\n"
+ "Retired event number : %d\n"
+ "Implemented PMC : ",
+ pm_info.pal_perf_mon_info_s.generic,
+ pm_info.pal_perf_mon_info_s.width,
+ pm_info.pal_perf_mon_info_s.cycles,
+ pm_info.pal_perf_mon_info_s.retired);
+
+ bitregister_process(m, pm_buffer, 256);
+ seq_puts(m, "\nImplemented PMD : ");
+ bitregister_process(m, pm_buffer+4, 256);
+ seq_puts(m, "\nCycles count capable : ");
+ bitregister_process(m, pm_buffer+8, 256);
+ seq_puts(m, "\nRetired bundles count capable : ");
#ifdef CONFIG_ITANIUM
/*
* PAL_PERF_MON_INFO reports that only PMC4 can be used to count CPU_CYCLES
* which is wrong, both PMC4 and PMD5 support it.
*/
- if (pm_buffer[12] == 0x10) pm_buffer[12]=0x30;
+ if (pm_buffer[12] == 0x10)
+ pm_buffer[12]=0x30;
#endif
- p = bitregister_process(p, pm_buffer+12, 256);
-
- p += sprintf(p, "\n");
-
- return p - page;
+ bitregister_process(m, pm_buffer+12, 256);
+ seq_putc(m, '\n');
+ return 0;
}
-static int
-frequency_info(char *page)
+static int frequency_info(struct seq_file *m)
{
- char *p = page;
struct pal_freq_ratio proc, itc, bus;
- u64 base;
+ unsigned long base;
if (ia64_pal_freq_base(&base) == -1)
- p += sprintf(p, "Output clock : not implemented\n");
+ seq_puts(m, "Output clock : not implemented\n");
else
- p += sprintf(p, "Output clock : %ld ticks/s\n", base);
+ seq_printf(m, "Output clock : %ld ticks/s\n", base);
if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0;
- p += sprintf(p,
+ seq_printf(m,
"Processor/Clock ratio : %d/%d\n"
"Bus/Clock ratio : %d/%d\n"
"ITC/Clock ratio : %d/%d\n",
proc.num, proc.den, bus.num, bus.den, itc.num, itc.den);
-
- return p - page;
+ return 0;
}
-static int
-tr_info(char *page)
+static int tr_info(struct seq_file *m)
{
- char *p = page;
- s64 status;
+ long status;
pal_tr_valid_u_t tr_valid;
u64 tr_buffer[4];
pal_vm_info_1_u_t vm_info_1;
pal_vm_info_2_u_t vm_info_2;
- u64 i, j;
- u64 max[3], pgm;
+ unsigned long i, j;
+ unsigned long max[3], pgm;
struct ifa_reg {
- u64 valid:1;
- u64 ig:11;
- u64 vpn:52;
+ unsigned long valid:1;
+ unsigned long ig:11;
+ unsigned long vpn:52;
} *ifa_reg;
struct itir_reg {
- u64 rv1:2;
- u64 ps:6;
- u64 key:24;
- u64 rv2:32;
+ unsigned long rv1:2;
+ unsigned long ps:6;
+ unsigned long key:24;
+ unsigned long rv2:32;
} *itir_reg;
struct gr_reg {
- u64 p:1;
- u64 rv1:1;
- u64 ma:3;
- u64 a:1;
- u64 d:1;
- u64 pl:2;
- u64 ar:3;
- u64 ppn:38;
- u64 rv2:2;
- u64 ed:1;
- u64 ig:11;
+ unsigned long p:1;
+ unsigned long rv1:1;
+ unsigned long ma:3;
+ unsigned long a:1;
+ unsigned long d:1;
+ unsigned long pl:2;
+ unsigned long ar:3;
+ unsigned long ppn:38;
+ unsigned long rv2:2;
+ unsigned long ed:1;
+ unsigned long ig:11;
} *gr_reg;
struct rid_reg {
- u64 ig1:1;
- u64 rv1:1;
- u64 ig2:6;
- u64 rid:24;
- u64 rv2:32;
+ unsigned long ig1:1;
+ unsigned long rv1:1;
+ unsigned long ig2:6;
+ unsigned long rid:24;
+ unsigned long rv2:32;
} *rid_reg;
if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
@@ -727,39 +767,40 @@ tr_info(char *page)
ifa_reg = (struct ifa_reg *)&tr_buffer[2];
- if (ifa_reg->valid == 0) continue;
+ if (ifa_reg->valid == 0)
+ continue;
gr_reg = (struct gr_reg *)tr_buffer;
itir_reg = (struct itir_reg *)&tr_buffer[1];
rid_reg = (struct rid_reg *)&tr_buffer[3];
pgm = -1 << (itir_reg->ps - 12);
- p += sprintf(p,
- "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n"
- "\tppn : 0x%lx\n"
- "\tvpn : 0x%lx\n"
- "\tps : ",
- "ID"[i], j,
- tr_valid.pal_tr_valid_s.access_rights_valid,
- tr_valid.pal_tr_valid_s.priv_level_valid,
- tr_valid.pal_tr_valid_s.dirty_bit_valid,
- tr_valid.pal_tr_valid_s.mem_attr_valid,
- (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12);
-
- p = bitvector_process(p, 1<< itir_reg->ps);
-
- p += sprintf(p,
- "\n\tpl : %d\n"
- "\tar : %d\n"
- "\trid : %x\n"
- "\tp : %d\n"
- "\tma : %d\n"
- "\td : %d\n",
- gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma,
- gr_reg->d);
+ seq_printf(m,
+ "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n"
+ "\tppn : 0x%lx\n"
+ "\tvpn : 0x%lx\n"
+ "\tps : ",
+ "ID"[i], j,
+ tr_valid.pal_tr_valid_s.access_rights_valid,
+ tr_valid.pal_tr_valid_s.priv_level_valid,
+ tr_valid.pal_tr_valid_s.dirty_bit_valid,
+ tr_valid.pal_tr_valid_s.mem_attr_valid,
+ (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12);
+
+ bitvector_process(m, 1<< itir_reg->ps);
+
+ seq_printf(m,
+ "\n\tpl : %d\n"
+ "\tar : %d\n"
+ "\trid : %x\n"
+ "\tp : %d\n"
+ "\tma : %d\n"
+ "\td : %d\n",
+ gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma,
+ gr_reg->d);
}
}
- return p - page;
+ return 0;
}
@@ -767,7 +808,7 @@ tr_info(char *page)
/*
* List {name,function} pairs for every entry in /proc/palinfo/cpu*
*/
-static palinfo_entry_t palinfo_entries[]={
+static const palinfo_entry_t palinfo_entries[]={
{ "version_info", version_info, },
{ "vm_info", vm_info, },
{ "cache_info", cache_info, },
@@ -782,17 +823,6 @@ static palinfo_entry_t palinfo_entries[]={
#define NR_PALINFO_ENTRIES (int) ARRAY_SIZE(palinfo_entries)
-/*
- * this array is used to keep track of the proc entries we create. This is
- * required in the module mode when we need to remove all entries. The procfs code
- * does not do recursion of deletion
- *
- * Notes:
- * - +1 accounts for the cpuN directory entry in /proc/pal
- */
-#define NR_PALINFO_PROC_ENTRIES (NR_CPUS*(NR_PALINFO_ENTRIES+1))
-
-static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES];
static struct proc_dir_entry *palinfo_dir;
/*
@@ -820,7 +850,7 @@ typedef union {
*/
typedef struct {
palinfo_func_t func; /* pointer to function to call */
- char *page; /* buffer to store results */
+ struct seq_file *m; /* buffer to store results */
int ret; /* return value from call */
} palinfo_smp_data_t;
@@ -833,13 +863,7 @@ static void
palinfo_smp_call(void *info)
{
palinfo_smp_data_t *data = (palinfo_smp_data_t *)info;
- if (data == NULL) {
- printk(KERN_ERR "palinfo: data pointer is NULL\n");
- data->ret = 0; /* no output */
- return;
- }
- /* does this actual call */
- data->ret = (*data->func)(data->page);
+ data->ret = (*data->func)(data->m);
}
/*
@@ -849,18 +873,18 @@ palinfo_smp_call(void *info)
* otherwise how many bytes in the "page" buffer were written
*/
static
-int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+int palinfo_handle_smp(struct seq_file *m, pal_func_cpu_u_t *f)
{
palinfo_smp_data_t ptr;
int ret;
ptr.func = palinfo_entries[f->func_id].proc_read;
- ptr.page = page;
+ ptr.m = m;
ptr.ret = 0; /* just in case */
/* will send IPI to other CPU and wait for completion of remote call */
- if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 0, 1))) {
+ if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 1))) {
printk(KERN_ERR "palinfo: remote CPU call from %d to %d on function %d: "
"error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret);
return 0;
@@ -869,7 +893,7 @@ int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
}
#else /* ! CONFIG_SMP */
static
-int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+int palinfo_handle_smp(struct seq_file *m, pal_func_cpu_u_t *f)
{
printk(KERN_ERR "palinfo: should not be called with non SMP kernel\n");
return 0;
@@ -879,93 +903,63 @@ int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
/*
* Entry point routine: all calls go through this function
*/
-static int
-palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int proc_palinfo_show(struct seq_file *m, void *v)
{
- int len=0;
- pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&data;
+ pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&m->private;
/*
* in SMP mode, we may need to call another CPU to get correct
* information. PAL, by definition, is processor specific
*/
if (f->req_cpu == get_cpu())
- len = (*palinfo_entries[f->func_id].proc_read)(page);
+ (*palinfo_entries[f->func_id].proc_read)(m);
else
- len = palinfo_handle_smp(f, page);
+ palinfo_handle_smp(m, f);
put_cpu();
+ return 0;
+}
- if (len <= off+count) *eof = 1;
-
- *start = page + off;
- len -= off;
-
- if (len>count) len = count;
- if (len<0) len = 0;
-
- return len;
+static int proc_palinfo_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, proc_palinfo_show, PDE_DATA(inode));
}
+static const struct file_operations proc_palinfo_fops = {
+ .open = proc_palinfo_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static void
create_palinfo_proc_entries(unsigned int cpu)
{
-# define CPUSTR "cpu%d"
-
pal_func_cpu_u_t f;
- struct proc_dir_entry **pdir;
struct proc_dir_entry *cpu_dir;
int j;
- char cpustr[sizeof(CPUSTR)];
-
-
- /*
- * we keep track of created entries in a depth-first order for
- * cleanup purposes. Each entry is stored into palinfo_proc_entries
- */
- sprintf(cpustr,CPUSTR, cpu);
+ char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */
+ sprintf(cpustr, "cpu%d", cpu);
cpu_dir = proc_mkdir(cpustr, palinfo_dir);
+ if (!cpu_dir)
+ return;
f.req_cpu = cpu;
- /*
- * Compute the location to store per cpu entries
- * We dont store the top level entry in this list, but
- * remove it finally after removing all cpu entries.
- */
- pdir = &palinfo_proc_entries[cpu*(NR_PALINFO_ENTRIES+1)];
- *pdir++ = cpu_dir;
for (j=0; j < NR_PALINFO_ENTRIES; j++) {
f.func_id = j;
- *pdir = create_proc_read_entry(
- palinfo_entries[j].name, 0, cpu_dir,
- palinfo_read_entry, (void *)f.value);
- if (*pdir)
- (*pdir)->owner = THIS_MODULE;
- pdir++;
+ proc_create_data(palinfo_entries[j].name, 0, cpu_dir,
+ &proc_palinfo_fops, (void *)f.value);
}
}
static void
remove_palinfo_proc_entries(unsigned int hcpu)
{
- int j;
- struct proc_dir_entry *cpu_dir, **pdir;
-
- pdir = &palinfo_proc_entries[hcpu*(NR_PALINFO_ENTRIES+1)];
- cpu_dir = *pdir;
- *pdir++=NULL;
- for (j=0; j < (NR_PALINFO_ENTRIES); j++) {
- if ((*pdir)) {
- remove_proc_entry ((*pdir)->name, cpu_dir);
- *pdir ++= NULL;
- }
- }
-
- if (cpu_dir) {
- remove_proc_entry(cpu_dir->name, palinfo_dir);
- }
+ char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */
+ sprintf(cpustr, "cpu%d", hcpu);
+ remove_proc_subtree(cpustr, palinfo_dir);
}
static int palinfo_cpu_callback(struct notifier_block *nfb,
@@ -986,7 +980,7 @@ static int palinfo_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static struct notifier_block palinfo_cpu_notifier =
+static struct notifier_block __refdata palinfo_cpu_notifier =
{
.notifier_call = palinfo_cpu_callback,
.priority = 0,
@@ -999,6 +993,10 @@ palinfo_init(void)
printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION);
palinfo_dir = proc_mkdir("pal", NULL);
+ if (!palinfo_dir)
+ return -ENOMEM;
+
+ cpu_notifier_register_begin();
/* Create palinfo dirs in /proc for all online cpus */
for_each_online_cpu(i) {
@@ -1006,7 +1004,9 @@ palinfo_init(void)
}
/* Register for future delivery via notify registration */
- register_hotcpu_notifier(&palinfo_cpu_notifier);
+ __register_hotcpu_notifier(&palinfo_cpu_notifier);
+
+ cpu_notifier_register_done();
return 0;
}
@@ -1014,22 +1014,8 @@ palinfo_init(void)
static void __exit
palinfo_exit(void)
{
- int i = 0;
-
- /* remove all nodes: depth first pass. Could optimize this */
- for_each_online_cpu(i) {
- remove_palinfo_proc_entries(i);
- }
-
- /*
- * Remove the top level entry finally
- */
- remove_proc_entry(palinfo_dir->name, NULL);
-
- /*
- * Unregister from cpu notifier callbacks
- */
unregister_hotcpu_notifier(&palinfo_cpu_notifier);
+ remove_proc_subtree("pal", NULL);
}
module_init(palinfo_init);
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c
new file mode 100644
index 00000000000..1b22f6de293
--- /dev/null
+++ b/arch/ia64/kernel/paravirt.c
@@ -0,0 +1,902 @@
+/******************************************************************************
+ * arch/ia64/kernel/paravirt.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ * Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+
+#include <linux/compiler.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include <asm/iosapic.h>
+#include <asm/paravirt.h>
+
+/***************************************************************************
+ * general info
+ */
+struct pv_info pv_info = {
+ .kernel_rpl = 0,
+ .paravirt_enabled = 0,
+ .name = "bare hardware"
+};
+
+/***************************************************************************
+ * pv_init_ops
+ * initialization hooks.
+ */
+
+static void __init
+ia64_native_patch_branch(unsigned long tag, unsigned long type);
+
+struct pv_init_ops pv_init_ops =
+{
+#ifdef ASM_SUPPORTED
+ .patch_bundle = ia64_native_patch_bundle,
+#endif
+ .patch_branch = ia64_native_patch_branch,
+};
+
+/***************************************************************************
+ * pv_cpu_ops
+ * intrinsics hooks.
+ */
+
+#ifndef ASM_SUPPORTED
+/* ia64_native_xxx are macros so that we have to make them real functions */
+
+#define DEFINE_VOID_FUNC1(name) \
+ static void \
+ ia64_native_ ## name ## _func(unsigned long arg) \
+ { \
+ ia64_native_ ## name(arg); \
+ }
+
+#define DEFINE_VOID_FUNC1_VOID(name) \
+ static void \
+ ia64_native_ ## name ## _func(void *arg) \
+ { \
+ ia64_native_ ## name(arg); \
+ }
+
+#define DEFINE_VOID_FUNC2(name) \
+ static void \
+ ia64_native_ ## name ## _func(unsigned long arg0, \
+ unsigned long arg1) \
+ { \
+ ia64_native_ ## name(arg0, arg1); \
+ }
+
+#define DEFINE_FUNC0(name) \
+ static unsigned long \
+ ia64_native_ ## name ## _func(void) \
+ { \
+ return ia64_native_ ## name(); \
+ }
+
+#define DEFINE_FUNC1(name, type) \
+ static unsigned long \
+ ia64_native_ ## name ## _func(type arg) \
+ { \
+ return ia64_native_ ## name(arg); \
+ } \
+
+DEFINE_VOID_FUNC1_VOID(fc);
+DEFINE_VOID_FUNC1(intrin_local_irq_restore);
+
+DEFINE_VOID_FUNC2(ptcga);
+DEFINE_VOID_FUNC2(set_rr);
+
+DEFINE_FUNC0(get_psr_i);
+
+DEFINE_FUNC1(thash, unsigned long);
+DEFINE_FUNC1(get_cpuid, int);
+DEFINE_FUNC1(get_pmd, int);
+DEFINE_FUNC1(get_rr, unsigned long);
+
+static void
+ia64_native_ssm_i_func(void)
+{
+ ia64_native_ssm(IA64_PSR_I);
+}
+
+static void
+ia64_native_rsm_i_func(void)
+{
+ ia64_native_rsm(IA64_PSR_I);
+}
+
+static void
+ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1,
+ unsigned long val2, unsigned long val3,
+ unsigned long val4)
+{
+ ia64_native_set_rr0_to_rr4(val0, val1, val2, val3, val4);
+}
+
+#define CASE_GET_REG(id) \
+ case _IA64_REG_ ## id: \
+ res = ia64_native_getreg(_IA64_REG_ ## id); \
+ break;
+#define CASE_GET_AR(id) CASE_GET_REG(AR_ ## id)
+#define CASE_GET_CR(id) CASE_GET_REG(CR_ ## id)
+
+unsigned long
+ia64_native_getreg_func(int regnum)
+{
+ unsigned long res = -1;
+ switch (regnum) {
+ CASE_GET_REG(GP);
+ /*CASE_GET_REG(IP);*/ /* returned ip value shouldn't be constant */
+ CASE_GET_REG(PSR);
+ CASE_GET_REG(TP);
+ CASE_GET_REG(SP);
+
+ CASE_GET_AR(KR0);
+ CASE_GET_AR(KR1);
+ CASE_GET_AR(KR2);
+ CASE_GET_AR(KR3);
+ CASE_GET_AR(KR4);
+ CASE_GET_AR(KR5);
+ CASE_GET_AR(KR6);
+ CASE_GET_AR(KR7);
+ CASE_GET_AR(RSC);
+ CASE_GET_AR(BSP);
+ CASE_GET_AR(BSPSTORE);
+ CASE_GET_AR(RNAT);
+ CASE_GET_AR(FCR);
+ CASE_GET_AR(EFLAG);
+ CASE_GET_AR(CSD);
+ CASE_GET_AR(SSD);
+ CASE_GET_AR(CFLAG);
+ CASE_GET_AR(FSR);
+ CASE_GET_AR(FIR);
+ CASE_GET_AR(FDR);
+ CASE_GET_AR(CCV);
+ CASE_GET_AR(UNAT);
+ CASE_GET_AR(FPSR);
+ CASE_GET_AR(ITC);
+ CASE_GET_AR(PFS);
+ CASE_GET_AR(LC);
+ CASE_GET_AR(EC);
+
+ CASE_GET_CR(DCR);
+ CASE_GET_CR(ITM);
+ CASE_GET_CR(IVA);
+ CASE_GET_CR(PTA);
+ CASE_GET_CR(IPSR);
+ CASE_GET_CR(ISR);
+ CASE_GET_CR(IIP);
+ CASE_GET_CR(IFA);
+ CASE_GET_CR(ITIR);
+ CASE_GET_CR(IIPA);
+ CASE_GET_CR(IFS);
+ CASE_GET_CR(IIM);
+ CASE_GET_CR(IHA);
+ CASE_GET_CR(LID);
+ CASE_GET_CR(IVR);
+ CASE_GET_CR(TPR);
+ CASE_GET_CR(EOI);
+ CASE_GET_CR(IRR0);
+ CASE_GET_CR(IRR1);
+ CASE_GET_CR(IRR2);
+ CASE_GET_CR(IRR3);
+ CASE_GET_CR(ITV);
+ CASE_GET_CR(PMV);
+ CASE_GET_CR(CMCV);
+ CASE_GET_CR(LRR0);
+ CASE_GET_CR(LRR1);
+
+ default:
+ printk(KERN_CRIT "wrong_getreg %d\n", regnum);
+ break;
+ }
+ return res;
+}
+
+#define CASE_SET_REG(id) \
+ case _IA64_REG_ ## id: \
+ ia64_native_setreg(_IA64_REG_ ## id, val); \
+ break;
+#define CASE_SET_AR(id) CASE_SET_REG(AR_ ## id)
+#define CASE_SET_CR(id) CASE_SET_REG(CR_ ## id)
+
+void
+ia64_native_setreg_func(int regnum, unsigned long val)
+{
+ switch (regnum) {
+ case _IA64_REG_PSR_L:
+ ia64_native_setreg(_IA64_REG_PSR_L, val);
+ ia64_dv_serialize_data();
+ break;
+ CASE_SET_REG(SP);
+ CASE_SET_REG(GP);
+
+ CASE_SET_AR(KR0);
+ CASE_SET_AR(KR1);
+ CASE_SET_AR(KR2);
+ CASE_SET_AR(KR3);
+ CASE_SET_AR(KR4);
+ CASE_SET_AR(KR5);
+ CASE_SET_AR(KR6);
+ CASE_SET_AR(KR7);
+ CASE_SET_AR(RSC);
+ CASE_SET_AR(BSP);
+ CASE_SET_AR(BSPSTORE);
+ CASE_SET_AR(RNAT);
+ CASE_SET_AR(FCR);
+ CASE_SET_AR(EFLAG);
+ CASE_SET_AR(CSD);
+ CASE_SET_AR(SSD);
+ CASE_SET_AR(CFLAG);
+ CASE_SET_AR(FSR);
+ CASE_SET_AR(FIR);
+ CASE_SET_AR(FDR);
+ CASE_SET_AR(CCV);
+ CASE_SET_AR(UNAT);
+ CASE_SET_AR(FPSR);
+ CASE_SET_AR(ITC);
+ CASE_SET_AR(PFS);
+ CASE_SET_AR(LC);
+ CASE_SET_AR(EC);
+
+ CASE_SET_CR(DCR);
+ CASE_SET_CR(ITM);
+ CASE_SET_CR(IVA);
+ CASE_SET_CR(PTA);
+ CASE_SET_CR(IPSR);
+ CASE_SET_CR(ISR);
+ CASE_SET_CR(IIP);
+ CASE_SET_CR(IFA);
+ CASE_SET_CR(ITIR);
+ CASE_SET_CR(IIPA);
+ CASE_SET_CR(IFS);
+ CASE_SET_CR(IIM);
+ CASE_SET_CR(IHA);
+ CASE_SET_CR(LID);
+ CASE_SET_CR(IVR);
+ CASE_SET_CR(TPR);
+ CASE_SET_CR(EOI);
+ CASE_SET_CR(IRR0);
+ CASE_SET_CR(IRR1);
+ CASE_SET_CR(IRR2);
+ CASE_SET_CR(IRR3);
+ CASE_SET_CR(ITV);
+ CASE_SET_CR(PMV);
+ CASE_SET_CR(CMCV);
+ CASE_SET_CR(LRR0);
+ CASE_SET_CR(LRR1);
+ default:
+ printk(KERN_CRIT "wrong setreg %d\n", regnum);
+ break;
+ }
+}
+#else
+
+#define __DEFINE_FUNC(name, code) \
+ extern const char ia64_native_ ## name ## _direct_start[]; \
+ extern const char ia64_native_ ## name ## _direct_end[]; \
+ asm (".align 32\n" \
+ ".proc ia64_native_" #name "_func\n" \
+ "ia64_native_" #name "_func:\n" \
+ "ia64_native_" #name "_direct_start:\n" \
+ code \
+ "ia64_native_" #name "_direct_end:\n" \
+ "br.cond.sptk.many b6\n" \
+ ".endp ia64_native_" #name "_func\n")
+
+#define DEFINE_VOID_FUNC0(name, code) \
+ extern void \
+ ia64_native_ ## name ## _func(void); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1(name, code) \
+ extern void \
+ ia64_native_ ## name ## _func(unsigned long arg); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC1_VOID(name, code) \
+ extern void \
+ ia64_native_ ## name ## _func(void *arg); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_VOID_FUNC2(name, code) \
+ extern void \
+ ia64_native_ ## name ## _func(unsigned long arg0, \
+ unsigned long arg1); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC0(name, code) \
+ extern unsigned long \
+ ia64_native_ ## name ## _func(void); \
+ __DEFINE_FUNC(name, code)
+
+#define DEFINE_FUNC1(name, type, code) \
+ extern unsigned long \
+ ia64_native_ ## name ## _func(type arg); \
+ __DEFINE_FUNC(name, code)
+
+DEFINE_VOID_FUNC1_VOID(fc,
+ "fc r8\n");
+DEFINE_VOID_FUNC1(intrin_local_irq_restore,
+ ";;\n"
+ " cmp.ne p6, p7 = r8, r0\n"
+ ";;\n"
+ "(p6) ssm psr.i\n"
+ "(p7) rsm psr.i\n"
+ ";;\n"
+ "(p6) srlz.d\n");
+
+DEFINE_VOID_FUNC2(ptcga,
+ "ptc.ga r8, r9\n");
+DEFINE_VOID_FUNC2(set_rr,
+ "mov rr[r8] = r9\n");
+
+/* ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I */
+DEFINE_FUNC0(get_psr_i,
+ "mov r2 = " __stringify(1 << IA64_PSR_I_BIT) "\n"
+ "mov r8 = psr\n"
+ ";;\n"
+ "and r8 = r2, r8\n");
+
+DEFINE_FUNC1(thash, unsigned long,
+ "thash r8 = r8\n");
+DEFINE_FUNC1(get_cpuid, int,
+ "mov r8 = cpuid[r8]\n");
+DEFINE_FUNC1(get_pmd, int,
+ "mov r8 = pmd[r8]\n");
+DEFINE_FUNC1(get_rr, unsigned long,
+ "mov r8 = rr[r8]\n");
+
+DEFINE_VOID_FUNC0(ssm_i,
+ "ssm psr.i\n");
+DEFINE_VOID_FUNC0(rsm_i,
+ "rsm psr.i\n");
+
+extern void
+ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1,
+ unsigned long val2, unsigned long val3,
+ unsigned long val4);
+__DEFINE_FUNC(set_rr0_to_rr4,
+ "mov rr[r0] = r8\n"
+ "movl r2 = 0x2000000000000000\n"
+ ";;\n"
+ "mov rr[r2] = r9\n"
+ "shl r3 = r2, 1\n" /* movl r3 = 0x4000000000000000 */
+ ";;\n"
+ "add r2 = r2, r3\n" /* movl r2 = 0x6000000000000000 */
+ "mov rr[r3] = r10\n"
+ ";;\n"
+ "mov rr[r2] = r11\n"
+ "shl r3 = r3, 1\n" /* movl r3 = 0x8000000000000000 */
+ ";;\n"
+ "mov rr[r3] = r14\n");
+
+extern unsigned long ia64_native_getreg_func(int regnum);
+asm(".global ia64_native_getreg_func\n");
+#define __DEFINE_GET_REG(id, reg) \
+ "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \
+ ";;\n" \
+ "cmp.eq p6, p0 = r2, r8\n" \
+ ";;\n" \
+ "(p6) mov r8 = " #reg "\n" \
+ "(p6) br.cond.sptk.many b6\n" \
+ ";;\n"
+#define __DEFINE_GET_AR(id, reg) __DEFINE_GET_REG(AR_ ## id, ar.reg)
+#define __DEFINE_GET_CR(id, reg) __DEFINE_GET_REG(CR_ ## id, cr.reg)
+
+__DEFINE_FUNC(getreg,
+ __DEFINE_GET_REG(GP, gp)
+ /*__DEFINE_GET_REG(IP, ip)*/ /* returned ip value shouldn't be constant */
+ __DEFINE_GET_REG(PSR, psr)
+ __DEFINE_GET_REG(TP, tp)
+ __DEFINE_GET_REG(SP, sp)
+
+ __DEFINE_GET_REG(AR_KR0, ar0)
+ __DEFINE_GET_REG(AR_KR1, ar1)
+ __DEFINE_GET_REG(AR_KR2, ar2)
+ __DEFINE_GET_REG(AR_KR3, ar3)
+ __DEFINE_GET_REG(AR_KR4, ar4)
+ __DEFINE_GET_REG(AR_KR5, ar5)
+ __DEFINE_GET_REG(AR_KR6, ar6)
+ __DEFINE_GET_REG(AR_KR7, ar7)
+ __DEFINE_GET_AR(RSC, rsc)
+ __DEFINE_GET_AR(BSP, bsp)
+ __DEFINE_GET_AR(BSPSTORE, bspstore)
+ __DEFINE_GET_AR(RNAT, rnat)
+ __DEFINE_GET_AR(FCR, fcr)
+ __DEFINE_GET_AR(EFLAG, eflag)
+ __DEFINE_GET_AR(CSD, csd)
+ __DEFINE_GET_AR(SSD, ssd)
+ __DEFINE_GET_REG(AR_CFLAG, ar27)
+ __DEFINE_GET_AR(FSR, fsr)
+ __DEFINE_GET_AR(FIR, fir)
+ __DEFINE_GET_AR(FDR, fdr)
+ __DEFINE_GET_AR(CCV, ccv)
+ __DEFINE_GET_AR(UNAT, unat)
+ __DEFINE_GET_AR(FPSR, fpsr)
+ __DEFINE_GET_AR(ITC, itc)
+ __DEFINE_GET_AR(PFS, pfs)
+ __DEFINE_GET_AR(LC, lc)
+ __DEFINE_GET_AR(EC, ec)
+
+ __DEFINE_GET_CR(DCR, dcr)
+ __DEFINE_GET_CR(ITM, itm)
+ __DEFINE_GET_CR(IVA, iva)
+ __DEFINE_GET_CR(PTA, pta)
+ __DEFINE_GET_CR(IPSR, ipsr)
+ __DEFINE_GET_CR(ISR, isr)
+ __DEFINE_GET_CR(IIP, iip)
+ __DEFINE_GET_CR(IFA, ifa)
+ __DEFINE_GET_CR(ITIR, itir)
+ __DEFINE_GET_CR(IIPA, iipa)
+ __DEFINE_GET_CR(IFS, ifs)
+ __DEFINE_GET_CR(IIM, iim)
+ __DEFINE_GET_CR(IHA, iha)
+ __DEFINE_GET_CR(LID, lid)
+ __DEFINE_GET_CR(IVR, ivr)
+ __DEFINE_GET_CR(TPR, tpr)
+ __DEFINE_GET_CR(EOI, eoi)
+ __DEFINE_GET_CR(IRR0, irr0)
+ __DEFINE_GET_CR(IRR1, irr1)
+ __DEFINE_GET_CR(IRR2, irr2)
+ __DEFINE_GET_CR(IRR3, irr3)
+ __DEFINE_GET_CR(ITV, itv)
+ __DEFINE_GET_CR(PMV, pmv)
+ __DEFINE_GET_CR(CMCV, cmcv)
+ __DEFINE_GET_CR(LRR0, lrr0)
+ __DEFINE_GET_CR(LRR1, lrr1)
+
+ "mov r8 = -1\n" /* unsupported case */
+ );
+
+extern void ia64_native_setreg_func(int regnum, unsigned long val);
+asm(".global ia64_native_setreg_func\n");
+#define __DEFINE_SET_REG(id, reg) \
+ "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \
+ ";;\n" \
+ "cmp.eq p6, p0 = r2, r9\n" \
+ ";;\n" \
+ "(p6) mov " #reg " = r8\n" \
+ "(p6) br.cond.sptk.many b6\n" \
+ ";;\n"
+#define __DEFINE_SET_AR(id, reg) __DEFINE_SET_REG(AR_ ## id, ar.reg)
+#define __DEFINE_SET_CR(id, reg) __DEFINE_SET_REG(CR_ ## id, cr.reg)
+__DEFINE_FUNC(setreg,
+ "mov r2 = " __stringify(_IA64_REG_PSR_L) "\n"
+ ";;\n"
+ "cmp.eq p6, p0 = r2, r9\n"
+ ";;\n"
+ "(p6) mov psr.l = r8\n"
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+ ".serialize.data\n"
+#endif
+ "(p6) br.cond.sptk.many b6\n"
+ __DEFINE_SET_REG(GP, gp)
+ __DEFINE_SET_REG(SP, sp)
+
+ __DEFINE_SET_REG(AR_KR0, ar0)
+ __DEFINE_SET_REG(AR_KR1, ar1)
+ __DEFINE_SET_REG(AR_KR2, ar2)
+ __DEFINE_SET_REG(AR_KR3, ar3)
+ __DEFINE_SET_REG(AR_KR4, ar4)
+ __DEFINE_SET_REG(AR_KR5, ar5)
+ __DEFINE_SET_REG(AR_KR6, ar6)
+ __DEFINE_SET_REG(AR_KR7, ar7)
+ __DEFINE_SET_AR(RSC, rsc)
+ __DEFINE_SET_AR(BSP, bsp)
+ __DEFINE_SET_AR(BSPSTORE, bspstore)
+ __DEFINE_SET_AR(RNAT, rnat)
+ __DEFINE_SET_AR(FCR, fcr)
+ __DEFINE_SET_AR(EFLAG, eflag)
+ __DEFINE_SET_AR(CSD, csd)
+ __DEFINE_SET_AR(SSD, ssd)
+ __DEFINE_SET_REG(AR_CFLAG, ar27)
+ __DEFINE_SET_AR(FSR, fsr)
+ __DEFINE_SET_AR(FIR, fir)
+ __DEFINE_SET_AR(FDR, fdr)
+ __DEFINE_SET_AR(CCV, ccv)
+ __DEFINE_SET_AR(UNAT, unat)
+ __DEFINE_SET_AR(FPSR, fpsr)
+ __DEFINE_SET_AR(ITC, itc)
+ __DEFINE_SET_AR(PFS, pfs)
+ __DEFINE_SET_AR(LC, lc)
+ __DEFINE_SET_AR(EC, ec)
+
+ __DEFINE_SET_CR(DCR, dcr)
+ __DEFINE_SET_CR(ITM, itm)
+ __DEFINE_SET_CR(IVA, iva)
+ __DEFINE_SET_CR(PTA, pta)
+ __DEFINE_SET_CR(IPSR, ipsr)
+ __DEFINE_SET_CR(ISR, isr)
+ __DEFINE_SET_CR(IIP, iip)
+ __DEFINE_SET_CR(IFA, ifa)
+ __DEFINE_SET_CR(ITIR, itir)
+ __DEFINE_SET_CR(IIPA, iipa)
+ __DEFINE_SET_CR(IFS, ifs)
+ __DEFINE_SET_CR(IIM, iim)
+ __DEFINE_SET_CR(IHA, iha)
+ __DEFINE_SET_CR(LID, lid)
+ __DEFINE_SET_CR(IVR, ivr)
+ __DEFINE_SET_CR(TPR, tpr)
+ __DEFINE_SET_CR(EOI, eoi)
+ __DEFINE_SET_CR(IRR0, irr0)
+ __DEFINE_SET_CR(IRR1, irr1)
+ __DEFINE_SET_CR(IRR2, irr2)
+ __DEFINE_SET_CR(IRR3, irr3)
+ __DEFINE_SET_CR(ITV, itv)
+ __DEFINE_SET_CR(PMV, pmv)
+ __DEFINE_SET_CR(CMCV, cmcv)
+ __DEFINE_SET_CR(LRR0, lrr0)
+ __DEFINE_SET_CR(LRR1, lrr1)
+ );
+#endif
+
+struct pv_cpu_ops pv_cpu_ops = {
+ .fc = ia64_native_fc_func,
+ .thash = ia64_native_thash_func,
+ .get_cpuid = ia64_native_get_cpuid_func,
+ .get_pmd = ia64_native_get_pmd_func,
+ .ptcga = ia64_native_ptcga_func,
+ .get_rr = ia64_native_get_rr_func,
+ .set_rr = ia64_native_set_rr_func,
+ .set_rr0_to_rr4 = ia64_native_set_rr0_to_rr4_func,
+ .ssm_i = ia64_native_ssm_i_func,
+ .getreg = ia64_native_getreg_func,
+ .setreg = ia64_native_setreg_func,
+ .rsm_i = ia64_native_rsm_i_func,
+ .get_psr_i = ia64_native_get_psr_i_func,
+ .intrin_local_irq_restore
+ = ia64_native_intrin_local_irq_restore_func,
+};
+EXPORT_SYMBOL(pv_cpu_ops);
+
+/******************************************************************************
+ * replacement of hand written assembly codes.
+ */
+
+void
+paravirt_cpu_asm_init(const struct pv_cpu_asm_switch *cpu_asm_switch)
+{
+ extern unsigned long paravirt_switch_to_targ;
+ extern unsigned long paravirt_leave_syscall_targ;
+ extern unsigned long paravirt_work_processed_syscall_targ;
+ extern unsigned long paravirt_leave_kernel_targ;
+
+ paravirt_switch_to_targ = cpu_asm_switch->switch_to;
+ paravirt_leave_syscall_targ = cpu_asm_switch->leave_syscall;
+ paravirt_work_processed_syscall_targ =
+ cpu_asm_switch->work_processed_syscall;
+ paravirt_leave_kernel_targ = cpu_asm_switch->leave_kernel;
+}
+
+/***************************************************************************
+ * pv_iosapic_ops
+ * iosapic read/write hooks.
+ */
+
+static unsigned int
+ia64_native_iosapic_read(char __iomem *iosapic, unsigned int reg)
+{
+ return __ia64_native_iosapic_read(iosapic, reg);
+}
+
+static void
+ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
+{
+ __ia64_native_iosapic_write(iosapic, reg, val);
+}
+
+struct pv_iosapic_ops pv_iosapic_ops = {
+ .pcat_compat_init = ia64_native_iosapic_pcat_compat_init,
+ .__get_irq_chip = ia64_native_iosapic_get_irq_chip,
+
+ .__read = ia64_native_iosapic_read,
+ .__write = ia64_native_iosapic_write,
+};
+
+/***************************************************************************
+ * pv_irq_ops
+ * irq operations
+ */
+
+struct pv_irq_ops pv_irq_ops = {
+ .register_ipi = ia64_native_register_ipi,
+
+ .assign_irq_vector = ia64_native_assign_irq_vector,
+ .free_irq_vector = ia64_native_free_irq_vector,
+ .register_percpu_irq = ia64_native_register_percpu_irq,
+
+ .resend_irq = ia64_native_resend_irq,
+};
+
+/***************************************************************************
+ * pv_time_ops
+ * time operations
+ */
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static int
+ia64_native_do_steal_accounting(unsigned long *new_itm)
+{
+ return 0;
+}
+
+struct pv_time_ops pv_time_ops = {
+ .do_steal_accounting = ia64_native_do_steal_accounting,
+ .sched_clock = ia64_native_sched_clock,
+};
+
+/***************************************************************************
+ * binary pacthing
+ * pv_init_ops.patch_bundle
+ */
+
+#ifdef ASM_SUPPORTED
+#define IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg) \
+ __DEFINE_FUNC(get_ ## name, \
+ ";;\n" \
+ "mov r8 = " #reg "\n" \
+ ";;\n")
+
+#define IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \
+ __DEFINE_FUNC(set_ ## name, \
+ ";;\n" \
+ "mov " #reg " = r8\n" \
+ ";;\n")
+
+#define IA64_NATIVE_PATCH_DEFINE_REG(name, reg) \
+ IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg); \
+ IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \
+
+#define IA64_NATIVE_PATCH_DEFINE_AR(name, reg) \
+ IA64_NATIVE_PATCH_DEFINE_REG(ar_ ## name, ar.reg)
+
+#define IA64_NATIVE_PATCH_DEFINE_CR(name, reg) \
+ IA64_NATIVE_PATCH_DEFINE_REG(cr_ ## name, cr.reg)
+
+
+IA64_NATIVE_PATCH_DEFINE_GET_REG(psr, psr);
+IA64_NATIVE_PATCH_DEFINE_GET_REG(tp, tp);
+
+/* IA64_NATIVE_PATCH_DEFINE_SET_REG(psr_l, psr.l); */
+__DEFINE_FUNC(set_psr_l,
+ ";;\n"
+ "mov psr.l = r8\n"
+#ifdef HAVE_SERIALIZE_DIRECTIVE
+ ".serialize.data\n"
+#endif
+ ";;\n");
+
+IA64_NATIVE_PATCH_DEFINE_REG(gp, gp);
+IA64_NATIVE_PATCH_DEFINE_REG(sp, sp);
+
+IA64_NATIVE_PATCH_DEFINE_REG(kr0, ar0);
+IA64_NATIVE_PATCH_DEFINE_REG(kr1, ar1);
+IA64_NATIVE_PATCH_DEFINE_REG(kr2, ar2);
+IA64_NATIVE_PATCH_DEFINE_REG(kr3, ar3);
+IA64_NATIVE_PATCH_DEFINE_REG(kr4, ar4);
+IA64_NATIVE_PATCH_DEFINE_REG(kr5, ar5);
+IA64_NATIVE_PATCH_DEFINE_REG(kr6, ar6);
+IA64_NATIVE_PATCH_DEFINE_REG(kr7, ar7);
+
+IA64_NATIVE_PATCH_DEFINE_AR(rsc, rsc);
+IA64_NATIVE_PATCH_DEFINE_AR(bsp, bsp);
+IA64_NATIVE_PATCH_DEFINE_AR(bspstore, bspstore);
+IA64_NATIVE_PATCH_DEFINE_AR(rnat, rnat);
+IA64_NATIVE_PATCH_DEFINE_AR(fcr, fcr);
+IA64_NATIVE_PATCH_DEFINE_AR(eflag, eflag);
+IA64_NATIVE_PATCH_DEFINE_AR(csd, csd);
+IA64_NATIVE_PATCH_DEFINE_AR(ssd, ssd);
+IA64_NATIVE_PATCH_DEFINE_REG(ar27, ar27);
+IA64_NATIVE_PATCH_DEFINE_AR(fsr, fsr);
+IA64_NATIVE_PATCH_DEFINE_AR(fir, fir);
+IA64_NATIVE_PATCH_DEFINE_AR(fdr, fdr);
+IA64_NATIVE_PATCH_DEFINE_AR(ccv, ccv);
+IA64_NATIVE_PATCH_DEFINE_AR(unat, unat);
+IA64_NATIVE_PATCH_DEFINE_AR(fpsr, fpsr);
+IA64_NATIVE_PATCH_DEFINE_AR(itc, itc);
+IA64_NATIVE_PATCH_DEFINE_AR(pfs, pfs);
+IA64_NATIVE_PATCH_DEFINE_AR(lc, lc);
+IA64_NATIVE_PATCH_DEFINE_AR(ec, ec);
+
+IA64_NATIVE_PATCH_DEFINE_CR(dcr, dcr);
+IA64_NATIVE_PATCH_DEFINE_CR(itm, itm);
+IA64_NATIVE_PATCH_DEFINE_CR(iva, iva);
+IA64_NATIVE_PATCH_DEFINE_CR(pta, pta);
+IA64_NATIVE_PATCH_DEFINE_CR(ipsr, ipsr);
+IA64_NATIVE_PATCH_DEFINE_CR(isr, isr);
+IA64_NATIVE_PATCH_DEFINE_CR(iip, iip);
+IA64_NATIVE_PATCH_DEFINE_CR(ifa, ifa);
+IA64_NATIVE_PATCH_DEFINE_CR(itir, itir);
+IA64_NATIVE_PATCH_DEFINE_CR(iipa, iipa);
+IA64_NATIVE_PATCH_DEFINE_CR(ifs, ifs);
+IA64_NATIVE_PATCH_DEFINE_CR(iim, iim);
+IA64_NATIVE_PATCH_DEFINE_CR(iha, iha);
+IA64_NATIVE_PATCH_DEFINE_CR(lid, lid);
+IA64_NATIVE_PATCH_DEFINE_CR(ivr, ivr);
+IA64_NATIVE_PATCH_DEFINE_CR(tpr, tpr);
+IA64_NATIVE_PATCH_DEFINE_CR(eoi, eoi);
+IA64_NATIVE_PATCH_DEFINE_CR(irr0, irr0);
+IA64_NATIVE_PATCH_DEFINE_CR(irr1, irr1);
+IA64_NATIVE_PATCH_DEFINE_CR(irr2, irr2);
+IA64_NATIVE_PATCH_DEFINE_CR(irr3, irr3);
+IA64_NATIVE_PATCH_DEFINE_CR(itv, itv);
+IA64_NATIVE_PATCH_DEFINE_CR(pmv, pmv);
+IA64_NATIVE_PATCH_DEFINE_CR(cmcv, cmcv);
+IA64_NATIVE_PATCH_DEFINE_CR(lrr0, lrr0);
+IA64_NATIVE_PATCH_DEFINE_CR(lrr1, lrr1);
+
+static const struct paravirt_patch_bundle_elem ia64_native_patch_bundle_elems[]
+__initdata_or_module =
+{
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM(name, type) \
+ { \
+ (void*)ia64_native_ ## name ## _direct_start, \
+ (void*)ia64_native_ ## name ## _direct_end, \
+ PARAVIRT_PATCH_TYPE_ ## type, \
+ }
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(fc, FC),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(thash, THASH),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(ptcga, PTCGA),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(get_rr, GET_RR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr, SET_RR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(ssm_i, SSM_I),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(rsm_i, RSM_I),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM(intrin_local_irq_restore,
+ INTRIN_LOCAL_IRQ_RESTORE),
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg) \
+ { \
+ (void*)ia64_native_get_ ## name ## _direct_start, \
+ (void*)ia64_native_get_ ## name ## _direct_end, \
+ PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg, \
+ }
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \
+ { \
+ (void*)ia64_native_set_ ## name ## _direct_start, \
+ (void*)ia64_native_set_ ## name ## _direct_end, \
+ PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg, \
+ }
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(name, reg) \
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg), \
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(name, reg) \
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar_ ## name, AR_ ## reg)
+
+#define IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(name, reg) \
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(cr_ ## name, CR_ ## reg)
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(psr, PSR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(tp, TP),
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(psr_l, PSR_L),
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(gp, GP),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(sp, SP),
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr0, AR_KR0),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr1, AR_KR1),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr2, AR_KR2),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr3, AR_KR3),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr4, AR_KR4),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr5, AR_KR5),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr6, AR_KR6),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr7, AR_KR7),
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rsc, RSC),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bsp, BSP),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bspstore, BSPSTORE),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rnat, RNAT),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fcr, FCR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(eflag, EFLAG),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(csd, CSD),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ssd, SSD),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar27, AR_CFLAG),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fsr, FSR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fir, FIR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fdr, FDR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ccv, CCV),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(unat, UNAT),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fpsr, FPSR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(itc, ITC),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(pfs, PFS),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(lc, LC),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ec, EC),
+
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(dcr, DCR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itm, ITM),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iva, IVA),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pta, PTA),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ipsr, IPSR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(isr, ISR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iip, IIP),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifa, IFA),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itir, ITIR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iipa, IIPA),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifs, IFS),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iim, IIM),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iha, IHA),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lid, LID),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ivr, IVR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(tpr, TPR),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(eoi, EOI),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr0, IRR0),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr1, IRR1),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr2, IRR2),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr3, IRR3),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itv, ITV),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pmv, PMV),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(cmcv, CMCV),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr0, LRR0),
+ IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr1, LRR1),
+};
+
+unsigned long __init_or_module
+ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type)
+{
+ const unsigned long nelems = sizeof(ia64_native_patch_bundle_elems) /
+ sizeof(ia64_native_patch_bundle_elems[0]);
+
+ return __paravirt_patch_apply_bundle(sbundle, ebundle, type,
+ ia64_native_patch_bundle_elems,
+ nelems, NULL);
+}
+#endif /* ASM_SUPPOTED */
+
+extern const char ia64_native_switch_to[];
+extern const char ia64_native_leave_syscall[];
+extern const char ia64_native_work_processed_syscall[];
+extern const char ia64_native_leave_kernel[];
+
+const struct paravirt_patch_branch_target ia64_native_branch_target[]
+__initconst = {
+#define PARAVIRT_BR_TARGET(name, type) \
+ { \
+ ia64_native_ ## name, \
+ PARAVIRT_PATCH_TYPE_BR_ ## type, \
+ }
+ PARAVIRT_BR_TARGET(switch_to, SWITCH_TO),
+ PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL),
+ PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL),
+ PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL),
+};
+
+static void __init
+ia64_native_patch_branch(unsigned long tag, unsigned long type)
+{
+ const unsigned long nelem =
+ sizeof(ia64_native_branch_target) /
+ sizeof(ia64_native_branch_target[0]);
+ __paravirt_patch_apply_branch(tag, type,
+ ia64_native_branch_target, nelem);
+}
diff --git a/arch/ia64/kernel/paravirt_inst.h b/arch/ia64/kernel/paravirt_inst.h
new file mode 100644
index 00000000000..1ad7512b5f6
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_inst.h
@@ -0,0 +1,28 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_inst.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifdef __IA64_ASM_PARAVIRTUALIZED_PVCHECK
+#include <asm/native/pvchk_inst.h>
+#else
+#include <asm/native/inst.h>
+#endif
+
diff --git a/arch/ia64/kernel/paravirt_patch.c b/arch/ia64/kernel/paravirt_patch.c
new file mode 100644
index 00000000000..bfdfef1b1ff
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patch.c
@@ -0,0 +1,514 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_patch.c
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <asm/intrinsics.h>
+#include <asm/kprobes.h>
+#include <asm/paravirt.h>
+#include <asm/paravirt_patch.h>
+
+typedef union ia64_inst {
+ struct {
+ unsigned long long qp : 6;
+ unsigned long long : 31;
+ unsigned long long opcode : 4;
+ unsigned long long reserved : 23;
+ } generic;
+ unsigned long long l;
+} ia64_inst_t;
+
+/*
+ * flush_icache_range() can't be used here.
+ * we are here before cpu_init() which initializes
+ * ia64_i_cache_stride_shift. flush_icache_range() uses it.
+ */
+void __init_or_module
+paravirt_flush_i_cache_range(const void *instr, unsigned long size)
+{
+ extern void paravirt_fc_i(const void *addr);
+ unsigned long i;
+
+ for (i = 0; i < size; i += sizeof(bundle_t))
+ paravirt_fc_i(instr + i);
+}
+
+bundle_t* __init_or_module
+paravirt_get_bundle(unsigned long tag)
+{
+ return (bundle_t *)(tag & ~3UL);
+}
+
+unsigned long __init_or_module
+paravirt_get_slot(unsigned long tag)
+{
+ return tag & 3UL;
+}
+
+unsigned long __init_or_module
+paravirt_get_num_inst(unsigned long stag, unsigned long etag)
+{
+ bundle_t *sbundle = paravirt_get_bundle(stag);
+ unsigned long sslot = paravirt_get_slot(stag);
+ bundle_t *ebundle = paravirt_get_bundle(etag);
+ unsigned long eslot = paravirt_get_slot(etag);
+
+ return (ebundle - sbundle) * 3 + eslot - sslot + 1;
+}
+
+unsigned long __init_or_module
+paravirt_get_next_tag(unsigned long tag)
+{
+ unsigned long slot = paravirt_get_slot(tag);
+
+ switch (slot) {
+ case 0:
+ case 1:
+ return tag + 1;
+ case 2: {
+ bundle_t *bundle = paravirt_get_bundle(tag);
+ return (unsigned long)(bundle + 1);
+ }
+ default:
+ BUG();
+ }
+ /* NOTREACHED */
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot0(const bundle_t *bundle)
+{
+ ia64_inst_t inst;
+ inst.l = bundle->quad0.slot0;
+ return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot1(const bundle_t *bundle)
+{
+ ia64_inst_t inst;
+ inst.l = bundle->quad0.slot1_p0 |
+ ((unsigned long long)bundle->quad1.slot1_p1 << 18UL);
+ return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_slot2(const bundle_t *bundle)
+{
+ ia64_inst_t inst;
+ inst.l = bundle->quad1.slot2;
+ return inst;
+}
+
+ia64_inst_t __init_or_module
+paravirt_read_inst(unsigned long tag)
+{
+ bundle_t *bundle = paravirt_get_bundle(tag);
+ unsigned long slot = paravirt_get_slot(tag);
+
+ switch (slot) {
+ case 0:
+ return paravirt_read_slot0(bundle);
+ case 1:
+ return paravirt_read_slot1(bundle);
+ case 2:
+ return paravirt_read_slot2(bundle);
+ default:
+ BUG();
+ }
+ /* NOTREACHED */
+}
+
+void __init_or_module
+paravirt_write_slot0(bundle_t *bundle, ia64_inst_t inst)
+{
+ bundle->quad0.slot0 = inst.l;
+}
+
+void __init_or_module
+paravirt_write_slot1(bundle_t *bundle, ia64_inst_t inst)
+{
+ bundle->quad0.slot1_p0 = inst.l;
+ bundle->quad1.slot1_p1 = inst.l >> 18UL;
+}
+
+void __init_or_module
+paravirt_write_slot2(bundle_t *bundle, ia64_inst_t inst)
+{
+ bundle->quad1.slot2 = inst.l;
+}
+
+void __init_or_module
+paravirt_write_inst(unsigned long tag, ia64_inst_t inst)
+{
+ bundle_t *bundle = paravirt_get_bundle(tag);
+ unsigned long slot = paravirt_get_slot(tag);
+
+ switch (slot) {
+ case 0:
+ paravirt_write_slot0(bundle, inst);
+ break;
+ case 1:
+ paravirt_write_slot1(bundle, inst);
+ break;
+ case 2:
+ paravirt_write_slot2(bundle, inst);
+ break;
+ default:
+ BUG();
+ break;
+ }
+ paravirt_flush_i_cache_range(bundle, sizeof(*bundle));
+}
+
+/* for debug */
+void
+paravirt_print_bundle(const bundle_t *bundle)
+{
+ const unsigned long *quad = (const unsigned long *)bundle;
+ ia64_inst_t slot0 = paravirt_read_slot0(bundle);
+ ia64_inst_t slot1 = paravirt_read_slot1(bundle);
+ ia64_inst_t slot2 = paravirt_read_slot2(bundle);
+
+ printk(KERN_DEBUG
+ "bundle 0x%p 0x%016lx 0x%016lx\n", bundle, quad[0], quad[1]);
+ printk(KERN_DEBUG
+ "bundle template 0x%x\n",
+ bundle->quad0.template);
+ printk(KERN_DEBUG
+ "slot0 0x%lx slot1_p0 0x%lx slot1_p1 0x%lx slot2 0x%lx\n",
+ (unsigned long)bundle->quad0.slot0,
+ (unsigned long)bundle->quad0.slot1_p0,
+ (unsigned long)bundle->quad1.slot1_p1,
+ (unsigned long)bundle->quad1.slot2);
+ printk(KERN_DEBUG
+ "slot0 0x%016llx slot1 0x%016llx slot2 0x%016llx\n",
+ slot0.l, slot1.l, slot2.l);
+}
+
+static int noreplace_paravirt __init_or_module = 0;
+
+static int __init setup_noreplace_paravirt(char *str)
+{
+ noreplace_paravirt = 1;
+ return 1;
+}
+__setup("noreplace-paravirt", setup_noreplace_paravirt);
+
+#ifdef ASM_SUPPORTED
+static void __init_or_module
+fill_nop_bundle(void *sbundle, void *ebundle)
+{
+ extern const char paravirt_nop_bundle[];
+ extern const unsigned long paravirt_nop_bundle_size;
+
+ void *bundle = sbundle;
+
+ BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
+ BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
+
+ while (bundle < ebundle) {
+ memcpy(bundle, paravirt_nop_bundle, paravirt_nop_bundle_size);
+
+ bundle += paravirt_nop_bundle_size;
+ }
+}
+
+/* helper function */
+unsigned long __init_or_module
+__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type,
+ const struct paravirt_patch_bundle_elem *elems,
+ unsigned long nelems,
+ const struct paravirt_patch_bundle_elem **found)
+{
+ unsigned long used = 0;
+ unsigned long i;
+
+ BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0);
+ BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0);
+
+ found = NULL;
+ for (i = 0; i < nelems; i++) {
+ const struct paravirt_patch_bundle_elem *p = &elems[i];
+ if (p->type == type) {
+ unsigned long need = p->ebundle - p->sbundle;
+ unsigned long room = ebundle - sbundle;
+
+ if (found != NULL)
+ *found = p;
+
+ if (room < need) {
+ /* no room to replace. skip it */
+ printk(KERN_DEBUG
+ "the space is too small to put "
+ "bundles. type %ld need %ld room %ld\n",
+ type, need, room);
+ break;
+ }
+
+ used = need;
+ memcpy(sbundle, p->sbundle, used);
+ break;
+ }
+ }
+
+ return used;
+}
+
+void __init_or_module
+paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start,
+ const struct paravirt_patch_site_bundle *end)
+{
+ const struct paravirt_patch_site_bundle *p;
+
+ if (noreplace_paravirt)
+ return;
+ if (pv_init_ops.patch_bundle == NULL)
+ return;
+
+ for (p = start; p < end; p++) {
+ unsigned long used;
+
+ used = (*pv_init_ops.patch_bundle)(p->sbundle, p->ebundle,
+ p->type);
+ if (used == 0)
+ continue;
+
+ fill_nop_bundle(p->sbundle + used, p->ebundle);
+ paravirt_flush_i_cache_range(p->sbundle,
+ p->ebundle - p->sbundle);
+ }
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+
+/*
+ * nop.i, nop.m, nop.f instruction are same format.
+ * but nop.b has differennt format.
+ * This doesn't support nop.b for now.
+ */
+static void __init_or_module
+fill_nop_inst(unsigned long stag, unsigned long etag)
+{
+ extern const bundle_t paravirt_nop_mfi_inst_bundle[];
+ unsigned long tag;
+ const ia64_inst_t nop_inst =
+ paravirt_read_slot0(paravirt_nop_mfi_inst_bundle);
+
+ for (tag = stag; tag < etag; tag = paravirt_get_next_tag(tag))
+ paravirt_write_inst(tag, nop_inst);
+}
+
+void __init_or_module
+paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start,
+ const struct paravirt_patch_site_inst *end)
+{
+ const struct paravirt_patch_site_inst *p;
+
+ if (noreplace_paravirt)
+ return;
+ if (pv_init_ops.patch_inst == NULL)
+ return;
+
+ for (p = start; p < end; p++) {
+ unsigned long tag;
+ bundle_t *sbundle;
+ bundle_t *ebundle;
+
+ tag = (*pv_init_ops.patch_inst)(p->stag, p->etag, p->type);
+ if (tag == p->stag)
+ continue;
+
+ fill_nop_inst(tag, p->etag);
+ sbundle = paravirt_get_bundle(p->stag);
+ ebundle = paravirt_get_bundle(p->etag) + 1;
+ paravirt_flush_i_cache_range(sbundle, (ebundle - sbundle) *
+ sizeof(bundle_t));
+ }
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+#endif /* ASM_SUPPOTED */
+
+/* brl.cond.sptk.many <target64> X3 */
+typedef union inst_x3_op {
+ ia64_inst_t inst;
+ struct {
+ unsigned long qp: 6;
+ unsigned long btyp: 3;
+ unsigned long unused: 3;
+ unsigned long p: 1;
+ unsigned long imm20b: 20;
+ unsigned long wh: 2;
+ unsigned long d: 1;
+ unsigned long i: 1;
+ unsigned long opcode: 4;
+ };
+ unsigned long l;
+} inst_x3_op_t;
+
+typedef union inst_x3_imm {
+ ia64_inst_t inst;
+ struct {
+ unsigned long unused: 2;
+ unsigned long imm39: 39;
+ };
+ unsigned long l;
+} inst_x3_imm_t;
+
+void __init_or_module
+paravirt_patch_reloc_brl(unsigned long tag, const void *target)
+{
+ unsigned long tag_op = paravirt_get_next_tag(tag);
+ unsigned long tag_imm = tag;
+ bundle_t *bundle = paravirt_get_bundle(tag);
+
+ ia64_inst_t inst_op = paravirt_read_inst(tag_op);
+ ia64_inst_t inst_imm = paravirt_read_inst(tag_imm);
+
+ inst_x3_op_t inst_x3_op = { .l = inst_op.l };
+ inst_x3_imm_t inst_x3_imm = { .l = inst_imm.l };
+
+ unsigned long imm60 =
+ ((unsigned long)target - (unsigned long)bundle) >> 4;
+
+ BUG_ON(paravirt_get_slot(tag) != 1); /* MLX */
+ BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
+
+ /* imm60[59] 1bit */
+ inst_x3_op.i = (imm60 >> 59) & 1;
+ /* imm60[19:0] 20bit */
+ inst_x3_op.imm20b = imm60 & ((1UL << 20) - 1);
+ /* imm60[58:20] 39bit */
+ inst_x3_imm.imm39 = (imm60 >> 20) & ((1UL << 39) - 1);
+
+ inst_op.l = inst_x3_op.l;
+ inst_imm.l = inst_x3_imm.l;
+
+ paravirt_write_inst(tag_op, inst_op);
+ paravirt_write_inst(tag_imm, inst_imm);
+}
+
+/* br.cond.sptk.many <target25> B1 */
+typedef union inst_b1 {
+ ia64_inst_t inst;
+ struct {
+ unsigned long qp: 6;
+ unsigned long btype: 3;
+ unsigned long unused: 3;
+ unsigned long p: 1;
+ unsigned long imm20b: 20;
+ unsigned long wh: 2;
+ unsigned long d: 1;
+ unsigned long s: 1;
+ unsigned long opcode: 4;
+ };
+ unsigned long l;
+} inst_b1_t;
+
+void __init
+paravirt_patch_reloc_br(unsigned long tag, const void *target)
+{
+ bundle_t *bundle = paravirt_get_bundle(tag);
+ ia64_inst_t inst = paravirt_read_inst(tag);
+ unsigned long target25 = (unsigned long)target - (unsigned long)bundle;
+ inst_b1_t inst_b1;
+
+ BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0);
+
+ inst_b1.l = inst.l;
+ if (target25 & (1UL << 63))
+ inst_b1.s = 1;
+ else
+ inst_b1.s = 0;
+
+ inst_b1.imm20b = target25 >> 4;
+ inst.l = inst_b1.l;
+
+ paravirt_write_inst(tag, inst);
+}
+
+void __init
+__paravirt_patch_apply_branch(
+ unsigned long tag, unsigned long type,
+ const struct paravirt_patch_branch_target *entries,
+ unsigned int nr_entries)
+{
+ unsigned int i;
+ for (i = 0; i < nr_entries; i++) {
+ if (entries[i].type == type) {
+ paravirt_patch_reloc_br(tag, entries[i].entry);
+ break;
+ }
+ }
+}
+
+static void __init
+paravirt_patch_apply_branch(const struct paravirt_patch_site_branch *start,
+ const struct paravirt_patch_site_branch *end)
+{
+ const struct paravirt_patch_site_branch *p;
+
+ if (noreplace_paravirt)
+ return;
+ if (pv_init_ops.patch_branch == NULL)
+ return;
+
+ for (p = start; p < end; p++)
+ (*pv_init_ops.patch_branch)(p->tag, p->type);
+
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+
+void __init
+paravirt_patch_apply(void)
+{
+ extern const char __start_paravirt_bundles[];
+ extern const char __stop_paravirt_bundles[];
+ extern const char __start_paravirt_insts[];
+ extern const char __stop_paravirt_insts[];
+ extern const char __start_paravirt_branches[];
+ extern const char __stop_paravirt_branches[];
+
+ paravirt_patch_apply_bundle((const struct paravirt_patch_site_bundle *)
+ __start_paravirt_bundles,
+ (const struct paravirt_patch_site_bundle *)
+ __stop_paravirt_bundles);
+ paravirt_patch_apply_inst((const struct paravirt_patch_site_inst *)
+ __start_paravirt_insts,
+ (const struct paravirt_patch_site_inst *)
+ __stop_paravirt_insts);
+ paravirt_patch_apply_branch((const struct paravirt_patch_site_branch *)
+ __start_paravirt_branches,
+ (const struct paravirt_patch_site_branch *)
+ __stop_paravirt_branches);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "linux"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ */
diff --git a/arch/ia64/kernel/paravirt_patchlist.c b/arch/ia64/kernel/paravirt_patchlist.c
new file mode 100644
index 00000000000..0a70720662e
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patchlist.c
@@ -0,0 +1,81 @@
+/******************************************************************************
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/paravirt.h>
+
+#define DECLARE(name) \
+ extern unsigned long \
+ __ia64_native_start_gate_##name##_patchlist[]; \
+ extern unsigned long \
+ __ia64_native_end_gate_##name##_patchlist[]
+
+DECLARE(fsyscall);
+DECLARE(brl_fsys_bubble_down);
+DECLARE(vtop);
+DECLARE(mckinley_e9);
+
+extern unsigned long __start_gate_section[];
+
+#define ASSIGN(name) \
+ .start_##name##_patchlist = \
+ (unsigned long)__ia64_native_start_gate_##name##_patchlist, \
+ .end_##name##_patchlist = \
+ (unsigned long)__ia64_native_end_gate_##name##_patchlist
+
+struct pv_patchdata pv_patchdata __initdata = {
+ ASSIGN(fsyscall),
+ ASSIGN(brl_fsys_bubble_down),
+ ASSIGN(vtop),
+ ASSIGN(mckinley_e9),
+
+ .gate_section = (void*)__start_gate_section,
+};
+
+
+unsigned long __init
+paravirt_get_gate_patchlist(enum pv_gate_patchlist type)
+{
+
+#define CASE(NAME, name) \
+ case PV_GATE_START_##NAME: \
+ return pv_patchdata.start_##name##_patchlist; \
+ case PV_GATE_END_##NAME: \
+ return pv_patchdata.end_##name##_patchlist; \
+
+ switch (type) {
+ CASE(FSYSCALL, fsyscall);
+ CASE(BRL_FSYS_BUBBLE_DOWN, brl_fsys_bubble_down);
+ CASE(VTOP, vtop);
+ CASE(MCKINLEY_E9, mckinley_e9);
+ default:
+ BUG();
+ break;
+ }
+ return 0;
+}
+
+void * __init
+paravirt_get_gate_section(void)
+{
+ return pv_patchdata.gate_section;
+}
diff --git a/arch/ia64/kernel/paravirt_patchlist.h b/arch/ia64/kernel/paravirt_patchlist.h
new file mode 100644
index 00000000000..67cffc3643a
--- /dev/null
+++ b/arch/ia64/kernel/paravirt_patchlist.h
@@ -0,0 +1,24 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirt_patchlist.h
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <asm/native/patchlist.h>
+
diff --git a/arch/ia64/kernel/paravirtentry.S b/arch/ia64/kernel/paravirtentry.S
new file mode 100644
index 00000000000..92d880c4d3d
--- /dev/null
+++ b/arch/ia64/kernel/paravirtentry.S
@@ -0,0 +1,121 @@
+/******************************************************************************
+ * linux/arch/ia64/xen/paravirtentry.S
+ *
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-offsets.h>
+#include <asm/paravirt_privop.h>
+#include <asm/paravirt_patch.h>
+#include "entry.h"
+
+#define DATA8(sym, init_value) \
+ .pushsection .data..read_mostly ; \
+ .align 8 ; \
+ .global sym ; \
+ sym: ; \
+ data8 init_value ; \
+ .popsection
+
+#define BRANCH(targ, reg, breg, type) \
+ PARAVIRT_PATCH_SITE_BR(PARAVIRT_PATCH_TYPE_BR_ ## type) ; \
+ ;; \
+ movl reg=targ ; \
+ ;; \
+ ld8 reg=[reg] ; \
+ ;; \
+ mov breg=reg ; \
+ br.cond.sptk.many breg
+
+#define BRANCH_PROC(sym, reg, breg, type) \
+ DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \
+ GLOBAL_ENTRY(paravirt_ ## sym) ; \
+ BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \
+ END(paravirt_ ## sym)
+
+#define BRANCH_PROC_UNWINFO(sym, reg, breg, type) \
+ DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \
+ GLOBAL_ENTRY(paravirt_ ## sym) ; \
+ PT_REGS_UNWIND_INFO(0) ; \
+ BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \
+ END(paravirt_ ## sym)
+
+
+BRANCH_PROC(switch_to, r22, b7, SWITCH_TO)
+BRANCH_PROC_UNWINFO(leave_syscall, r22, b7, LEAVE_SYSCALL)
+BRANCH_PROC(work_processed_syscall, r2, b7, WORK_PROCESSED_SYSCALL)
+BRANCH_PROC_UNWINFO(leave_kernel, r22, b7, LEAVE_KERNEL)
+
+
+#ifdef CONFIG_MODULES
+#define __INIT_OR_MODULE .text
+#define __INITDATA_OR_MODULE .data
+#else
+#define __INIT_OR_MODULE __INIT
+#define __INITDATA_OR_MODULE __INITDATA
+#endif /* CONFIG_MODULES */
+
+ __INIT_OR_MODULE
+ GLOBAL_ENTRY(paravirt_fc_i)
+ fc.i r32
+ br.ret.sptk.many rp
+ END(paravirt_fc_i)
+ __FINIT
+
+ __INIT_OR_MODULE
+ .align 32
+ GLOBAL_ENTRY(paravirt_nop_b_inst_bundle)
+ {
+ nop.b 0
+ nop.b 0
+ nop.b 0
+ }
+ END(paravirt_nop_b_inst_bundle)
+ __FINIT
+
+ /* NOTE: nop.[mfi] has same format */
+ __INIT_OR_MODULE
+ GLOBAL_ENTRY(paravirt_nop_mfi_inst_bundle)
+ {
+ nop.m 0
+ nop.f 0
+ nop.i 0
+ }
+ END(paravirt_nop_mfi_inst_bundle)
+ __FINIT
+
+ __INIT_OR_MODULE
+ GLOBAL_ENTRY(paravirt_nop_bundle)
+paravirt_nop_bundle_start:
+ {
+ nop 0
+ nop 0
+ nop 0
+ }
+paravirt_nop_bundle_end:
+ END(paravirt_nop_bundle)
+ __FINIT
+
+ __INITDATA_OR_MODULE
+ .align 8
+ .global paravirt_nop_bundle_size
+paravirt_nop_bundle_size:
+ data8 paravirt_nop_bundle_end - paravirt_nop_bundle_start
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index e796e29f8e1..1cf09179371 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -7,10 +7,10 @@
#include <linux/init.h>
#include <linux/string.h>
+#include <asm/paravirt.h>
#include <asm/patch.h>
#include <asm/processor.h>
#include <asm/sections.h>
-#include <asm/system.h>
#include <asm/unistd.h>
/*
@@ -115,6 +115,29 @@ ia64_patch_vtop (unsigned long start, unsigned long end)
ia64_srlz_i();
}
+/*
+ * Disable the RSE workaround by turning the conditional branch
+ * that we tagged in each place the workaround was used into an
+ * unconditional branch.
+ */
+void __init
+ia64_patch_rse (unsigned long start, unsigned long end)
+{
+ s32 *offp = (s32 *) start;
+ u64 ip, *b;
+
+ while (offp < (s32 *) end) {
+ ip = (u64) offp + *offp;
+
+ b = (u64 *)(ip & -16);
+ b[1] &= ~0xf800000L;
+ ia64_fc((void *) ip);
+ ++offp;
+ }
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+
void __init
ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
{
@@ -129,19 +152,16 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
first_time = 0;
if (need_workaround)
printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n");
- else
- printk(KERN_INFO "McKinley Errata 9 workaround not needed; "
- "disabling it\n");
}
if (need_workaround)
return;
while (offp < (s32 *) end) {
wp = (u64 *) ia64_imva((char *) offp + *offp);
- wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
- wp[1] = 0x0004000000000200UL;
- wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
- wp[3] = 0x0084006880000200UL;
+ wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+ wp[1] = 0x0084006880000200UL;
+ wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
+ wp[3] = 0x0004000000000200UL;
ia64_fc(wp); ia64_fc(wp + 2);
++offp;
}
@@ -149,16 +169,35 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
ia64_srlz_i();
}
+extern unsigned long ia64_native_fsyscall_table[NR_syscalls];
+extern char ia64_native_fsys_bubble_down[];
+struct pv_fsys_data pv_fsys_data __initdata = {
+ .fsyscall_table = (unsigned long *)ia64_native_fsyscall_table,
+ .fsys_bubble_down = (void *)ia64_native_fsys_bubble_down,
+};
+
+unsigned long * __init
+paravirt_get_fsyscall_table(void)
+{
+ return pv_fsys_data.fsyscall_table;
+}
+
+char * __init
+paravirt_get_fsys_bubble_down(void)
+{
+ return pv_fsys_data.fsys_bubble_down;
+}
+
static void __init
patch_fsyscall_table (unsigned long start, unsigned long end)
{
- extern unsigned long fsyscall_table[NR_syscalls];
+ u64 fsyscall_table = (u64)paravirt_get_fsyscall_table();
s32 *offp = (s32 *) start;
u64 ip;
while (offp < (s32 *) end) {
ip = (u64) ia64_imva((char *) offp + *offp);
- ia64_patch_imm64(ip, (u64) fsyscall_table);
+ ia64_patch_imm64(ip, fsyscall_table);
ia64_fc((void *) ip);
++offp;
}
@@ -169,7 +208,7 @@ patch_fsyscall_table (unsigned long start, unsigned long end)
static void __init
patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
{
- extern char fsys_bubble_down[];
+ u64 fsys_bubble_down = (u64)paravirt_get_fsys_bubble_down();
s32 *offp = (s32 *) start;
u64 ip;
@@ -187,13 +226,13 @@ patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
void __init
ia64_patch_gate (void)
{
-# define START(name) ((unsigned long) __start_gate_##name##_patchlist)
-# define END(name) ((unsigned long)__end_gate_##name##_patchlist)
+# define START(name) paravirt_get_gate_patchlist(PV_GATE_START_##name)
+# define END(name) paravirt_get_gate_patchlist(PV_GATE_END_##name)
- patch_fsyscall_table(START(fsyscall), END(fsyscall));
- patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down));
- ia64_patch_vtop(START(vtop), END(vtop));
- ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
+ patch_fsyscall_table(START(FSYSCALL), END(FSYSCALL));
+ patch_brl_fsys_bubble_down(START(BRL_FSYS_BUBBLE_DOWN), END(BRL_FSYS_BUBBLE_DOWN));
+ ia64_patch_vtop(START(VTOP), END(VTOP));
+ ia64_patch_mckinley_e9(START(MCKINLEY_E9), END(MCKINLEY_E9));
}
void ia64_patch_phys_stack_reg(unsigned long val)
@@ -209,7 +248,7 @@ void ia64_patch_phys_stack_reg(unsigned long val)
while (offp < end) {
ip = (u64) offp + *offp;
ia64_patch(ip, mask, imm);
- ia64_fc(ip);
+ ia64_fc((void *)ip);
++offp;
}
ia64_sync_i();
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
new file mode 100644
index 00000000000..992c1098c52
--- /dev/null
+++ b/arch/ia64/kernel/pci-dma.c
@@ -0,0 +1,110 @@
+/*
+ * Dynamic DMA mapping support.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/dmar.h>
+#include <asm/iommu.h>
+#include <asm/machvec.h>
+#include <linux/dma-mapping.h>
+
+
+#ifdef CONFIG_INTEL_IOMMU
+
+#include <linux/kernel.h>
+
+#include <asm/page.h>
+
+dma_addr_t bad_dma_address __read_mostly;
+EXPORT_SYMBOL(bad_dma_address);
+
+static int iommu_sac_force __read_mostly;
+
+int no_iommu __read_mostly;
+#ifdef CONFIG_IOMMU_DEBUG
+int force_iommu __read_mostly = 1;
+#else
+int force_iommu __read_mostly;
+#endif
+
+int iommu_pass_through;
+
+extern struct dma_map_ops intel_dma_ops;
+
+static int __init pci_iommu_init(void)
+{
+ if (iommu_detected)
+ intel_iommu_init();
+
+ return 0;
+}
+
+/* Must execute after PCI subsystem */
+fs_initcall(pci_iommu_init);
+
+void pci_iommu_shutdown(void)
+{
+ return;
+}
+
+void __init
+iommu_dma_init(void)
+{
+ return;
+}
+
+int iommu_dma_supported(struct device *dev, u64 mask)
+{
+ /* Copied from i386. Doesn't make much sense, because it will
+ only work for pci_alloc_coherent.
+ The caller just has to use GFP_DMA in this case. */
+ if (mask < DMA_BIT_MASK(24))
+ return 0;
+
+ /* Tell the device to use SAC when IOMMU force is on. This
+ allows the driver to use cheaper accesses in some cases.
+
+ Problem with this is that if we overflow the IOMMU area and
+ return DAC as fallback address the device may not handle it
+ correctly.
+
+ As a special case some controllers have a 39bit address
+ mode that is as efficient as 32bit (aic79xx). Don't force
+ SAC for these. Assume all masks <= 40 bits are of this
+ type. Normally this doesn't make any difference, but gives
+ more gentle handling of IOMMU overflow. */
+ if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
+ dev_info(dev, "Force SAC with mask %llx\n", mask);
+ return 0;
+ }
+
+ return 1;
+}
+EXPORT_SYMBOL(iommu_dma_supported);
+
+void __init pci_iommu_alloc(void)
+{
+ dma_ops = &intel_dma_ops;
+
+ dma_ops->sync_single_for_cpu = machvec_dma_sync_single;
+ dma_ops->sync_sg_for_cpu = machvec_dma_sync_sg;
+ dma_ops->sync_single_for_device = machvec_dma_sync_single;
+ dma_ops->sync_sg_for_device = machvec_dma_sync_sg;
+ dma_ops->dma_supported = iommu_dma_supported;
+
+ /*
+ * The order of these functions is important for
+ * fall-back/fail-over reasons
+ */
+ detect_intel_iommu();
+
+#ifdef CONFIG_SWIOTLB
+ pci_swiotlb_init();
+#endif
+}
+
+#endif
diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
new file mode 100644
index 00000000000..939260aeac9
--- /dev/null
+++ b/arch/ia64/kernel/pci-swiotlb.c
@@ -0,0 +1,67 @@
+/* Glue code to lib/swiotlb.c */
+
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/cache.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/swiotlb.h>
+#include <asm/dma.h>
+#include <asm/iommu.h>
+#include <asm/machvec.h>
+
+int swiotlb __read_mostly;
+EXPORT_SYMBOL(swiotlb);
+
+static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp,
+ struct dma_attrs *attrs)
+{
+ if (dev->coherent_dma_mask != DMA_BIT_MASK(64))
+ gfp |= GFP_DMA;
+ return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
+}
+
+static void ia64_swiotlb_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ struct dma_attrs *attrs)
+{
+ swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+}
+
+struct dma_map_ops swiotlb_dma_ops = {
+ .alloc = ia64_swiotlb_alloc_coherent,
+ .free = ia64_swiotlb_free_coherent,
+ .map_page = swiotlb_map_page,
+ .unmap_page = swiotlb_unmap_page,
+ .map_sg = swiotlb_map_sg_attrs,
+ .unmap_sg = swiotlb_unmap_sg_attrs,
+ .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+ .sync_single_for_device = swiotlb_sync_single_for_device,
+ .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+ .sync_sg_for_device = swiotlb_sync_sg_for_device,
+ .dma_supported = swiotlb_dma_supported,
+ .mapping_error = swiotlb_dma_mapping_error,
+};
+
+void __init swiotlb_dma_init(void)
+{
+ dma_ops = &swiotlb_dma_ops;
+ swiotlb_init(1);
+}
+
+void __init pci_swiotlb_init(void)
+{
+ if (!iommu_detected) {
+#ifdef CONFIG_IA64_GENERIC
+ swiotlb = 1;
+ printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
+ machvec_init("dig");
+ swiotlb_init(1);
+ dma_ops = &swiotlb_dma_ops;
+#else
+ panic("Unable to find Intel IOMMU");
+#endif
+ }
+}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 14b8e5a6222..5845ffea67c 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -40,6 +40,9 @@
#include <linux/capability.h>
#include <linux/rcupdate.h>
#include <linux/completion.h>
+#include <linux/tracehook.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
#include <asm/errno.h>
#include <asm/intrinsics.h>
@@ -47,7 +50,6 @@
#include <asm/perfmon.h>
#include <asm/processor.h>
#include <asm/signal.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/delay.h>
@@ -158,14 +160,14 @@
*/
#define PROTECT_CTX(c, f) \
do { \
- DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
+ DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \
spin_lock_irqsave(&(c)->ctx_lock, f); \
- DPRINT(("spinlocked ctx %p by [%d]\n", c, current->pid)); \
+ DPRINT(("spinlocked ctx %p by [%d]\n", c, task_pid_nr(current))); \
} while(0)
#define UNPROTECT_CTX(c, f) \
do { \
- DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
+ DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \
spin_unlock_irqrestore(&(c)->ctx_lock, f); \
} while(0)
@@ -227,12 +229,12 @@
#ifdef PFM_DEBUGGING
#define DPRINT(a) \
do { \
- if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+ if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
} while (0)
#define DPRINT_ovfl(a) \
do { \
- if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+ if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
} while (0)
#endif
@@ -311,7 +313,7 @@ typedef struct pfm_context {
unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */
unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */
- u64 ctx_saved_psr_up; /* only contains psr.up value */
+ unsigned long ctx_saved_psr_up; /* only contains psr.up value */
unsigned long ctx_last_activation; /* context last activation number for last_cpu */
unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */
@@ -519,55 +521,49 @@ static pmu_config_t *pmu_conf;
pfm_sysctl_t pfm_sysctl;
EXPORT_SYMBOL(pfm_sysctl);
-static ctl_table pfm_ctl_table[]={
+static struct ctl_table pfm_ctl_table[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "debug",
.data = &pfm_sysctl.debug,
.maxlen = sizeof(int),
.mode = 0666,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "debug_ovfl",
.data = &pfm_sysctl.debug_ovfl,
.maxlen = sizeof(int),
.mode = 0666,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "fastctxsw",
.data = &pfm_sysctl.fastctxsw,
.maxlen = sizeof(int),
.mode = 0600,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "expert_mode",
.data = &pfm_sysctl.expert_mode,
.maxlen = sizeof(int),
.mode = 0600,
- .proc_handler = &proc_dointvec,
+ .proc_handler = proc_dointvec,
},
{}
};
-static ctl_table pfm_sysctl_dir[] = {
+static struct ctl_table pfm_sysctl_dir[] = {
{
- .ctl_name = CTL_UNNUMBERED,
.procname = "perfmon",
- .mode = 0755,
+ .mode = 0555,
.child = pfm_ctl_table,
},
{}
};
-static ctl_table pfm_sysctl_root[] = {
+static struct ctl_table pfm_sysctl_root[] = {
{
- .ctl_name = CTL_KERN,
.procname = "kernel",
- .mode = 0755,
+ .mode = 0555,
.child = pfm_sysctl_dir,
},
{}
@@ -586,21 +582,6 @@ pfm_put_task(struct task_struct *task)
}
static inline void
-pfm_set_task_notify(struct task_struct *task)
-{
- struct thread_info *info;
-
- info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
- set_bit(TIF_PERFMON_WORK, &info->flags);
-}
-
-static inline void
-pfm_clear_task_notify(void)
-{
- clear_thread_flag(TIF_PERFMON_WORK);
-}
-
-static inline void
pfm_reserve_page(unsigned long a)
{
SetPageReserved(vmalloc_to_page((void *)a));
@@ -624,31 +605,22 @@ pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
spin_unlock(&(x)->ctx_lock);
}
-static inline unsigned int
-pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct)
-{
- return do_munmap(mm, addr, len);
-}
-
-static inline unsigned long
-pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec)
-{
- return get_unmapped_area(file, addr, len, pgoff, flags);
-}
-
+/* forward declaration */
+static const struct dentry_operations pfmfs_dentry_operations;
-static int
-pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
+static struct dentry *
+pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
{
- return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt);
+ return mount_pseudo(fs_type, "pfm:", NULL, &pfmfs_dentry_operations,
+ PFMFS_MAGIC);
}
static struct file_system_type pfm_fs_type = {
.name = "pfmfs",
- .get_sb = pfmfs_get_sb,
+ .mount = pfmfs_mount,
.kill_sb = kill_anon_super,
};
+MODULE_ALIAS_FS("pfmfs");
DEFINE_PER_CPU(unsigned long, pfm_syst_info);
DEFINE_PER_CPU(struct task_struct *, pmu_owner);
@@ -849,10 +821,9 @@ pfm_rvmalloc(unsigned long size)
unsigned long addr;
size = PAGE_ALIGN(size);
- mem = vmalloc(size);
+ mem = vzalloc(size);
if (mem) {
//printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
- memset(mem, 0, size);
addr = (unsigned long)mem;
while (size > 0) {
pfm_reserve_page(addr);
@@ -882,7 +853,7 @@ pfm_rvfree(void *mem, unsigned long size)
}
static pfm_context_t *
-pfm_context_alloc(void)
+pfm_context_alloc(int ctx_flags)
{
pfm_context_t *ctx;
@@ -893,6 +864,46 @@ pfm_context_alloc(void)
ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL);
if (ctx) {
DPRINT(("alloc ctx @%p\n", ctx));
+
+ /*
+ * init context protection lock
+ */
+ spin_lock_init(&ctx->ctx_lock);
+
+ /*
+ * context is unloaded
+ */
+ ctx->ctx_state = PFM_CTX_UNLOADED;
+
+ /*
+ * initialization of context's flags
+ */
+ ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
+ ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
+ ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
+ /*
+ * will move to set properties
+ * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
+ */
+
+ /*
+ * init restart semaphore to locked
+ */
+ init_completion(&ctx->ctx_restart_done);
+
+ /*
+ * activation is used in SMP only
+ */
+ ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
+ SET_LAST_CPU(ctx, -1);
+
+ /*
+ * initialize notification message queue
+ */
+ ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
+ init_waitqueue_head(&ctx->ctx_msgq_wait);
+ init_waitqueue_head(&ctx->ctx_zombieq);
+
}
return ctx;
}
@@ -913,7 +924,7 @@ pfm_mask_monitoring(struct task_struct *task)
unsigned long mask, val, ovfl_mask;
int i;
- DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
+ DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task)));
ovfl_mask = pmu_conf->ovfl_val;
/*
@@ -992,12 +1003,12 @@ pfm_restore_monitoring(struct task_struct *task)
ovfl_mask = pmu_conf->ovfl_val;
if (task != current) {
- printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
+ printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current));
return;
}
if (ctx->ctx_state != PFM_CTX_MASKED) {
printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
- task->pid, current->pid, ctx->ctx_state);
+ task_pid_nr(task), task_pid_nr(current), ctx->ctx_state);
return;
}
psr = pfm_get_psr();
@@ -1051,7 +1062,8 @@ pfm_restore_monitoring(struct task_struct *task)
if ((mask & 0x1) == 0UL) continue;
ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
ia64_set_pmc(i, ctx->th_pmcs[i]);
- DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, ctx->th_pmcs[i]));
+ DPRINT(("[%d] pmc[%d]=0x%lx\n",
+ task_pid_nr(task), i, ctx->th_pmcs[i]));
}
ia64_srlz_d();
@@ -1311,8 +1323,6 @@ out:
}
EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
-extern void update_pal_halt_status(int);
-
static int
pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
{
@@ -1360,9 +1370,9 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
cpu));
/*
- * disable default_idle() to go to PAL_HALT
+ * Force idle() into poll mode
*/
- update_pal_halt_status(0);
+ cpu_idle_poll_ctrl(true);
UNLOCK_PFS(flags);
@@ -1370,7 +1380,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
error_conflict:
DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
- pfm_sessions.pfs_sys_session[cpu]->pid,
+ task_pid_nr(pfm_sessions.pfs_sys_session[cpu]),
cpu));
abort:
UNLOCK_PFS(flags);
@@ -1419,11 +1429,8 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
is_syswide,
cpu));
- /*
- * if possible, enable default_idle() to go into PAL_HALT
- */
- if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0)
- update_pal_halt_status(1);
+ /* Undo forced polling. Last session reenables pal_halt */
+ cpu_idle_poll_ctrl(false);
UNLOCK_PFS(flags);
@@ -1436,13 +1443,14 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
* a PROTECT_CTX() section.
*/
static int
-pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size)
+pfm_remove_smpl_mapping(void *vaddr, unsigned long size)
{
+ struct task_struct *task = current;
int r;
/* sanity checks */
if (task->mm == NULL || size == 0UL || vaddr == NULL) {
- printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
+ printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm);
return -EINVAL;
}
@@ -1451,15 +1459,10 @@ pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long siz
/*
* does the actual unmapping
*/
- down_write(&task->mm->mmap_sem);
-
- DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size));
+ r = vm_munmap((unsigned long)vaddr, size);
- r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0);
-
- up_write(&task->mm->mmap_sem);
if (r !=0) {
- printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
+ printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size);
}
DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
@@ -1501,7 +1504,7 @@ pfm_free_smpl_buffer(pfm_context_t *ctx)
return 0;
invalid_free:
- printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
+ printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", task_pid_nr(current));
return -EINVAL;
}
#endif
@@ -1521,7 +1524,7 @@ pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
* any operations on the root directory. However, we need a non-trivial
* d_name - pfm: will go nicely and kill the special-casing in procfs.
*/
-static struct vfsmount *pfmfs_mnt;
+static struct vfsmount *pfmfs_mnt __read_mostly;
static int __init
init_pfm_fs(void)
@@ -1538,13 +1541,6 @@ init_pfm_fs(void)
return err;
}
-static void __exit
-exit_pfm_fs(void)
-{
- unregister_filesystem(&pfm_fs_type);
- mntput(pfmfs_mnt);
-}
-
static ssize_t
pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
{
@@ -1554,13 +1550,13 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
unsigned long flags;
DECLARE_WAITQUEUE(wait, current);
if (PFM_IS_FILE(filp) == 0) {
- printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
+ printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
return -EINVAL;
}
- ctx = (pfm_context_t *)filp->private_data;
+ ctx = filp->private_data;
if (ctx == NULL) {
- printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
+ printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current));
return -EINVAL;
}
@@ -1614,7 +1610,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
PROTECT_CTX(ctx, flags);
}
- DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
+ DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret));
set_current_state(TASK_RUNNING);
remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
@@ -1623,7 +1619,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
ret = -EINVAL;
msg = pfm_get_next_msg(ctx);
if (msg == NULL) {
- printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
+ printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current));
goto abort_locked;
}
@@ -1654,13 +1650,13 @@ pfm_poll(struct file *filp, poll_table * wait)
unsigned int mask = 0;
if (PFM_IS_FILE(filp) == 0) {
- printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
+ printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
return 0;
}
- ctx = (pfm_context_t *)filp->private_data;
+ ctx = filp->private_data;
if (ctx == NULL) {
- printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
+ printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current));
return 0;
}
@@ -1681,8 +1677,8 @@ pfm_poll(struct file *filp, poll_table * wait)
return mask;
}
-static int
-pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+static long
+pfm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
DPRINT(("pfm_ioctl called\n"));
return -EINVAL;
@@ -1699,7 +1695,7 @@ pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
- current->pid,
+ task_pid_nr(current),
fd,
on,
ctx->ctx_async_queue, ret));
@@ -1714,13 +1710,13 @@ pfm_fasync(int fd, struct file *filp, int on)
int ret;
if (PFM_IS_FILE(filp) == 0) {
- printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
+ printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current));
return -EBADF;
}
- ctx = (pfm_context_t *)filp->private_data;
+ ctx = filp->private_data;
if (ctx == NULL) {
- printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
+ printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current));
return -EBADF;
}
/*
@@ -1766,7 +1762,7 @@ pfm_syswide_force_stop(void *info)
if (owner != ctx->ctx_task) {
printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
smp_processor_id(),
- owner->pid, ctx->ctx_task->pid);
+ task_pid_nr(owner), task_pid_nr(ctx->ctx_task));
return;
}
if (GET_PMU_CTX() != ctx) {
@@ -1776,7 +1772,7 @@ pfm_syswide_force_stop(void *info)
return;
}
- DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid));
+ DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task)));
/*
* the context is already protected in pfm_close(), we simply
* need to mask interrupts to avoid a PMU interrupt race on
@@ -1801,7 +1797,7 @@ pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
int ret;
DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu));
- ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1);
+ ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 1);
DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret));
}
#endif /* CONFIG_SMP */
@@ -1826,9 +1822,9 @@ pfm_flush(struct file *filp, fl_owner_t id)
return -EBADF;
}
- ctx = (pfm_context_t *)filp->private_data;
+ ctx = filp->private_data;
if (ctx == NULL) {
- printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid);
+ printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current));
return -EBADF;
}
@@ -1845,11 +1841,6 @@ pfm_flush(struct file *filp, fl_owner_t id)
* invoked after, it will find an empty queue and no
* signal will be sent. In both case, we are safe
*/
- if (filp->f_flags & FASYNC) {
- DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue));
- pfm_do_fasync (-1, filp, ctx, 0);
- }
-
PROTECT_CTX(ctx, flags);
state = ctx->ctx_state;
@@ -1935,7 +1926,7 @@ pfm_flush(struct file *filp, fl_owner_t id)
* because some VM function reenables interrupts.
*
*/
- if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size);
+ if (smpl_buf_vaddr) pfm_remove_smpl_mapping(smpl_buf_vaddr, smpl_buf_size);
return 0;
}
@@ -1974,9 +1965,9 @@ pfm_close(struct inode *inode, struct file *filp)
return -EBADF;
}
- ctx = (pfm_context_t *)filp->private_data;
+ ctx = filp->private_data;
if (ctx == NULL) {
- printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid);
+ printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current));
return -EBADF;
}
@@ -2073,7 +2064,7 @@ pfm_close(struct inode *inode, struct file *filp)
*/
ctx->ctx_state = PFM_CTX_ZOMBIE;
- DPRINT(("zombie ctx for [%d]\n", task->pid));
+ DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task)));
/*
* cannot free the context on the spot. deferred until
* the task notices the ZOMBIE state
@@ -2164,115 +2155,72 @@ pfm_no_open(struct inode *irrelevant, struct file *dontcare)
static const struct file_operations pfm_file_ops = {
- .llseek = no_llseek,
- .read = pfm_read,
- .write = pfm_write,
- .poll = pfm_poll,
- .ioctl = pfm_ioctl,
- .open = pfm_no_open, /* special open code to disallow open via /proc */
- .fasync = pfm_fasync,
- .release = pfm_close,
- .flush = pfm_flush
+ .llseek = no_llseek,
+ .read = pfm_read,
+ .write = pfm_write,
+ .poll = pfm_poll,
+ .unlocked_ioctl = pfm_ioctl,
+ .open = pfm_no_open, /* special open code to disallow open via /proc */
+ .fasync = pfm_fasync,
+ .release = pfm_close,
+ .flush = pfm_flush
};
-static int
-pfmfs_delete_dentry(struct dentry *dentry)
+static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
{
- return 1;
+ return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]",
+ dentry->d_inode->i_ino);
}
-static struct dentry_operations pfmfs_dentry_operations = {
- .d_delete = pfmfs_delete_dentry,
+static const struct dentry_operations pfmfs_dentry_operations = {
+ .d_delete = always_delete_dentry,
+ .d_dname = pfmfs_dname,
};
-static int
-pfm_alloc_fd(struct file **cfile)
+static struct file *
+pfm_alloc_file(pfm_context_t *ctx)
{
- int fd, ret = 0;
- struct file *file = NULL;
- struct inode * inode;
- char name[32];
- struct qstr this;
-
- fd = get_unused_fd();
- if (fd < 0) return -ENFILE;
-
- ret = -ENFILE;
-
- file = get_empty_filp();
- if (!file) goto out;
+ struct file *file;
+ struct inode *inode;
+ struct path path;
+ struct qstr this = { .name = "" };
/*
* allocate a new inode
*/
inode = new_inode(pfmfs_mnt->mnt_sb);
- if (!inode) goto out;
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode));
inode->i_mode = S_IFCHR|S_IRUGO;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
-
- sprintf(name, "[%lu]", inode->i_ino);
- this.name = name;
- this.len = strlen(name);
- this.hash = inode->i_ino;
-
- ret = -ENOMEM;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
/*
* allocate a new dcache entry
*/
- file->f_path.dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
- if (!file->f_path.dentry) goto out;
+ path.dentry = d_alloc(pfmfs_mnt->mnt_root, &this);
+ if (!path.dentry) {
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ path.mnt = mntget(pfmfs_mnt);
- file->f_path.dentry->d_op = &pfmfs_dentry_operations;
+ d_add(path.dentry, inode);
- d_add(file->f_path.dentry, inode);
- file->f_path.mnt = mntget(pfmfs_mnt);
- file->f_mapping = inode->i_mapping;
+ file = alloc_file(&path, FMODE_READ, &pfm_file_ops);
+ if (IS_ERR(file)) {
+ path_put(&path);
+ return file;
+ }
- file->f_op = &pfm_file_ops;
- file->f_mode = FMODE_READ;
file->f_flags = O_RDONLY;
- file->f_pos = 0;
+ file->private_data = ctx;
- /*
- * may have to delay until context is attached?
- */
- fd_install(fd, file);
-
- /*
- * the file structure we will use
- */
- *cfile = file;
-
- return fd;
-out:
- if (file) put_filp(file);
- put_unused_fd(fd);
- return ret;
-}
-
-static void
-pfm_free_fd(int fd, struct file *file)
-{
- struct files_struct *files = current->files;
- struct fdtable *fdt;
-
- /*
- * there ie no fd_uninstall(), so we do it here
- */
- spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- rcu_assign_pointer(fdt->fd[fd], NULL);
- spin_unlock(&files->file_lock);
-
- if (file)
- put_filp(file);
- put_unused_fd(fd);
+ return file;
}
static int
@@ -2321,7 +2269,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
* if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
* return -ENOMEM;
*/
- if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
+ if (size > task_rlimit(task, RLIMIT_MEMLOCK))
return -ENOMEM;
/*
@@ -2343,13 +2291,14 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
DPRINT(("Cannot allocate vma\n"));
goto error_kmem;
}
+ INIT_LIST_HEAD(&vma->anon_vma_chain);
/*
* partially initialize the vma for the sampling buffer
*/
vma->vm_mm = mm;
- vma->vm_file = filp;
- vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED;
+ vma->vm_file = get_file(filp);
+ vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP;
vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */
/*
@@ -2369,8 +2318,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
down_write(&task->mm->mmap_sem);
/* find some free area in address space, must have mmap sem held */
- vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0);
- if (vma->vm_start == 0UL) {
+ vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS);
+ if (IS_ERR_VALUE(vma->vm_start)) {
DPRINT(("Cannot find unmapped area for size %ld\n", size));
up_write(&task->mm->mmap_sem);
goto error;
@@ -2387,15 +2336,12 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
goto error;
}
- get_file(filp);
-
/*
* now insert the vma in the vm list for the process, must be
* done with mmap lock held
*/
insert_vm_struct(mm, vma);
- mm->total_vm += size >> PAGE_SHIFT;
vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
vma_pages(vma));
up_write(&task->mm->mmap_sem);
@@ -2422,22 +2368,33 @@ error_kmem:
static int
pfm_bad_permissions(struct task_struct *task)
{
+ const struct cred *tcred;
+ kuid_t uid = current_uid();
+ kgid_t gid = current_gid();
+ int ret;
+
+ rcu_read_lock();
+ tcred = __task_cred(task);
+
/* inspired by ptrace_attach() */
DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n",
- current->uid,
- current->gid,
- task->euid,
- task->suid,
- task->uid,
- task->egid,
- task->sgid));
-
- return ((current->uid != task->euid)
- || (current->uid != task->suid)
- || (current->uid != task->uid)
- || (current->gid != task->egid)
- || (current->gid != task->sgid)
- || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE);
+ from_kuid(&init_user_ns, uid),
+ from_kgid(&init_user_ns, gid),
+ from_kuid(&init_user_ns, tcred->euid),
+ from_kuid(&init_user_ns, tcred->suid),
+ from_kuid(&init_user_ns, tcred->uid),
+ from_kgid(&init_user_ns, tcred->egid),
+ from_kgid(&init_user_ns, tcred->sgid)));
+
+ ret = ((!uid_eq(uid, tcred->euid))
+ || (!uid_eq(uid, tcred->suid))
+ || (!uid_eq(uid, tcred->uid))
+ || (!gid_eq(gid, tcred->egid))
+ || (!gid_eq(gid, tcred->sgid))
+ || (!gid_eq(gid, tcred->gid))) && !capable(CAP_SYS_PTRACE);
+
+ rcu_read_unlock();
+ return ret;
}
static int
@@ -2479,7 +2436,7 @@ pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t
/* invoke and lock buffer format, if found */
fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
if (fmt == NULL) {
- DPRINT(("[%d] cannot find buffer format\n", task->pid));
+ DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task)));
return -EINVAL;
}
@@ -2490,12 +2447,13 @@ pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t
ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
- DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret));
+ DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret));
if (ret) goto error;
/* link buffer format and context */
ctx->ctx_buf_fmt = fmt;
+ ctx->ctx_fl_is_sampling = 1; /* assume record() is defined */
/*
* check if buffer format wants to use perfmon buffer allocation/mapping service
@@ -2612,23 +2570,23 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
* no kernel task or task not owner by caller
*/
if (task->mm == NULL) {
- DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid));
+ DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task)));
return -EPERM;
}
if (pfm_bad_permissions(task)) {
- DPRINT(("no permission to attach to [%d]\n", task->pid));
+ DPRINT(("no permission to attach to [%d]\n", task_pid_nr(task)));
return -EPERM;
}
/*
* cannot block in self-monitoring mode
*/
if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
- DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid));
+ DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task)));
return -EINVAL;
}
if (task->exit_state == EXIT_ZOMBIE) {
- DPRINT(("cannot attach to zombie task [%d]\n", task->pid));
+ DPRINT(("cannot attach to zombie task [%d]\n", task_pid_nr(task)));
return -EBUSY;
}
@@ -2637,14 +2595,14 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
*/
if (task == current) return 0;
- if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
- DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state));
+ if (!task_is_stopped_or_traced(task)) {
+ DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state));
return -EBUSY;
}
/*
* make sure the task is off any CPU
*/
- wait_task_inactive(task);
+ wait_task_inactive(task, 0);
/* more to come... */
@@ -2660,11 +2618,11 @@ pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task)
/* XXX: need to add more checks here */
if (pid < 2) return -EPERM;
- if (pid != current->pid) {
+ if (pid != task_pid_vnr(current)) {
read_lock(&tasklist_lock);
- p = find_task_by_pid(pid);
+ p = find_task_by_vpid(pid);
/* make sure task cannot go away while we operate on it */
if (p) get_task_struct(p);
@@ -2690,79 +2648,46 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
{
pfarg_context_t *req = (pfarg_context_t *)arg;
struct file *filp;
+ struct path path;
int ctx_flags;
+ int fd;
int ret;
/* let's check the arguments first */
ret = pfarg_is_sane(current, req);
- if (ret < 0) return ret;
+ if (ret < 0)
+ return ret;
ctx_flags = req->ctx_flags;
ret = -ENOMEM;
- ctx = pfm_context_alloc();
- if (!ctx) goto error;
+ fd = get_unused_fd();
+ if (fd < 0)
+ return fd;
- ret = pfm_alloc_fd(&filp);
- if (ret < 0) goto error_file;
+ ctx = pfm_context_alloc(ctx_flags);
+ if (!ctx)
+ goto error;
- req->ctx_fd = ctx->ctx_fd = ret;
+ filp = pfm_alloc_file(ctx);
+ if (IS_ERR(filp)) {
+ ret = PTR_ERR(filp);
+ goto error_file;
+ }
- /*
- * attach context to file
- */
- filp->private_data = ctx;
+ req->ctx_fd = ctx->ctx_fd = fd;
/*
* does the user want to sample?
*/
if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) {
ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req);
- if (ret) goto buffer_error;
+ if (ret)
+ goto buffer_error;
}
- /*
- * init context protection lock
- */
- spin_lock_init(&ctx->ctx_lock);
-
- /*
- * context is unloaded
- */
- ctx->ctx_state = PFM_CTX_UNLOADED;
-
- /*
- * initialization of context's flags
- */
- ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
- ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
- ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */
- ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
- /*
- * will move to set properties
- * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
- */
-
- /*
- * init restart semaphore to locked
- */
- init_completion(&ctx->ctx_restart_done);
-
- /*
- * activation is used in SMP only
- */
- ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
- SET_LAST_CPU(ctx, -1);
-
- /*
- * initialize notification message queue
- */
- ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
- init_waitqueue_head(&ctx->ctx_msgq_wait);
- init_waitqueue_head(&ctx->ctx_zombieq);
-
- DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n",
+ DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d\n",
ctx,
ctx_flags,
ctx->ctx_fl_system,
@@ -2776,10 +2701,14 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
*/
pfm_reset_pmu_state(ctx);
+ fd_install(fd, filp);
+
return 0;
buffer_error:
- pfm_free_fd(ctx->ctx_fd, filp);
+ path = filp->f_path;
+ put_filp(filp);
+ path_put(&path);
if (ctx->ctx_buf_fmt) {
pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs);
@@ -2788,6 +2717,7 @@ error_file:
pfm_context_free(ctx);
error:
+ put_unused_fd(fd);
return ret;
}
@@ -3519,7 +3449,7 @@ pfm_use_debug_registers(struct task_struct *task)
if (pmu_conf->use_rr_dbregs == 0) return 0;
- DPRINT(("called for [%d]\n", task->pid));
+ DPRINT(("called for [%d]\n", task_pid_nr(task)));
/*
* do it only once
@@ -3550,7 +3480,7 @@ pfm_use_debug_registers(struct task_struct *task)
DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n",
pfm_sessions.pfs_ptrace_use_dbregs,
pfm_sessions.pfs_sys_use_dbregs,
- task->pid, ret));
+ task_pid_nr(task), ret));
UNLOCK_PFS(flags);
@@ -3562,7 +3492,7 @@ pfm_use_debug_registers(struct task_struct *task)
* IA64_THREAD_DBG_VALID set. This indicates a task which was
* able to use the debug registers for debugging purposes via
* ptrace(). Therefore we know it was not using them for
- * perfmormance monitoring, so we only decrement the number
+ * performance monitoring, so we only decrement the number
* of "ptraced" debug register users to keep the count up to date
*/
int
@@ -3575,7 +3505,7 @@ pfm_release_debug_registers(struct task_struct *task)
LOCK_PFS(flags);
if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
- printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
+ printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task));
ret = -1;
} else {
pfm_sessions.pfs_ptrace_use_dbregs--;
@@ -3627,7 +3557,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
/* sanity check */
if (unlikely(task == NULL)) {
- printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid);
+ printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current));
return -EINVAL;
}
@@ -3636,7 +3566,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
fmt = ctx->ctx_buf_fmt;
DPRINT(("restarting self %d ovfl=0x%lx\n",
- task->pid,
+ task_pid_nr(task),
ctx->ctx_ovfl_regs[0]));
if (CTX_HAS_SMPL(ctx)) {
@@ -3660,11 +3590,11 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
if (rst_ctrl.bits.mask_monitoring == 0) {
- DPRINT(("resuming monitoring for [%d]\n", task->pid));
+ DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task)));
if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
} else {
- DPRINT(("keeping monitoring stopped for [%d]\n", task->pid));
+ DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task)));
// cannot use pfm_stop_monitoring(task, regs);
}
@@ -3721,16 +3651,16 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
* "self-monitoring".
*/
if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
- DPRINT(("unblocking [%d] \n", task->pid));
+ DPRINT(("unblocking [%d]\n", task_pid_nr(task)));
complete(&ctx->ctx_restart_done);
} else {
- DPRINT(("[%d] armed exit trap\n", task->pid));
+ DPRINT(("[%d] armed exit trap\n", task_pid_nr(task)));
ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
PFM_SET_WORK_PENDING(task, 1);
- pfm_set_task_notify(task);
+ set_notify_resume(task);
/*
* XXX: send reschedule if task runs on another CPU
@@ -3812,7 +3742,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
* don't bother if we are loaded and task is being debugged
*/
if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
- DPRINT(("debug registers already in use for [%d]\n", task->pid));
+ DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task)));
return -EBUSY;
}
@@ -3853,7 +3783,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
* is shared by all processes running on it
*/
if (first_time && can_access_pmu) {
- DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid));
+ DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task)));
for (i=0; i < pmu_conf->num_ibrs; i++) {
ia64_set_ibr(i, 0UL);
ia64_dv_serialize_instruction();
@@ -4042,7 +3972,7 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
return -EBUSY;
}
DPRINT(("task [%d] ctx_state=%d is_system=%d\n",
- PFM_CTX_TASK(ctx)->pid,
+ task_pid_nr(PFM_CTX_TASK(ctx)),
state,
is_system));
/*
@@ -4100,7 +4030,7 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
* monitoring disabled in kernel at next reschedule
*/
ctx->ctx_saved_psr_up = 0;
- DPRINT(("task=[%d]\n", task->pid));
+ DPRINT(("task=[%d]\n", task_pid_nr(task)));
}
return 0;
}
@@ -4225,10 +4155,10 @@ pfm_check_task_exist(pfm_context_t *ctx)
do_each_thread (g, t) {
if (t->thread.pfm_context == ctx) {
ret = 0;
- break;
+ goto out;
}
} while_each_thread (g, t);
-
+out:
read_unlock(&tasklist_lock);
DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
@@ -4305,11 +4235,12 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
if (is_system) {
if (pfm_sessions.pfs_ptrace_use_dbregs) {
- DPRINT(("cannot load [%d] dbregs in use\n", task->pid));
+ DPRINT(("cannot load [%d] dbregs in use\n",
+ task_pid_nr(task)));
ret = -EBUSY;
} else {
pfm_sessions.pfs_sys_use_dbregs++;
- DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs));
+ DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs));
set_dbregs = 1;
}
}
@@ -4401,7 +4332,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
/* allow user level control */
ia64_psr(regs)->sp = 0;
- DPRINT(("clearing psr.sp for [%d]\n", task->pid));
+ DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task)));
SET_LAST_CPU(ctx, smp_processor_id());
INC_ACTIVATION();
@@ -4436,7 +4367,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
*/
SET_PMU_OWNER(task, ctx);
- DPRINT(("context loaded on PMU for [%d]\n", task->pid));
+ DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task)));
} else {
/*
* when not current, task MUST be stopped, so this is safe
@@ -4500,7 +4431,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
int prev_state, is_system;
int ret;
- DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1));
+ DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1));
prev_state = ctx->ctx_state;
is_system = ctx->ctx_fl_system;
@@ -4575,7 +4506,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
*/
ia64_psr(regs)->sp = 1;
- DPRINT(("setting psr.sp for [%d]\n", task->pid));
+ DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task)));
}
/*
* save PMDs to context
@@ -4615,7 +4546,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
ctx->ctx_fl_can_restart = 0;
ctx->ctx_fl_going_zombie = 0;
- DPRINT(("disconnected [%d] from context\n", task->pid));
+ DPRINT(("disconnected [%d] from context\n", task_pid_nr(task)));
return 0;
}
@@ -4638,7 +4569,7 @@ pfm_exit_thread(struct task_struct *task)
PROTECT_CTX(ctx, flags);
- DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid));
+ DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task)));
state = ctx->ctx_state;
switch(state) {
@@ -4647,13 +4578,13 @@ pfm_exit_thread(struct task_struct *task)
* only comes to this function if pfm_context is not NULL, i.e., cannot
* be in unloaded state
*/
- printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid);
+ printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task));
break;
case PFM_CTX_LOADED:
case PFM_CTX_MASKED:
ret = pfm_context_unload(ctx, NULL, 0, regs);
if (ret) {
- printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
+ printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
}
DPRINT(("ctx unloaded for current state was %d\n", state));
@@ -4662,12 +4593,12 @@ pfm_exit_thread(struct task_struct *task)
case PFM_CTX_ZOMBIE:
ret = pfm_context_unload(ctx, NULL, 0, regs);
if (ret) {
- printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
+ printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
}
free_ok = 1;
break;
default:
- printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state);
+ printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state);
break;
}
UNPROTECT_CTX(ctx, flags);
@@ -4751,7 +4682,7 @@ recheck:
DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
ctx->ctx_fd,
state,
- task->pid,
+ task_pid_nr(task),
task->state, PFM_CMD_STOPPED(cmd)));
/*
@@ -4797,8 +4728,8 @@ recheck:
* the task must be stopped.
*/
if (PFM_CMD_STOPPED(cmd)) {
- if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
- DPRINT(("[%d] task not in stopped state\n", task->pid));
+ if (!task_is_stopped_or_traced(task)) {
+ DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task)));
return -EBUSY;
}
/*
@@ -4819,7 +4750,7 @@ recheck:
UNPROTECT_CTX(ctx, flags);
- wait_task_inactive(task);
+ wait_task_inactive(task, 0);
PROTECT_CTX(ctx, flags);
@@ -4840,7 +4771,7 @@ recheck:
asmlinkage long
sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
{
- struct file *file = NULL;
+ struct fd f = {NULL, 0};
pfm_context_t *ctx = NULL;
unsigned long flags = 0UL;
void *args_k = NULL;
@@ -4891,7 +4822,7 @@ restart_args:
* limit abuse to min page size
*/
if (unlikely(sz > PFM_MAX_ARGSIZE)) {
- printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz);
+ printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz);
return -E2BIG;
}
@@ -4937,17 +4868,17 @@ restart_args:
ret = -EBADF;
- file = fget(fd);
- if (unlikely(file == NULL)) {
+ f = fdget(fd);
+ if (unlikely(f.file == NULL)) {
DPRINT(("invalid fd %d\n", fd));
goto error_args;
}
- if (unlikely(PFM_IS_FILE(file) == 0)) {
+ if (unlikely(PFM_IS_FILE(f.file) == 0)) {
DPRINT(("fd %d not related to perfmon\n", fd));
goto error_args;
}
- ctx = (pfm_context_t *)file->private_data;
+ ctx = f.file->private_data;
if (unlikely(ctx == NULL)) {
DPRINT(("no context for fd %d\n", fd));
goto error_args;
@@ -4977,8 +4908,8 @@ abort_locked:
if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
error_args:
- if (file)
- fput(file);
+ if (f.file)
+ fdput(f);
kfree(args_k);
@@ -5038,11 +4969,11 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
{
int ret;
- DPRINT(("entering for [%d]\n", current->pid));
+ DPRINT(("entering for [%d]\n", task_pid_nr(current)));
ret = pfm_context_unload(ctx, NULL, 0, regs);
if (ret) {
- printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret);
+ printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret);
}
/*
@@ -5058,12 +4989,13 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
}
static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);
+
/*
* pfm_handle_work() can be called with interrupts enabled
* (TIF_NEED_RESCHED) or disabled. The down_interruptible
* call may sleep, therefore we must re-enable interrupts
* to avoid deadlocks. It is safe to do so because this function
- * is called ONLY when returning to user level (PUStk=1), in which case
+ * is called ONLY when returning to user level (pUStk=1), in which case
* there is no risk of kernel stack overflow due to deep
* interrupt nesting.
*/
@@ -5079,7 +5011,8 @@ pfm_handle_work(void)
ctx = PFM_GET_CTX(current);
if (ctx == NULL) {
- printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
+ printk(KERN_ERR "perfmon: [%d] has no PFM context\n",
+ task_pid_nr(current));
return;
}
@@ -5087,8 +5020,6 @@ pfm_handle_work(void)
PFM_SET_WORK_PENDING(current, 0);
- pfm_clear_task_notify();
-
regs = task_pt_regs(current);
/*
@@ -5103,11 +5034,12 @@ pfm_handle_work(void)
/*
* must be done before we check for simple-reset mode
*/
- if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie;
-
+ if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE)
+ goto do_zombie;
//if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
- if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking;
+ if (reason == PFM_TRAP_REASON_RESET)
+ goto skip_blocking;
/*
* restore interrupt mask to what it was on entry.
@@ -5155,7 +5087,8 @@ do_zombie:
/*
* in case of interruption of down() we don't restart anything
*/
- if (ret < 0) goto nothing_to_do;
+ if (ret < 0)
+ goto nothing_to_do;
skip_blocking:
pfm_resume_after_ovfl(ctx, ovfl_regs, regs);
@@ -5249,8 +5182,8 @@ pfm_end_notify_user(pfm_context_t *ctx)
* main overflow processing routine.
* it can be called from the interrupt path or explicitly during the context switch code
*/
-static void
-pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
+static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx,
+ unsigned long pmc0, struct pt_regs *regs)
{
pfm_ovfl_arg_t *ovfl_arg;
unsigned long mask;
@@ -5276,7 +5209,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
"used_pmds=0x%lx\n",
pmc0,
- task ? task->pid: -1,
+ task ? task_pid_nr(task): -1,
(regs ? regs->cr_iip : 0),
CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
ctx->ctx_used_pmds[0]));
@@ -5455,7 +5388,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
* when coming from ctxsw, current still points to the
* previous task, therefore we must work with task and not current.
*/
- pfm_set_task_notify(task);
+ set_notify_resume(task);
}
/*
* defer until state is changed (shorten spin window). the context is locked
@@ -5465,7 +5398,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
}
DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
- GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1,
+ GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1,
PFM_GET_WORK_PENDING(task),
ctx->ctx_fl_trap_reason,
ovfl_pmds,
@@ -5490,7 +5423,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
sanity_check:
printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
smp_processor_id(),
- task ? task->pid : -1,
+ task ? task_pid_nr(task) : -1,
pmc0);
return;
@@ -5523,7 +5456,7 @@ stop_monitoring:
*
* Overall pretty hairy stuff....
*/
- DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1));
+ DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1));
pfm_clear_psr_up();
ia64_psr(regs)->up = 0;
ia64_psr(regs)->sp = 1;
@@ -5531,7 +5464,7 @@ stop_monitoring:
}
static int
-pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
+pfm_do_interrupt_handler(void *arg, struct pt_regs *regs)
{
struct task_struct *task;
pfm_context_t *ctx;
@@ -5584,13 +5517,13 @@ pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
report_spurious1:
printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
- this_cpu, task->pid);
+ this_cpu, task_pid_nr(task));
pfm_unfreeze_pmu();
return -1;
report_spurious2:
printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n",
this_cpu,
- task->pid);
+ task_pid_nr(task));
pfm_unfreeze_pmu();
return -1;
}
@@ -5611,7 +5544,7 @@ pfm_interrupt_handler(int irq, void *arg)
start_cycles = ia64_get_itc();
- ret = pfm_do_interrupt_handler(irq, arg, regs);
+ ret = pfm_do_interrupt_handler(arg, regs);
total_cycles = ia64_get_itc();
@@ -5631,7 +5564,7 @@ pfm_interrupt_handler(int irq, void *arg)
(*pfm_alt_intr_handler->handler)(irq, arg, regs);
}
- put_cpu_no_resched();
+ put_cpu();
return IRQ_HANDLED;
}
@@ -5639,7 +5572,7 @@ pfm_interrupt_handler(int irq, void *arg)
* /proc/perfmon interface, for debug only
*/
-#define PFM_PROC_SHOW_HEADER ((void *)NR_CPUS+1)
+#define PFM_PROC_SHOW_HEADER ((void *)(long)nr_cpu_ids+1)
static void *
pfm_proc_start(struct seq_file *m, loff_t *pos)
@@ -5648,7 +5581,7 @@ pfm_proc_start(struct seq_file *m, loff_t *pos)
return PFM_PROC_SHOW_HEADER;
}
- while (*pos <= NR_CPUS) {
+ while (*pos <= nr_cpu_ids) {
if (cpu_online(*pos - 1)) {
return (void *)*pos;
}
@@ -5708,24 +5641,8 @@ pfm_proc_show_header(struct seq_file *m)
list_for_each(pos, &pfm_buffer_fmt_list) {
entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
- seq_printf(m, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n",
- entry->fmt_uuid[0],
- entry->fmt_uuid[1],
- entry->fmt_uuid[2],
- entry->fmt_uuid[3],
- entry->fmt_uuid[4],
- entry->fmt_uuid[5],
- entry->fmt_uuid[6],
- entry->fmt_uuid[7],
- entry->fmt_uuid[8],
- entry->fmt_uuid[9],
- entry->fmt_uuid[10],
- entry->fmt_uuid[11],
- entry->fmt_uuid[12],
- entry->fmt_uuid[13],
- entry->fmt_uuid[14],
- entry->fmt_uuid[15],
- entry->fmt_name);
+ seq_printf(m, "format : %16phD %s\n",
+ entry->fmt_uuid, entry->fmt_name);
}
spin_unlock(&pfm_buffer_fmt_lock);
@@ -5800,7 +5717,7 @@ pfm_proc_show(struct seq_file *m, void *v)
return 0;
}
-struct seq_operations pfm_seq_ops = {
+const struct seq_operations pfm_seq_ops = {
.start = pfm_proc_start,
.next = pfm_proc_next,
.stop = pfm_proc_stop,
@@ -5877,7 +5794,8 @@ pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
ia64_psr(regs)->sp = 1;
if (GET_PMU_OWNER() == task) {
- DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
+ DPRINT(("cleared ownership for [%d]\n",
+ task_pid_nr(ctx->ctx_task)));
SET_PMU_OWNER(NULL, NULL);
}
@@ -5889,7 +5807,7 @@ pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
task->thread.pfm_context = NULL;
task->thread.flags &= ~IA64_THREAD_PM_VALID;
- DPRINT(("force cleanup for [%d]\n", task->pid));
+ DPRINT(("force cleanup for [%d]\n", task_pid_nr(task)));
}
@@ -6433,7 +6351,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
if (PMD_IS_COUNTING(i)) {
DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
- task->pid,
+ task_pid_nr(task),
i,
ctx->ctx_pmds[i].val,
val & ovfl_val));
@@ -6455,11 +6373,11 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
*/
if (pmc0 & (1UL << i)) {
val += 1 + ovfl_val;
- DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i));
+ DPRINT(("[%d] pmd[%d] overflowed\n", task_pid_nr(task), i));
}
}
- DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val));
+ DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task_pid_nr(task), i, val, pmd_val));
if (is_self) ctx->th_pmds[i] = pmd_val;
@@ -6469,7 +6387,6 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
static struct irqaction perfmon_irqaction = {
.handler = pfm_interrupt_handler,
- .flags = IRQF_DISABLED,
.name = "perfmon"
};
@@ -6548,7 +6465,7 @@ pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
}
/* save the current system wide pmu states */
- ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 0, 1);
+ ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1);
if (ret) {
DPRINT(("on_each_cpu() failed: %d\n", ret));
goto cleanup_reserve;
@@ -6593,7 +6510,7 @@ pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
pfm_alt_intr_handler = NULL;
- ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 0, 1);
+ ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1);
if (ret) {
DPRINT(("on_each_cpu() failed: %d\n", ret));
}
@@ -6714,16 +6631,12 @@ pfm_init(void)
/*
* create /proc/perfmon (mostly for debugging purposes)
*/
- perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL);
+ perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops);
if (perfmon_dir == NULL) {
printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
pmu_conf = NULL;
return -1;
}
- /*
- * install customized file operations for /proc/perfmon entry
- */
- perfmon_dir->proc_fops = &pfm_proc_fops;
/*
* create /proc/sys/kernel/perfmon (for debugging purposes)
@@ -6800,14 +6713,14 @@ dump_pmu_state(const char *from)
printk("CPU%d from %s() current [%d] iip=0x%lx %s\n",
this_cpu,
from,
- current->pid,
+ task_pid_nr(current),
regs->cr_iip,
current->comm);
task = GET_PMU_OWNER();
ctx = GET_PMU_CTX();
- printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx);
+ printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task_pid_nr(task) : -1, ctx);
psr = pfm_get_psr();
@@ -6855,7 +6768,7 @@ pfm_inherit(struct task_struct *task, struct pt_regs *regs)
{
struct thread_struct *thread;
- DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid));
+ DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task_pid_nr(task)));
thread = &task->thread;
diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c
index ff80eab83b3..30c644ea44c 100644
--- a/arch/ia64/kernel/perfmon_default_smpl.c
+++ b/arch/ia64/kernel/perfmon_default_smpl.c
@@ -24,12 +24,12 @@ MODULE_LICENSE("GPL");
#ifdef DEFAULT_DEBUG
#define DPRINT(a) \
do { \
- if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+ if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
} while (0)
#define DPRINT_ovfl(a) \
do { \
- if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
+ if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \
} while (0)
#else
@@ -44,11 +44,11 @@ default_validate(struct task_struct *task, unsigned int flags, int cpu, void *da
int ret = 0;
if (data == NULL) {
- DPRINT(("[%d] no argument passed\n", task->pid));
+ DPRINT(("[%d] no argument passed\n", task_pid_nr(task)));
return -EINVAL;
}
- DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu));
+ DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu));
/*
* must hold at least the buffer header + one minimally sized entry
@@ -88,7 +88,7 @@ default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, v
hdr->hdr_count = 0UL;
DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
- task->pid,
+ task_pid_nr(task),
buf,
hdr->hdr_buf_size,
sizeof(*hdr),
@@ -150,7 +150,7 @@ default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct
* current = task running at the time of the overflow.
*
* per-task mode:
- * - this is ususally the task being monitored.
+ * - this is usually the task being monitored.
* Under certain conditions, it might be a different task
*
* system-wide:
@@ -245,7 +245,7 @@ default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, stru
static int
default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
{
- DPRINT(("[%d] exit(%p)\n", task->pid, buf));
+ DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf));
return 0;
}
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index c613fc0e91c..55d4ba47a90 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -15,11 +15,11 @@
#include <linux/kallsyms.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <linux/module.h>
#include <linux/notifier.h>
#include <linux/personality.h>
#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/stddef.h>
#include <linux/thread_info.h>
#include <linux/unistd.h>
@@ -27,16 +27,19 @@
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/kdebug.h>
+#include <linux/utsname.h>
+#include <linux/tracehook.h>
+#include <linux/rcupdate.h>
#include <asm/cpu.h>
#include <asm/delay.h>
#include <asm/elf.h>
-#include <asm/ia32.h>
#include <asm/irq.h>
#include <asm/kexec.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
#include <asm/sal.h>
+#include <asm/switch_to.h>
#include <asm/tlbflush.h>
#include <asm/uaccess.h>
#include <asm/unwind.h>
@@ -51,10 +54,11 @@
#include "sigframe.h"
void (*ia64_mark_idle)(int);
-static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
-unsigned long boot_option_idle_override = 0;
+unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override);
+void (*pm_power_off) (void);
+EXPORT_SYMBOL(pm_power_off);
void
ia64_do_show_stack (struct unw_frame_info *info, void *arg)
@@ -92,22 +96,16 @@ show_stack (struct task_struct *task, unsigned long *sp)
}
void
-dump_stack (void)
-{
- show_stack(NULL, NULL);
-}
-
-EXPORT_SYMBOL(dump_stack);
-
-void
show_regs (struct pt_regs *regs)
{
unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
print_modules();
- printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
- printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s\n",
- regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
+ printk("\n");
+ show_regs_print_info(KERN_DEFAULT);
+ printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s (%s)\n",
+ regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(),
+ init_utsname()->release);
print_symbol("ip is at %s\n", ip);
printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
@@ -154,11 +152,21 @@ show_regs (struct pt_regs *regs)
show_stack(NULL, NULL);
}
+/* local support for deprecated console_print */
+void
+console_print(const char *s)
+{
+ printk(KERN_EMERG "%s", s);
+}
+
void
-do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall)
+do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
{
if (fsys_mode(current, &scr->pt)) {
- /* defer signal-handling etc. until we return to privilege-level 0. */
+ /*
+ * defer signal-handling etc. until we return to
+ * privilege-level 0.
+ */
if (!ia64_psr(&scr->pt)->lp)
ia64_psr(&scr->pt)->lp = 1;
return;
@@ -166,54 +174,44 @@ do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall
#ifdef CONFIG_PERFMON
if (current->thread.pfm_needs_checking)
+ /*
+ * Note: pfm_handle_work() allow us to call it with interrupts
+ * disabled, and may enable interrupts within the function.
+ */
pfm_handle_work();
#endif
/* deal with pending signal delivery */
- if (test_thread_flag(TIF_SIGPENDING)||test_thread_flag(TIF_RESTORE_SIGMASK))
+ if (test_thread_flag(TIF_SIGPENDING)) {
+ local_irq_enable(); /* force interrupt enable */
ia64_do_signal(scr, in_syscall);
-}
+ }
-static int pal_halt = 1;
-static int can_do_pal_halt = 1;
+ if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) {
+ local_irq_enable(); /* force interrupt enable */
+ tracehook_notify_resume(&scr->pt);
+ }
-static int __init nohalt_setup(char * str)
-{
- pal_halt = can_do_pal_halt = 0;
- return 1;
-}
-__setup("nohalt", nohalt_setup);
+ /* copy user rbs to kernel rbs */
+ if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) {
+ local_irq_enable(); /* force interrupt enable */
+ ia64_sync_krbs();
+ }
-void
-update_pal_halt_status(int status)
-{
- can_do_pal_halt = pal_halt && status;
+ local_irq_disable(); /* force interrupt disable */
}
-/*
- * We use this if we don't have any better idle routine..
- */
-void
-default_idle (void)
+static int __init nohalt_setup(char * str)
{
- local_irq_enable();
- while (!need_resched()) {
- if (can_do_pal_halt) {
- local_irq_disable();
- if (!need_resched()) {
- safe_halt();
- }
- local_irq_enable();
- } else
- cpu_relax();
- }
+ cpu_idle_poll_ctrl(true);
+ return 1;
}
+__setup("nohalt", nohalt_setup);
#ifdef CONFIG_HOTPLUG_CPU
/* We don't actually take CPU down, just spin without interrupts. */
static inline void play_dead(void)
{
- extern void ia64_cpu_local_tick (void);
unsigned int this_cpu = smp_processor_id();
/* Ack it */
@@ -236,84 +234,29 @@ static inline void play_dead(void)
}
#endif /* CONFIG_HOTPLUG_CPU */
-void cpu_idle_wait(void)
+void arch_cpu_idle_dead(void)
{
- unsigned int cpu, this_cpu = get_cpu();
- cpumask_t map;
- cpumask_t tmp = current->cpus_allowed;
-
- set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
- put_cpu();
-
- cpus_clear(map);
- for_each_online_cpu(cpu) {
- per_cpu(cpu_idle_state, cpu) = 1;
- cpu_set(cpu, map);
- }
-
- __get_cpu_var(cpu_idle_state) = 0;
-
- wmb();
- do {
- ssleep(1);
- for_each_online_cpu(cpu) {
- if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
- cpu_clear(cpu, map);
- }
- cpus_and(map, map, cpu_online_map);
- } while (!cpus_empty(map));
- set_cpus_allowed(current, tmp);
+ play_dead();
}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-void __attribute__((noreturn))
-cpu_idle (void)
+void arch_cpu_idle(void)
{
void (*mark_idle)(int) = ia64_mark_idle;
- int cpu = smp_processor_id();
-
- /* endless idle loop with no priority at all */
- while (1) {
- if (can_do_pal_halt) {
- current_thread_info()->status &= ~TS_POLLING;
- /*
- * TS_POLLING-cleared state must be visible before we
- * test NEED_RESCHED:
- */
- smp_mb();
- } else {
- current_thread_info()->status |= TS_POLLING;
- }
- if (!need_resched()) {
- void (*idle)(void);
#ifdef CONFIG_SMP
- min_xtp();
+ min_xtp();
#endif
- if (__get_cpu_var(cpu_idle_state))
- __get_cpu_var(cpu_idle_state) = 0;
-
- rmb();
- if (mark_idle)
- (*mark_idle)(1);
-
- idle = pm_idle;
- if (!idle)
- idle = default_idle;
- (*idle)();
- if (mark_idle)
- (*mark_idle)(0);
+ rmb();
+ if (mark_idle)
+ (*mark_idle)(1);
+
+ safe_halt();
+
+ if (mark_idle)
+ (*mark_idle)(0);
#ifdef CONFIG_SMP
- normal_xtp();
+ normal_xtp();
#endif
- }
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
- check_pgt_cache();
- if (cpu_is_offline(cpu))
- play_dead();
- }
}
void
@@ -334,11 +277,6 @@ ia64_save_extra (struct task_struct *task)
if (info & PFM_CPUINFO_SYST_WIDE)
pfm_syst_wide_update_task(task, info, 0);
#endif
-
-#ifdef CONFIG_IA32_SUPPORT
- if (IS_IA32_PROCESS(task_pt_regs(task)))
- ia32_save_state(task);
-#endif
}
void
@@ -359,11 +297,6 @@ ia64_load_extra (struct task_struct *task)
if (info & PFM_CPUINFO_SYST_WIDE)
pfm_syst_wide_update_task(task, info, 1);
#endif
-
-#ifdef CONFIG_IA32_SUPPORT
- if (IS_IA32_PROCESS(task_pt_regs(task)))
- ia32_load_state(task);
-#endif
}
/*
@@ -398,77 +331,26 @@ ia64_load_extra (struct task_struct *task)
* so there is nothing to worry about.
*/
int
-copy_thread (int nr, unsigned long clone_flags,
+copy_thread(unsigned long clone_flags,
unsigned long user_stack_base, unsigned long user_stack_size,
- struct task_struct *p, struct pt_regs *regs)
+ struct task_struct *p)
{
- extern char ia64_ret_from_clone, ia32_ret_from_clone;
+ extern char ia64_ret_from_clone;
struct switch_stack *child_stack, *stack;
unsigned long rbs, child_rbs, rbs_size;
struct pt_regs *child_ptregs;
+ struct pt_regs *regs = current_pt_regs();
int retval = 0;
-#ifdef CONFIG_SMP
- /*
- * For SMP idle threads, fork_by_hand() calls do_fork with
- * NULL regs.
- */
- if (!regs)
- return 0;
-#endif
-
- stack = ((struct switch_stack *) regs) - 1;
-
child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1;
child_stack = (struct switch_stack *) child_ptregs - 1;
- /* copy parent's switch_stack & pt_regs to child: */
- memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
-
rbs = (unsigned long) current + IA64_RBS_OFFSET;
child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
- rbs_size = stack->ar_bspstore - rbs;
-
- /* copy the parent's register backing store to the child: */
- memcpy((void *) child_rbs, (void *) rbs, rbs_size);
-
- if (likely(user_mode(child_ptregs))) {
- if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs))
- child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */
- if (user_stack_base) {
- child_ptregs->r12 = user_stack_base + user_stack_size - 16;
- child_ptregs->ar_bspstore = user_stack_base;
- child_ptregs->ar_rnat = 0;
- child_ptregs->loadrs = 0;
- }
- } else {
- /*
- * Note: we simply preserve the relative position of
- * the stack pointer here. There is no need to
- * allocate a scratch area here, since that will have
- * been taken care of by the caller of sys_clone()
- * already.
- */
- child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* kernel sp */
- child_ptregs->r13 = (unsigned long) p; /* set `current' pointer */
- }
- child_stack->ar_bspstore = child_rbs + rbs_size;
- if (IS_IA32_PROCESS(regs))
- child_stack->b0 = (unsigned long) &ia32_ret_from_clone;
- else
- child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
/* copy parts of thread_struct: */
p->thread.ksp = (unsigned long) child_stack - 16;
- /* stop some PSR bits from being inherited.
- * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
- * therefore we must specify them explicitly here and not include them in
- * IA64_PSR_BITS_TO_CLEAR.
- */
- child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
- & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
-
/*
* NOTE: The calling convention considers all floating point
* registers in the high partition (fph) to be scratch. Since
@@ -490,23 +372,65 @@ copy_thread (int nr, unsigned long clone_flags,
# define THREAD_FLAGS_TO_SET 0
p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
| THREAD_FLAGS_TO_SET);
+
ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */
-#ifdef CONFIG_IA32_SUPPORT
- /*
- * If we're cloning an IA32 task then save the IA32 extra
- * state from the current task to the new task
- */
- if (IS_IA32_PROCESS(task_pt_regs(current))) {
- ia32_save_state(p);
- if (clone_flags & CLONE_SETTLS)
- retval = ia32_clone_tls(p, child_ptregs);
-
- /* Copy partially mapped page list */
- if (!retval)
- retval = ia32_copy_ia64_partial_page_list(p,
- clone_flags);
+
+ if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(!user_stack_base)) {
+ /* fork_idle() called us */
+ return 0;
+ }
+ memset(child_stack, 0, sizeof(*child_ptregs) + sizeof(*child_stack));
+ child_stack->r4 = user_stack_base; /* payload */
+ child_stack->r5 = user_stack_size; /* argument */
+ /*
+ * Preserve PSR bits, except for bits 32-34 and 37-45,
+ * which we can't read.
+ */
+ child_ptregs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
+ /* mark as valid, empty frame */
+ child_ptregs->cr_ifs = 1UL << 63;
+ child_stack->ar_fpsr = child_ptregs->ar_fpsr
+ = ia64_getreg(_IA64_REG_AR_FPSR);
+ child_stack->pr = (1 << PRED_KERNEL_STACK);
+ child_stack->ar_bspstore = child_rbs;
+ child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+ /* stop some PSR bits from being inherited.
+ * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
+ * therefore we must specify them explicitly here and not include them in
+ * IA64_PSR_BITS_TO_CLEAR.
+ */
+ child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+ & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
+
+ return 0;
}
-#endif
+ stack = ((struct switch_stack *) regs) - 1;
+ /* copy parent's switch_stack & pt_regs to child: */
+ memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
+
+ /* copy the parent's register backing store to the child: */
+ rbs_size = stack->ar_bspstore - rbs;
+ memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+ if (clone_flags & CLONE_SETTLS)
+ child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */
+ if (user_stack_base) {
+ child_ptregs->r12 = user_stack_base + user_stack_size - 16;
+ child_ptregs->ar_bspstore = user_stack_base;
+ child_ptregs->ar_rnat = 0;
+ child_ptregs->loadrs = 0;
+ }
+ child_stack->ar_bspstore = child_rbs + rbs_size;
+ child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+ /* stop some PSR bits from being inherited.
+ * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
+ * therefore we must specify them explicitly here and not include them in
+ * IA64_PSR_BITS_TO_CLEAR.
+ */
+ child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+ & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
#ifdef CONFIG_PERFMON
if (current->thread.pfm_context)
@@ -621,21 +545,6 @@ do_dump_fpu (struct unw_frame_info *info, void *arg)
do_dump_task_fpu(current, info, arg);
}
-int
-dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
-{
- struct unw_frame_info tcore_info;
-
- if (current == task) {
- unw_init_running(do_copy_regs, regs);
- } else {
- memset(&tcore_info, 0, sizeof(tcore_info));
- unw_init_from_blocked_task(&tcore_info, task);
- do_copy_task_regs(task, &tcore_info, regs);
- }
- return 1;
-}
-
void
ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
{
@@ -643,85 +552,12 @@ ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
}
int
-dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
-{
- struct unw_frame_info tcore_info;
-
- if (current == task) {
- unw_init_running(do_dump_fpu, dst);
- } else {
- memset(&tcore_info, 0, sizeof(tcore_info));
- unw_init_from_blocked_task(&tcore_info, task);
- do_dump_task_fpu(task, &tcore_info, dst);
- }
- return 1;
-}
-
-int
dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
{
unw_init_running(do_dump_fpu, dst);
return 1; /* f0-f31 are always valid so we always return 1 */
}
-long
-sys_execve (char __user *filename, char __user * __user *argv, char __user * __user *envp,
- struct pt_regs *regs)
-{
- char *fname;
- int error;
-
- fname = getname(filename);
- error = PTR_ERR(fname);
- if (IS_ERR(fname))
- goto out;
- error = do_execve(fname, argv, envp, regs);
- putname(fname);
-out:
- return error;
-}
-
-pid_t
-kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
-{
- extern void start_kernel_thread (void);
- unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
- struct {
- struct switch_stack sw;
- struct pt_regs pt;
- } regs;
-
- memset(&regs, 0, sizeof(regs));
- regs.pt.cr_iip = helper_fptr[0]; /* set entry point (IP) */
- regs.pt.r1 = helper_fptr[1]; /* set GP */
- regs.pt.r9 = (unsigned long) fn; /* 1st argument */
- regs.pt.r11 = (unsigned long) arg; /* 2nd argument */
- /* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read. */
- regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
- regs.pt.cr_ifs = 1UL << 63; /* mark as valid, empty frame */
- regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
- regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
- regs.sw.pr = (1 << PRED_KERNEL_STACK);
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/* This gets called from kernel_thread() via ia64_invoke_thread_helper(). */
-int
-kernel_thread_helper (int (*fn)(void *), void *arg)
-{
-#ifdef CONFIG_IA32_SUPPORT
- if (IS_IA32_PROCESS(task_pt_regs(current))) {
- /* A kernel thread is always a 64-bit process. */
- current->thread.map_base = DEFAULT_MAP_BASE;
- current->thread.task_size = DEFAULT_TASK_SIZE;
- ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob);
- ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
- }
-#endif
- return (*fn)(arg);
-}
-
/*
* Flush thread state. This is called when a thread does an execve().
*/
@@ -731,13 +567,6 @@ flush_thread (void)
/* drop floating-point and debug-register state if it exists: */
current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
ia64_drop_fpu(current);
-#ifdef CONFIG_IA32_SUPPORT
- if (IS_IA32_PROCESS(task_pt_regs(current))) {
- ia32_drop_ia64_partial_page_list(current);
- current->thread.task_size = IA32_PAGE_OFFSET;
- set_fs(USER_DS);
- }
-#endif
}
/*
@@ -758,8 +587,6 @@ exit_thread (void)
if (current->thread.flags & IA64_THREAD_DBG_VALID)
pfm_release_debug_registers(current);
#endif
- if (IS_IA32_PROCESS(task_pt_regs(current)))
- ia32_drop_ia64_partial_page_list(current);
}
unsigned long
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 2e96f17b2f3..b7a5fffe092 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -3,26 +3,29 @@
*
* Copyright (C) 1999-2005 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2006 Intel Co
+ * 2006-08-12 - IA64 Native Utrace implementation support added by
+ * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
*
* Derived from the x86 and Alpha versions.
*/
#include <linux/kernel.h>
#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/ptrace.h>
-#include <linux/smp_lock.h>
#include <linux/user.h>
#include <linux/security.h>
#include <linux/audit.h>
#include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/ptrace_offsets.h>
#include <asm/rse.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/unwind.h>
#ifdef CONFIG_PERFMON
@@ -547,77 +550,126 @@ ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw,
return 0;
}
-static inline int
-thread_matches (struct task_struct *thread, unsigned long addr)
+static long
+ia64_sync_kernel_rbs (struct task_struct *child, struct switch_stack *sw,
+ unsigned long user_rbs_start, unsigned long user_rbs_end)
{
- unsigned long thread_rbs_end;
- struct pt_regs *thread_regs;
+ unsigned long addr, val;
+ long ret;
- if (ptrace_check_attach(thread, 0) < 0)
- /*
- * If the thread is not in an attachable state, we'll
- * ignore it. The net effect is that if ADDR happens
- * to overlap with the portion of the thread's
- * register backing store that is currently residing
- * on the thread's kernel stack, then ptrace() may end
- * up accessing a stale value. But if the thread
- * isn't stopped, that's a problem anyhow, so we're
- * doing as well as we can...
- */
- return 0;
+ /* now copy word for word from user rbs to kernel rbs: */
+ for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) {
+ if (access_process_vm(child, addr, &val, sizeof(val), 0)
+ != sizeof(val))
+ return -EIO;
- thread_regs = task_pt_regs(thread);
- thread_rbs_end = ia64_get_user_rbs_end(thread, thread_regs, NULL);
- if (!on_kernel_rbs(addr, thread_regs->ar_bspstore, thread_rbs_end))
- return 0;
+ ret = ia64_poke(child, sw, user_rbs_end, addr, val);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+typedef long (*syncfunc_t)(struct task_struct *, struct switch_stack *,
+ unsigned long, unsigned long);
+
+static void do_sync_rbs(struct unw_frame_info *info, void *arg)
+{
+ struct pt_regs *pt;
+ unsigned long urbs_end;
+ syncfunc_t fn = arg;
- return 1; /* looks like we've got a winner */
+ if (unw_unwind_to_user(info) < 0)
+ return;
+ pt = task_pt_regs(info->task);
+ urbs_end = ia64_get_user_rbs_end(info->task, pt, NULL);
+
+ fn(info->task, info->sw, pt->ar_bspstore, urbs_end);
}
/*
- * GDB apparently wants to be able to read the register-backing store
- * of any thread when attached to a given process. If we are peeking
- * or poking an address that happens to reside in the kernel-backing
- * store of another thread, we need to attach to that thread, because
- * otherwise we end up accessing stale data.
- *
- * task_list_lock must be read-locked before calling this routine!
+ * when a thread is stopped (ptraced), debugger might change thread's user
+ * stack (change memory directly), and we must avoid the RSE stored in kernel
+ * to override user stack (user space's RSE is newer than kernel's in the
+ * case). To workaround the issue, we copy kernel RSE to user RSE before the
+ * task is stopped, so user RSE has updated data. we then copy user RSE to
+ * kernel after the task is resummed from traced stop and kernel will use the
+ * newer RSE to return to user. TIF_RESTORE_RSE is the flag to indicate we need
+ * synchronize user RSE to kernel.
*/
-static struct task_struct *
-find_thread_for_addr (struct task_struct *child, unsigned long addr)
+void ia64_ptrace_stop(void)
{
- struct task_struct *p;
- struct mm_struct *mm;
- struct list_head *this, *next;
- int mm_users;
+ if (test_and_set_tsk_thread_flag(current, TIF_RESTORE_RSE))
+ return;
+ set_notify_resume(current);
+ unw_init_running(do_sync_rbs, ia64_sync_user_rbs);
+}
- if (!(mm = get_task_mm(child)))
- return child;
+/*
+ * This is called to read back the register backing store.
+ */
+void ia64_sync_krbs(void)
+{
+ clear_tsk_thread_flag(current, TIF_RESTORE_RSE);
- /* -1 because of our get_task_mm(): */
- mm_users = atomic_read(&mm->mm_users) - 1;
- if (mm_users <= 1)
- goto out; /* not multi-threaded */
+ unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs);
+}
+
+/*
+ * After PTRACE_ATTACH, a thread's register backing store area in user
+ * space is assumed to contain correct data whenever the thread is
+ * stopped. arch_ptrace_stop takes care of this on tracing stops.
+ * But if the child was already stopped for job control when we attach
+ * to it, then it might not ever get into ptrace_stop by the time we
+ * want to examine the user memory containing the RBS.
+ */
+void
+ptrace_attach_sync_user_rbs (struct task_struct *child)
+{
+ int stopped = 0;
+ struct unw_frame_info info;
/*
- * Traverse the current process' children list. Every task that
- * one attaches to becomes a child. And it is only attached children
- * of the debugger that are of interest (ptrace_check_attach checks
- * for this).
+ * If the child is in TASK_STOPPED, we need to change that to
+ * TASK_TRACED momentarily while we operate on it. This ensures
+ * that the child won't be woken up and return to user mode while
+ * we are doing the sync. (It can only be woken up for SIGKILL.)
*/
- list_for_each_safe(this, next, &current->children) {
- p = list_entry(this, struct task_struct, sibling);
- if (p->tgid != child->tgid)
- continue;
- if (thread_matches(p, addr)) {
- child = p;
- goto out;
+
+ read_lock(&tasklist_lock);
+ if (child->sighand) {
+ spin_lock_irq(&child->sighand->siglock);
+ if (child->state == TASK_STOPPED &&
+ !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) {
+ set_notify_resume(child);
+
+ child->state = TASK_TRACED;
+ stopped = 1;
}
+ spin_unlock_irq(&child->sighand->siglock);
}
+ read_unlock(&tasklist_lock);
+
+ if (!stopped)
+ return;
+
+ unw_init_from_blocked_task(&info, child);
+ do_sync_rbs(&info, ia64_sync_user_rbs);
- out:
- mmput(mm);
- return child;
+ /*
+ * Now move the child back into TASK_STOPPED if it should be in a
+ * job control stop, so that SIGCONT can be used to wake it up.
+ */
+ read_lock(&tasklist_lock);
+ if (child->sighand) {
+ spin_lock_irq(&child->sighand->siglock);
+ if (child->state == TASK_TRACED &&
+ (child->signal->flags & SIGNAL_STOP_STOPPED)) {
+ child->state = TASK_STOPPED;
+ }
+ spin_unlock_irq(&child->sighand->siglock);
+ }
+ read_unlock(&tasklist_lock);
}
/*
@@ -663,25 +715,6 @@ ia64_sync_fph (struct task_struct *task)
psr->dfh = 1;
}
-static int
-access_fr (struct unw_frame_info *info, int regnum, int hi,
- unsigned long *data, int write_access)
-{
- struct ia64_fpreg fpval;
- int ret;
-
- ret = unw_get_fr(info, regnum, &fpval);
- if (ret < 0)
- return ret;
-
- if (write_access) {
- fpval.u.bits[hi] = *data;
- ret = unw_set_fr(info, regnum, fpval);
- } else
- *data = fpval.u.bits[hi];
- return ret;
-}
-
/*
* Change the machine-state of CHILD such that it will return via the normal
* kernel exit-path, rather than the syscall-exit path.
@@ -703,14 +736,14 @@ convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt,
if ((long)((unsigned long)child + IA64_STK_OFFSET - sp)
< IA64_PT_REGS_SIZE) {
dprintk("ptrace.%s: ran off the top of the kernel "
- "stack\n", __FUNCTION__);
+ "stack\n", __func__);
return;
}
if (unw_get_pr (&prev_info, &pr) < 0) {
unw_get_rp(&prev_info, &ip);
dprintk("ptrace.%s: failed to read "
"predicate register (ip=0x%lx)\n",
- __FUNCTION__, ip);
+ __func__, ip);
return;
}
if (unw_is_intr_frame(&info)
@@ -783,330 +816,7 @@ access_nat_bits (struct task_struct *child, struct pt_regs *pt,
static int
access_uarea (struct task_struct *child, unsigned long addr,
- unsigned long *data, int write_access)
-{
- unsigned long *ptr, regnum, urbs_end, rnat_addr, cfm;
- struct switch_stack *sw;
- struct pt_regs *pt;
-# define pt_reg_addr(pt, reg) ((void *) \
- ((unsigned long) (pt) \
- + offsetof(struct pt_regs, reg)))
-
-
- pt = task_pt_regs(child);
- sw = (struct switch_stack *) (child->thread.ksp + 16);
-
- if ((addr & 0x7) != 0) {
- dprintk("ptrace: unaligned register address 0x%lx\n", addr);
- return -1;
- }
-
- if (addr < PT_F127 + 16) {
- /* accessing fph */
- if (write_access)
- ia64_sync_fph(child);
- else
- ia64_flush_fph(child);
- ptr = (unsigned long *)
- ((unsigned long) &child->thread.fph + addr);
- } else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) {
- /* scratch registers untouched by kernel (saved in pt_regs) */
- ptr = pt_reg_addr(pt, f10) + (addr - PT_F10);
- } else if (addr >= PT_F12 && addr < PT_F15 + 16) {
- /*
- * Scratch registers untouched by kernel (saved in
- * switch_stack).
- */
- ptr = (unsigned long *) ((long) sw
- + (addr - PT_NAT_BITS - 32));
- } else if (addr < PT_AR_LC + 8) {
- /* preserved state: */
- struct unw_frame_info info;
- char nat = 0;
- int ret;
-
- unw_init_from_blocked_task(&info, child);
- if (unw_unwind_to_user(&info) < 0)
- return -1;
-
- switch (addr) {
- case PT_NAT_BITS:
- return access_nat_bits(child, pt, &info,
- data, write_access);
-
- case PT_R4: case PT_R5: case PT_R6: case PT_R7:
- if (write_access) {
- /* read NaT bit first: */
- unsigned long dummy;
-
- ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4,
- &dummy, &nat);
- if (ret < 0)
- return ret;
- }
- return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data,
- &nat, write_access);
-
- case PT_B1: case PT_B2: case PT_B3:
- case PT_B4: case PT_B5:
- return unw_access_br(&info, (addr - PT_B1)/8 + 1, data,
- write_access);
-
- case PT_AR_EC:
- return unw_access_ar(&info, UNW_AR_EC, data,
- write_access);
-
- case PT_AR_LC:
- return unw_access_ar(&info, UNW_AR_LC, data,
- write_access);
-
- default:
- if (addr >= PT_F2 && addr < PT_F5 + 16)
- return access_fr(&info, (addr - PT_F2)/16 + 2,
- (addr & 8) != 0, data,
- write_access);
- else if (addr >= PT_F16 && addr < PT_F31 + 16)
- return access_fr(&info,
- (addr - PT_F16)/16 + 16,
- (addr & 8) != 0,
- data, write_access);
- else {
- dprintk("ptrace: rejecting access to register "
- "address 0x%lx\n", addr);
- return -1;
- }
- }
- } else if (addr < PT_F9+16) {
- /* scratch state */
- switch (addr) {
- case PT_AR_BSP:
- /*
- * By convention, we use PT_AR_BSP to refer to
- * the end of the user-level backing store.
- * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
- * to get the real value of ar.bsp at the time
- * the kernel was entered.
- *
- * Furthermore, when changing the contents of
- * PT_AR_BSP (or PT_CFM) we MUST copy any
- * users-level stacked registers that are
- * stored on the kernel stack back to
- * user-space because otherwise, we might end
- * up clobbering kernel stacked registers.
- * Also, if this happens while the task is
- * blocked in a system call, which convert the
- * state such that the non-system-call exit
- * path is used. This ensures that the proper
- * state will be picked up when resuming
- * execution. However, it *also* means that
- * once we write PT_AR_BSP/PT_CFM, it won't be
- * possible to modify the syscall arguments of
- * the pending system call any longer. This
- * shouldn't be an issue because modifying
- * PT_AR_BSP/PT_CFM generally implies that
- * we're either abandoning the pending system
- * call or that we defer it's re-execution
- * (e.g., due to GDB doing an inferior
- * function call).
- */
- urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
- if (write_access) {
- if (*data != urbs_end) {
- if (ia64_sync_user_rbs(child, sw,
- pt->ar_bspstore,
- urbs_end) < 0)
- return -1;
- if (in_syscall(pt))
- convert_to_non_syscall(child,
- pt,
- cfm);
- /*
- * Simulate user-level write
- * of ar.bsp:
- */
- pt->loadrs = 0;
- pt->ar_bspstore = *data;
- }
- } else
- *data = urbs_end;
- return 0;
-
- case PT_CFM:
- urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
- if (write_access) {
- if (((cfm ^ *data) & PFM_MASK) != 0) {
- if (ia64_sync_user_rbs(child, sw,
- pt->ar_bspstore,
- urbs_end) < 0)
- return -1;
- if (in_syscall(pt))
- convert_to_non_syscall(child,
- pt,
- cfm);
- pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
- | (*data & PFM_MASK));
- }
- } else
- *data = cfm;
- return 0;
-
- case PT_CR_IPSR:
- if (write_access) {
- unsigned long tmp = *data;
- /* psr.ri==3 is a reserved value: SDM 2:25 */
- if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
- tmp &= ~IA64_PSR_RI;
- pt->cr_ipsr = ((tmp & IPSR_MASK)
- | (pt->cr_ipsr & ~IPSR_MASK));
- } else
- *data = (pt->cr_ipsr & IPSR_MASK);
- return 0;
-
- case PT_AR_RSC:
- if (write_access)
- pt->ar_rsc = *data | (3 << 2); /* force PL3 */
- else
- *data = pt->ar_rsc;
- return 0;
-
- case PT_AR_RNAT:
- urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
- rnat_addr = (long) ia64_rse_rnat_addr((long *)
- urbs_end);
- if (write_access)
- return ia64_poke(child, sw, urbs_end,
- rnat_addr, *data);
- else
- return ia64_peek(child, sw, urbs_end,
- rnat_addr, data);
-
- case PT_R1:
- ptr = pt_reg_addr(pt, r1);
- break;
- case PT_R2: case PT_R3:
- ptr = pt_reg_addr(pt, r2) + (addr - PT_R2);
- break;
- case PT_R8: case PT_R9: case PT_R10: case PT_R11:
- ptr = pt_reg_addr(pt, r8) + (addr - PT_R8);
- break;
- case PT_R12: case PT_R13:
- ptr = pt_reg_addr(pt, r12) + (addr - PT_R12);
- break;
- case PT_R14:
- ptr = pt_reg_addr(pt, r14);
- break;
- case PT_R15:
- ptr = pt_reg_addr(pt, r15);
- break;
- case PT_R16: case PT_R17: case PT_R18: case PT_R19:
- case PT_R20: case PT_R21: case PT_R22: case PT_R23:
- case PT_R24: case PT_R25: case PT_R26: case PT_R27:
- case PT_R28: case PT_R29: case PT_R30: case PT_R31:
- ptr = pt_reg_addr(pt, r16) + (addr - PT_R16);
- break;
- case PT_B0:
- ptr = pt_reg_addr(pt, b0);
- break;
- case PT_B6:
- ptr = pt_reg_addr(pt, b6);
- break;
- case PT_B7:
- ptr = pt_reg_addr(pt, b7);
- break;
- case PT_F6: case PT_F6+8: case PT_F7: case PT_F7+8:
- case PT_F8: case PT_F8+8: case PT_F9: case PT_F9+8:
- ptr = pt_reg_addr(pt, f6) + (addr - PT_F6);
- break;
- case PT_AR_BSPSTORE:
- ptr = pt_reg_addr(pt, ar_bspstore);
- break;
- case PT_AR_UNAT:
- ptr = pt_reg_addr(pt, ar_unat);
- break;
- case PT_AR_PFS:
- ptr = pt_reg_addr(pt, ar_pfs);
- break;
- case PT_AR_CCV:
- ptr = pt_reg_addr(pt, ar_ccv);
- break;
- case PT_AR_FPSR:
- ptr = pt_reg_addr(pt, ar_fpsr);
- break;
- case PT_CR_IIP:
- ptr = pt_reg_addr(pt, cr_iip);
- break;
- case PT_PR:
- ptr = pt_reg_addr(pt, pr);
- break;
- /* scratch register */
-
- default:
- /* disallow accessing anything else... */
- dprintk("ptrace: rejecting access to register "
- "address 0x%lx\n", addr);
- return -1;
- }
- } else if (addr <= PT_AR_SSD) {
- ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD);
- } else {
- /* access debug registers */
-
- if (addr >= PT_IBR) {
- regnum = (addr - PT_IBR) >> 3;
- ptr = &child->thread.ibr[0];
- } else {
- regnum = (addr - PT_DBR) >> 3;
- ptr = &child->thread.dbr[0];
- }
-
- if (regnum >= 8) {
- dprintk("ptrace: rejecting access to register "
- "address 0x%lx\n", addr);
- return -1;
- }
-#ifdef CONFIG_PERFMON
- /*
- * Check if debug registers are used by perfmon. This
- * test must be done once we know that we can do the
- * operation, i.e. the arguments are all valid, but
- * before we start modifying the state.
- *
- * Perfmon needs to keep a count of how many processes
- * are trying to modify the debug registers for system
- * wide monitoring sessions.
- *
- * We also include read access here, because they may
- * cause the PMU-installed debug register state
- * (dbr[], ibr[]) to be reset. The two arrays are also
- * used by perfmon, but we do not use
- * IA64_THREAD_DBG_VALID. The registers are restored
- * by the PMU context switch code.
- */
- if (pfm_use_debug_registers(child)) return -1;
-#endif
-
- if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
- child->thread.flags |= IA64_THREAD_DBG_VALID;
- memset(child->thread.dbr, 0,
- sizeof(child->thread.dbr));
- memset(child->thread.ibr, 0,
- sizeof(child->thread.ibr));
- }
-
- ptr += regnum;
-
- if ((regnum & 1) && write_access) {
- /* don't let the user set kernel-level breakpoints: */
- *ptr = *data & ~(7UL << 56);
- return 0;
- }
- }
- if (write_access)
- *ptr = *data;
- else
- *data = *ptr;
- return 0;
-}
+ unsigned long *data, int write_access);
static long
ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
@@ -1398,277 +1108,1087 @@ ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
return ret;
}
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure the single step bit is not set.
- */
void
-ptrace_disable (struct task_struct *child)
+user_enable_single_step (struct task_struct *child)
{
struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child));
- /* make sure the single step/taken-branch trap bits are not set: */
- clear_tsk_thread_flag(child, TIF_SINGLESTEP);
- child_psr->ss = 0;
- child_psr->tb = 0;
+ set_tsk_thread_flag(child, TIF_SINGLESTEP);
+ child_psr->ss = 1;
}
-asmlinkage long
-sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
+void
+user_enable_block_step (struct task_struct *child)
{
- struct pt_regs *pt;
- unsigned long urbs_end, peek_or_poke;
- struct task_struct *child;
- struct switch_stack *sw;
- long ret;
+ struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child));
- lock_kernel();
- ret = -EPERM;
- if (request == PTRACE_TRACEME) {
- ret = ptrace_traceme();
- goto out;
- }
+ set_tsk_thread_flag(child, TIF_SINGLESTEP);
+ child_psr->tb = 1;
+}
- peek_or_poke = (request == PTRACE_PEEKTEXT
- || request == PTRACE_PEEKDATA
- || request == PTRACE_POKETEXT
- || request == PTRACE_POKEDATA);
- ret = -ESRCH;
- read_lock(&tasklist_lock);
- {
- child = find_task_by_pid(pid);
- if (child) {
- if (peek_or_poke)
- child = find_thread_for_addr(child, addr);
- get_task_struct(child);
- }
- }
- read_unlock(&tasklist_lock);
- if (!child)
- goto out;
- ret = -EPERM;
- if (pid == 1) /* no messing around with init! */
- goto out_tsk;
-
- if (request == PTRACE_ATTACH) {
- ret = ptrace_attach(child);
- goto out_tsk;
- }
+void
+user_disable_single_step (struct task_struct *child)
+{
+ struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child));
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out_tsk;
+ /* make sure the single step/taken-branch trap bits are not set: */
+ clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+ child_psr->ss = 0;
+ child_psr->tb = 0;
+}
- pt = task_pt_regs(child);
- sw = (struct switch_stack *) (child->thread.ksp + 16);
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure the single step bit is not set.
+ */
+void
+ptrace_disable (struct task_struct *child)
+{
+ user_disable_single_step(child);
+}
+long
+arch_ptrace (struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
+{
switch (request) {
- case PTRACE_PEEKTEXT:
- case PTRACE_PEEKDATA:
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA:
/* read word at location addr */
- urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
- ret = ia64_peek(child, sw, urbs_end, addr, &data);
- if (ret == 0) {
- ret = data;
- /* ensure "ret" is not mistaken as an error code: */
- force_successful_syscall_return();
- }
- goto out_tsk;
+ if (access_process_vm(child, addr, &data, sizeof(data), 0)
+ != sizeof(data))
+ return -EIO;
+ /* ensure return value is not mistaken for error code */
+ force_successful_syscall_return();
+ return data;
- case PTRACE_POKETEXT:
- case PTRACE_POKEDATA:
- /* write the word at location addr */
- urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
- ret = ia64_poke(child, sw, urbs_end, addr, data);
- goto out_tsk;
+ /* PTRACE_POKETEXT and PTRACE_POKEDATA is handled
+ * by the generic ptrace_request().
+ */
- case PTRACE_PEEKUSR:
+ case PTRACE_PEEKUSR:
/* read the word at addr in the USER area */
- if (access_uarea(child, addr, &data, 0) < 0) {
- ret = -EIO;
- goto out_tsk;
- }
- ret = data;
- /* ensure "ret" is not mistaken as an error code */
+ if (access_uarea(child, addr, &data, 0) < 0)
+ return -EIO;
+ /* ensure return value is not mistaken for error code */
force_successful_syscall_return();
- goto out_tsk;
+ return data;
- case PTRACE_POKEUSR:
+ case PTRACE_POKEUSR:
/* write the word at addr in the USER area */
- if (access_uarea(child, addr, &data, 1) < 0) {
- ret = -EIO;
- goto out_tsk;
- }
- ret = 0;
- goto out_tsk;
+ if (access_uarea(child, addr, &data, 1) < 0)
+ return -EIO;
+ return 0;
- case PTRACE_OLD_GETSIGINFO:
+ case PTRACE_OLD_GETSIGINFO:
/* for backwards-compatibility */
- ret = ptrace_request(child, PTRACE_GETSIGINFO, addr, data);
- goto out_tsk;
+ return ptrace_request(child, PTRACE_GETSIGINFO, addr, data);
- case PTRACE_OLD_SETSIGINFO:
+ case PTRACE_OLD_SETSIGINFO:
/* for backwards-compatibility */
- ret = ptrace_request(child, PTRACE_SETSIGINFO, addr, data);
- goto out_tsk;
-
- case PTRACE_SYSCALL:
- /* continue and stop at next (return from) syscall */
- case PTRACE_CONT:
- /* restart after signal. */
- ret = -EIO;
- if (!valid_signal(data))
- goto out_tsk;
- if (request == PTRACE_SYSCALL)
- set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- else
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- child->exit_code = data;
+ return ptrace_request(child, PTRACE_SETSIGINFO, addr, data);
- /*
- * Make sure the single step/taken-branch trap bits
- * are not set:
- */
- clear_tsk_thread_flag(child, TIF_SINGLESTEP);
- ia64_psr(pt)->ss = 0;
- ia64_psr(pt)->tb = 0;
+ case PTRACE_GETREGS:
+ return ptrace_getregs(child,
+ (struct pt_all_user_regs __user *) data);
- wake_up_process(child);
- ret = 0;
- goto out_tsk;
+ case PTRACE_SETREGS:
+ return ptrace_setregs(child,
+ (struct pt_all_user_regs __user *) data);
- case PTRACE_KILL:
- /*
- * Make the child exit. Best I can do is send it a
- * sigkill. Perhaps it should be put in the status
- * that it wants to exit.
- */
- if (child->exit_state == EXIT_ZOMBIE)
- /* already dead */
- goto out_tsk;
- child->exit_code = SIGKILL;
-
- ptrace_disable(child);
- wake_up_process(child);
- ret = 0;
- goto out_tsk;
-
- case PTRACE_SINGLESTEP:
- /* let child execute for one instruction */
- case PTRACE_SINGLEBLOCK:
- ret = -EIO;
- if (!valid_signal(data))
- goto out_tsk;
-
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
- set_tsk_thread_flag(child, TIF_SINGLESTEP);
- if (request == PTRACE_SINGLESTEP) {
- ia64_psr(pt)->ss = 1;
- } else {
- ia64_psr(pt)->tb = 1;
+ default:
+ return ptrace_request(child, request, addr, data);
+ }
+}
+
+
+/* "asmlinkage" so the input arguments are preserved... */
+
+asmlinkage long
+syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
+ long arg4, long arg5, long arg6, long arg7,
+ struct pt_regs regs)
+{
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ if (tracehook_report_syscall_entry(&regs))
+ return -ENOSYS;
+
+ /* copy user rbs to kernel rbs */
+ if (test_thread_flag(TIF_RESTORE_RSE))
+ ia64_sync_krbs();
+
+
+ audit_syscall_entry(AUDIT_ARCH_IA64, regs.r15, arg0, arg1, arg2, arg3);
+
+ return 0;
+}
+
+/* "asmlinkage" so the input arguments are preserved... */
+
+asmlinkage void
+syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
+ long arg4, long arg5, long arg6, long arg7,
+ struct pt_regs regs)
+{
+ int step;
+
+ audit_syscall_exit(&regs);
+
+ step = test_thread_flag(TIF_SINGLESTEP);
+ if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(&regs, step);
+
+ /* copy user rbs to kernel rbs */
+ if (test_thread_flag(TIF_RESTORE_RSE))
+ ia64_sync_krbs();
+}
+
+/* Utrace implementation starts here */
+struct regset_get {
+ void *kbuf;
+ void __user *ubuf;
+};
+
+struct regset_set {
+ const void *kbuf;
+ const void __user *ubuf;
+};
+
+struct regset_getset {
+ struct task_struct *target;
+ const struct user_regset *regset;
+ union {
+ struct regset_get get;
+ struct regset_set set;
+ } u;
+ unsigned int pos;
+ unsigned int count;
+ int ret;
+};
+
+static int
+access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info,
+ unsigned long addr, unsigned long *data, int write_access)
+{
+ struct pt_regs *pt;
+ unsigned long *ptr = NULL;
+ int ret;
+ char nat = 0;
+
+ pt = task_pt_regs(target);
+ switch (addr) {
+ case ELF_GR_OFFSET(1):
+ ptr = &pt->r1;
+ break;
+ case ELF_GR_OFFSET(2):
+ case ELF_GR_OFFSET(3):
+ ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2));
+ break;
+ case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7):
+ if (write_access) {
+ /* read NaT bit first: */
+ unsigned long dummy;
+
+ ret = unw_get_gr(info, addr/8, &dummy, &nat);
+ if (ret < 0)
+ return ret;
}
- child->exit_code = data;
-
- /* give it a chance to run. */
- wake_up_process(child);
- ret = 0;
- goto out_tsk;
-
- case PTRACE_DETACH:
- /* detach a process that was attached. */
- ret = ptrace_detach(child, data);
- goto out_tsk;
-
- case PTRACE_GETREGS:
- ret = ptrace_getregs(child,
- (struct pt_all_user_regs __user *) data);
- goto out_tsk;
-
- case PTRACE_SETREGS:
- ret = ptrace_setregs(child,
- (struct pt_all_user_regs __user *) data);
- goto out_tsk;
-
- default:
- ret = ptrace_request(child, request, addr, data);
- goto out_tsk;
+ return unw_access_gr(info, addr/8, data, &nat, write_access);
+ case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11):
+ ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8);
+ break;
+ case ELF_GR_OFFSET(12):
+ case ELF_GR_OFFSET(13):
+ ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12);
+ break;
+ case ELF_GR_OFFSET(14):
+ ptr = &pt->r14;
+ break;
+ case ELF_GR_OFFSET(15):
+ ptr = &pt->r15;
}
- out_tsk:
- put_task_struct(child);
- out:
- unlock_kernel();
- return ret;
+ if (write_access)
+ *ptr = *data;
+ else
+ *data = *ptr;
+ return 0;
}
+static int
+access_elf_breg(struct task_struct *target, struct unw_frame_info *info,
+ unsigned long addr, unsigned long *data, int write_access)
+{
+ struct pt_regs *pt;
+ unsigned long *ptr = NULL;
+
+ pt = task_pt_regs(target);
+ switch (addr) {
+ case ELF_BR_OFFSET(0):
+ ptr = &pt->b0;
+ break;
+ case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5):
+ return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8,
+ data, write_access);
+ case ELF_BR_OFFSET(6):
+ ptr = &pt->b6;
+ break;
+ case ELF_BR_OFFSET(7):
+ ptr = &pt->b7;
+ }
+ if (write_access)
+ *ptr = *data;
+ else
+ *data = *ptr;
+ return 0;
+}
-static void
-syscall_trace (void)
+static int
+access_elf_areg(struct task_struct *target, struct unw_frame_info *info,
+ unsigned long addr, unsigned long *data, int write_access)
+{
+ struct pt_regs *pt;
+ unsigned long cfm, urbs_end;
+ unsigned long *ptr = NULL;
+
+ pt = task_pt_regs(target);
+ if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) {
+ switch (addr) {
+ case ELF_AR_RSC_OFFSET:
+ /* force PL3 */
+ if (write_access)
+ pt->ar_rsc = *data | (3 << 2);
+ else
+ *data = pt->ar_rsc;
+ return 0;
+ case ELF_AR_BSP_OFFSET:
+ /*
+ * By convention, we use PT_AR_BSP to refer to
+ * the end of the user-level backing store.
+ * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
+ * to get the real value of ar.bsp at the time
+ * the kernel was entered.
+ *
+ * Furthermore, when changing the contents of
+ * PT_AR_BSP (or PT_CFM) while the task is
+ * blocked in a system call, convert the state
+ * so that the non-system-call exit
+ * path is used. This ensures that the proper
+ * state will be picked up when resuming
+ * execution. However, it *also* means that
+ * once we write PT_AR_BSP/PT_CFM, it won't be
+ * possible to modify the syscall arguments of
+ * the pending system call any longer. This
+ * shouldn't be an issue because modifying
+ * PT_AR_BSP/PT_CFM generally implies that
+ * we're either abandoning the pending system
+ * call or that we defer it's re-execution
+ * (e.g., due to GDB doing an inferior
+ * function call).
+ */
+ urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+ if (write_access) {
+ if (*data != urbs_end) {
+ if (in_syscall(pt))
+ convert_to_non_syscall(target,
+ pt,
+ cfm);
+ /*
+ * Simulate user-level write
+ * of ar.bsp:
+ */
+ pt->loadrs = 0;
+ pt->ar_bspstore = *data;
+ }
+ } else
+ *data = urbs_end;
+ return 0;
+ case ELF_AR_BSPSTORE_OFFSET:
+ ptr = &pt->ar_bspstore;
+ break;
+ case ELF_AR_RNAT_OFFSET:
+ ptr = &pt->ar_rnat;
+ break;
+ case ELF_AR_CCV_OFFSET:
+ ptr = &pt->ar_ccv;
+ break;
+ case ELF_AR_UNAT_OFFSET:
+ ptr = &pt->ar_unat;
+ break;
+ case ELF_AR_FPSR_OFFSET:
+ ptr = &pt->ar_fpsr;
+ break;
+ case ELF_AR_PFS_OFFSET:
+ ptr = &pt->ar_pfs;
+ break;
+ case ELF_AR_LC_OFFSET:
+ return unw_access_ar(info, UNW_AR_LC, data,
+ write_access);
+ case ELF_AR_EC_OFFSET:
+ return unw_access_ar(info, UNW_AR_EC, data,
+ write_access);
+ case ELF_AR_CSD_OFFSET:
+ ptr = &pt->ar_csd;
+ break;
+ case ELF_AR_SSD_OFFSET:
+ ptr = &pt->ar_ssd;
+ }
+ } else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) {
+ switch (addr) {
+ case ELF_CR_IIP_OFFSET:
+ ptr = &pt->cr_iip;
+ break;
+ case ELF_CFM_OFFSET:
+ urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+ if (write_access) {
+ if (((cfm ^ *data) & PFM_MASK) != 0) {
+ if (in_syscall(pt))
+ convert_to_non_syscall(target,
+ pt,
+ cfm);
+ pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
+ | (*data & PFM_MASK));
+ }
+ } else
+ *data = cfm;
+ return 0;
+ case ELF_CR_IPSR_OFFSET:
+ if (write_access) {
+ unsigned long tmp = *data;
+ /* psr.ri==3 is a reserved value: SDM 2:25 */
+ if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
+ tmp &= ~IA64_PSR_RI;
+ pt->cr_ipsr = ((tmp & IPSR_MASK)
+ | (pt->cr_ipsr & ~IPSR_MASK));
+ } else
+ *data = (pt->cr_ipsr & IPSR_MASK);
+ return 0;
+ }
+ } else if (addr == ELF_NAT_OFFSET)
+ return access_nat_bits(target, pt, info,
+ data, write_access);
+ else if (addr == ELF_PR_OFFSET)
+ ptr = &pt->pr;
+ else
+ return -1;
+
+ if (write_access)
+ *ptr = *data;
+ else
+ *data = *ptr;
+
+ return 0;
+}
+
+static int
+access_elf_reg(struct task_struct *target, struct unw_frame_info *info,
+ unsigned long addr, unsigned long *data, int write_access)
{
+ if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15))
+ return access_elf_gpreg(target, info, addr, data, write_access);
+ else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7))
+ return access_elf_breg(target, info, addr, data, write_access);
+ else
+ return access_elf_areg(target, info, addr, data, write_access);
+}
+
+void do_gpregs_get(struct unw_frame_info *info, void *arg)
+{
+ struct pt_regs *pt;
+ struct regset_getset *dst = arg;
+ elf_greg_t tmp[16];
+ unsigned int i, index, min_copy;
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
/*
- * The 0x80 provides a way for the tracing parent to
- * distinguish between a syscall stop and SIGTRAP delivery.
+ * coredump format:
+ * r0-r31
+ * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+ * predicate registers (p0-p63)
+ * b0-b7
+ * ip cfm user-mask
+ * ar.rsc ar.bsp ar.bspstore ar.rnat
+ * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
*/
- ptrace_notify(SIGTRAP
- | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
- /*
- * This isn't the same as continuing with a signal, but it
- * will do for normal use. strace only continues with a
- * signal if the stopping signal is not SIGTRAP. -brl
+
+ /* Skip r0 */
+ if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+ dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+ &dst->u.get.kbuf,
+ &dst->u.get.ubuf,
+ 0, ELF_GR_OFFSET(1));
+ if (dst->ret || dst->count == 0)
+ return;
+ }
+
+ /* gr1 - gr15 */
+ if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+ index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+ min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ?
+ (dst->pos + dst->count) : ELF_GR_OFFSET(16);
+ for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+ index++)
+ if (access_elf_reg(dst->target, info, i,
+ &tmp[index], 0) < 0) {
+ dst->ret = -EIO;
+ return;
+ }
+ dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+ ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+ if (dst->ret || dst->count == 0)
+ return;
+ }
+
+ /* r16-r31 */
+ if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+ pt = task_pt_regs(dst->target);
+ dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16,
+ ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+ if (dst->ret || dst->count == 0)
+ return;
+ }
+
+ /* nat, pr, b0 - b7 */
+ if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+ index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+ min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ?
+ (dst->pos + dst->count) : ELF_CR_IIP_OFFSET;
+ for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+ index++)
+ if (access_elf_reg(dst->target, info, i,
+ &tmp[index], 0) < 0) {
+ dst->ret = -EIO;
+ return;
+ }
+ dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+ ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+ if (dst->ret || dst->count == 0)
+ return;
+ }
+
+ /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+ * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
*/
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
+ if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+ index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+ min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ?
+ (dst->pos + dst->count) : ELF_AR_END_OFFSET;
+ for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+ index++)
+ if (access_elf_reg(dst->target, info, i,
+ &tmp[index], 0) < 0) {
+ dst->ret = -EIO;
+ return;
+ }
+ dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+ ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
}
}
-/* "asmlinkage" so the input arguments are preserved... */
+void do_gpregs_set(struct unw_frame_info *info, void *arg)
+{
+ struct pt_regs *pt;
+ struct regset_getset *dst = arg;
+ elf_greg_t tmp[16];
+ unsigned int i, index;
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
+ /* Skip r0 */
+ if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+ dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+ &dst->u.set.kbuf,
+ &dst->u.set.ubuf,
+ 0, ELF_GR_OFFSET(1));
+ if (dst->ret || dst->count == 0)
+ return;
+ }
-asmlinkage void
-syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
- long arg4, long arg5, long arg6, long arg7,
- struct pt_regs regs)
+ /* gr1-gr15 */
+ if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+ i = dst->pos;
+ index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+ dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+ &dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+ ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+ if (dst->ret)
+ return;
+ for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+ if (access_elf_reg(dst->target, info, i,
+ &tmp[index], 1) < 0) {
+ dst->ret = -EIO;
+ return;
+ }
+ if (dst->count == 0)
+ return;
+ }
+
+ /* gr16-gr31 */
+ if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+ pt = task_pt_regs(dst->target);
+ dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+ &dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16,
+ ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+ if (dst->ret || dst->count == 0)
+ return;
+ }
+
+ /* nat, pr, b0 - b7 */
+ if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+ i = dst->pos;
+ index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+ dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+ &dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+ ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+ if (dst->ret)
+ return;
+ for (; i < dst->pos; i += sizeof(elf_greg_t), index++)
+ if (access_elf_reg(dst->target, info, i,
+ &tmp[index], 1) < 0) {
+ dst->ret = -EIO;
+ return;
+ }
+ if (dst->count == 0)
+ return;
+ }
+
+ /* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+ * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+ */
+ if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+ i = dst->pos;
+ index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+ dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+ &dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+ ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+ if (dst->ret)
+ return;
+ for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+ if (access_elf_reg(dst->target, info, i,
+ &tmp[index], 1) < 0) {
+ dst->ret = -EIO;
+ return;
+ }
+ }
+}
+
+#define ELF_FP_OFFSET(i) (i * sizeof(elf_fpreg_t))
+
+void do_fpregs_get(struct unw_frame_info *info, void *arg)
+{
+ struct regset_getset *dst = arg;
+ struct task_struct *task = dst->target;
+ elf_fpreg_t tmp[30];
+ int index, min_copy, i;
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
+ /* Skip pos 0 and 1 */
+ if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+ dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+ &dst->u.get.kbuf,
+ &dst->u.get.ubuf,
+ 0, ELF_FP_OFFSET(2));
+ if (dst->count == 0 || dst->ret)
+ return;
+ }
+
+ /* fr2-fr31 */
+ if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+ index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t);
+
+ min_copy = min(((unsigned int)ELF_FP_OFFSET(32)),
+ dst->pos + dst->count);
+ for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t),
+ index++)
+ if (unw_get_fr(info, i / sizeof(elf_fpreg_t),
+ &tmp[index])) {
+ dst->ret = -EIO;
+ return;
+ }
+ dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+ ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+ if (dst->count == 0 || dst->ret)
+ return;
+ }
+
+ /* fph */
+ if (dst->count > 0) {
+ ia64_flush_fph(dst->target);
+ if (task->thread.flags & IA64_THREAD_FPH_VALID)
+ dst->ret = user_regset_copyout(
+ &dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf,
+ &dst->target->thread.fph,
+ ELF_FP_OFFSET(32), -1);
+ else
+ /* Zero fill instead. */
+ dst->ret = user_regset_copyout_zero(
+ &dst->pos, &dst->count,
+ &dst->u.get.kbuf, &dst->u.get.ubuf,
+ ELF_FP_OFFSET(32), -1);
+ }
+}
+
+void do_fpregs_set(struct unw_frame_info *info, void *arg)
{
- if (test_thread_flag(TIF_SYSCALL_TRACE)
- && (current->ptrace & PT_PTRACED))
- syscall_trace();
-
- if (unlikely(current->audit_context)) {
- long syscall;
- int arch;
-
- if (IS_IA32_PROCESS(&regs)) {
- syscall = regs.r1;
- arch = AUDIT_ARCH_I386;
- } else {
- syscall = regs.r15;
- arch = AUDIT_ARCH_IA64;
+ struct regset_getset *dst = arg;
+ elf_fpreg_t fpreg, tmp[30];
+ int index, start, end;
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
+ /* Skip pos 0 and 1 */
+ if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+ dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+ &dst->u.set.kbuf,
+ &dst->u.set.ubuf,
+ 0, ELF_FP_OFFSET(2));
+ if (dst->count == 0 || dst->ret)
+ return;
+ }
+
+ /* fr2-fr31 */
+ if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+ start = dst->pos;
+ end = min(((unsigned int)ELF_FP_OFFSET(32)),
+ dst->pos + dst->count);
+ dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+ &dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+ ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+ if (dst->ret)
+ return;
+
+ if (start & 0xF) { /* only write high part */
+ if (unw_get_fr(info, start / sizeof(elf_fpreg_t),
+ &fpreg)) {
+ dst->ret = -EIO;
+ return;
+ }
+ tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0]
+ = fpreg.u.bits[0];
+ start &= ~0xFUL;
+ }
+ if (end & 0xF) { /* only write low part */
+ if (unw_get_fr(info, end / sizeof(elf_fpreg_t),
+ &fpreg)) {
+ dst->ret = -EIO;
+ return;
+ }
+ tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1]
+ = fpreg.u.bits[1];
+ end = (end + 0xF) & ~0xFUL;
}
- audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3);
+ for ( ; start < end ; start += sizeof(elf_fpreg_t)) {
+ index = start / sizeof(elf_fpreg_t);
+ if (unw_set_fr(info, index, tmp[index - 2])) {
+ dst->ret = -EIO;
+ return;
+ }
+ }
+ if (dst->ret || dst->count == 0)
+ return;
}
+ /* fph */
+ if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) {
+ ia64_sync_fph(dst->target);
+ dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+ &dst->u.set.kbuf,
+ &dst->u.set.ubuf,
+ &dst->target->thread.fph,
+ ELF_FP_OFFSET(32), -1);
+ }
}
-/* "asmlinkage" so the input arguments are preserved... */
+static int
+do_regset_call(void (*call)(struct unw_frame_info *, void *),
+ struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct regset_getset info = { .target = target, .regset = regset,
+ .pos = pos, .count = count,
+ .u.set = { .kbuf = kbuf, .ubuf = ubuf },
+ .ret = 0 };
-asmlinkage void
-syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
- long arg4, long arg5, long arg6, long arg7,
- struct pt_regs regs)
+ if (target == current)
+ unw_init_running(call, &info);
+ else {
+ struct unw_frame_info ufi;
+ memset(&ufi, 0, sizeof(ufi));
+ unw_init_from_blocked_task(&ufi, target);
+ (*call)(&ufi, &info);
+ }
+
+ return info.ret;
+}
+
+static int
+gpregs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ return do_regset_call(do_gpregs_get, target, regset, pos, count,
+ kbuf, ubuf);
+}
+
+static int gpregs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return do_regset_call(do_gpregs_set, target, regset, pos, count,
+ kbuf, ubuf);
+}
+
+static void do_gpregs_writeback(struct unw_frame_info *info, void *arg)
{
- if (unlikely(current->audit_context)) {
- int success = AUDITSC_RESULT(regs.r10);
- long result = regs.r8;
+ do_sync_rbs(info, ia64_sync_user_rbs);
+}
+
+/*
+ * This is called to write back the register backing store.
+ * ptrace does this before it stops, so that a tracer reading the user
+ * memory after the thread stops will get the current register data.
+ */
+static int
+gpregs_writeback(struct task_struct *target,
+ const struct user_regset *regset,
+ int now)
+{
+ if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE))
+ return 0;
+ set_notify_resume(target);
+ return do_regset_call(do_gpregs_writeback, target, regset, 0, 0,
+ NULL, NULL);
+}
+
+static int
+fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+ return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32;
+}
+
+static int fpregs_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ return do_regset_call(do_fpregs_get, target, regset, pos, count,
+ kbuf, ubuf);
+}
+
+static int fpregs_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return do_regset_call(do_fpregs_set, target, regset, pos, count,
+ kbuf, ubuf);
+}
- if (success != AUDITSC_SUCCESS)
- result = -result;
- audit_syscall_exit(success, result);
+static int
+access_uarea(struct task_struct *child, unsigned long addr,
+ unsigned long *data, int write_access)
+{
+ unsigned int pos = -1; /* an invalid value */
+ int ret;
+ unsigned long *ptr, regnum;
+
+ if ((addr & 0x7) != 0) {
+ dprintk("ptrace: unaligned register address 0x%lx\n", addr);
+ return -1;
+ }
+ if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) ||
+ (addr >= PT_R7 + 8 && addr < PT_B1) ||
+ (addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) ||
+ (addr >= PT_AR_SSD + 8 && addr < PT_DBR)) {
+ dprintk("ptrace: rejecting access to register "
+ "address 0x%lx\n", addr);
+ return -1;
+ }
+
+ switch (addr) {
+ case PT_F32 ... (PT_F127 + 15):
+ pos = addr - PT_F32 + ELF_FP_OFFSET(32);
+ break;
+ case PT_F2 ... (PT_F5 + 15):
+ pos = addr - PT_F2 + ELF_FP_OFFSET(2);
+ break;
+ case PT_F10 ... (PT_F31 + 15):
+ pos = addr - PT_F10 + ELF_FP_OFFSET(10);
+ break;
+ case PT_F6 ... (PT_F9 + 15):
+ pos = addr - PT_F6 + ELF_FP_OFFSET(6);
+ break;
+ }
+
+ if (pos != -1) {
+ if (write_access)
+ ret = fpregs_set(child, NULL, pos,
+ sizeof(unsigned long), data, NULL);
+ else
+ ret = fpregs_get(child, NULL, pos,
+ sizeof(unsigned long), data, NULL);
+ if (ret != 0)
+ return -1;
+ return 0;
+ }
+
+ switch (addr) {
+ case PT_NAT_BITS:
+ pos = ELF_NAT_OFFSET;
+ break;
+ case PT_R4 ... PT_R7:
+ pos = addr - PT_R4 + ELF_GR_OFFSET(4);
+ break;
+ case PT_B1 ... PT_B5:
+ pos = addr - PT_B1 + ELF_BR_OFFSET(1);
+ break;
+ case PT_AR_EC:
+ pos = ELF_AR_EC_OFFSET;
+ break;
+ case PT_AR_LC:
+ pos = ELF_AR_LC_OFFSET;
+ break;
+ case PT_CR_IPSR:
+ pos = ELF_CR_IPSR_OFFSET;
+ break;
+ case PT_CR_IIP:
+ pos = ELF_CR_IIP_OFFSET;
+ break;
+ case PT_CFM:
+ pos = ELF_CFM_OFFSET;
+ break;
+ case PT_AR_UNAT:
+ pos = ELF_AR_UNAT_OFFSET;
+ break;
+ case PT_AR_PFS:
+ pos = ELF_AR_PFS_OFFSET;
+ break;
+ case PT_AR_RSC:
+ pos = ELF_AR_RSC_OFFSET;
+ break;
+ case PT_AR_RNAT:
+ pos = ELF_AR_RNAT_OFFSET;
+ break;
+ case PT_AR_BSPSTORE:
+ pos = ELF_AR_BSPSTORE_OFFSET;
+ break;
+ case PT_PR:
+ pos = ELF_PR_OFFSET;
+ break;
+ case PT_B6:
+ pos = ELF_BR_OFFSET(6);
+ break;
+ case PT_AR_BSP:
+ pos = ELF_AR_BSP_OFFSET;
+ break;
+ case PT_R1 ... PT_R3:
+ pos = addr - PT_R1 + ELF_GR_OFFSET(1);
+ break;
+ case PT_R12 ... PT_R15:
+ pos = addr - PT_R12 + ELF_GR_OFFSET(12);
+ break;
+ case PT_R8 ... PT_R11:
+ pos = addr - PT_R8 + ELF_GR_OFFSET(8);
+ break;
+ case PT_R16 ... PT_R31:
+ pos = addr - PT_R16 + ELF_GR_OFFSET(16);
+ break;
+ case PT_AR_CCV:
+ pos = ELF_AR_CCV_OFFSET;
+ break;
+ case PT_AR_FPSR:
+ pos = ELF_AR_FPSR_OFFSET;
+ break;
+ case PT_B0:
+ pos = ELF_BR_OFFSET(0);
+ break;
+ case PT_B7:
+ pos = ELF_BR_OFFSET(7);
+ break;
+ case PT_AR_CSD:
+ pos = ELF_AR_CSD_OFFSET;
+ break;
+ case PT_AR_SSD:
+ pos = ELF_AR_SSD_OFFSET;
+ break;
+ }
+
+ if (pos != -1) {
+ if (write_access)
+ ret = gpregs_set(child, NULL, pos,
+ sizeof(unsigned long), data, NULL);
+ else
+ ret = gpregs_get(child, NULL, pos,
+ sizeof(unsigned long), data, NULL);
+ if (ret != 0)
+ return -1;
+ return 0;
+ }
+
+ /* access debug registers */
+ if (addr >= PT_IBR) {
+ regnum = (addr - PT_IBR) >> 3;
+ ptr = &child->thread.ibr[0];
+ } else {
+ regnum = (addr - PT_DBR) >> 3;
+ ptr = &child->thread.dbr[0];
+ }
+
+ if (regnum >= 8) {
+ dprintk("ptrace: rejecting access to register "
+ "address 0x%lx\n", addr);
+ return -1;
+ }
+#ifdef CONFIG_PERFMON
+ /*
+ * Check if debug registers are used by perfmon. This
+ * test must be done once we know that we can do the
+ * operation, i.e. the arguments are all valid, but
+ * before we start modifying the state.
+ *
+ * Perfmon needs to keep a count of how many processes
+ * are trying to modify the debug registers for system
+ * wide monitoring sessions.
+ *
+ * We also include read access here, because they may
+ * cause the PMU-installed debug register state
+ * (dbr[], ibr[]) to be reset. The two arrays are also
+ * used by perfmon, but we do not use
+ * IA64_THREAD_DBG_VALID. The registers are restored
+ * by the PMU context switch code.
+ */
+ if (pfm_use_debug_registers(child))
+ return -1;
+#endif
+
+ if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
+ child->thread.flags |= IA64_THREAD_DBG_VALID;
+ memset(child->thread.dbr, 0,
+ sizeof(child->thread.dbr));
+ memset(child->thread.ibr, 0,
+ sizeof(child->thread.ibr));
+ }
+
+ ptr += regnum;
+
+ if ((regnum & 1) && write_access) {
+ /* don't let the user set kernel-level breakpoints: */
+ *ptr = *data & ~(7UL << 56);
+ return 0;
}
+ if (write_access)
+ *ptr = *data;
+ else
+ *data = *ptr;
+ return 0;
+}
+
+static const struct user_regset native_regsets[] = {
+ {
+ .core_note_type = NT_PRSTATUS,
+ .n = ELF_NGREG,
+ .size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t),
+ .get = gpregs_get, .set = gpregs_set,
+ .writeback = gpregs_writeback
+ },
+ {
+ .core_note_type = NT_PRFPREG,
+ .n = ELF_NFPREG,
+ .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t),
+ .get = fpregs_get, .set = fpregs_set, .active = fpregs_active
+ },
+};
+
+static const struct user_regset_view user_ia64_view = {
+ .name = "ia64",
+ .e_machine = EM_IA_64,
+ .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *tsk)
+{
+ return &user_ia64_view;
+}
- if ((test_thread_flag(TIF_SYSCALL_TRACE)
- || test_thread_flag(TIF_SINGLESTEP))
- && (current->ptrace & PT_PTRACED))
- syscall_trace();
+struct syscall_get_set_args {
+ unsigned int i;
+ unsigned int n;
+ unsigned long *args;
+ struct pt_regs *regs;
+ int rw;
+};
+
+static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
+{
+ struct syscall_get_set_args *args = data;
+ struct pt_regs *pt = args->regs;
+ unsigned long *krbs, cfm, ndirty;
+ int i, count;
+
+ if (unw_unwind_to_user(info) < 0)
+ return;
+
+ cfm = pt->cr_ifs;
+ krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8;
+ ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
+
+ count = 0;
+ if (in_syscall(pt))
+ count = min_t(int, args->n, cfm & 0x7f);
+
+ for (i = 0; i < count; i++) {
+ if (args->rw)
+ *ia64_rse_skip_regs(krbs, ndirty + i + args->i) =
+ args->args[i];
+ else
+ args->args[i] = *ia64_rse_skip_regs(krbs,
+ ndirty + i + args->i);
+ }
+
+ if (!args->rw) {
+ while (i < args->n) {
+ args->args[i] = 0;
+ i++;
+ }
+ }
+}
+
+void ia64_syscall_get_set_arguments(struct task_struct *task,
+ struct pt_regs *regs, unsigned int i, unsigned int n,
+ unsigned long *args, int rw)
+{
+ struct syscall_get_set_args data = {
+ .i = i,
+ .n = n,
+ .args = args,
+ .regs = regs,
+ .rw = rw,
+ };
+
+ if (task == current)
+ unw_init_running(syscall_get_set_args_cb, &data);
+ else {
+ struct unw_frame_info ufi;
+ memset(&ufi, 0, sizeof(ufi));
+ unw_init_from_blocked_task(&ufi, task);
+ syscall_get_set_args_cb(&ufi, &data);
+ }
}
diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S
index 903babd22d6..c370e02f006 100644
--- a/arch/ia64/kernel/relocate_kernel.S
+++ b/arch/ia64/kernel/relocate_kernel.S
@@ -52,7 +52,7 @@ GLOBAL_ENTRY(relocate_new_kernel)
srlz.i
;;
mov ar.rnat=r18
- rfi
+ rfi // note: this unmask MCA/INIT (psr.mc)
;;
1:
//physical mode code begin
@@ -61,7 +61,7 @@ GLOBAL_ENTRY(relocate_new_kernel)
// purge all TC entries
#define O(member) IA64_CPUINFO_##member##_OFFSET
- GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2
+ GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2
;;
addl r17=O(PTCE_STRIDE),r2
addl r2=O(PTCE_BASE),r2
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
index 27c2ef445a5..0464173ea56 100644
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -109,6 +109,13 @@ check_versions (struct ia64_sal_systab *systab)
sal_revision = SAL_VERSION_CODE(2, 8);
sal_version = SAL_VERSION_CODE(0, 0);
}
+
+ if (ia64_platform_is("sn2") && (sal_revision == SAL_VERSION_CODE(2, 9)))
+ /*
+ * SGI Altix has hard-coded version 2.9 in their prom
+ * but they actually implement 3.2, so let's fix it here.
+ */
+ sal_revision = SAL_VERSION_CODE(3, 2);
}
static void __init
@@ -222,6 +229,14 @@ static void __init sal_desc_ap_wakeup(void *p) { }
*/
static int sal_cache_flush_drops_interrupts;
+static int __init
+force_pal_cache_flush(char *str)
+{
+ sal_cache_flush_drops_interrupts = 1;
+ return 0;
+}
+early_param("force_pal_cache_flush", force_pal_cache_flush);
+
void __init
check_sal_cache_flush (void)
{
@@ -230,15 +245,17 @@ check_sal_cache_flush (void)
u64 vector, cache_type = 3;
struct ia64_sal_retval isrv;
+ if (sal_cache_flush_drops_interrupts)
+ return;
+
cpu = get_cpu();
local_irq_save(flags);
/*
- * Schedule a timer interrupt, wait until it's reported, and see if
- * SAL_CACHE_FLUSH drops it.
+ * Send ourselves a timer interrupt, wait until it's reported, and see
+ * if SAL_CACHE_FLUSH drops it.
*/
- ia64_set_itv(IA64_TIMER_VECTOR);
- ia64_set_itm(ia64_get_itc() + 1000);
+ platform_send_ipi(cpu, IA64_TIMER_VECTOR, IA64_IPI_DM_INT, 0);
while (!ia64_get_irr(IA64_TIMER_VECTOR))
cpu_relax();
@@ -284,6 +301,7 @@ ia64_sal_cache_flush (u64 cache_type)
SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
return isrv.status;
}
+EXPORT_SYMBOL_GPL(ia64_sal_cache_flush);
void __init
ia64_sal_init (struct ia64_sal_systab *systab)
@@ -372,3 +390,16 @@ ia64_sal_oemcall_reentrant(struct ia64_sal_retval *isrvp, u64 oemfunc,
return 0;
}
EXPORT_SYMBOL(ia64_sal_oemcall_reentrant);
+
+long
+ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second,
+ unsigned long *drift_info)
+{
+ struct ia64_sal_retval isrv;
+
+ SAL_CALL(isrv, SAL_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
+ *ticks_per_second = isrv.v0;
+ *drift_info = isrv.v1;
+ return isrv.status;
+}
+EXPORT_SYMBOL_GPL(ia64_sal_freq_base);
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index 25cd75f50ab..ee9719eebb1 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -40,12 +40,13 @@
#include <linux/cpu.h>
#include <linux/types.h>
#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/timer.h>
#include <linux/vmalloc.h>
+#include <linux/semaphore.h>
-#include <asm/semaphore.h>
#include <asm/sal.h>
#include <asm/uaccess.h>
@@ -53,7 +54,7 @@ MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
MODULE_LICENSE("GPL");
-static int salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data);
+static const struct file_operations proc_salinfo_fops;
typedef struct {
const char *name; /* name of the proc entry */
@@ -65,7 +66,7 @@ typedef struct {
* List {name,feature} pairs for every entry in /proc/sal/<feature>
* that this module exports
*/
-static salinfo_entry_t salinfo_entries[]={
+static const salinfo_entry_t salinfo_entries[]={
{ "bus_lock", IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, },
{ "irq_redirection", IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, },
{ "ipi_redirection", IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, },
@@ -192,7 +193,7 @@ struct salinfo_platform_oemdata_parms {
static void
salinfo_work_to_do(struct salinfo_data *data)
{
- down_trylock(&data->mutex);
+ (void)(down_trylock(&data->mutex) ?: 0);
up(&data->mutex);
}
@@ -301,9 +302,7 @@ salinfo_event_open(struct inode *inode, struct file *file)
static ssize_t
salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct proc_dir_entry *entry = PDE(inode);
- struct salinfo_data *data = entry->data;
+ struct salinfo_data *data = PDE_DATA(file_inode(file));
char cmd[32];
size_t size;
int i, n, cpu = -1;
@@ -317,7 +316,7 @@ retry:
}
n = data->cpu_check;
- for (i = 0; i < NR_CPUS; i++) {
+ for (i = 0; i < nr_cpu_ids; i++) {
if (cpu_isset(n, data->cpu_event)) {
if (!cpu_online(n)) {
cpu_clear(n, data->cpu_event);
@@ -326,7 +325,7 @@ retry:
cpu = n;
break;
}
- if (++n == NR_CPUS)
+ if (++n == nr_cpu_ids)
n = 0;
}
@@ -337,7 +336,7 @@ retry:
/* for next read, start checking at next CPU */
data->cpu_check = cpu;
- if (++data->cpu_check == NR_CPUS)
+ if (++data->cpu_check == nr_cpu_ids)
data->cpu_check = 0;
snprintf(cmd, sizeof(cmd), "read %d\n", cpu);
@@ -354,13 +353,13 @@ retry:
static const struct file_operations salinfo_event_fops = {
.open = salinfo_event_open,
.read = salinfo_event_read,
+ .llseek = noop_llseek,
};
static int
salinfo_log_open(struct inode *inode, struct file *file)
{
- struct proc_dir_entry *entry = PDE(inode);
- struct salinfo_data *data = entry->data;
+ struct salinfo_data *data = PDE_DATA(inode);
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -385,8 +384,7 @@ salinfo_log_open(struct inode *inode, struct file *file)
static int
salinfo_log_release(struct inode *inode, struct file *file)
{
- struct proc_dir_entry *entry = PDE(inode);
- struct salinfo_data *data = entry->data;
+ struct salinfo_data *data = PDE_DATA(inode);
if (data->state == STATE_NO_DATA) {
vfree(data->log_buffer);
@@ -404,10 +402,9 @@ static void
call_on_cpu(int cpu, void (*fn)(void *), void *arg)
{
cpumask_t save_cpus_allowed = current->cpus_allowed;
- cpumask_t new_cpus_allowed = cpumask_of_cpu(cpu);
- set_cpus_allowed(current, new_cpus_allowed);
+ set_cpus_allowed_ptr(current, cpumask_of(cpu));
(*fn)(arg);
- set_cpus_allowed(current, save_cpus_allowed);
+ set_cpus_allowed_ptr(current, &save_cpus_allowed);
}
static void
@@ -463,9 +460,7 @@ retry:
static ssize_t
salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct proc_dir_entry *entry = PDE(inode);
- struct salinfo_data *data = entry->data;
+ struct salinfo_data *data = PDE_DATA(file_inode(file));
u8 *buf;
u64 bufsize;
@@ -524,9 +519,7 @@ salinfo_log_clear(struct salinfo_data *data, int cpu)
static ssize_t
salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct proc_dir_entry *entry = PDE(inode);
- struct salinfo_data *data = entry->data;
+ struct salinfo_data *data = PDE_DATA(file_inode(file));
char cmd[32];
size_t size;
u32 offset;
@@ -572,9 +565,10 @@ static const struct file_operations salinfo_data_fops = {
.release = salinfo_log_release,
.read = salinfo_log_read,
.write = salinfo_log_write,
+ .llseek = default_llseek,
};
-static int __devinit
+static int
salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
{
unsigned int i, cpu = (unsigned long)hcpu;
@@ -636,30 +630,31 @@ salinfo_init(void)
for (i=0; i < NR_SALINFO_ENTRIES; i++) {
/* pass the feature bit in question as misc data */
- *sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir,
- salinfo_read, (void *)salinfo_entries[i].feature);
+ *sdir++ = proc_create_data(salinfo_entries[i].name, 0, salinfo_dir,
+ &proc_salinfo_fops,
+ (void *)salinfo_entries[i].feature);
}
+ cpu_notifier_register_begin();
+
for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
data = salinfo_data + i;
data->type = i;
- init_MUTEX(&data->mutex);
+ sema_init(&data->mutex, 1);
dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
if (!dir)
continue;
- entry = create_proc_entry("event", S_IRUSR, dir);
+ entry = proc_create_data("event", S_IRUSR, dir,
+ &salinfo_event_fops, data);
if (!entry)
continue;
- entry->data = data;
- entry->proc_fops = &salinfo_event_fops;
*sdir++ = entry;
- entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir);
+ entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir,
+ &salinfo_data_fops, data);
if (!entry)
continue;
- entry->data = data;
- entry->proc_fops = &salinfo_data_fops;
*sdir++ = entry;
/* we missed any events before now */
@@ -676,7 +671,9 @@ salinfo_init(void)
salinfo_timer.function = &salinfo_timeout;
add_timer(&salinfo_timer);
- register_hotcpu_notifier(&salinfo_cpu_notifier);
+ __register_hotcpu_notifier(&salinfo_cpu_notifier);
+
+ cpu_notifier_register_done();
return 0;
}
@@ -685,22 +682,23 @@ salinfo_init(void)
* 'data' contains an integer that corresponds to the feature we're
* testing
*/
-static int
-salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int proc_salinfo_show(struct seq_file *m, void *v)
{
- int len = 0;
-
- len = sprintf(page, (sal_platform_features & (unsigned long)data) ? "1\n" : "0\n");
-
- if (len <= off+count) *eof = 1;
-
- *start = page + off;
- len -= off;
-
- if (len>count) len = count;
- if (len<0) len = 0;
+ unsigned long data = (unsigned long)v;
+ seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n");
+ return 0;
+}
- return len;
+static int proc_salinfo_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, proc_salinfo_show, PDE_DATA(inode));
}
+static const struct file_operations proc_salinfo_fops = {
+ .open = proc_salinfo_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
module_init(salinfo_init);
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c
deleted file mode 100644
index 2724ef3fbae..00000000000
--- a/arch/ia64/kernel/semaphore.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * IA-64 semaphore implementation (derived from x86 version).
- *
- * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
- * David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-/*
- * Semaphores are implemented using a two-way counter: The "count"
- * variable is decremented for each process that tries to acquire the
- * semaphore, while the "sleepers" variable is a count of such
- * acquires.
- *
- * Notably, the inline "up()" and "down()" functions can efficiently
- * test if they need to do any extra work (up needs to do something
- * only if count was negative before the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is contention
- * on the lock, and as such all this is the "non-critical" part of the
- * whole semaphore business. The critical part is the inline stuff in
- * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
- */
-#include <linux/sched.h>
-#include <linux/init.h>
-
-#include <asm/errno.h>
-#include <asm/semaphore.h>
-
-/*
- * Logic:
- * - Only on a boundary condition do we need to care. When we go
- * from a negative count to a non-negative, we wake people up.
- * - When we go from a non-negative count to a negative do we
- * (a) synchronize with the "sleepers" count and (b) make sure
- * that we're on the wakeup list before we synchronize so that
- * we cannot lose wakeup events.
- */
-
-void
-__up (struct semaphore *sem)
-{
- wake_up(&sem->wait);
-}
-
-void __sched __down (struct semaphore *sem)
-{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- unsigned long flags;
-
- tsk->state = TASK_UNINTERRUPTIBLE;
- spin_lock_irqsave(&sem->wait.lock, flags);
- add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
- sem->sleepers++;
- for (;;) {
- int sleepers = sem->sleepers;
-
- /*
- * Add "everybody else" into it. They aren't
- * playing, because we own the spinlock in
- * the wait_queue_head.
- */
- if (!atomic_add_negative(sleepers - 1, &sem->count)) {
- sem->sleepers = 0;
- break;
- }
- sem->sleepers = 1; /* us - see -1 above */
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- schedule();
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- tsk->state = TASK_UNINTERRUPTIBLE;
- }
- remove_wait_queue_locked(&sem->wait, &wait);
- wake_up_locked(&sem->wait);
- spin_unlock_irqrestore(&sem->wait.lock, flags);
- tsk->state = TASK_RUNNING;
-}
-
-int __sched __down_interruptible (struct semaphore * sem)
-{
- int retval = 0;
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- unsigned long flags;
-
- tsk->state = TASK_INTERRUPTIBLE;
- spin_lock_irqsave(&sem->wait.lock, flags);
- add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
- sem->sleepers ++;
- for (;;) {
- int sleepers = sem->sleepers;
-
- /*
- * With signals pending, this turns into
- * the trylock failure case - we won't be
- * sleeping, and we* can't get the lock as
- * it has contention. Just correct the count
- * and exit.
- */
- if (signal_pending(current)) {
- retval = -EINTR;
- sem->sleepers = 0;
- atomic_add(sleepers, &sem->count);
- break;
- }
-
- /*
- * Add "everybody else" into it. They aren't
- * playing, because we own the spinlock in
- * wait_queue_head. The "-1" is because we're
- * still hoping to get the semaphore.
- */
- if (!atomic_add_negative(sleepers - 1, &sem->count)) {
- sem->sleepers = 0;
- break;
- }
- sem->sleepers = 1; /* us - see -1 above */
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- schedule();
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- tsk->state = TASK_INTERRUPTIBLE;
- }
- remove_wait_queue_locked(&sem->wait, &wait);
- wake_up_locked(&sem->wait);
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- tsk->state = TASK_RUNNING;
- return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for having decremented the
- * count.
- */
-int
-__down_trylock (struct semaphore *sem)
-{
- unsigned long flags;
- int sleepers;
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- sleepers = sem->sleepers + 1;
- sem->sleepers = 0;
-
- /*
- * Add "everybody else" and us into it. They aren't
- * playing, because we own the spinlock in the
- * wait_queue_head.
- */
- if (!atomic_add_negative(sleepers, &sem->count)) {
- wake_up_locked(&sem->wait);
- }
-
- spin_unlock_irqrestore(&sem->wait.lock, flags);
- return 1;
-}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 9e392a30d19..d86669bcdfb 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -46,11 +46,12 @@
#include <linux/kexec.h>
#include <linux/crash_dump.h>
-#include <asm/ia32.h>
#include <asm/machvec.h>
#include <asm/mca.h>
#include <asm/meminit.h>
#include <asm/page.h>
+#include <asm/paravirt.h>
+#include <asm/paravirt_patch.h>
#include <asm/patch.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -58,7 +59,7 @@
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/smp.h>
-#include <asm/system.h>
+#include <asm/tlbflush.h>
#include <asm/unistd.h>
#include <asm/hpsim.h>
@@ -71,9 +72,7 @@ unsigned long __per_cpu_offset[NR_CPUS];
EXPORT_SYMBOL(__per_cpu_offset);
#endif
-extern void ia64_setup_printk_clock(void);
-
-DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
+DEFINE_PER_CPU(struct cpuinfo_ia64, ia64_cpu_info);
DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
unsigned long ia64_cycles_per_usec;
struct ia64_boot_param *ia64_boot_param;
@@ -90,15 +89,13 @@ static struct resource code_resource = {
.name = "Kernel code",
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
};
-extern char _text[], _end[], _etext[];
-unsigned long ia64_max_cacheline_size;
+static struct resource bss_resource = {
+ .name = "Kernel bss",
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
-int dma_get_cache_alignment(void)
-{
- return ia64_max_cacheline_size;
-}
-EXPORT_SYMBOL(dma_get_cache_alignment);
+unsigned long ia64_max_cacheline_size;
unsigned long ia64_iobase; /* virtual address for I/O accesses */
EXPORT_SYMBOL(ia64_iobase);
@@ -112,6 +109,13 @@ unsigned int num_io_spaces;
*/
#define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */
unsigned long ia64_i_cache_stride_shift = ~0;
+/*
+ * "clflush_cache_range()" needs to know what processor dependent stride size to
+ * use when it flushes cache lines including both d-cache and i-cache.
+ */
+/* Safest way to go: 32 bytes by 32 bytes */
+#define CACHE_STRIDE_SHIFT 5
+unsigned long ia64_cache_stride_shift = ~0;
/*
* The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This
@@ -139,9 +143,9 @@ int num_rsvd_regions __initdata;
* This routine does not assume the incoming segments are sorted.
*/
int __init
-filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
+filter_rsvd_memory (u64 start, u64 end, void *arg)
{
- unsigned long range_start, range_end, prev_start;
+ u64 range_start, range_end, prev_start;
void (*func)(unsigned long, unsigned long, int);
int i;
@@ -174,6 +178,29 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
return 0;
}
+/*
+ * Similar to "filter_rsvd_memory()", but the reserved memory ranges
+ * are not filtered out.
+ */
+int __init
+filter_memory(u64 start, u64 end, void *arg)
+{
+ void (*func)(unsigned long, unsigned long, int);
+
+#if IGNORE_PFN0
+ if (start == PAGE_OFFSET) {
+ printk(KERN_WARNING "warning: skipping physical page 0\n");
+ start += PAGE_SIZE;
+ if (start >= end)
+ return 0;
+ }
+#endif
+ func = arg;
+ if (start < end)
+ call_pernode_memory(__pa(start), end - start, func);
+ return 0;
+}
+
static void __init
sort_regions (struct rsvd_region *rsvd_region, int max)
{
@@ -192,6 +219,23 @@ sort_regions (struct rsvd_region *rsvd_region, int max)
}
}
+/* merge overlaps */
+static int __init
+merge_regions (struct rsvd_region *rsvd_region, int max)
+{
+ int i;
+ for (i = 1; i < max; ++i) {
+ if (rsvd_region[i].start >= rsvd_region[i-1].end)
+ continue;
+ if (rsvd_region[i].end > rsvd_region[i-1].end)
+ rsvd_region[i-1].end = rsvd_region[i].end;
+ --max;
+ memmove(&rsvd_region[i], &rsvd_region[i+1],
+ (max - i) * sizeof(struct rsvd_region));
+ }
+ return max;
+}
+
/*
* Request address space for all standard resources
*/
@@ -200,25 +244,101 @@ static int __init register_memory(void)
code_resource.start = ia64_tpa(_text);
code_resource.end = ia64_tpa(_etext) - 1;
data_resource.start = ia64_tpa(_etext);
- data_resource.end = ia64_tpa(_end) - 1;
- efi_initialize_iomem_resources(&code_resource, &data_resource);
+ data_resource.end = ia64_tpa(_edata) - 1;
+ bss_resource.start = ia64_tpa(__bss_start);
+ bss_resource.end = ia64_tpa(_end) - 1;
+ efi_initialize_iomem_resources(&code_resource, &data_resource,
+ &bss_resource);
return 0;
}
__initcall(register_memory);
+
+#ifdef CONFIG_KEXEC
+
+/*
+ * This function checks if the reserved crashkernel is allowed on the specific
+ * IA64 machine flavour. Machines without an IO TLB use swiotlb and require
+ * some memory below 4 GB (i.e. in 32 bit area), see the implementation of
+ * lib/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that
+ * in kdump case. See the comment in sba_init() in sba_iommu.c.
+ *
+ * So, the only machvec that really supports loading the kdump kernel
+ * over 4 GB is "sn2".
+ */
+static int __init check_crashkernel_memory(unsigned long pbase, size_t size)
+{
+ if (ia64_platform_is("sn2") || ia64_platform_is("uv"))
+ return 1;
+ else
+ return pbase < (1UL << 32);
+}
+
+static void __init setup_crashkernel(unsigned long total, int *n)
+{
+ unsigned long long base = 0, size = 0;
+ int ret;
+
+ ret = parse_crashkernel(boot_command_line, total,
+ &size, &base);
+ if (ret == 0 && size > 0) {
+ if (!base) {
+ sort_regions(rsvd_region, *n);
+ *n = merge_regions(rsvd_region, *n);
+ base = kdump_find_rsvd_region(size,
+ rsvd_region, *n);
+ }
+
+ if (!check_crashkernel_memory(base, size)) {
+ pr_warning("crashkernel: There would be kdump memory "
+ "at %ld GB but this is unusable because it "
+ "must\nbe below 4 GB. Change the memory "
+ "configuration of the machine.\n",
+ (unsigned long)(base >> 30));
+ return;
+ }
+
+ if (base != ~0UL) {
+ printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+ "for crashkernel (System RAM: %ldMB)\n",
+ (unsigned long)(size >> 20),
+ (unsigned long)(base >> 20),
+ (unsigned long)(total >> 20));
+ rsvd_region[*n].start =
+ (unsigned long)__va(base);
+ rsvd_region[*n].end =
+ (unsigned long)__va(base + size);
+ (*n)++;
+ crashk_res.start = base;
+ crashk_res.end = base + size - 1;
+ }
+ }
+ efi_memmap_res.start = ia64_boot_param->efi_memmap;
+ efi_memmap_res.end = efi_memmap_res.start +
+ ia64_boot_param->efi_memmap_size;
+ boot_param_res.start = __pa(ia64_boot_param);
+ boot_param_res.end = boot_param_res.start +
+ sizeof(*ia64_boot_param);
+}
+#else
+static inline void __init setup_crashkernel(unsigned long total, int *n)
+{}
+#endif
+
/**
* reserve_memory - setup reserved memory areas
*
* Setup the reserved memory areas set aside for the boot parameters,
* initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined,
- * see include/asm-ia64/meminit.h if you need to define more.
+ * see arch/ia64/include/asm/meminit.h if you need to define more.
*/
void __init
reserve_memory (void)
{
int n = 0;
+ unsigned long total_memory;
/*
* none of the entries in this table overlap
@@ -240,6 +360,8 @@ reserve_memory (void)
rsvd_region[n].end = (unsigned long) ia64_imva(_end);
n++;
+ n += paravirt_reserve_memory(&rsvd_region[n]);
+
#ifdef CONFIG_BLK_DEV_INITRD
if (ia64_boot_param->initrd_start) {
rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
@@ -248,56 +370,17 @@ reserve_memory (void)
}
#endif
-#ifdef CONFIG_PROC_VMCORE
+#ifdef CONFIG_CRASH_DUMP
if (reserve_elfcorehdr(&rsvd_region[n].start,
&rsvd_region[n].end) == 0)
n++;
#endif
- efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
+ total_memory = efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
n++;
-#ifdef CONFIG_KEXEC
- /* crashkernel=size@offset specifies the size to reserve for a crash
- * kernel. If offset is 0, then it is determined automatically.
- * By reserving this memory we guarantee that linux never set's it
- * up as a DMA target.Useful for holding code to do something
- * appropriate after a kernel panic.
- */
- {
- char *from = strstr(boot_command_line, "crashkernel=");
- unsigned long base, size;
- if (from) {
- size = memparse(from + 12, &from);
- if (*from == '@')
- base = memparse(from+1, &from);
- else
- base = 0;
- if (size) {
- if (!base) {
- sort_regions(rsvd_region, n);
- base = kdump_find_rsvd_region(size,
- rsvd_region, n);
- }
- if (base != ~0UL) {
- rsvd_region[n].start =
- (unsigned long)__va(base);
- rsvd_region[n].end =
- (unsigned long)__va(base + size);
- n++;
- crashk_res.start = base;
- crashk_res.end = base + size - 1;
- }
- }
- }
- efi_memmap_res.start = ia64_boot_param->efi_memmap;
- efi_memmap_res.end = efi_memmap_res.start +
- ia64_boot_param->efi_memmap_size;
- boot_param_res.start = __pa(ia64_boot_param);
- boot_param_res.end = boot_param_res.start +
- sizeof(*ia64_boot_param);
- }
-#endif
+ setup_crashkernel(total_memory, &n);
+
/* end of memory marker */
rsvd_region[n].start = ~0UL;
rsvd_region[n].end = ~0UL;
@@ -307,6 +390,7 @@ reserve_memory (void)
BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n);
sort_regions(rsvd_region, num_rsvd_regions);
+ num_rsvd_regions = merge_regions(rsvd_region, num_rsvd_regions);
}
@@ -324,7 +408,7 @@ find_initrd (void)
initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
initrd_end = initrd_start+ia64_boot_param->initrd_size;
- printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
+ printk(KERN_INFO "Initial ramdisk at: 0x%lx (%llu bytes)\n",
initrd_start, ia64_boot_param->initrd_size);
}
#endif
@@ -401,38 +485,10 @@ mark_bsp_online (void)
{
#ifdef CONFIG_SMP
/* If we register an early console, allow CPU 0 to printk */
- cpu_set(smp_processor_id(), cpu_online_map);
+ set_cpu_online(smp_processor_id(), true);
#endif
}
-#ifdef CONFIG_SMP
-static void __init
-check_for_logical_procs (void)
-{
- pal_logical_to_physical_t info;
- s64 status;
-
- status = ia64_pal_logical_to_phys(0, &info);
- if (status == -1) {
- printk(KERN_INFO "No logical to physical processor mapping "
- "available\n");
- return;
- }
- if (status) {
- printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
- status);
- return;
- }
- /*
- * Total number of siblings that BSP has. Though not all of them
- * may have booted successfully. The correct number of siblings
- * booted is in info.overview_num_log.
- */
- smp_num_siblings = info.overview_tpc;
- smp_num_cpucores = info.overview_cpp;
-}
-#endif
-
static __initdata int nomca;
static __init int setup_nomca(char *s)
{
@@ -441,23 +497,10 @@ static __init int setup_nomca(char *s)
}
early_param("nomca", setup_nomca);
-#ifdef CONFIG_PROC_VMCORE
-/* elfcorehdr= specifies the location of elf core header
- * stored by the crashed kernel.
- */
-static int __init parse_elfcorehdr(char *arg)
-{
- if (!arg)
- return -EINVAL;
-
- elfcorehdr_addr = memparse(arg, &arg);
- return 0;
-}
-early_param("elfcorehdr", parse_elfcorehdr);
-
-int __init reserve_elfcorehdr(unsigned long *start, unsigned long *end)
+#ifdef CONFIG_CRASH_DUMP
+int __init reserve_elfcorehdr(u64 *start, u64 *end)
{
- unsigned long length;
+ u64 length;
/* We get the address using the kernel command line,
* but the size is extracted from the EFI tables.
@@ -465,11 +508,11 @@ int __init reserve_elfcorehdr(unsigned long *start, unsigned long *end)
* to work properly.
*/
- if (elfcorehdr_addr >= ELFCORE_ADDR_MAX)
+ if (!is_vmcore_usable())
return -EINVAL;
if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) {
- elfcorehdr_addr = ELFCORE_ADDR_MAX;
+ vmcore_unusable();
return -EINVAL;
}
@@ -485,7 +528,10 @@ setup_arch (char **cmdline_p)
{
unw_init();
+ paravirt_arch_setup_early();
+
ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
+ paravirt_patch_apply();
*cmdline_p = __va(ia64_boot_param->command_line);
strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE);
@@ -510,47 +556,46 @@ setup_arch (char **cmdline_p)
#ifdef CONFIG_ACPI
/* Initialize the ACPI boot-time table parser */
acpi_table_init();
+ early_acpi_boot_init();
# ifdef CONFIG_ACPI_NUMA
acpi_numa_init();
-# endif
-#else
-# ifdef CONFIG_SMP
- smp_build_cpu_map(); /* happens, e.g., with the Ski simulator */
+# ifdef CONFIG_ACPI_HOTPLUG_CPU
+ prefill_possible_map();
+# endif
+ per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
+ 32 : cpus_weight(early_cpu_possible_map)),
+ additional_cpus > 0 ? additional_cpus : 0);
# endif
#endif /* CONFIG_APCI_BOOT */
+#ifdef CONFIG_SMP
+ smp_build_cpu_map();
+#endif
find_memory();
/* process SAL system table: */
ia64_sal_init(__va(efi.sal_systab));
- ia64_setup_printk_clock();
+#ifdef CONFIG_ITANIUM
+ ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist);
+#else
+ {
+ unsigned long num_phys_stacked;
+
+ if (ia64_pal_rse_info(&num_phys_stacked, 0) == 0 && num_phys_stacked > 96)
+ ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist);
+ }
+#endif
#ifdef CONFIG_SMP
cpu_physical_id(0) = hard_smp_processor_id();
-
- cpu_set(0, cpu_sibling_map[0]);
- cpu_set(0, cpu_core_map[0]);
-
- check_for_logical_procs();
- if (smp_num_cpucores > 1)
- printk(KERN_INFO
- "cpu package is Multi-Core capable: number of cores=%d\n",
- smp_num_cpucores);
- if (smp_num_siblings > 1)
- printk(KERN_INFO
- "cpu package is Multi-Threading capable: number of siblings=%d\n",
- smp_num_siblings);
#endif
cpu_init(); /* initialize the bootstrap CPU */
mmu_context_init(); /* initialize context_id bitmap */
- check_sal_cache_flush();
-
-#ifdef CONFIG_ACPI
- acpi_boot_init();
-#endif
+ paravirt_banner();
+ paravirt_arch_setup_console(cmdline_p);
#ifdef CONFIG_VT
if (!conswitchp) {
@@ -571,10 +616,15 @@ setup_arch (char **cmdline_p)
#endif
/* enable IA-64 Machine Check Abort Handling unless disabled */
+ if (paravirt_arch_setup_nomca())
+ nomca = 1;
if (!nomca)
ia64_mca_init();
platform_setup(cmdline_p);
+#ifndef CONFIG_IA64_HP_SIM
+ check_sal_cache_flush();
+#endif
paging_init();
}
@@ -653,12 +703,13 @@ show_cpuinfo (struct seq_file *m, void *v)
lpj*HZ/500000, (lpj*HZ/5000) % 100);
#ifdef CONFIG_SMP
seq_printf(m, "siblings : %u\n", cpus_weight(cpu_core_map[cpunum]));
+ if (c->socket_id != -1)
+ seq_printf(m, "physical id: %u\n", c->socket_id);
if (c->threads_per_core > 1 || c->cores_per_socket > 1)
seq_printf(m,
- "physical id: %u\n"
- "core id : %u\n"
- "thread id : %u\n",
- c->socket_id, c->core_id, c->thread_id);
+ "core id : %u\n"
+ "thread id : %u\n",
+ c->core_id, c->thread_id);
#endif
seq_printf(m,"\n");
@@ -669,10 +720,10 @@ static void *
c_start (struct seq_file *m, loff_t *pos)
{
#ifdef CONFIG_SMP
- while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
+ while (*pos < nr_cpu_ids && !cpu_online(*pos))
++*pos;
#endif
- return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
+ return *pos < nr_cpu_ids ? cpu_data(*pos) : NULL;
}
static void *
@@ -687,7 +738,7 @@ c_stop (struct seq_file *m, void *v)
{
}
-struct seq_operations cpuinfo_op = {
+const struct seq_operations cpuinfo_op = {
.start = c_start,
.next = c_next,
.stop = c_stop,
@@ -697,7 +748,7 @@ struct seq_operations cpuinfo_op = {
#define MAX_BRANDS 8
static char brandname[MAX_BRANDS][128];
-static char * __cpuinit
+static char *
get_model_name(__u8 family, __u8 model)
{
static int overflow;
@@ -723,11 +774,11 @@ get_model_name(__u8 family, __u8 model)
if (overflow++ == 0)
printk(KERN_ERR
"%s: Table overflow. Some processor model information will be missing\n",
- __FUNCTION__);
+ __func__);
return "Unknown";
}
-static void __cpuinit
+static void
identify_cpu (struct cpuinfo_ia64 *c)
{
union {
@@ -770,6 +821,9 @@ identify_cpu (struct cpuinfo_ia64 *c)
c->socket_id = -1;
identify_siblings(c);
+
+ if (c->threads_per_core > smp_num_siblings)
+ smp_num_siblings = c->threads_per_core;
#endif
c->ppn = cpuid.field.ppn;
c->number = cpuid.field.number;
@@ -789,63 +843,63 @@ identify_cpu (struct cpuinfo_ia64 *c)
c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
}
-void __init
-setup_per_cpu_areas (void)
-{
- /* start_kernel() requires this... */
-#ifdef CONFIG_ACPI_HOTPLUG_CPU
- prefill_possible_map();
-#endif
-}
-
/*
- * Calculate the max. cache line size.
+ * Do the following calculations:
*
- * In addition, the minimum of the i-cache stride sizes is calculated for
- * "flush_icache_range()".
+ * 1. the max. cache line size.
+ * 2. the minimum of the i-cache stride sizes for "flush_icache_range()".
+ * 3. the minimum of the cache stride sizes for "clflush_cache_range()".
*/
-static void __cpuinit
-get_max_cacheline_size (void)
+static void
+get_cache_info(void)
{
unsigned long line_size, max = 1;
- u64 l, levels, unique_caches;
- pal_cache_config_info_t cci;
- s64 status;
+ unsigned long l, levels, unique_caches;
+ pal_cache_config_info_t cci;
+ long status;
status = ia64_pal_cache_summary(&levels, &unique_caches);
if (status != 0) {
printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
- __FUNCTION__, status);
+ __func__, status);
max = SMP_CACHE_BYTES;
/* Safest setup for "flush_icache_range()" */
ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;
+ /* Safest setup for "clflush_cache_range()" */
+ ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;
goto out;
}
for (l = 0; l < levels; ++l) {
- status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
- &cci);
+ /* cache_type (data_or_unified)=2 */
+ status = ia64_pal_cache_config_info(l, 2, &cci);
if (status != 0) {
- printk(KERN_ERR
- "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",
- __FUNCTION__, l, status);
+ printk(KERN_ERR "%s: ia64_pal_cache_config_info"
+ "(l=%lu, 2) failed (status=%ld)\n",
+ __func__, l, status);
max = SMP_CACHE_BYTES;
/* The safest setup for "flush_icache_range()" */
cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+ /* The safest setup for "clflush_cache_range()" */
+ ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;
cci.pcci_unified = 1;
+ } else {
+ if (cci.pcci_stride < ia64_cache_stride_shift)
+ ia64_cache_stride_shift = cci.pcci_stride;
+
+ line_size = 1 << cci.pcci_line_size;
+ if (line_size > max)
+ max = line_size;
}
- line_size = 1 << cci.pcci_line_size;
- if (line_size > max)
- max = line_size;
+
if (!cci.pcci_unified) {
- status = ia64_pal_cache_config_info(l,
- /* cache_type (instruction)= */ 1,
- &cci);
+ /* cache_type (instruction)=1*/
+ status = ia64_pal_cache_config_info(l, 1, &cci);
if (status != 0) {
- printk(KERN_ERR
- "%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",
- __FUNCTION__, l, status);
- /* The safest setup for "flush_icache_range()" */
+ printk(KERN_ERR "%s: ia64_pal_cache_config_info"
+ "(l=%lu, 1) failed (status=%ld)\n",
+ __func__, l, status);
+ /* The safest setup for flush_icache_range() */
cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
}
}
@@ -861,10 +915,10 @@ get_max_cacheline_size (void)
* cpu_init() initializes state that is per-CPU. This function acts
* as a 'CPU state barrier', nothing should get across.
*/
-void __cpuinit
+void
cpu_init (void)
{
- extern void __cpuinit ia64_mmu_init (void *);
+ extern void ia64_mmu_init(void *);
static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG;
unsigned long num_phys_stacked;
pal_vm_info_2_u_t vmi;
@@ -873,16 +927,28 @@ cpu_init (void)
void *cpu_data;
cpu_data = per_cpu_init();
-
+#ifdef CONFIG_SMP
/*
- * We set ar.k3 so that assembly code in MCA handler can compute
- * physical addresses of per cpu variables with a simple:
- * phys = ar.k3 + &per_cpu_var
+ * insert boot cpu into sibling and core mapes
+ * (must be done after per_cpu area is setup)
*/
- ia64_set_kr(IA64_KR_PER_CPU_DATA,
- ia64_tpa(cpu_data) - (long) __per_cpu_start);
+ if (smp_processor_id() == 0) {
+ cpu_set(0, per_cpu(cpu_sibling_map, 0));
+ cpu_set(0, cpu_core_map[0]);
+ } else {
+ /*
+ * Set ar.k3 so that assembly code in MCA handler can compute
+ * physical addresses of per cpu variables with a simple:
+ * phys = ar.k3 + &per_cpu_var
+ * and the alt-dtlb-miss handler can set per-cpu mapping into
+ * the TLB when needed. head.S already did this for cpu0.
+ */
+ ia64_set_kr(IA64_KR_PER_CPU_DATA,
+ ia64_tpa(cpu_data) - (long) __per_cpu_start);
+ }
+#endif
- get_max_cacheline_size();
+ get_cache_info();
/*
* We can't pass "local_cpu_data" to identify_cpu() because we haven't called
@@ -890,7 +956,7 @@ cpu_init (void)
* depends on the data returned by identify_cpu(). We break the dependency by
* accessing cpu_data() through the canonical per-CPU address.
*/
- cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
+ cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(ia64_cpu_info) - __per_cpu_start);
identify_cpu(cpu_info);
#ifdef CONFIG_MCKINLEY
@@ -934,16 +1000,11 @@ cpu_init (void)
| IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
- if (current->mm)
- BUG();
+ BUG_ON(current->mm);
ia64_mmu_init(ia64_imva(cpu_data));
ia64_mca_cpu_init(ia64_imva(cpu_data));
-#ifdef CONFIG_IA32_SUPPORT
- ia32_cpu_init();
-#endif
-
/* Clear ITC to eliminate sched_clock() overflows in human time. */
ia64_set_itc(0);
@@ -966,9 +1027,10 @@ cpu_init (void)
#endif
/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
- if (ia64_pal_vm_summary(NULL, &vmi) == 0)
+ if (ia64_pal_vm_summary(NULL, &vmi) == 0) {
max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
- else {
+ setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL);
+ } else {
printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
max_ctx = (1U << 15) - 1; /* use architected minimum */
}
@@ -989,7 +1051,6 @@ cpu_init (void)
max_num_phys_stacked = num_phys_stacked;
}
platform_cpu_init();
- pm_idle = default_idle;
}
void __init
@@ -1002,6 +1063,8 @@ check_bugs (void)
static int __init run_dmi_scan(void)
{
dmi_scan_machine();
+ dmi_memdev_walk();
+ dmi_set_dump_stack_arch_desc();
return 0;
}
core_initcall(run_dmi_scan);
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index aeec8184e86..33cab9a8adf 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/ptrace.h>
+#include <linux/tracehook.h>
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/smp.h>
@@ -20,7 +21,6 @@
#include <linux/unistd.h>
#include <linux/wait.h>
-#include <asm/ia32.h>
#include <asm/intrinsics.h>
#include <asm/uaccess.h>
#include <asm/rse.h>
@@ -30,7 +30,6 @@
#define DEBUG_SIG 0
#define STACK_ALIGN 16 /* minimal alignment for stack pointer */
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
#if _NSIG_WORDS > 1
# define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t))
@@ -40,14 +39,6 @@
# define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0])
#endif
-asmlinkage long
-sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2,
- long arg3, long arg4, long arg5, long arg6, long arg7,
- struct pt_regs regs)
-{
- return do_sigaltstack(uss, uoss, regs.r12);
-}
-
static long
restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
{
@@ -98,7 +89,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) {
struct ia64_psr *psr = ia64_psr(&scr->pt);
- __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
+ err |= __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
psr->mfh = 0; /* drop signal handler's fph contents... */
preempt_disable();
if (psr->dfh)
@@ -114,7 +105,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
}
int
-copy_siginfo_to_user (siginfo_t __user *to, siginfo_t *from)
+copy_siginfo_to_user (siginfo_t __user *to, const siginfo_t *from)
{
if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t)))
return -EFAULT;
@@ -200,14 +191,7 @@ ia64_rt_sigreturn (struct sigscratch *scr)
if (GET_SIGSET(&set, &sc->sc_mask))
goto give_sigsegv;
- sigdelsetmask(&set, ~_BLOCKABLE);
-
- spin_lock_irq(&current->sighand->siglock);
- {
- current->blocked = set;
- recalc_sigpending();
- }
- spin_unlock_irq(&current->sighand->siglock);
+ set_current_blocked(&set);
if (restore_sigcontext(sc, scr))
goto give_sigsegv;
@@ -216,19 +200,16 @@ ia64_rt_sigreturn (struct sigscratch *scr)
printk("SIG return (%s:%d): sp=%lx ip=%lx\n",
current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip);
#endif
- /*
- * It is more difficult to avoid calling this function than to
- * call it and ignore errors.
- */
- do_sigaltstack(&sc->sc_stack, NULL, scr->pt.r12);
+ if (restore_altstack(&sc->sc_stack))
+ goto give_sigsegv;
return retval;
give_sigsegv:
si.si_signo = SIGSEGV;
si.si_errno = 0;
si.si_code = SI_KERNEL;
- si.si_pid = current->pid;
- si.si_uid = current->uid;
+ si.si_pid = task_pid_vnr(current);
+ si.si_uid = from_kuid_munged(current_user_ns(), current_uid());
si.si_addr = sc;
force_sig_info(SIGSEGV, &si, current);
return retval;
@@ -244,7 +225,7 @@ static long
setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratch *scr)
{
unsigned long flags = 0, ifs, cfm, nat;
- long err;
+ long err = 0;
ifs = scr->pt.cr_ifs;
@@ -257,12 +238,12 @@ setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratc
ia64_flush_fph(current);
if ((current->thread.flags & IA64_THREAD_FPH_VALID)) {
flags |= IA64_SC_FLAG_FPH_VALID;
- __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16);
+ err = __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16);
}
nat = ia64_get_scratch_nat_bits(&scr->pt, scr->scratch_unat);
- err = __put_user(flags, &sc->sc_flags);
+ err |= __put_user(flags, &sc->sc_flags);
err |= __put_user(nat, &sc->sc_nat);
err |= PUT_SIGSET(mask, &sc->sc_mask);
err |= __put_user(cfm, &sc->sc_cfm);
@@ -280,15 +261,7 @@ setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratc
err |= __copy_to_user(&sc->sc_gr[15], &scr->pt.r15, 8); /* r15 */
err |= __put_user(scr->pt.cr_iip + ia64_psr(&scr->pt)->ri, &sc->sc_ip);
- if (flags & IA64_SC_FLAG_IN_SYSCALL) {
- /* Clear scratch registers if the signal interrupted a system call. */
- err |= __put_user(0, &sc->sc_ar_ccv); /* ar.ccv */
- err |= __put_user(0, &sc->sc_br[7]); /* b7 */
- err |= __put_user(0, &sc->sc_gr[14]); /* r14 */
- err |= __clear_user(&sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */
- err |= __clear_user(&sc->sc_gr[2], 2*8); /* r2-r3 */
- err |= __clear_user(&sc->sc_gr[16], 16*8); /* r16-r31 */
- } else {
+ if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) {
/* Copy scratch regs to sigcontext if the signal didn't interrupt a syscall. */
err |= __put_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */
err |= __put_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */
@@ -332,8 +305,8 @@ force_sigsegv_info (int sig, void __user *addr)
si.si_signo = SIGSEGV;
si.si_errno = 0;
si.si_code = SI_KERNEL;
- si.si_pid = current->pid;
- si.si_uid = current->uid;
+ si.si_pid = task_pid_vnr(current);
+ si.si_uid = from_kuid_munged(current_user_ns(), current_uid());
si.si_addr = addr;
force_sig_info(SIGSEGV, &si, current);
return 0;
@@ -350,15 +323,33 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set,
new_sp = scr->pt.r12;
tramp_addr = (unsigned long) __kernel_sigtramp;
- if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags(new_sp) == 0) {
- new_sp = current->sas_ss_sp + current->sas_ss_size;
- /*
- * We need to check for the register stack being on the signal stack
- * separately, because it's switched separately (memory stack is switched
- * in the kernel, register stack is switched in the signal trampoline).
- */
- if (!rbs_on_sig_stack(scr->pt.ar_bspstore))
- new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1);
+ if (ka->sa.sa_flags & SA_ONSTACK) {
+ int onstack = sas_ss_flags(new_sp);
+
+ if (onstack == 0) {
+ new_sp = current->sas_ss_sp + current->sas_ss_size;
+ /*
+ * We need to check for the register stack being on the
+ * signal stack separately, because it's switched
+ * separately (memory stack is switched in the kernel,
+ * register stack is switched in the signal trampoline).
+ */
+ if (!rbs_on_sig_stack(scr->pt.ar_bspstore))
+ new_rbs = ALIGN(current->sas_ss_sp,
+ sizeof(long));
+ } else if (onstack == SS_ONSTACK) {
+ unsigned long check_sp;
+
+ /*
+ * If we are on the alternate signal stack and would
+ * overflow it, don't. Return an always-bogus address
+ * instead so we will die with SIGSEGV.
+ */
+ check_sp = (new_sp - sizeof(*frame)) & -STACK_ALIGN;
+ if (!likely(on_sig_stack(check_sp)))
+ return force_sigsegv_info(sig, (void __user *)
+ check_sp);
+ }
}
frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN);
@@ -374,9 +365,7 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set,
err |= copy_siginfo_to_user(&frame->info, info);
- err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp);
- err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size);
- err |= __put_user(sas_ss_flags(scr->pt.r12), &frame->sc.sc_stack.ss_flags);
+ err |= __save_altstack(&frame->sc.sc_stack, scr->pt.r12);
err |= setup_sigcontext(&frame->sc, set, scr);
if (unlikely(err))
@@ -411,24 +400,15 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set,
}
static long
-handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset,
+handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
struct sigscratch *scr)
{
- if (IS_IA32_PROCESS(&scr->pt)) {
- /* send signal to IA-32 process */
- if (!ia32_setup_frame1(sig, ka, info, oldset, &scr->pt))
- return 0;
- } else
- /* send signal to IA-64 process */
- if (!setup_frame(sig, ka, info, oldset, scr))
- return 0;
-
- spin_lock_irq(&current->sighand->siglock);
- sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
- if (!(ka->sa.sa_flags & SA_NODEFER))
- sigaddset(&current->blocked, sig);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
+ if (!setup_frame(sig, ka, info, sigmask_to_save(), scr))
+ return 0;
+
+ signal_delivered(sig, info, ka, &scr->pt,
+ test_thread_flag(TIF_SINGLESTEP));
+
return 1;
}
@@ -440,24 +420,9 @@ void
ia64_do_signal (struct sigscratch *scr, long in_syscall)
{
struct k_sigaction ka;
- sigset_t *oldset;
siginfo_t info;
long restart = in_syscall;
long errno = scr->pt.r8;
-# define ERR_CODE(c) (IS_IA32_PROCESS(&scr->pt) ? -(c) : (c))
-
- /*
- * In the ia64_leave_kernel code path, we want the common case to go fast, which
- * is why we may in certain cases get here from kernel mode. Just return without
- * doing anything if so.
- */
- if (!user_mode(&scr->pt))
- return;
-
- if (test_thread_flag(TIF_RESTORE_SIGMASK))
- oldset = &current->saved_sigmask;
- else
- oldset = &current->blocked;
/*
* This only loops in the rare cases of handle_signal() failing, in which case we
@@ -472,14 +437,7 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
* inferior call), thus it's important to check for restarting _after_
* get_signal_to_deliver().
*/
- if (IS_IA32_PROCESS(&scr->pt)) {
- if (in_syscall) {
- if (errno >= 0)
- restart = 0;
- else
- errno = -errno;
- }
- } else if ((long) scr->pt.r10 != -1)
+ if ((long) scr->pt.r10 != -1)
/*
* A system calls has to be restarted only if one of the error codes
* ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned. If r10
@@ -495,22 +453,18 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
switch (errno) {
case ERESTART_RESTARTBLOCK:
case ERESTARTNOHAND:
- scr->pt.r8 = ERR_CODE(EINTR);
+ scr->pt.r8 = EINTR;
/* note: scr->pt.r10 is already -1 */
break;
case ERESTARTSYS:
if ((ka.sa.sa_flags & SA_RESTART) == 0) {
- scr->pt.r8 = ERR_CODE(EINTR);
+ scr->pt.r8 = EINTR;
/* note: scr->pt.r10 is already -1 */
break;
}
case ERESTARTNOINTR:
- if (IS_IA32_PROCESS(&scr->pt)) {
- scr->pt.r8 = scr->pt.r1;
- scr->pt.cr_iip -= 2;
- } else
- ia64_decrement_ip(&scr->pt);
+ ia64_decrement_ip(&scr->pt);
restart = 0; /* don't restart twice if handle_signal() fails... */
}
}
@@ -519,15 +473,8 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
* Whee! Actually deliver the signal. If the delivery failed, we need to
* continue to iterate in this loop so we can deliver the SIGSEGV...
*/
- if (handle_signal(signr, &ka, &info, oldset, scr)) {
- /* a signal was successfully delivered; the saved
- * sigmask will have been stored in the signal frame,
- * and will be restored by sigreturn, so we can simply
- * clear the TIF_RESTORE_SIGMASK flag */
- if (test_thread_flag(TIF_RESTORE_SIGMASK))
- clear_thread_flag(TIF_RESTORE_SIGMASK);
+ if (handle_signal(signr, &ka, &info, scr))
return;
- }
}
/* Did we come from a system call? */
@@ -536,28 +483,18 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR
|| errno == ERESTART_RESTARTBLOCK)
{
- if (IS_IA32_PROCESS(&scr->pt)) {
- scr->pt.r8 = scr->pt.r1;
- scr->pt.cr_iip -= 2;
- if (errno == ERESTART_RESTARTBLOCK)
- scr->pt.r8 = 0; /* x86 version of __NR_restart_syscall */
- } else {
- /*
- * Note: the syscall number is in r15 which is saved in
- * pt_regs so all we need to do here is adjust ip so that
- * the "break" instruction gets re-executed.
- */
- ia64_decrement_ip(&scr->pt);
- if (errno == ERESTART_RESTARTBLOCK)
- scr->pt.r15 = __NR_restart_syscall;
- }
+ /*
+ * Note: the syscall number is in r15 which is saved in
+ * pt_regs so all we need to do here is adjust ip so that
+ * the "break" instruction gets re-executed.
+ */
+ ia64_decrement_ip(&scr->pt);
+ if (errno == ERESTART_RESTARTBLOCK)
+ scr->pt.r15 = __NR_restart_syscall;
}
}
/* if there's no signal to deliver, we just put the saved sigmask
* back */
- if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
- clear_thread_flag(TIF_RESTORE_SIGMASK);
- sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
- }
+ restore_saved_sigmask();
}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 4e446aa5f4a..9fcd4e63048 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -32,7 +32,7 @@
#include <linux/bitops.h>
#include <linux/kexec.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/current.h>
#include <asm/delay.h>
#include <asm/machvec.h>
@@ -44,7 +44,6 @@
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/sal.h>
-#include <asm/system.h>
#include <asm/tlbflush.h>
#include <asm/unistd.h>
#include <asm/mca.h>
@@ -58,53 +57,26 @@ static struct local_tlb_flush_counts {
unsigned int count;
} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS];
-static DEFINE_PER_CPU(unsigned int, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned;
-
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise static memory
- * requirements. It also looks cleaner.
- */
-static __cacheline_aligned DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- long wait;
- atomic_t started;
- atomic_t finished;
-};
-
-static volatile struct call_data_struct *call_data;
+static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS],
+ shadow_flush_counts);
#define IPI_CALL_FUNC 0
#define IPI_CPU_STOP 1
+#define IPI_CALL_FUNC_SINGLE 2
#define IPI_KDUMP_CPU_STOP 3
/* This needs to be cacheline aligned because it is written to by *other* CPUs. */
-static DEFINE_PER_CPU_SHARED_ALIGNED(u64, ipi_operation);
+static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, ipi_operation);
extern void cpu_halt (void);
-void
-lock_ipi_calllock(void)
-{
- spin_lock_irq(&call_lock);
-}
-
-void
-unlock_ipi_calllock(void)
-{
- spin_unlock_irq(&call_lock);
-}
-
static void
-stop_this_cpu (void)
+stop_this_cpu(void)
{
/*
* Remove this CPU:
*/
- cpu_clear(smp_processor_id(), cpu_online_map);
+ set_cpu_online(smp_processor_id(), false);
max_xtp();
local_irq_disable();
cpu_halt();
@@ -138,44 +110,23 @@ handle_IPI (int irq, void *dev_id)
ops &= ~(1 << which);
switch (which) {
- case IPI_CALL_FUNC:
- {
- struct call_data_struct *data;
- void (*func)(void *info);
- void *info;
- int wait;
-
- /* release the 'pointer lock' */
- data = (struct call_data_struct *) call_data;
- func = data->func;
- info = data->info;
- wait = data->wait;
-
- mb();
- atomic_inc(&data->started);
- /*
- * At this point the structure may be gone unless
- * wait is true.
- */
- (*func)(info);
-
- /* Notify the sending CPU that the task is done. */
- mb();
- if (wait)
- atomic_inc(&data->finished);
- }
- break;
-
- case IPI_CPU_STOP:
+ case IPI_CPU_STOP:
stop_this_cpu();
break;
+ case IPI_CALL_FUNC:
+ generic_smp_call_function_interrupt();
+ break;
+ case IPI_CALL_FUNC_SINGLE:
+ generic_smp_call_function_single_interrupt();
+ break;
#ifdef CONFIG_KEXEC
- case IPI_KDUMP_CPU_STOP:
+ case IPI_KDUMP_CPU_STOP:
unw_init_running(kdump_cpu_freeze, NULL);
break;
#endif
- default:
- printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
+ default:
+ printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n",
+ this_cpu, which);
break;
}
} while (ops);
@@ -185,6 +136,8 @@ handle_IPI (int irq, void *dev_id)
return IRQ_HANDLED;
}
+
+
/*
* Called with preemption disabled.
*/
@@ -213,6 +166,19 @@ send_IPI_allbutself (int op)
* Called with preemption disabled.
*/
static inline void
+send_IPI_mask(const struct cpumask *mask, int op)
+{
+ unsigned int cpu;
+
+ for_each_cpu(cpu, mask) {
+ send_IPI_single(cpu, op);
+ }
+}
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
send_IPI_all (int op)
{
int i;
@@ -259,6 +225,7 @@ smp_send_reschedule (int cpu)
{
platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
}
+EXPORT_SYMBOL_GPL(smp_send_reschedule);
/*
* Called with preemption disabled.
@@ -288,7 +255,7 @@ smp_local_flush_tlb(void)
void
smp_flush_tlb_cpumask(cpumask_t xcpumask)
{
- unsigned int *counts = __ia64_per_cpu_var(shadow_flush_counts);
+ unsigned short *counts = __ia64_per_cpu_var(shadow_flush_counts);
cpumask_t cpumask = xcpumask;
int mycpu, cpu, flush_mycpu = 0;
@@ -296,7 +263,7 @@ smp_flush_tlb_cpumask(cpumask_t xcpumask)
mycpu = smp_processor_id();
for_each_cpu_mask(cpu, cpumask)
- counts[cpu] = local_tlb_flush_counts[cpu].count;
+ counts[cpu] = local_tlb_flush_counts[cpu].count & 0xffff;
mb();
for_each_cpu_mask(cpu, cpumask) {
@@ -310,7 +277,7 @@ smp_flush_tlb_cpumask(cpumask_t xcpumask)
smp_local_flush_tlb();
for_each_cpu_mask(cpu, cpumask)
- while(counts[cpu] == local_tlb_flush_counts[cpu].count)
+ while(counts[cpu] == (local_tlb_flush_counts[cpu].count & 0xffff))
udelay(FLUSH_DELAY);
preempt_enable();
@@ -319,12 +286,13 @@ smp_flush_tlb_cpumask(cpumask_t xcpumask)
void
smp_flush_tlb_all (void)
{
- on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
+ on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1);
}
void
smp_flush_tlb_mm (struct mm_struct *mm)
{
+ cpumask_var_t cpus;
preempt_disable();
/* this happens for the common case of a single-threaded fork(): */
if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
@@ -333,133 +301,30 @@ smp_flush_tlb_mm (struct mm_struct *mm)
preempt_enable();
return;
}
-
+ if (!alloc_cpumask_var(&cpus, GFP_ATOMIC)) {
+ smp_call_function((void (*)(void *))local_finish_flush_tlb_mm,
+ mm, 1);
+ } else {
+ cpumask_copy(cpus, mm_cpumask(mm));
+ smp_call_function_many(cpus,
+ (void (*)(void *))local_finish_flush_tlb_mm, mm, 1);
+ free_cpumask_var(cpus);
+ }
+ local_irq_disable();
+ local_finish_flush_tlb_mm(mm);
+ local_irq_enable();
preempt_enable();
- /*
- * We could optimize this further by using mm->cpu_vm_mask to track which CPUs
- * have been running in the address space. It's not clear that this is worth the
- * trouble though: to avoid races, we have to raise the IPI on the target CPU
- * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is
- * rather trivial.
- */
- on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1);
}
-/*
- * Run a function on a specific CPU
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> Currently unused.
- * <wait> If true, wait until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code.
- *
- * Does not return until the remote CPU is nearly ready to execute <func>
- * or is or has executed.
- */
-
-int
-smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic,
- int wait)
+void arch_send_call_function_single_ipi(int cpu)
{
- struct call_data_struct data;
- int cpus = 1;
- int me = get_cpu(); /* prevent preemption and reschedule on another processor */
-
- if (cpuid == me) {
- local_irq_disable();
- func(info);
- local_irq_enable();
- put_cpu();
- return 0;
- }
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- spin_lock_bh(&call_lock);
-
- call_data = &data;
- mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */
- send_IPI_single(cpuid, IPI_CALL_FUNC);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
-
- if (wait)
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
- call_data = NULL;
-
- spin_unlock_bh(&call_lock);
- put_cpu();
- return 0;
+ send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE);
}
-EXPORT_SYMBOL(smp_call_function_single);
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-
-/*
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code.
- *
- * Does not return until remote CPUs are nearly ready to execute <func> or are or have
- * executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int
-smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait)
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
- struct call_data_struct data;
- int cpus;
-
- spin_lock(&call_lock);
- cpus = num_online_cpus() - 1;
- if (!cpus) {
- spin_unlock(&call_lock);
- return 0;
- }
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */
- send_IPI_allbutself(IPI_CALL_FUNC);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
-
- if (wait)
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
- call_data = NULL;
-
- spin_unlock(&call_lock);
- return 0;
+ send_IPI_mask(mask, IPI_CALL_FUNC);
}
-EXPORT_SYMBOL(smp_call_function);
/*
* this function calls the 'stop' function on all other CPUs in the system.
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 308772f7cdd..547a48d78bd 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -40,22 +40,21 @@
#include <linux/percpu.h>
#include <linux/bitops.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/cache.h>
#include <asm/current.h>
#include <asm/delay.h>
-#include <asm/ia32.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/machvec.h>
#include <asm/mca.h>
#include <asm/page.h>
+#include <asm/paravirt.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/sal.h>
-#include <asm/system.h>
#include <asm/tlbflush.h>
#include <asm/unistd.h>
#include <asm/sn/arch.h>
@@ -76,13 +75,6 @@
#endif
/*
- * Store all idle threads, this can be reused instead of creating
- * a new thread. Also avoids complicated thread destroy functionality
- * for idle threads.
- */
-struct task_struct *idle_thread_array[NR_CPUS];
-
-/*
* Global array allocated for NR_CPUS at boot time
*/
struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS];
@@ -95,13 +87,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0];
#define set_brendez_area(x) (sal_state_for_booting_cpu = &sal_boot_rendez_state[(x)]);
-#define get_idle_for_cpu(x) (idle_thread_array[(x)])
-#define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p))
-
#else
-
-#define get_idle_for_cpu(x) (NULL)
-#define set_idle_for_cpu(x,p)
#define set_brendez_area(x)
#endif
@@ -120,7 +106,6 @@ static volatile unsigned long go[SLAVE + 1];
#define DEBUG_ITC_SYNC 0
-extern void __devinit calibrate_delay (void);
extern void start_ap (void);
extern unsigned long ia64_iobase;
@@ -131,16 +116,12 @@ struct task_struct *task_for_booting_cpu;
*/
DEFINE_PER_CPU(int, cpu_state);
-/* Bitmasks of currently online, and possible CPUs */
-cpumask_t cpu_online_map;
-EXPORT_SYMBOL(cpu_online_map);
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(cpu_possible_map);
-
cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+
int smp_num_siblings = 1;
-int smp_num_cpucores = 1;
/* which logical CPU number maps to which CPU (physical APIC ID) */
volatile int ia64_cpu_to_sapicid[NR_CPUS];
@@ -317,7 +298,7 @@ ia64_sync_itc (unsigned int master)
go[MASTER] = 1;
- if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) {
+ if (smp_call_function_single(master, sync_master, NULL, 0) < 0) {
printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master);
return;
}
@@ -366,12 +347,11 @@ ia64_sync_itc (unsigned int master)
/*
* Ideally sets up per-cpu profiling hooks. Doesn't do much now...
*/
-static inline void __devinit
-smp_setup_percpu_timer (void)
+static inline void smp_setup_percpu_timer(void)
{
}
-static void __cpuinit
+static void
smp_callin (void)
{
int cpuid, phys_id, itc_master;
@@ -395,12 +375,17 @@ smp_callin (void)
fix_b0_for_bsp();
- lock_ipi_calllock();
+ /*
+ * numa_node_id() works after this.
+ */
+ set_numa_node(cpu_to_node_map[cpuid]);
+ set_numa_mem(local_memory_node(cpu_to_node_map[cpuid]));
+
spin_lock(&vector_lock);
/* Setup the per cpu irq handling data structures */
__setup_vector_irq(cpuid);
- cpu_set(cpuid, cpu_online_map);
- unlock_ipi_calllock();
+ notify_cpu_starting(cpuid);
+ set_cpu_online(cpuid, true);
per_cpu(cpu_state, cpuid) = CPU_ONLINE;
spin_unlock(&vector_lock);
@@ -446,10 +431,6 @@ smp_callin (void)
calibrate_delay();
local_cpu_data->loops_per_jiffy = loops_per_jiffy;
-#ifdef CONFIG_IA32_SUPPORT
- ia32_gdt_init();
-#endif
-
/*
* Allow the master to continue.
*/
@@ -461,77 +442,29 @@ smp_callin (void)
/*
* Activate a secondary processor. head.S calls this.
*/
-int __cpuinit
+int
start_secondary (void *unused)
{
/* Early console may use I/O ports */
ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
+#ifndef CONFIG_PRINTK_TIME
Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id());
+#endif
efi_map_pal_code();
cpu_init();
preempt_disable();
smp_callin();
- cpu_idle();
+ cpu_startup_entry(CPUHP_ONLINE);
return 0;
}
-struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
-{
- return NULL;
-}
-
-struct create_idle {
- struct work_struct work;
- struct task_struct *idle;
- struct completion done;
- int cpu;
-};
-
-void __cpuinit
-do_fork_idle(struct work_struct *work)
-{
- struct create_idle *c_idle =
- container_of(work, struct create_idle, work);
-
- c_idle->idle = fork_idle(c_idle->cpu);
- complete(&c_idle->done);
-}
-
-static int __cpuinit
-do_boot_cpu (int sapicid, int cpu)
+static int
+do_boot_cpu (int sapicid, int cpu, struct task_struct *idle)
{
int timeout;
- struct create_idle c_idle = {
- .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
- .cpu = cpu,
- .done = COMPLETION_INITIALIZER(c_idle.done),
- };
-
- c_idle.idle = get_idle_for_cpu(cpu);
- if (c_idle.idle) {
- init_idle(c_idle.idle, cpu);
- goto do_rest;
- }
-
- /*
- * We can't use kernel_thread since we must avoid to reschedule the child.
- */
- if (!keventd_up() || current_is_keventd())
- c_idle.work.func(&c_idle.work);
- else {
- schedule_work(&c_idle.work);
- wait_for_completion(&c_idle.done);
- }
-
- if (IS_ERR(c_idle.idle))
- panic("failed fork for CPU %d", cpu);
-
- set_idle_for_cpu(cpu, c_idle.idle);
-
-do_rest:
- task_for_booting_cpu = c_idle.idle;
+ task_for_booting_cpu = idle;
Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid);
set_brendez_area(cpu);
@@ -551,7 +484,7 @@ do_rest:
if (!cpu_isset(cpu, cpu_callin_map)) {
printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
ia64_cpu_to_sapicid[cpu] = -1;
- cpu_clear(cpu, cpu_online_map); /* was set in smp_callin() */
+ set_cpu_online(cpu, false); /* was set in smp_callin() */
return -EINVAL;
}
return 0;
@@ -581,15 +514,14 @@ smp_build_cpu_map (void)
}
ia64_cpu_to_sapicid[0] = boot_cpu_id;
- cpus_clear(cpu_present_map);
- cpu_set(0, cpu_present_map);
- cpu_set(0, cpu_possible_map);
+ init_cpu_present(cpumask_of(0));
+ set_cpu_possible(0, true);
for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) {
sapicid = smp_boot_data.cpu_phys_id[i];
if (sapicid == boot_cpu_id)
continue;
- cpu_set(cpu, cpu_present_map);
- cpu_set(cpu, cpu_possible_map);
+ set_cpu_present(cpu, true);
+ set_cpu_possible(cpu, true);
ia64_cpu_to_sapicid[cpu] = sapicid;
cpu++;
}
@@ -609,10 +541,6 @@ smp_prepare_cpus (unsigned int max_cpus)
smp_setup_percpu_timer();
- /*
- * We have the boot CPU online for sure.
- */
- cpu_set(0, cpu_online_map);
cpu_set(0, cpu_callin_map);
local_cpu_data->loops_per_jiffy = loops_per_jiffy;
@@ -627,21 +555,20 @@ smp_prepare_cpus (unsigned int max_cpus)
*/
if (!max_cpus) {
printk(KERN_INFO "SMP mode deactivated.\n");
- cpus_clear(cpu_online_map);
- cpus_clear(cpu_present_map);
- cpus_clear(cpu_possible_map);
- cpu_set(0, cpu_online_map);
- cpu_set(0, cpu_present_map);
- cpu_set(0, cpu_possible_map);
+ init_cpu_online(cpumask_of(0));
+ init_cpu_present(cpumask_of(0));
+ init_cpu_possible(cpumask_of(0));
return;
}
}
-void __devinit smp_prepare_boot_cpu(void)
+void smp_prepare_boot_cpu(void)
{
- cpu_set(smp_processor_id(), cpu_online_map);
+ set_cpu_online(smp_processor_id(), true);
cpu_set(smp_processor_id(), cpu_callin_map);
+ set_numa_node(cpu_to_node_map[smp_processor_id()]);
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+ paravirt_post_smp_prepare_boot_cpu();
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -650,12 +577,12 @@ clear_cpu_sibling_map(int cpu)
{
int i;
- for_each_cpu_mask(i, cpu_sibling_map[cpu])
- cpu_clear(cpu, cpu_sibling_map[i]);
+ for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
+ cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
for_each_cpu_mask(i, cpu_core_map[cpu])
cpu_clear(cpu, cpu_core_map[i]);
- cpu_sibling_map[cpu] = cpu_core_map[cpu] = CPU_MASK_NONE;
+ per_cpu(cpu_sibling_map, cpu) = cpu_core_map[cpu] = CPU_MASK_NONE;
}
static void
@@ -666,7 +593,7 @@ remove_siblinginfo(int cpu)
if (cpu_data(cpu)->threads_per_core == 1 &&
cpu_data(cpu)->cores_per_socket == 1) {
cpu_clear(cpu, cpu_core_map[cpu]);
- cpu_clear(cpu, cpu_sibling_map[cpu]);
+ cpu_clear(cpu, per_cpu(cpu_sibling_map, cpu));
return;
}
@@ -681,8 +608,8 @@ extern void fixup_irqs(void);
int migrate_platform_irqs(unsigned int cpu)
{
int new_cpei_cpu;
- irq_desc_t *desc = NULL;
- cpumask_t mask;
+ struct irq_data *data = NULL;
+ const struct cpumask *mask;
int retval = 0;
/*
@@ -694,23 +621,23 @@ int migrate_platform_irqs(unsigned int cpu)
/*
* Now re-target the CPEI to a different processor
*/
- new_cpei_cpu = any_online_cpu(cpu_online_map);
- mask = cpumask_of_cpu(new_cpei_cpu);
+ new_cpei_cpu = cpumask_any(cpu_online_mask);
+ mask = cpumask_of(new_cpei_cpu);
set_cpei_target_cpu(new_cpei_cpu);
- desc = irq_desc + ia64_cpe_irq;
+ data = irq_get_irq_data(ia64_cpe_irq);
/*
* Switch for now, immediately, we need to do fake intr
* as other interrupts, but need to study CPEI behaviour with
* polling before making changes.
*/
- if (desc) {
- desc->chip->disable(ia64_cpe_irq);
- desc->chip->set_affinity(ia64_cpe_irq, mask);
- desc->chip->enable(ia64_cpe_irq);
- printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu);
+ if (data && data->chip) {
+ data->chip->irq_disable(data);
+ data->chip->irq_set_affinity(data, mask, false);
+ data->chip->irq_enable(data);
+ printk ("Re-targeting CPEI to cpu %d\n", new_cpei_cpu);
}
}
- if (!desc) {
+ if (!data) {
printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu);
retval = -EBUSY;
}
@@ -736,15 +663,14 @@ int __cpu_disable(void)
return -EBUSY;
}
- cpu_clear(cpu, cpu_online_map);
+ set_cpu_online(cpu, false);
if (migrate_platform_irqs(cpu)) {
- cpu_set(cpu, cpu_online_map);
- return (-EBUSY);
+ set_cpu_online(cpu, true);
+ return -EBUSY;
}
remove_siblinginfo(cpu);
- cpu_clear(cpu, cpu_online_map);
fixup_irqs();
local_flush_tlb_all();
cpu_clear(cpu, cpu_callin_map);
@@ -766,17 +692,6 @@ void __cpu_die(unsigned int cpu)
}
printk(KERN_ERR "CPU %u didn't die...\n", cpu);
}
-#else /* !CONFIG_HOTPLUG_CPU */
-int __cpu_disable(void)
-{
- return -ENOSYS;
-}
-
-void __cpu_die(unsigned int cpu)
-{
- /* We said "no" in __cpu_disable */
- BUG();
-}
#endif /* CONFIG_HOTPLUG_CPU */
void
@@ -797,8 +712,7 @@ smp_cpus_done (unsigned int dummy)
(int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100);
}
-static inline void __devinit
-set_cpu_sibling_map(int cpu)
+static inline void set_cpu_sibling_map(int cpu)
{
int i;
@@ -807,15 +721,15 @@ set_cpu_sibling_map(int cpu)
cpu_set(i, cpu_core_map[cpu]);
cpu_set(cpu, cpu_core_map[i]);
if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) {
- cpu_set(i, cpu_sibling_map[cpu]);
- cpu_set(cpu, cpu_sibling_map[i]);
+ cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+ cpu_set(cpu, per_cpu(cpu_sibling_map, i));
}
}
}
}
-int __cpuinit
-__cpu_up (unsigned int cpu)
+int
+__cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int ret;
int sapicid;
@@ -833,13 +747,13 @@ __cpu_up (unsigned int cpu)
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Processor goes to start_secondary(), sets online flag */
- ret = do_boot_cpu(sapicid, cpu);
+ ret = do_boot_cpu(sapicid, cpu, tidle);
if (ret < 0)
return ret;
if (cpu_data(cpu)->threads_per_core == 1 &&
cpu_data(cpu)->cores_per_socket == 1) {
- cpu_set(cpu, cpu_sibling_map[cpu]);
+ cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
cpu_set(cpu, cpu_core_map[cpu]);
return 0;
}
@@ -877,27 +791,40 @@ init_smp_config(void)
* identify_siblings(cpu) gets called from identify_cpu. This populates the
* information related to logical execution units in per_cpu_data structure.
*/
-void __devinit
-identify_siblings(struct cpuinfo_ia64 *c)
+void identify_siblings(struct cpuinfo_ia64 *c)
{
- s64 status;
+ long status;
u16 pltid;
pal_logical_to_physical_t info;
- if (smp_num_cpucores == 1 && smp_num_siblings == 1)
- return;
+ status = ia64_pal_logical_to_phys(-1, &info);
+ if (status != PAL_STATUS_SUCCESS) {
+ if (status != PAL_STATUS_UNIMPLEMENTED) {
+ printk(KERN_ERR
+ "ia64_pal_logical_to_phys failed with %ld\n",
+ status);
+ return;
+ }
- if ((status = ia64_pal_logical_to_phys(-1, &info)) != PAL_STATUS_SUCCESS) {
- printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
- status);
- return;
+ info.overview_ppid = 0;
+ info.overview_cpp = 1;
+ info.overview_tpc = 1;
}
- if ((status = ia64_sal_physical_id_info(&pltid)) != PAL_STATUS_SUCCESS) {
- printk(KERN_ERR "ia64_sal_pltid failed with %ld\n", status);
+
+ status = ia64_sal_physical_id_info(&pltid);
+ if (status != PAL_STATUS_SUCCESS) {
+ if (status != PAL_STATUS_UNIMPLEMENTED)
+ printk(KERN_ERR
+ "ia64_sal_pltid failed with %ld\n",
+ status);
return;
}
c->socket_id = (pltid << 8) | info.overview_ppid;
+
+ if (info.overview_cpp == 1 && info.overview_tpc == 1)
+ return;
+
c->cores_per_socket = info.overview_cpp;
c->threads_per_core = info.overview_tpc;
c->num_log = info.overview_num_log;
diff --git a/arch/ia64/kernel/stacktrace.c b/arch/ia64/kernel/stacktrace.c
new file mode 100644
index 00000000000..5af2783a87f
--- /dev/null
+++ b/arch/ia64/kernel/stacktrace.c
@@ -0,0 +1,39 @@
+/*
+ * arch/ia64/kernel/stacktrace.c
+ *
+ * Stack trace management functions
+ *
+ */
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+#include <linux/module.h>
+
+static void
+ia64_do_save_stack(struct unw_frame_info *info, void *arg)
+{
+ struct stack_trace *trace = arg;
+ unsigned long ip;
+ int skip = trace->skip;
+
+ trace->nr_entries = 0;
+ do {
+ unw_get_ip(info, &ip);
+ if (ip == 0)
+ break;
+ if (skip == 0) {
+ trace->entries[trace->nr_entries++] = ip;
+ if (trace->nr_entries == trace->max_entries)
+ break;
+ } else
+ skip--;
+ } while (unw_unwind(info) >= 0);
+}
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ */
+void save_stack_trace(struct stack_trace *trace)
+{
+ unw_init_running(ia64_do_save_stack, trace);
+}
+EXPORT_SYMBOL(save_stack_trace);
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index 1eda194b955..41e33f84c18 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -25,9 +25,9 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
unsigned long pgoff, unsigned long flags)
{
long map_shared = (flags & MAP_SHARED);
- unsigned long start_addr, align_mask = PAGE_SIZE - 1;
+ unsigned long align_mask = 0;
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
+ struct vm_unmapped_area_info info;
if (len > RGN_MAP_LIMIT)
return -ENOMEM;
@@ -44,7 +44,7 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
addr = 0;
#endif
if (!addr)
- addr = mm->free_area_cache;
+ addr = TASK_UNMAPPED_BASE;
if (map_shared && (TASK_SIZE > 0xfffffffful))
/*
@@ -53,28 +53,15 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
* tasks, we prefer to avoid exhausting the address space too quickly by
* limiting alignment to a single page.
*/
- align_mask = SHMLBA - 1;
-
- full_search:
- start_addr = addr = (addr + align_mask) & ~align_mask;
-
- for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
- /* At this point: (!vma || addr < vma->vm_end). */
- if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) {
- if (start_addr != TASK_UNMAPPED_BASE) {
- /* Start a new search --- just in case we missed some holes. */
- addr = TASK_UNMAPPED_BASE;
- goto full_search;
- }
- return -ENOMEM;
- }
- if (!vma || addr + len <= vma->vm_start) {
- /* Remember the address where we stopped this search: */
- mm->free_area_cache = addr + len;
- return addr;
- }
- addr = (vma->vm_end + align_mask) & ~align_mask;
- }
+ align_mask = PAGE_MASK & (SHMLBA - 1);
+
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = addr;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = align_mask;
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
}
asmlinkage long
@@ -100,51 +87,7 @@ sys_getpagesize (void)
asmlinkage unsigned long
ia64_brk (unsigned long brk)
{
- unsigned long rlim, retval, newbrk, oldbrk;
- struct mm_struct *mm = current->mm;
-
- /*
- * Most of this replicates the code in sys_brk() except for an additional safety
- * check and the clearing of r8. However, we can't call sys_brk() because we need
- * to acquire the mmap_sem before we can do the test...
- */
- down_write(&mm->mmap_sem);
-
- if (brk < mm->end_code)
- goto out;
- newbrk = PAGE_ALIGN(brk);
- oldbrk = PAGE_ALIGN(mm->brk);
- if (oldbrk == newbrk)
- goto set_brk;
-
- /* Always allow shrinking brk. */
- if (brk <= mm->brk) {
- if (!do_munmap(mm, newbrk, oldbrk-newbrk))
- goto set_brk;
- goto out;
- }
-
- /* Check against unimplemented/unmapped addresses: */
- if ((newbrk - oldbrk) > RGN_MAP_LIMIT || REGION_OFFSET(newbrk) > RGN_MAP_LIMIT)
- goto out;
-
- /* Check against rlimit.. */
- rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
- if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
- goto out;
-
- /* Check against existing mmap mappings. */
- if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
- goto out;
-
- /* Ok, looks good - let it rip. */
- if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
- goto out;
-set_brk:
- mm->brk = brk;
-out:
- retval = mm->brk;
- up_write(&mm->mmap_sem);
+ unsigned long retval = sys_brk(brk);
force_successful_syscall_return();
return retval;
}
@@ -154,13 +97,13 @@ out:
* and r9) as this is faster than doing a copy_to_user().
*/
asmlinkage long
-sys_pipe (void)
+sys_ia64_pipe (void)
{
struct pt_regs *regs = task_pt_regs(current);
int fd[2];
int retval;
- retval = do_pipe(fd);
+ retval = do_pipe_flags(fd, 0);
if (retval)
goto out;
retval = fd[0];
@@ -185,39 +128,6 @@ int ia64_mmap_check(unsigned long addr, unsigned long len,
return 0;
}
-static inline unsigned long
-do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff)
-{
- struct file *file = NULL;
-
- flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
- if (!(flags & MAP_ANONYMOUS)) {
- file = fget(fd);
- if (!file)
- return -EBADF;
-
- if (!file->f_op || !file->f_op->mmap) {
- addr = -ENODEV;
- goto out;
- }
- }
-
- /* Careful about overflows.. */
- len = PAGE_ALIGN(len);
- if (!len || len > TASK_SIZE) {
- addr = -EINVAL;
- goto out;
- }
-
- down_write(&current->mm->mmap_sem);
- addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
- up_write(&current->mm->mmap_sem);
-
-out: if (file)
- fput(file);
- return addr;
-}
-
/*
* mmap2() is like mmap() except that the offset is expressed in units
* of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces
@@ -226,7 +136,7 @@ out: if (file)
asmlinkage unsigned long
sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff)
{
- addr = do_mmap2(addr, len, prot, flags, fd, pgoff);
+ addr = sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
if (!IS_ERR((void *) addr))
force_successful_syscall_return();
return addr;
@@ -238,7 +148,7 @@ sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, lo
if (offset_in_page(off) != 0)
return -EINVAL;
- addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+ addr = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
if (!IS_ERR((void *) addr))
force_successful_syscall_return();
return addr;
@@ -248,22 +158,9 @@ asmlinkage unsigned long
ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags,
unsigned long new_addr)
{
- extern unsigned long do_mremap (unsigned long addr,
- unsigned long old_len,
- unsigned long new_len,
- unsigned long flags,
- unsigned long new_addr);
-
- down_write(&current->mm->mmap_sem);
- {
- addr = do_mremap(addr, old_len, new_len, flags, new_addr);
- }
- up_write(&current->mm->mmap_sem);
-
- if (IS_ERR((void *) addr))
- return addr;
-
- force_successful_syscall_return();
+ addr = sys_mremap(addr, old_len, new_len, flags, new_addr);
+ if (!IS_ERR((void *) addr))
+ force_successful_syscall_return();
return addr;
}
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 98cfc90cab1..71c52bc7c28 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -19,23 +19,22 @@
#include <linux/interrupt.h>
#include <linux/efi.h>
#include <linux/timex.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/platform_device.h>
#include <asm/machvec.h>
#include <asm/delay.h>
#include <asm/hw_irq.h>
+#include <asm/paravirt.h>
#include <asm/ptrace.h>
#include <asm/sal.h>
#include <asm/sections.h>
-#include <asm/system.h>
#include "fsyscall_gtod_data.h"
-static cycle_t itc_get_cycles(void);
+static cycle_t itc_get_cycles(struct clocksource *cs);
-struct fsyscall_gtod_data_t fsyscall_gtod_data = {
- .lock = SEQLOCK_UNLOCKED,
-};
+struct fsyscall_gtod_data_t fsyscall_gtod_data;
struct itc_jitter_data_t itc_jitter_data;
@@ -48,23 +47,110 @@ EXPORT_SYMBOL(last_cli_ip);
#endif
+#ifdef CONFIG_PARAVIRT
+/* We need to define a real function for sched_clock, to override the
+ weak default version */
+unsigned long long sched_clock(void)
+{
+ return paravirt_sched_clock();
+}
+#endif
+
+#ifdef CONFIG_PARAVIRT
+static void
+paravirt_clocksource_resume(struct clocksource *cs)
+{
+ if (pv_time_ops.clocksource_resume)
+ pv_time_ops.clocksource_resume();
+}
+#endif
+
static struct clocksource clocksource_itc = {
- .name = "itc",
- .rating = 350,
- .read = itc_get_cycles,
- .mask = CLOCKSOURCE_MASK(64),
- .mult = 0, /*to be caluclated*/
- .shift = 16,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .name = "itc",
+ .rating = 350,
+ .read = itc_get_cycles,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+#ifdef CONFIG_PARAVIRT
+ .resume = paravirt_clocksource_resume,
+#endif
};
static struct clocksource *itc_clocksource;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+
+#include <linux/kernel_stat.h>
+
+extern cputime_t cycle_to_cputime(u64 cyc);
+
+void vtime_account_user(struct task_struct *tsk)
+{
+ cputime_t delta_utime;
+ struct thread_info *ti = task_thread_info(tsk);
+
+ if (ti->ac_utime) {
+ delta_utime = cycle_to_cputime(ti->ac_utime);
+ account_user_time(tsk, delta_utime, delta_utime);
+ ti->ac_utime = 0;
+ }
+}
+
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+void arch_vtime_task_switch(struct task_struct *prev)
+{
+ struct thread_info *pi = task_thread_info(prev);
+ struct thread_info *ni = task_thread_info(current);
+
+ pi->ac_stamp = ni->ac_stamp;
+ ni->ac_stime = ni->ac_utime = 0;
+}
+
+/*
+ * Account time for a transition between system, hard irq or soft irq state.
+ * Note that this function is called with interrupts enabled.
+ */
+static cputime_t vtime_delta(struct task_struct *tsk)
+{
+ struct thread_info *ti = task_thread_info(tsk);
+ cputime_t delta_stime;
+ __u64 now;
+
+ WARN_ON_ONCE(!irqs_disabled());
+
+ now = ia64_get_itc();
+
+ delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
+ ti->ac_stime = 0;
+ ti->ac_stamp = now;
+
+ return delta_stime;
+}
+
+void vtime_account_system(struct task_struct *tsk)
+{
+ cputime_t delta = vtime_delta(tsk);
+
+ account_system_time(tsk, 0, delta, delta);
+}
+EXPORT_SYMBOL_GPL(vtime_account_system);
+
+void vtime_account_idle(struct task_struct *tsk)
+{
+ account_idle_time(vtime_delta(tsk));
+}
+
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
static irqreturn_t
timer_interrupt (int irq, void *dev_id)
{
unsigned long new_itm;
- if (unlikely(cpu_is_offline(smp_processor_id()))) {
+ if (cpu_is_offline(smp_processor_id())) {
return IRQ_HANDLED;
}
@@ -78,24 +164,18 @@ timer_interrupt (int irq, void *dev_id)
profile_tick(CPU_PROFILING);
+ if (paravirt_do_steal_accounting(&new_itm))
+ goto skip_process_time_accounting;
+
while (1) {
update_process_times(user_mode(get_irq_regs()));
new_itm += local_cpu_data->itm_delta;
- if (smp_processor_id() == time_keeper_id) {
- /*
- * Here we are in the timer irq handler. We have irqs locally
- * disabled, but we don't know if the timer_bh is running on
- * another CPU. We need to avoid to SMP race by acquiring the
- * xtime_lock.
- */
- write_seqlock(&xtime_lock);
- do_timer(1);
- local_cpu_data->itm_next = new_itm;
- write_sequnlock(&xtime_lock);
- } else
- local_cpu_data->itm_next = new_itm;
+ if (smp_processor_id() == time_keeper_id)
+ xtime_update(1);
+
+ local_cpu_data->itm_next = new_itm;
if (time_after(new_itm, ia64_get_itc()))
break;
@@ -107,13 +187,15 @@ timer_interrupt (int irq, void *dev_id)
local_irq_disable();
}
+skip_process_time_accounting:
+
do {
/*
* If we're too close to the next clock tick for
* comfort, we increase the safety margin by
* intentionally dropping the next tick(s). We do NOT
* update itm.next because that would force us to call
- * do_timer() which in turn would let our clock run
+ * xtime_update() which in turn would let our clock run
* too fast (with the potentially devastating effect
* of losing monotony of time).
*/
@@ -162,8 +244,7 @@ static int __init nojitter_setup(char *str)
__setup("nojitter", nojitter_setup);
-void __devinit
-ia64_init_itm (void)
+void ia64_init_itm(void)
{
unsigned long platform_base_freq, itc_freq;
struct pal_freq_ratio itc_ratio, proc_ratio;
@@ -256,22 +337,24 @@ ia64_init_itm (void)
*/
clocksource_itc.rating = 50;
+ paravirt_init_missing_ticks_accounting(smp_processor_id());
+
+ /* avoid softlock up message when cpu is unplug and plugged again. */
+ touch_softlockup_watchdog();
+
/* Setup the CPU local timer tick */
ia64_cpu_local_tick();
if (!itc_clocksource) {
- /* Sort out mult/shift values: */
- clocksource_itc.mult =
- clocksource_hz2mult(local_cpu_data->itc_freq,
- clocksource_itc.shift);
- clocksource_register(&clocksource_itc);
+ clocksource_register_hz(&clocksource_itc,
+ local_cpu_data->itc_freq);
itc_clocksource = &clocksource_itc;
}
}
-static cycle_t itc_get_cycles(void)
+static cycle_t itc_get_cycles(struct clocksource *cs)
{
- u64 lcycle, now, ret;
+ unsigned long lcycle, now, ret;
if (!itc_jitter_data.itc_jitter)
return get_cycles();
@@ -297,27 +380,35 @@ static cycle_t itc_get_cycles(void)
static struct irqaction timer_irqaction = {
.handler = timer_interrupt,
- .flags = IRQF_DISABLED | IRQF_IRQPOLL,
+ .flags = IRQF_IRQPOLL,
.name = "timer"
};
-void __devinit ia64_disable_timer(void)
+static struct platform_device rtc_efi_dev = {
+ .name = "rtc-efi",
+ .id = -1,
+};
+
+static int __init rtc_init(void)
{
- ia64_set_itv(1 << 16);
+ if (platform_device_register(&rtc_efi_dev) < 0)
+ printk(KERN_ERR "unable to register rtc device...\n");
+
+ /* not necessarily an error */
+ return 0;
+}
+module_init(rtc_init);
+
+void read_persistent_clock(struct timespec *ts)
+{
+ efi_gettimeofday(ts);
}
void __init
time_init (void)
{
register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
- efi_gettimeofday(&xtime);
ia64_init_itm();
-
- /*
- * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the
- * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC).
- */
- set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
}
/*
@@ -344,52 +435,29 @@ udelay (unsigned long usecs)
}
EXPORT_SYMBOL(udelay);
-static unsigned long long ia64_itc_printk_clock(void)
+/* IA64 doesn't cache the timezone */
+void update_vsyscall_tz(void)
{
- if (ia64_get_kr(IA64_KR_PER_CPU_DATA))
- return sched_clock();
- return 0;
}
-static unsigned long long ia64_default_printk_clock(void)
-{
- return (unsigned long long)(jiffies_64 - INITIAL_JIFFIES) *
- (1000000000/HZ);
-}
-
-unsigned long long (*ia64_printk_clock)(void) = &ia64_default_printk_clock;
-
-unsigned long long printk_clock(void)
+void update_vsyscall_old(struct timespec *wall, struct timespec *wtm,
+ struct clocksource *c, u32 mult)
{
- return ia64_printk_clock();
-}
-
-void __init
-ia64_setup_printk_clock(void)
-{
- if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT))
- ia64_printk_clock = ia64_itc_printk_clock;
-}
-
-void update_vsyscall(struct timespec *wall, struct clocksource *c)
-{
- unsigned long flags;
-
- write_seqlock_irqsave(&fsyscall_gtod_data.lock, flags);
+ write_seqcount_begin(&fsyscall_gtod_data.seq);
/* copy fsyscall clock data */
fsyscall_gtod_data.clk_mask = c->mask;
- fsyscall_gtod_data.clk_mult = c->mult;
+ fsyscall_gtod_data.clk_mult = mult;
fsyscall_gtod_data.clk_shift = c->shift;
- fsyscall_gtod_data.clk_fsys_mmio = c->fsys_mmio;
+ fsyscall_gtod_data.clk_fsys_mmio = c->archdata.fsys_mmio;
fsyscall_gtod_data.clk_cycle_last = c->cycle_last;
/* copy kernel time structures */
fsyscall_gtod_data.wall_time.tv_sec = wall->tv_sec;
fsyscall_gtod_data.wall_time.tv_nsec = wall->tv_nsec;
- fsyscall_gtod_data.monotonic_time.tv_sec = wall_to_monotonic.tv_sec
+ fsyscall_gtod_data.monotonic_time.tv_sec = wtm->tv_sec
+ wall->tv_sec;
- fsyscall_gtod_data.monotonic_time.tv_nsec = wall_to_monotonic.tv_nsec
+ fsyscall_gtod_data.monotonic_time.tv_nsec = wtm->tv_nsec
+ wall->tv_nsec;
/* normalize */
@@ -398,6 +466,6 @@ void update_vsyscall(struct timespec *wall, struct clocksource *c)
fsyscall_gtod_data.monotonic_time.tv_sec++;
}
- write_sequnlock_irqrestore(&fsyscall_gtod_data.lock, flags);
+ write_seqcount_end(&fsyscall_gtod_data.seq);
}
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 94ae3c87d82..f295f9abba4 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -17,40 +17,57 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/node.h>
+#include <linux/slab.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/nodemask.h>
#include <linux/notifier.h>
+#include <linux/export.h>
#include <asm/mmzone.h>
#include <asm/numa.h>
#include <asm/cpu.h>
static struct ia64_cpu *sysfs_cpus;
-int arch_register_cpu(int num)
+void arch_fix_phys_package_id(int num, u32 slot)
{
-#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU)
+#ifdef CONFIG_SMP
+ if (cpu_data(num)->socket_id == -1)
+ cpu_data(num)->socket_id = slot;
+#endif
+}
+EXPORT_SYMBOL_GPL(arch_fix_phys_package_id);
+
+
+#ifdef CONFIG_HOTPLUG_CPU
+int __ref arch_register_cpu(int num)
+{
+#ifdef CONFIG_ACPI
/*
- * If CPEI can be re-targetted or if this is not
+ * If CPEI can be re-targeted or if this is not
* CPEI target, then it is hotpluggable
*/
if (can_cpei_retarget() || !is_cpu_cpei_target(num))
sysfs_cpus[num].cpu.hotpluggable = 1;
map_cpu_to_node(num, node_cpuid[num].nid);
#endif
-
return register_cpu(&sysfs_cpus[num].cpu, num);
}
+EXPORT_SYMBOL(arch_register_cpu);
-#ifdef CONFIG_HOTPLUG_CPU
-
-void arch_unregister_cpu(int num)
+void __ref arch_unregister_cpu(int num)
{
unregister_cpu(&sysfs_cpus[num].cpu);
+#ifdef CONFIG_ACPI
unmap_cpu_from_node(num, cpu_to_node(num));
+#endif
}
-EXPORT_SYMBOL(arch_register_cpu);
EXPORT_SYMBOL(arch_unregister_cpu);
+#else
+static int __init arch_register_cpu(int num)
+{
+ return register_cpu(&sysfs_cpus[num].cpu, num);
+}
#endif /*CONFIG_HOTPLUG_CPU*/
@@ -122,7 +139,7 @@ static struct cpu_cache_info all_cpu_cache_info[NR_CPUS];
#define LEAF_KOBJECT_PTR(x,y) (&all_cpu_cache_info[x].cache_leaves[y])
#ifdef CONFIG_SMP
-static void cache_shared_cpu_map_setup( unsigned int cpu,
+static void cache_shared_cpu_map_setup(unsigned int cpu,
struct cache_info * this_leaf)
{
pal_cache_shared_info_t csi;
@@ -203,8 +220,9 @@ static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf)
ssize_t len;
cpumask_t shared_cpu_map;
- cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map);
- len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map);
+ cpumask_and(&shared_cpu_map,
+ &this_leaf->shared_cpu_map, cpu_online_mask);
+ len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map);
len += sprintf(buf+len, "\n");
return len;
}
@@ -257,7 +275,7 @@ static struct attribute * cache_default_attrs[] = {
#define to_object(k) container_of(k, struct cache_info, kobj)
#define to_attr(a) container_of(a, struct cache_attr, attr)
-static ssize_t cache_show(struct kobject * kobj, struct attribute * attr, char * buf)
+static ssize_t ia64_cache_show(struct kobject * kobj, struct attribute * attr, char * buf)
{
struct cache_attr *fattr = to_attr(attr);
struct cache_info *this_leaf = to_object(kobj);
@@ -267,8 +285,8 @@ static ssize_t cache_show(struct kobject * kobj, struct attribute * attr, char *
return ret;
}
-static struct sysfs_ops cache_sysfs_ops = {
- .show = cache_show
+static const struct sysfs_ops cache_sysfs_ops = {
+ .show = ia64_cache_show
};
static struct kobj_type cache_ktype = {
@@ -280,7 +298,7 @@ static struct kobj_type cache_ktype_percpu_entry = {
.sysfs_ops = &cache_sysfs_ops,
};
-static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu)
+static void cpu_cache_sysfs_exit(unsigned int cpu)
{
kfree(all_cpu_cache_info[cpu].cache_leaves);
all_cpu_cache_info[cpu].cache_leaves = NULL;
@@ -289,12 +307,12 @@ static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu)
return;
}
-static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu)
+static int cpu_cache_sysfs_init(unsigned int cpu)
{
- u64 i, levels, unique_caches;
+ unsigned long i, levels, unique_caches;
pal_cache_config_info_t cci;
int j;
- s64 status;
+ long status;
struct cache_info *this_cache;
int num_cache_leaves = 0;
@@ -333,7 +351,7 @@ static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu)
}
/* Add cache interface for CPU device */
-static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
+static int cache_add_dev(struct device *sys_dev)
{
unsigned int cpu = sys_dev->id;
unsigned long i, j;
@@ -345,50 +363,54 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
return 0;
oldmask = current->cpus_allowed;
- retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
+ retval = set_cpus_allowed_ptr(current, cpumask_of(cpu));
if (unlikely(retval))
return retval;
retval = cpu_cache_sysfs_init(cpu);
- set_cpus_allowed(current, oldmask);
+ set_cpus_allowed_ptr(current, &oldmask);
if (unlikely(retval < 0))
return retval;
- all_cpu_cache_info[cpu].kobj.parent = &sys_dev->kobj;
- kobject_set_name(&all_cpu_cache_info[cpu].kobj, "%s", "cache");
- all_cpu_cache_info[cpu].kobj.ktype = &cache_ktype_percpu_entry;
- retval = kobject_register(&all_cpu_cache_info[cpu].kobj);
+ retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj,
+ &cache_ktype_percpu_entry, &sys_dev->kobj,
+ "%s", "cache");
+ if (unlikely(retval < 0)) {
+ cpu_cache_sysfs_exit(cpu);
+ return retval;
+ }
for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) {
this_object = LEAF_KOBJECT_PTR(cpu,i);
- this_object->kobj.parent = &all_cpu_cache_info[cpu].kobj;
- kobject_set_name(&(this_object->kobj), "index%1lu", i);
- this_object->kobj.ktype = &cache_ktype;
- retval = kobject_register(&(this_object->kobj));
+ retval = kobject_init_and_add(&(this_object->kobj),
+ &cache_ktype,
+ &all_cpu_cache_info[cpu].kobj,
+ "index%1lu", i);
if (unlikely(retval)) {
for (j = 0; j < i; j++) {
- kobject_unregister(
- &(LEAF_KOBJECT_PTR(cpu,j)->kobj));
+ kobject_put(&(LEAF_KOBJECT_PTR(cpu,j)->kobj));
}
- kobject_unregister(&all_cpu_cache_info[cpu].kobj);
+ kobject_put(&all_cpu_cache_info[cpu].kobj);
cpu_cache_sysfs_exit(cpu);
- break;
+ return retval;
}
+ kobject_uevent(&(this_object->kobj), KOBJ_ADD);
}
+ kobject_uevent(&all_cpu_cache_info[cpu].kobj, KOBJ_ADD);
return retval;
}
/* Remove cache interface for CPU device */
-static int __cpuinit cache_remove_dev(struct sys_device * sys_dev)
+static int cache_remove_dev(struct device *sys_dev)
{
unsigned int cpu = sys_dev->id;
unsigned long i;
for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++)
- kobject_unregister(&(LEAF_KOBJECT_PTR(cpu,i)->kobj));
+ kobject_put(&(LEAF_KOBJECT_PTR(cpu,i)->kobj));
if (all_cpu_cache_info[cpu].kobj.parent) {
- kobject_unregister(&all_cpu_cache_info[cpu].kobj);
+ kobject_put(&all_cpu_cache_info[cpu].kobj);
memset(&all_cpu_cache_info[cpu].kobj,
0,
sizeof(struct kobject));
@@ -403,13 +425,13 @@ static int __cpuinit cache_remove_dev(struct sys_device * sys_dev)
* When a cpu is hot-plugged, do a check and initiate
* cache kobject if necessary
*/
-static int __cpuinit cache_cpu_callback(struct notifier_block *nfb,
+static int cache_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
- struct sys_device *sys_dev;
+ struct device *sys_dev;
- sys_dev = get_cpu_sysdev(cpu);
+ sys_dev = get_cpu_device(cpu);
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
@@ -423,21 +445,25 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata cache_cpu_notifier =
+static struct notifier_block cache_cpu_notifier =
{
.notifier_call = cache_cpu_callback
};
-static int __cpuinit cache_sysfs_init(void)
+static int __init cache_sysfs_init(void)
{
int i;
+ cpu_notifier_register_begin();
+
for_each_online_cpu(i) {
- cache_cpu_callback(&cache_cpu_notifier, CPU_ONLINE,
- (void *)(long)i);
+ struct device *sys_dev = get_cpu_device((unsigned int)i);
+ cache_add_dev(sys_dev);
}
- register_hotcpu_notifier(&cache_cpu_notifier);
+ __register_hotcpu_notifier(&cache_cpu_notifier);
+
+ cpu_notifier_register_done();
return 0;
}
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 3aeaf15e468..d3636e67a98 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -19,10 +19,10 @@
#include <linux/kdebug.h>
#include <asm/fpswa.h>
-#include <asm/ia32.h>
#include <asm/intrinsics.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
+#include <asm/setup.h>
fpswa_interface_t *fpswa_interface;
EXPORT_SYMBOL(fpswa_interface);
@@ -35,7 +35,7 @@ trap_init (void)
fpswa_interface = __va(ia64_boot_param->fpswa);
}
-void
+int
die (const char *str, struct pt_regs *regs, long err)
{
static struct {
@@ -61,28 +61,36 @@ die (const char *str, struct pt_regs *regs, long err)
if (++die.lock_owner_depth < 3) {
printk("%s[%d]: %s %ld [%d]\n",
- current->comm, current->pid, str, err, ++die_counter);
- (void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
- show_regs(regs);
+ current->comm, task_pid_nr(current), str, err, ++die_counter);
+ if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV)
+ != NOTIFY_STOP)
+ show_regs(regs);
+ else
+ regs = NULL;
} else
printk(KERN_ERR "Recursive die() failure, output suppressed\n");
bust_spinlocks(0);
die.lock_owner = -1;
- add_taint(TAINT_DIE);
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
spin_unlock_irq(&die.lock);
+ if (!regs)
+ return 1;
+
if (panic_on_oops)
panic("Fatal exception");
do_exit(SIGSEGV);
+ return 0;
}
-void
+int
die_if_kernel (char *str, struct pt_regs *regs, long err)
{
if (!user_mode(regs))
- die(str, regs, err);
+ return die(str, regs, err);
+ return 0;
}
void
@@ -102,7 +110,8 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP)
== NOTIFY_STOP)
return;
- die_if_kernel("bugcheck!", regs, break_num);
+ if (die_if_kernel("bugcheck!", regs, break_num))
+ return;
sig = SIGILL; code = ILL_ILLOPC;
break;
@@ -155,8 +164,9 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
break;
default:
- if (break_num < 0x40000 || break_num > 0x100000)
- die_if_kernel("Bad break", regs, break_num);
+ if ((break_num < 0x40000 || break_num > 0x100000)
+ && die_if_kernel("Bad break", regs, break_num))
+ return;
if (break_num < 0x80000) {
sig = SIGILL; code = __ILL_BREAK;
@@ -315,7 +325,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
last.time = current_jiffies + 5 * HZ;
printk(KERN_WARNING
"%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
- current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr);
+ current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
}
}
}
@@ -402,14 +412,15 @@ ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3,
#endif
sprintf(buf, "IA-64 Illegal operation fault");
- die_if_kernel(buf, &regs, 0);
+ rv.fkt = 0;
+ if (die_if_kernel(buf, &regs, 0))
+ return rv;
memset(&si, 0, sizeof(si));
si.si_signo = SIGILL;
si.si_code = ILL_ILLOPC;
si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(&regs)->ri);
force_sig_info(SIGILL, &si, current);
- rv.fkt = 0;
return rv;
}
@@ -453,7 +464,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
if (code == 8) {
# ifdef CONFIG_IA64_PRINT_HAZARDS
printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n",
- current->comm, current->pid,
+ current->comm, task_pid_nr(current),
regs.cr_iip + ia64_psr(&regs)->ri, regs.pr);
# endif
return;
@@ -615,21 +626,13 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
break;
case 45:
-#ifdef CONFIG_IA32_SUPPORT
- if (ia32_exception(&regs, isr) == 0)
- return;
-#endif
printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
iip, ifa, isr);
force_sig(SIGSEGV, current);
- break;
+ return;
case 46:
-#ifdef CONFIG_IA32_SUPPORT
- if (ia32_intercept(&regs, isr) == 0)
- return;
-#endif
printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
iip, ifa, isr, iim);
@@ -644,6 +647,6 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
sprintf(buf, "Fault %lu", vector);
break;
}
- die_if_kernel(buf, &regs, error);
- force_sig(SIGILL, current);
+ if (!die_if_kernel(buf, &regs, error))
+ force_sig(SIGILL, current);
}
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index fe6aa5a9f8f..622772b7fb6 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -13,9 +13,11 @@
* 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
* 2001/01/17 Add support emulation of unaligned kernel accesses.
*/
+#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/tty.h>
+#include <linux/ratelimit.h>
#include <asm/intrinsics.h>
#include <asm/processor.h>
@@ -23,12 +25,12 @@
#include <asm/uaccess.h>
#include <asm/unaligned.h>
-extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
+extern int die_if_kernel(char *str, struct pt_regs *regs, long err);
#undef DEBUG_UNALIGNED_TRAP
#ifdef DEBUG_UNALIGNED_TRAP
-# define DPRINT(a...) do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
+# define DPRINT(a...) do { printk("%s %u: ", __func__, __LINE__); printk (a); } while (0)
# define DDUMP(str,vp,len) dump(str, vp, len)
static void
@@ -58,7 +60,7 @@ dump (const char *str, void *vp, size_t len)
* (i.e. don't allow attacker to fill up logs with unaligned accesses).
*/
int no_unaligned_warning;
-static int noprint_warning;
+int unaligned_dump_stack;
/*
* For M-unit:
@@ -674,9 +676,10 @@ emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsi
* just in case.
*/
if (ld.x6_op == 1 || ld.x6_op == 3) {
- printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
- die_if_kernel("unaligned reference on speculative load with register update\n",
- regs, 30);
+ printk(KERN_ERR "%s: register update on speculative load, error\n", __func__);
+ if (die_if_kernel("unaligned reference on speculative load with register update\n",
+ regs, 30))
+ return;
}
@@ -1103,7 +1106,7 @@ emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs
*/
if (ld.x6_op == 1 || ld.x6_op == 3)
printk(KERN_ERR "%s: register update on speculative load pair, error\n",
- __FUNCTION__);
+ __func__);
setreg(ld.r3, ifa, 0, regs);
}
@@ -1281,24 +1284,9 @@ emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
/*
* Make sure we log the unaligned access, so that user/sysadmin can notice it and
* eventually fix the program. However, we don't want to do that for every access so we
- * pace it with jiffies. This isn't really MP-safe, but it doesn't really have to be
- * either...
+ * pace it with jiffies.
*/
-static int
-within_logging_rate_limit (void)
-{
- static unsigned long count, last_time;
-
- if (jiffies - last_time > 5*HZ)
- count = 0;
- if (count < 5) {
- last_time = jiffies;
- count++;
- return 1;
- }
- return 0;
-
-}
+static DEFINE_RATELIMIT_STATE(logging_rate_limit, 5 * HZ, 5);
void
ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
@@ -1317,7 +1305,8 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
if (ia64_psr(regs)->be) {
/* we don't support big-endian accesses */
- die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
+ if (die_if_kernel("big-endian unaligned accesses are not supported", regs, 0))
+ return;
goto force_sigbus;
}
@@ -1334,13 +1323,14 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
if (!no_unaligned_warning &&
!(current->thread.flags & IA64_THREAD_UAC_NOPRINT) &&
- within_logging_rate_limit())
+ __ratelimit(&logging_rate_limit))
{
char buf[200]; /* comm[] is at most 16 bytes... */
size_t len;
len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
- "ip=0x%016lx\n\r", current->comm, current->pid,
+ "ip=0x%016lx\n\r", current->comm,
+ task_pid_nr(current),
ifa, regs->cr_iip + ipsr->ri);
/*
* Don't call tty_write_message() if we're in the kernel; we might
@@ -1352,9 +1342,8 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
/* watch for command names containing %s */
printk(KERN_WARNING "%s", buf);
} else {
- if (no_unaligned_warning && !noprint_warning) {
- noprint_warning = 1;
- printk(KERN_WARNING "%s(%d) encountered an "
+ if (no_unaligned_warning) {
+ printk_once(KERN_WARNING "%s(%d) encountered an "
"unaligned exception which required\n"
"kernel assistance, which degrades "
"the performance of the application.\n"
@@ -1363,13 +1352,16 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
"administrator\n"
"echo 0 > /proc/sys/kernel/ignore-"
"unaligned-usertrap to re-enable\n",
- current->comm, current->pid);
+ current->comm, task_pid_nr(current));
}
}
} else {
- if (within_logging_rate_limit())
+ if (__ratelimit(&logging_rate_limit)) {
printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
ifa, regs->cr_iip + ipsr->ri);
+ if (unaligned_dump_stack)
+ dump_stack();
+ }
set_fs(KERNEL_DS);
}
@@ -1487,16 +1479,19 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
case LDFA_OP:
case LDFCCLR_OP:
case LDFCNC_OP:
- case LDF_IMM_OP:
- case LDFA_IMM_OP:
- case LDFCCLR_IMM_OP:
- case LDFCNC_IMM_OP:
if (u.insn.x)
ret = emulate_load_floatpair(ifa, u.insn, regs);
else
ret = emulate_load_float(ifa, u.insn, regs);
break;
+ case LDF_IMM_OP:
+ case LDFA_IMM_OP:
+ case LDFCCLR_IMM_OP:
+ case LDFCNC_IMM_OP:
+ ret = emulate_load_float(ifa, u.insn, regs);
+ break;
+
case STF_OP:
case STF_IMM_OP:
ret = emulate_store_float(ifa, u.insn, regs);
@@ -1530,7 +1525,8 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
ia64_handle_exception(regs, eh);
goto done;
}
- die_if_kernel("error during unaligned kernel access\n", regs, ret);
+ if (die_if_kernel("error during unaligned kernel access\n", regs, ret))
+ return;
/* NOT_REACHED */
}
force_sigbus:
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index c58e933694d..20e8a9b21d7 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2001-2008 Silicon Graphics, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU General Public License
@@ -18,14 +18,13 @@
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/string.h>
-#include <linux/slab.h>
#include <linux/efi.h>
#include <linux/genalloc.h>
+#include <linux/gfp.h>
#include <asm/page.h>
#include <asm/pal.h>
-#include <asm/system.h>
#include <asm/pgtable.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/tlbflush.h>
#include <asm/sn/arch.h>
@@ -98,7 +97,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
/* attempt to allocate a granule's worth of cached memory pages */
- page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+ page = alloc_pages_exact_node(nid,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
IA64_GRANULE_SHIFT-PAGE_SHIFT);
if (!page) {
mutex_unlock(&uc_pool->add_chunk_mutex);
@@ -118,13 +118,12 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++)
SetPageUncached(&page[i]);
- flush_tlb_kernel_range(uc_addr, uc_adddr + IA64_GRANULE_SIZE);
+ flush_tlb_kernel_range(uc_addr, uc_addr + IA64_GRANULE_SIZE);
status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) {
atomic_set(&uc_pool->status, 0);
- status = smp_call_function(uncached_ipi_visibility, uc_pool,
- 0, 1);
+ status = smp_call_function(uncached_ipi_visibility, uc_pool, 1);
if (status || atomic_read(&uc_pool->status))
goto failed;
} else if (status != PAL_VISIBILITY_OK)
@@ -146,7 +145,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
if (status != PAL_STATUS_SUCCESS)
goto failed;
atomic_set(&uc_pool->status, 0);
- status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 0, 1);
+ status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
if (status || atomic_read(&uc_pool->status))
goto failed;
@@ -177,12 +176,13 @@ failed:
* uncached_alloc_page
*
* @starting_nid: node id of node to start with, or -1
+ * @n_pages: number of contiguous pages to allocate
*
- * Allocate 1 uncached page. Allocates on the requested node. If no
- * uncached pages are available on the requested node, roundrobin starting
- * with the next higher node.
+ * Allocate the specified number of contiguous uncached pages on the
+ * the requested node. If not enough contiguous uncached pages are available
+ * on the requested node, roundrobin starting with the next higher node.
*/
-unsigned long uncached_alloc_page(int starting_nid)
+unsigned long uncached_alloc_page(int starting_nid, int n_pages)
{
unsigned long uc_addr;
struct uncached_pool *uc_pool;
@@ -196,13 +196,14 @@ unsigned long uncached_alloc_page(int starting_nid)
nid = starting_nid;
do {
- if (!node_online(nid))
+ if (!node_state(nid, N_HIGH_MEMORY))
continue;
uc_pool = &uncached_pools[nid];
if (uc_pool->pool == NULL)
continue;
do {
- uc_addr = gen_pool_alloc(uc_pool->pool, PAGE_SIZE);
+ uc_addr = gen_pool_alloc(uc_pool->pool,
+ n_pages * PAGE_SIZE);
if (uc_addr != 0)
return uc_addr;
} while (uncached_add_chunk(uc_pool, nid) == 0);
@@ -217,11 +218,12 @@ EXPORT_SYMBOL(uncached_alloc_page);
/*
* uncached_free_page
*
- * @uc_addr: uncached address of page to free
+ * @uc_addr: uncached address of first page to free
+ * @n_pages: number of contiguous pages to free
*
- * Free a single uncached page.
+ * Free the specified number of uncached pages.
*/
-void uncached_free_page(unsigned long uc_addr)
+void uncached_free_page(unsigned long uc_addr, int n_pages)
{
int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET);
struct gen_pool *pool = uncached_pools[nid].pool;
@@ -232,7 +234,7 @@ void uncached_free_page(unsigned long uc_addr)
if ((uc_addr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET)
panic("uncached_free_page invalid address %lx\n", uc_addr);
- gen_pool_free(pool, uc_addr, PAGE_SIZE);
+ gen_pool_free(pool, uc_addr, n_pages * PAGE_SIZE);
}
EXPORT_SYMBOL(uncached_free_page);
@@ -247,8 +249,7 @@ EXPORT_SYMBOL(uncached_free_page);
* Called at boot time to build a map of pages that can be used for
* memory special operations.
*/
-static int __init uncached_build_memmap(unsigned long uc_start,
- unsigned long uc_end, void *arg)
+static int __init uncached_build_memmap(u64 uc_start, u64 uc_end, void *arg)
{
int nid = paddr_to_nid(uc_start - __IA64_UNCACHED_OFFSET);
struct gen_pool *pool = uncached_pools[nid].pool;
@@ -268,7 +269,7 @@ static int __init uncached_init(void)
{
int nid;
- for_each_online_node(nid) {
+ for_each_node_state(nid, N_ONLINE) {
uncached_pools[nid].pool = gen_pool_create(PAGE_SHIFT, nid);
mutex_init(&uncached_pools[nid].add_chunk_mutex);
}
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index c1bdb513181..8f66195999e 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -41,7 +41,6 @@
#include <asm/ptrace_offsets.h>
#include <asm/rse.h>
#include <asm/sections.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include "entry.h"
@@ -257,7 +256,7 @@ pt_regs_off (unsigned long reg)
off = unw.pt_regs_offsets[reg];
if (off < 0) {
- UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", __FUNCTION__, reg);
+ UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", __func__, reg);
off = 0;
}
return (unsigned long) off;
@@ -268,13 +267,13 @@ get_scratch_regs (struct unw_frame_info *info)
{
if (!info->pt) {
/* This should not happen with valid unwind info. */
- UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting info->pt\n", __FUNCTION__);
+ UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting info->pt\n", __func__);
if (info->flags & UNW_FLAG_INTERRUPT_FRAME)
info->pt = (unsigned long) ((struct pt_regs *) info->psp - 1);
else
info->pt = info->sp - 16;
}
- UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __FUNCTION__, info->sp, info->pt);
+ UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __func__, info->sp, info->pt);
return (struct pt_regs *) info->pt;
}
@@ -294,7 +293,7 @@ unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char
return 0;
}
UNW_DPRINT(0, "unwind.%s: trying to access non-existent r%u\n",
- __FUNCTION__, regnum);
+ __func__, regnum);
return -1;
}
@@ -341,7 +340,7 @@ unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char
{
UNW_DPRINT(0, "unwind.%s: %p outside of regstk "
"[0x%lx-0x%lx)\n",
- __FUNCTION__, (void *) addr,
+ __func__, (void *) addr,
info->regstk.limit,
info->regstk.top);
return -1;
@@ -374,7 +373,7 @@ unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char
|| (unsigned long) addr >= info->regstk.top)
{
UNW_DPRINT(0, "unwind.%s: ignoring attempt to access register outside "
- "of rbs\n", __FUNCTION__);
+ "of rbs\n", __func__);
return -1;
}
if ((unsigned long) nat_addr >= info->regstk.top)
@@ -385,7 +384,7 @@ unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char
if (write) {
if (read_only(addr)) {
UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
- __FUNCTION__);
+ __func__);
} else {
*addr = *val;
if (*nat)
@@ -427,13 +426,13 @@ unw_access_br (struct unw_frame_info *info, int regnum, unsigned long *val, int
default:
UNW_DPRINT(0, "unwind.%s: trying to access non-existent b%u\n",
- __FUNCTION__, regnum);
+ __func__, regnum);
return -1;
}
if (write)
if (read_only(addr)) {
UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
- __FUNCTION__);
+ __func__);
} else
*addr = *val;
else
@@ -450,7 +449,7 @@ unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg *val,
if ((unsigned) (regnum - 2) >= 126) {
UNW_DPRINT(0, "unwind.%s: trying to access non-existent f%u\n",
- __FUNCTION__, regnum);
+ __func__, regnum);
return -1;
}
@@ -482,7 +481,7 @@ unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg *val,
if (write)
if (read_only(addr)) {
UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
- __FUNCTION__);
+ __func__);
} else
*addr = *val;
else
@@ -572,14 +571,14 @@ unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val, int
default:
UNW_DPRINT(0, "unwind.%s: trying to access non-existent ar%u\n",
- __FUNCTION__, regnum);
+ __func__, regnum);
return -1;
}
if (write) {
if (read_only(addr)) {
UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
- __FUNCTION__);
+ __func__);
} else
*addr = *val;
} else
@@ -600,7 +599,7 @@ unw_access_pr (struct unw_frame_info *info, unsigned long *val, int write)
if (write) {
if (read_only(addr)) {
UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
- __FUNCTION__);
+ __func__);
} else
*addr = *val;
} else
@@ -699,7 +698,7 @@ decode_abreg (unsigned char abreg, int memory)
default:
break;
}
- UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __FUNCTION__, abreg);
+ UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __func__, abreg);
return UNW_REG_LC;
}
@@ -739,7 +738,7 @@ spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim, unw_word
return;
}
}
- UNW_DPRINT(0, "unwind.%s: excess spill!\n", __FUNCTION__);
+ UNW_DPRINT(0, "unwind.%s: excess spill!\n", __func__);
}
static inline void
@@ -855,11 +854,11 @@ desc_abi (unsigned char abi, unsigned char context, struct unw_state_record *sr)
{
if (abi == 3 && context == 'i') {
sr->flags |= UNW_FLAG_INTERRUPT_FRAME;
- UNW_DPRINT(3, "unwind.%s: interrupt frame\n", __FUNCTION__);
+ UNW_DPRINT(3, "unwind.%s: interrupt frame\n", __func__);
}
else
UNW_DPRINT(0, "unwind%s: ignoring unwabi(abi=0x%x,context=0x%x)\n",
- __FUNCTION__, abi, context);
+ __func__, abi, context);
}
static inline void
@@ -1204,10 +1203,10 @@ desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word
static inline unw_hash_index_t
hash (unsigned long ip)
{
-# define hashmagic 0x9e3779b97f4a7c16UL /* based on (sqrt(5)/2-1)*2^64 */
+ /* magic number = ((sqrt(5)-1)/2)*2^64 */
+ static const unsigned long hashmagic = 0x9e3779b97f4a7c16UL;
- return (ip >> 4)*hashmagic >> (64 - UNW_LOG_HASH_SIZE);
-#undef hashmagic
+ return (ip >> 4) * hashmagic >> (64 - UNW_LOG_HASH_SIZE);
}
static inline long
@@ -1347,7 +1346,7 @@ script_emit (struct unw_script *script, struct unw_insn insn)
{
if (script->count >= UNW_MAX_SCRIPT_LEN) {
UNW_DPRINT(0, "unwind.%s: script exceeds maximum size of %u instructions!\n",
- __FUNCTION__, UNW_MAX_SCRIPT_LEN);
+ __func__, UNW_MAX_SCRIPT_LEN);
return;
}
script->insn[script->count++] = insn;
@@ -1389,7 +1388,7 @@ emit_nat_info (struct unw_state_record *sr, int i, struct unw_script *script)
default:
UNW_DPRINT(0, "unwind.%s: don't know how to emit nat info for where = %u\n",
- __FUNCTION__, r->where);
+ __func__, r->where);
return;
}
insn.opc = opc;
@@ -1446,7 +1445,7 @@ compile_reg (struct unw_state_record *sr, int i, struct unw_script *script)
val = offsetof(struct pt_regs, f6) + 16*(rval - 6);
else
UNW_DPRINT(0, "unwind.%s: kernel may not touch f%lu\n",
- __FUNCTION__, rval);
+ __func__, rval);
}
break;
@@ -1474,7 +1473,7 @@ compile_reg (struct unw_state_record *sr, int i, struct unw_script *script)
default:
UNW_DPRINT(0, "unwind%s: register %u has unexpected `where' value of %u\n",
- __FUNCTION__, i, r->where);
+ __func__, i, r->where);
break;
}
insn.opc = opc;
@@ -1531,7 +1530,7 @@ build_script (struct unw_frame_info *info)
struct unw_labeled_state *ls, *next;
unsigned long ip = info->ip;
struct unw_state_record sr;
- struct unw_table *table;
+ struct unw_table *table, *prev;
struct unw_reg_info *r;
struct unw_insn insn;
u8 *dp, *desc_end;
@@ -1547,10 +1546,10 @@ build_script (struct unw_frame_info *info)
r->when = UNW_WHEN_NEVER;
sr.pr_val = info->pr;
- UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __FUNCTION__, ip);
+ UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __func__, ip);
script = script_new(ip);
if (!script) {
- UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n", __FUNCTION__);
+ UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n", __func__);
STAT(unw.stat.script.build_time += ia64_get_itc() - start);
return NULL;
}
@@ -1560,16 +1559,31 @@ build_script (struct unw_frame_info *info)
STAT(parse_start = ia64_get_itc());
+ prev = NULL;
for (table = unw.tables; table; table = table->next) {
if (ip >= table->start && ip < table->end) {
+ /*
+ * Leave the kernel unwind table at the very front,
+ * lest moving it breaks some assumption elsewhere.
+ * Otherwise, move the matching table to the second
+ * position in the list so that traversals can benefit
+ * from commonality in backtrace paths.
+ */
+ if (prev && prev != unw.tables) {
+ /* unw is safe - we're already spinlocked */
+ prev->next = table->next;
+ table->next = unw.tables->next;
+ unw.tables->next = table;
+ }
e = lookup(table, ip - table->segment_base);
break;
}
+ prev = table;
}
if (!e) {
/* no info, return default unwinder (leaf proc, no mem stack, no saved regs) */
UNW_DPRINT(1, "unwind.%s: no unwind info for ip=0x%lx (prev ip=0x%lx)\n",
- __FUNCTION__, ip, unw.cache[info->prev_script].ip);
+ __func__, ip, unw.cache[info->prev_script].ip);
sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
sr.curr.reg[UNW_REG_RP].when = -1;
sr.curr.reg[UNW_REG_RP].val = 0;
@@ -1618,13 +1632,13 @@ build_script (struct unw_frame_info *info)
sr.curr.reg[UNW_REG_RP].when = -1;
sr.curr.reg[UNW_REG_RP].val = sr.return_link_reg;
UNW_DPRINT(1, "unwind.%s: using default for rp at ip=0x%lx where=%d val=0x%lx\n",
- __FUNCTION__, ip, sr.curr.reg[UNW_REG_RP].where,
+ __func__, ip, sr.curr.reg[UNW_REG_RP].where,
sr.curr.reg[UNW_REG_RP].val);
}
#ifdef UNW_DEBUG
UNW_DPRINT(1, "unwind.%s: state record for func 0x%lx, t=%u:\n",
- __FUNCTION__, table->segment_base + e->start_offset, sr.when_target);
+ __func__, table->segment_base + e->start_offset, sr.when_target);
for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) {
if (r->where != UNW_WHERE_NONE || r->when != UNW_WHEN_NEVER) {
UNW_DPRINT(1, " %s <- ", unw.preg_name[r - sr.curr.reg]);
@@ -1746,7 +1760,7 @@ run_script (struct unw_script *script, struct unw_frame_info *state)
} else {
s[dst] = 0;
UNW_DPRINT(0, "unwind.%s: no state->pt, dst=%ld, val=%ld\n",
- __FUNCTION__, dst, val);
+ __func__, dst, val);
}
break;
@@ -1756,7 +1770,7 @@ run_script (struct unw_script *script, struct unw_frame_info *state)
else {
s[dst] = 0;
UNW_DPRINT(0, "unwind.%s: UNW_INSN_MOVE_CONST bad val=%ld\n",
- __FUNCTION__, val);
+ __func__, val);
}
break;
@@ -1791,7 +1805,7 @@ run_script (struct unw_script *script, struct unw_frame_info *state)
|| s[val] < TASK_SIZE)
{
UNW_DPRINT(0, "unwind.%s: rejecting bad psp=0x%lx\n",
- __FUNCTION__, s[val]);
+ __func__, s[val]);
break;
}
#endif
@@ -1825,7 +1839,7 @@ find_save_locs (struct unw_frame_info *info)
if ((info->ip & (local_cpu_data->unimpl_va_mask | 0xf)) || info->ip < TASK_SIZE) {
/* don't let obviously bad addresses pollute the cache */
/* FIXME: should really be level 0 but it occurs too often. KAO */
- UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", __FUNCTION__, info->ip);
+ UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", __func__, info->ip);
info->rp_loc = NULL;
return -1;
}
@@ -1838,7 +1852,7 @@ find_save_locs (struct unw_frame_info *info)
spin_unlock_irqrestore(&unw.lock, flags);
UNW_DPRINT(0,
"unwind.%s: failed to locate/build unwind script for ip %lx\n",
- __FUNCTION__, info->ip);
+ __func__, info->ip);
return -1;
}
have_write_lock = 1;
@@ -1882,21 +1896,21 @@ unw_unwind (struct unw_frame_info *info)
if (!unw_valid(info, info->rp_loc)) {
/* FIXME: should really be level 0 but it occurs too often. KAO */
UNW_DPRINT(1, "unwind.%s: failed to locate return link (ip=0x%lx)!\n",
- __FUNCTION__, info->ip);
+ __func__, info->ip);
STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
return -1;
}
/* restore the ip */
ip = info->ip = *info->rp_loc;
if (ip < GATE_ADDR) {
- UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __FUNCTION__, ip);
+ UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __func__, ip);
STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
return -1;
}
/* validate the previous stack frame pointer */
if (!unw_valid(info, info->pfs_loc)) {
- UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __FUNCTION__);
+ UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __func__);
STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
return -1;
}
@@ -1912,13 +1926,13 @@ unw_unwind (struct unw_frame_info *info)
num_regs = *info->cfm_loc & 0x7f; /* size of frame */
info->pfs_loc =
(unsigned long *) (info->pt + offsetof(struct pt_regs, ar_pfs));
- UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", __FUNCTION__, info->pt);
+ UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", __func__, info->pt);
} else
num_regs = (*info->cfm_loc >> 7) & 0x7f; /* size of locals */
info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->bsp, -num_regs);
if (info->bsp < info->regstk.limit || info->bsp > info->regstk.top) {
UNW_DPRINT(0, "unwind.%s: bsp (0x%lx) out of range [0x%lx-0x%lx]\n",
- __FUNCTION__, info->bsp, info->regstk.limit, info->regstk.top);
+ __func__, info->bsp, info->regstk.limit, info->regstk.top);
STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
return -1;
}
@@ -1927,14 +1941,14 @@ unw_unwind (struct unw_frame_info *info)
info->sp = info->psp;
if (info->sp < info->memstk.top || info->sp > info->memstk.limit) {
UNW_DPRINT(0, "unwind.%s: sp (0x%lx) out of range [0x%lx-0x%lx]\n",
- __FUNCTION__, info->sp, info->memstk.top, info->memstk.limit);
+ __func__, info->sp, info->memstk.top, info->memstk.limit);
STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
return -1;
}
if (info->ip == prev_ip && info->sp == prev_sp && info->bsp == prev_bsp) {
UNW_DPRINT(0, "unwind.%s: ip, sp, bsp unchanged; stopping here (ip=0x%lx)\n",
- __FUNCTION__, ip);
+ __func__, ip);
STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
return -1;
}
@@ -1961,7 +1975,7 @@ unw_unwind_to_user (struct unw_frame_info *info)
if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp)
< IA64_PT_REGS_SIZE) {
UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel stack\n",
- __FUNCTION__);
+ __func__);
break;
}
if (unw_is_intr_frame(info) &&
@@ -1971,13 +1985,13 @@ unw_unwind_to_user (struct unw_frame_info *info)
unw_get_rp(info, &ip);
UNW_DPRINT(0, "unwind.%s: failed to read "
"predicate register (ip=0x%lx)\n",
- __FUNCTION__, ip);
+ __func__, ip);
return -1;
}
} while (unw_unwind(info) >= 0);
unw_get_ip(info, &ip);
UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n",
- __FUNCTION__, ip);
+ __func__, ip);
return -1;
}
EXPORT_SYMBOL(unw_unwind_to_user);
@@ -2028,7 +2042,7 @@ init_frame_info (struct unw_frame_info *info, struct task_struct *t,
" pr 0x%lx\n"
" sw 0x%lx\n"
" sp 0x%lx\n",
- __FUNCTION__, (unsigned long) t, rbslimit, rbstop, stktop, stklimit,
+ __func__, (unsigned long) t, rbslimit, rbstop, stktop, stklimit,
info->pr, (unsigned long) info->sw, info->sp);
STAT(unw.stat.api.init_time += ia64_get_itc() - start; local_irq_restore(flags));
}
@@ -2047,7 +2061,7 @@ unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, struct
" bsp 0x%lx\n"
" sol 0x%lx\n"
" ip 0x%lx\n",
- __FUNCTION__, info->bsp, sol, info->ip);
+ __func__, info->bsp, sol, info->ip);
find_save_locs(info);
}
@@ -2058,7 +2072,7 @@ unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t)
{
struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16);
- UNW_DPRINT(1, "unwind.%s\n", __FUNCTION__);
+ UNW_DPRINT(1, "unwind.%s\n", __func__);
unw_init_frame_info(info, t, sw);
}
EXPORT_SYMBOL(unw_init_from_blocked_task);
@@ -2088,7 +2102,7 @@ unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned lon
if (end - start <= 0) {
UNW_DPRINT(0, "unwind.%s: ignoring attempt to insert empty unwind table\n",
- __FUNCTION__);
+ __func__);
return NULL;
}
@@ -2119,14 +2133,14 @@ unw_remove_unwind_table (void *handle)
if (!handle) {
UNW_DPRINT(0, "unwind.%s: ignoring attempt to remove non-existent unwind table\n",
- __FUNCTION__);
+ __func__);
return;
}
table = handle;
if (table == &unw.kernel_table) {
UNW_DPRINT(0, "unwind.%s: sorry, freeing the kernel's unwind table is a "
- "no-can-do!\n", __FUNCTION__);
+ "no-can-do!\n", __func__);
return;
}
@@ -2139,7 +2153,7 @@ unw_remove_unwind_table (void *handle)
break;
if (!prev) {
UNW_DPRINT(0, "unwind.%s: failed to find unwind table %p\n",
- __FUNCTION__, (void *) table);
+ __func__, (void *) table);
spin_unlock_irqrestore(&unw.lock, flags);
return;
}
@@ -2149,7 +2163,7 @@ unw_remove_unwind_table (void *handle)
/* next, remove hash table entries for this table */
- for (index = 0; index <= UNW_HASH_SIZE; ++index) {
+ for (index = 0; index < UNW_HASH_SIZE; ++index) {
tmp = unw.cache + unw.hash[index];
if (unw.hash[index] >= UNW_CACHE_SIZE
|| tmp->ip < table->start || tmp->ip >= table->end)
@@ -2185,7 +2199,7 @@ create_gate_table (void)
}
if (!punw) {
- printk("%s: failed to find gate DSO's unwind table!\n", __FUNCTION__);
+ printk("%s: failed to find gate DSO's unwind table!\n", __func__);
return 0;
}
@@ -2202,7 +2216,7 @@ create_gate_table (void)
unw.gate_table = kmalloc(size, GFP_KERNEL);
if (!unw.gate_table) {
unw.gate_table_size = 0;
- printk(KERN_ERR "%s: unable to create unwind data for gate page!\n", __FUNCTION__);
+ printk(KERN_ERR "%s: unable to create unwind data for gate page!\n", __func__);
return 0;
}
unw.gate_table_size = size;
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 00232b4357b..84f8a52ac5a 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -1,287 +1,248 @@
#include <asm/cache.h>
#include <asm/ptrace.h>
-#include <asm/system.h>
#include <asm/pgtable.h>
-#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE)
#include <asm-generic/vmlinux.lds.h>
-#define IVT_TEXT \
- VMLINUX_SYMBOL(__start_ivt_text) = .; \
- *(.text.ivt) \
- VMLINUX_SYMBOL(__end_ivt_text) = .;
-
OUTPUT_FORMAT("elf64-ia64-little")
OUTPUT_ARCH(ia64)
ENTRY(phys_start)
jiffies = jiffies_64;
+
PHDRS {
- code PT_LOAD;
- percpu PT_LOAD;
- data PT_LOAD;
- note PT_NOTE;
- unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
+ code PT_LOAD;
+ percpu PT_LOAD;
+ data PT_LOAD;
+ note PT_NOTE;
+ unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
}
-SECTIONS
-{
- /* Sections to be discarded */
- /DISCARD/ : {
- *(.exit.text)
- *(.exit.data)
- *(.exitcall.exit)
- *(.IA_64.unwind.exit.text)
- *(.IA_64.unwind_info.exit.text)
- }
- v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
- phys_start = _start - LOAD_OFFSET;
-
- code : { } :code
- . = KERNEL_START;
-
- _text = .;
- _stext = .;
-
- .text : AT(ADDR(.text) - LOAD_OFFSET)
- {
- IVT_TEXT
- TEXT_TEXT
- SCHED_TEXT
- LOCK_TEXT
- KPROBES_TEXT
- *(.gnu.linkonce.t*)
- }
- .text.head : AT(ADDR(.text.head) - LOAD_OFFSET)
- { *(.text.head) }
- .text2 : AT(ADDR(.text2) - LOAD_OFFSET)
- { *(.text2) }
-#ifdef CONFIG_SMP
- .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET)
- { *(.text.lock) }
-#endif
- _etext = .;
-
- /* Read-only data */
-
- NOTES :code :note /* put .notes in text and mark in PT_NOTE */
- code_continues : {} :code /* switch back to regular program... */
+SECTIONS {
+ /*
+ * unwind exit sections must be discarded before
+ * the rest of the sections get included.
+ */
+ /DISCARD/ : {
+ *(.IA_64.unwind.exit.text)
+ *(.IA_64.unwind_info.exit.text)
+ *(.comment)
+ *(.note)
+ }
- /* Exception table */
- . = ALIGN(16);
- __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET)
- {
- __start___ex_table = .;
- *(__ex_table)
- __stop___ex_table = .;
+ v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
+ phys_start = _start - LOAD_OFFSET;
+
+ code : {
+ } :code
+ . = KERNEL_START;
+
+ _text = .;
+ _stext = .;
+
+ .text : AT(ADDR(.text) - LOAD_OFFSET) {
+ __start_ivt_text = .;
+ *(.text..ivt)
+ __end_ivt_text = .;
+ TEXT_TEXT
+ SCHED_TEXT
+ LOCK_TEXT
+ KPROBES_TEXT
+ *(.gnu.linkonce.t*)
}
- /* MCA table */
- . = ALIGN(16);
- __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET)
- {
- __start___mca_table = .;
- *(__mca_table)
- __stop___mca_table = .;
+ .text2 : AT(ADDR(.text2) - LOAD_OFFSET) {
+ *(.text2)
}
- .data.patch.phys_stack_reg : AT(ADDR(.data.patch.phys_stack_reg) - LOAD_OFFSET)
- {
- __start___phys_stack_reg_patchlist = .;
- *(.data.patch.phys_stack_reg)
- __end___phys_stack_reg_patchlist = .;
+#ifdef CONFIG_SMP
+ .text..lock : AT(ADDR(.text..lock) - LOAD_OFFSET) {
+ *(.text..lock)
+ }
+#endif
+ _etext = .;
+
+ /*
+ * Read-only data
+ */
+ NOTES :code :note /* put .notes in text and mark in PT_NOTE */
+ code_continues : {
+ } : code /* switch back to regular program... */
+
+ EXCEPTION_TABLE(16)
+
+ /* MCA table */
+ . = ALIGN(16);
+ __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) {
+ __start___mca_table = .;
+ *(__mca_table)
+ __stop___mca_table = .;
}
- /* Global data */
- _data = .;
-
- /* Unwind info & table: */
- . = ALIGN(8);
- .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET)
- { *(.IA_64.unwind_info*) }
- .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET)
- {
- __start_unwind = .;
- *(.IA_64.unwind*)
- __end_unwind = .;
- } :code :unwind
- code_continues2 : {} : code
+ .data..patch.phys_stack_reg : AT(ADDR(.data..patch.phys_stack_reg) - LOAD_OFFSET) {
+ __start___phys_stack_reg_patchlist = .;
+ *(.data..patch.phys_stack_reg)
+ __end___phys_stack_reg_patchlist = .;
+ }
- RODATA
+ /*
+ * Global data
+ */
+ _data = .;
- .opd : AT(ADDR(.opd) - LOAD_OFFSET)
- { *(.opd) }
+ /* Unwind info & table: */
+ . = ALIGN(8);
+ .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) {
+ *(.IA_64.unwind_info*)
+ }
+ .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) {
+ __start_unwind = .;
+ *(.IA_64.unwind*)
+ __end_unwind = .;
+ } :code :unwind
+ code_continues2 : {
+ } : code
- /* Initialization code and data: */
+ RODATA
- . = ALIGN(PAGE_SIZE);
- __init_begin = .;
- .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
- {
- _sinittext = .;
- *(.init.text)
- _einittext = .;
+ .opd : AT(ADDR(.opd) - LOAD_OFFSET) {
+ *(.opd)
}
- .init.data : AT(ADDR(.init.data) - LOAD_OFFSET)
- { *(.init.data) }
+ /*
+ * Initialization code and data:
+ */
+ . = ALIGN(PAGE_SIZE);
+ __init_begin = .;
-#ifdef CONFIG_BLK_DEV_INITRD
- .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET)
- {
- __initramfs_start = .;
- *(.init.ramfs)
- __initramfs_end = .;
- }
-#endif
+ INIT_TEXT_SECTION(PAGE_SIZE)
+ INIT_DATA_SECTION(16)
- . = ALIGN(16);
- .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET)
- {
- __setup_start = .;
- *(.init.setup)
- __setup_end = .;
+ .data..patch.vtop : AT(ADDR(.data..patch.vtop) - LOAD_OFFSET) {
+ __start___vtop_patchlist = .;
+ *(.data..patch.vtop)
+ __end___vtop_patchlist = .;
}
- .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET)
- {
- __initcall_start = .;
- INITCALLS
- __initcall_end = .;
+
+ .data..patch.rse : AT(ADDR(.data..patch.rse) - LOAD_OFFSET) {
+ __start___rse_patchlist = .;
+ *(.data..patch.rse)
+ __end___rse_patchlist = .;
}
- .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
- {
- __start___vtop_patchlist = .;
- *(.data.patch.vtop)
- __end___vtop_patchlist = .;
+ .data..patch.mckinley_e9 : AT(ADDR(.data..patch.mckinley_e9) - LOAD_OFFSET) {
+ __start___mckinley_e9_bundles = .;
+ *(.data..patch.mckinley_e9)
+ __end___mckinley_e9_bundles = .;
}
- .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
- {
- __start___mckinley_e9_bundles = .;
- *(.data.patch.mckinley_e9)
- __end___mckinley_e9_bundles = .;
+#if defined(CONFIG_PARAVIRT)
+ . = ALIGN(16);
+ .paravirt_bundles : AT(ADDR(.paravirt_bundles) - LOAD_OFFSET) {
+ __start_paravirt_bundles = .;
+ *(.paravirt_bundles)
+ __stop_paravirt_bundles = .;
+ }
+ . = ALIGN(16);
+ .paravirt_insts : AT(ADDR(.paravirt_insts) - LOAD_OFFSET) {
+ __start_paravirt_insts = .;
+ *(.paravirt_insts)
+ __stop_paravirt_insts = .;
}
+ . = ALIGN(16);
+ .paravirt_branches : AT(ADDR(.paravirt_branches) - LOAD_OFFSET) {
+ __start_paravirt_branches = .;
+ *(.paravirt_branches)
+ __stop_paravirt_branches = .;
+ }
+#endif
#if defined(CONFIG_IA64_GENERIC)
- /* Machine Vector */
- . = ALIGN(16);
- .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
- {
- machvec_start = .;
- *(.machvec)
- machvec_end = .;
+ /* Machine Vector */
+ . = ALIGN(16);
+ .machvec : AT(ADDR(.machvec) - LOAD_OFFSET) {
+ machvec_start = .;
+ *(.machvec)
+ machvec_end = .;
}
#endif
- . = ALIGN(8);
- __con_initcall_start = .;
- .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET)
- { *(.con_initcall.init) }
- __con_initcall_end = .;
- __security_initcall_start = .;
- .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET)
- { *(.security_initcall.init) }
- __security_initcall_end = .;
- . = ALIGN(PAGE_SIZE);
- __init_end = .;
-
- /* The initial task and kernel stack */
- .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET)
- { *(.data.init_task) }
-
- .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET)
- { *(__special_page_section)
- __start_gate_section = .;
- *(.data.gate)
- __stop_gate_section = .;
- }
- . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose
- * kernel data
- */
-
- .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET)
- { *(.data.read_mostly) }
-
- .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET)
- { *(.data.cacheline_aligned) }
-
- /* Per-cpu data: */
- percpu : { } :percpu
- . = ALIGN(PERCPU_PAGE_SIZE);
- __phys_per_cpu_start = .;
- .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
- {
- __per_cpu_start = .;
- *(.data.percpu)
- *(.data.percpu.shared_aligned)
- __per_cpu_end = .;
- }
- . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits
- * into percpu page size
- */
+#ifdef CONFIG_SMP
+ . = ALIGN(PERCPU_PAGE_SIZE);
+ __cpu0_per_cpu = .;
+ . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */
+#endif
+
+ . = ALIGN(PAGE_SIZE);
+ __init_end = .;
- data : { } :data
- .data : AT(ADDR(.data) - LOAD_OFFSET)
- {
+ .data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) {
+ PAGE_ALIGNED_DATA(PAGE_SIZE)
+ . = ALIGN(PAGE_SIZE);
+ __start_gate_section = .;
+ *(.data..gate)
+ __stop_gate_section = .;
+ }
+ /*
+ * make sure the gate page doesn't expose
+ * kernel data
+ */
+ . = ALIGN(PAGE_SIZE);
+
+ /* Per-cpu data: */
+ . = ALIGN(PERCPU_PAGE_SIZE);
+ PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu)
+ __phys_per_cpu_start = __per_cpu_load;
+ /*
+ * ensure percpu data fits
+ * into percpu page size
+ */
+ . = __phys_per_cpu_start + PERCPU_PAGE_SIZE;
+
+ data : {
+ } :data
+ .data : AT(ADDR(.data) - LOAD_OFFSET) {
+ _sdata = .;
+ INIT_TASK_DATA(PAGE_SIZE)
+ CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES)
+ READ_MOSTLY_DATA(SMP_CACHE_BYTES)
DATA_DATA
*(.data1)
*(.gnu.linkonce.d*)
CONSTRUCTORS
}
- . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */
- .got : AT(ADDR(.got) - LOAD_OFFSET)
- { *(.got.plt) *(.got) }
- __gp = ADDR(.got) + 0x200000;
- /* We want the small data sections together, so single-instruction offsets
- can access them all, and initialized data all before uninitialized, so
- we can shorten the on-disk segment size. */
- .sdata : AT(ADDR(.sdata) - LOAD_OFFSET)
- { *(.sdata) *(.sdata1) *(.srdata) }
- _edata = .;
- _bss = .;
- .sbss : AT(ADDR(.sbss) - LOAD_OFFSET)
- { *(.sbss) *(.scommon) }
- .bss : AT(ADDR(.bss) - LOAD_OFFSET)
- { *(.bss) *(COMMON) }
-
- _end = .;
-
- code : { } :code
- /* Stabs debugging sections. */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- /* DWARF debug sections.
- Symbols in the DWARF debugging sections are relative to the beginning
- of the section so we begin them at 0. */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
- /* These must appear regardless of . */
- /DISCARD/ : { *(.comment) }
- /DISCARD/ : { *(.note) }
+ . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */
+ .got : AT(ADDR(.got) - LOAD_OFFSET) {
+ *(.got.plt)
+ *(.got)
+ }
+ __gp = ADDR(.got) + 0x200000;
+
+ /*
+ * We want the small data sections together,
+ * so single-instruction offsets can access
+ * them all, and initialized data all before
+ * uninitialized, so we can shorten the
+ * on-disk segment size.
+ */
+ .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) {
+ *(.sdata)
+ *(.sdata1)
+ *(.srdata)
+ }
+ _edata = .;
+
+ BSS_SECTION(0, 0, 0)
+
+ _end = .;
+
+ code : {
+ } :code
+
+ STABS_DEBUG
+ DWARF_DEBUG
+
+ /* Default discards */
+ DISCARDS
}