diff options
Diffstat (limited to 'arch/ia64/kernel')
73 files changed, 3836 insertions, 3902 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index c381ea95489..20678a9ed11 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -2,21 +2,19 @@ # Makefile for the linux kernel. # +ifdef CONFIG_DYNAMIC_FTRACE +CFLAGS_REMOVE_ftrace.o = -pg +endif + extra-y := head.o init_task.o vmlinux.lds -obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ - irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ +obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ + irq_lsapic.o ivt.o machvec.o pal.o paravirt_patchlist.o patch.o process.o perfmon.o ptrace.o sal.o \ salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ - unwind.o mca.o mca_asm.o topology.o + unwind.o mca.o mca_asm.o topology.o dma-mapping.o +obj-$(CONFIG_ACPI) += acpi.o acpi-ext.o obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o -obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o -obj-$(CONFIG_IA64_HP_ZX1) += acpi-ext.o -obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o - -ifneq ($(CONFIG_ACPI_PROCESSOR),) -obj-y += acpi-processor.o -endif obj-$(CONFIG_IA64_PALINFO) += palinfo.o obj-$(CONFIG_IOSAPIC) += iosapic.o @@ -25,9 +23,9 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o -obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o @@ -35,47 +33,27 @@ obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_PCI_MSI) += msi_ia64.o mca_recovery-y += mca_drv.o mca_drv_asm.o obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o -obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirtentry.o \ + paravirt_patch.o obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) obj-y += esi_stub.o # must be in kernel proper endif -obj-$(CONFIG_DMAR) += pci-dma.o -ifeq ($(CONFIG_DMAR), y) +obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o -endif - -# The gate DSO image is built using a special linker script. -targets += gate.so gate-syms.o -extra-y += gate.so gate-syms.o gate.lds gate.o +obj-$(CONFIG_BINFMT_ELF) += elfcore.o # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 -CPPFLAGS_gate.lds := -P -C -U$(ARCH) - -quiet_cmd_gate = GATE $@ - cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@ - -GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \ - $(call ld-option, -Wl$(comma)--hash-style=sysv) -$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE - $(call if_changed,gate) - -$(obj)/built-in.o: $(obj)/gate-syms.o -$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o - -GATECFLAGS_gate-syms.o = -r -$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE - $(call if_changed,gate) - -# gate-data.o contains the gate DSO image as data in section .data.gate. -# We must build gate.so before we can assemble it. -# Note: kbuild does not track this dependency due to usage of .incbin -$(obj)/gate-data.o: $(obj)/gate.so +# The gate DSO image is built using a special linker script. +include $(srctree)/arch/ia64/kernel/Makefile.gate +# tell compiled for native +CPPFLAGS_gate.lds += -D__IA64_GATE_PARAVIRTUALIZED_NATIVE # Calculate NR_IRQ = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, ...) based on config define sed-y @@ -99,21 +77,18 @@ define cmd_nr_irqs endef # We use internal kbuild rules to avoid the "is up to date" message from make -arch/$(SRCARCH)/kernel/nr-irqs.s: $(srctree)/arch/$(SRCARCH)/kernel/nr-irqs.c \ - $(wildcard $(srctree)/include/asm-ia64/*/irq.h) +arch/$(SRCARCH)/kernel/nr-irqs.s: arch/$(SRCARCH)/kernel/nr-irqs.c $(Q)mkdir -p $(dir $@) $(call if_changed_dep,cc_s_c) -include/asm-ia64/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s +include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s $(Q)mkdir -p $(dir $@) $(call cmd,nr_irqs) -clean-files += $(objtree)/include/asm-ia64/nr-irqs.h - # -# native ivt.S and entry.S +# native ivt.S, entry.S and fsys.S # -ASM_PARAVIRT_OBJS = ivt.o entry.o +ASM_PARAVIRT_OBJS = ivt.o entry.o fsys.o define paravirtualized_native AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate new file mode 100644 index 00000000000..ceeffc50976 --- /dev/null +++ b/arch/ia64/kernel/Makefile.gate @@ -0,0 +1,27 @@ +# The gate DSO image is built using a special linker script. + +targets += gate.so gate-syms.o + +extra-y += gate.so gate-syms.o gate.lds gate.o + +CPPFLAGS_gate.lds := -P -C -U$(ARCH) + +quiet_cmd_gate = GATE $@ + cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@ + +GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \ + $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE + $(call if_changed,gate) + +$(obj)/built-in.o: $(obj)/gate-syms.o +$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o + +GATECFLAGS_gate-syms.o = -r +$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE + $(call if_changed,gate) + +# gate-data.o contains the gate DSO image as data in section .data..gate. +# We must build gate.so before we can assemble it. +# Note: kbuild does not track this dependency due to usage of .incbin +$(obj)/gate-data.o: $(obj)/gate.so diff --git a/arch/ia64/kernel/acpi-ext.c b/arch/ia64/kernel/acpi-ext.c index b7515bc808a..8b9318d311a 100644 --- a/arch/ia64/kernel/acpi-ext.c +++ b/arch/ia64/kernel/acpi-ext.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/types.h> +#include <linux/slab.h> #include <linux/acpi.h> #include <asm/acpi-ext.h> diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c deleted file mode 100644 index cbe6cee5a55..00000000000 --- a/arch/ia64/kernel/acpi-processor.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * arch/ia64/kernel/acpi-processor.c - * - * Copyright (C) 2005 Intel Corporation - * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> - * - Added _PDC for platforms with Intel CPUs - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/acpi.h> - -#include <acpi/processor.h> -#include <asm/acpi.h> - -static void init_intel_pdc(struct acpi_processor *pr) -{ - struct acpi_object_list *obj_list; - union acpi_object *obj; - u32 *buf; - - /* allocate and initialize pdc. It will be used later. */ - obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); - if (!obj_list) { - printk(KERN_ERR "Memory allocation error\n"); - return; - } - - obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); - if (!obj) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj_list); - return; - } - - buf = kmalloc(12, GFP_KERNEL); - if (!buf) { - printk(KERN_ERR "Memory allocation error\n"); - kfree(obj); - kfree(obj_list); - return; - } - - buf[0] = ACPI_PDC_REVISION_ID; - buf[1] = 1; - buf[2] = ACPI_PDC_EST_CAPABILITY_SMP; - /* - * The default of PDC_SMP_T_SWCOORD bit is set for IA64 cpu so - * that OSPM is capable of native ACPI throttling software - * coordination using BIOS supplied _TSD info. - */ - buf[2] |= ACPI_PDC_SMP_T_SWCOORD; - - obj->type = ACPI_TYPE_BUFFER; - obj->buffer.length = 12; - obj->buffer.pointer = (u8 *) buf; - obj_list->count = 1; - obj_list->pointer = obj; - pr->pdc = obj_list; - - return; -} - -/* Initialize _PDC data based on the CPU vendor */ -void arch_acpi_processor_init_pdc(struct acpi_processor *pr) -{ - pr->pdc = NULL; - init_intel_pdc(pr); - return; -} - -EXPORT_SYMBOL(arch_acpi_processor_init_pdc); diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index d541671caf4..615ef81def4 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -44,28 +44,19 @@ #include <linux/efi.h> #include <linux/mmzone.h> #include <linux/nodemask.h> +#include <linux/slab.h> +#include <acpi/processor.h> #include <asm/io.h> #include <asm/iosapic.h> #include <asm/machvec.h> #include <asm/page.h> -#include <asm/system.h> #include <asm/numa.h> #include <asm/sal.h> #include <asm/cyclone.h> -#include <asm/xen/hypervisor.h> - -#define BAD_MADT_ENTRY(entry, end) ( \ - (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ - ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) #define PREFIX "ACPI: " -void (*pm_idle) (void); -EXPORT_SYMBOL(pm_idle); -void (*pm_power_off) (void); -EXPORT_SYMBOL(pm_power_off); - -u32 acpi_rsdt_forced; +int acpi_lapic; unsigned int acpi_cpei_override; unsigned int acpi_cpei_phys_cpuid; @@ -83,17 +74,15 @@ static unsigned long __init acpi_find_rsdp(void) "v1.0/r0.71 tables no longer supported\n"); return rsdp_phys; } -#endif const char __init * acpi_get_sysname(void) { -#ifdef CONFIG_IA64_GENERIC unsigned long rsdp_phys; struct acpi_table_rsdp *rsdp; struct acpi_table_xsdt *xsdt; struct acpi_table_header *hdr; -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU u64 i, nentries; #endif @@ -126,11 +115,9 @@ acpi_get_sysname(void) return "uv"; else return "sn2"; - } else if (xen_pv_domain() && !strcmp(hdr->oem_id, "XEN")) { - return "xen"; } -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU /* Look for Intel IOMMU */ nentries = (hdr->length - sizeof(*hdr)) / sizeof(xsdt->table_offset_entry[0]); @@ -143,30 +130,8 @@ acpi_get_sysname(void) #endif return "dig"; -#else -# if defined (CONFIG_IA64_HP_SIM) - return "hpsim"; -# elif defined (CONFIG_IA64_HP_ZX1) - return "hpzx1"; -# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB) - return "hpzx1_swiotlb"; -# elif defined (CONFIG_IA64_SGI_SN2) - return "sn2"; -# elif defined (CONFIG_IA64_SGI_UV) - return "uv"; -# elif defined (CONFIG_IA64_DIG) - return "dig"; -# elif defined (CONFIG_IA64_XEN_GUEST) - return "xen"; -# elif defined(CONFIG_IA64_DIG_VTD) - return "dig_vtd"; -# else -# error Unknown platform. Fix acpi.c. -# endif -#endif } - -#ifdef CONFIG_ACPI +#endif /* CONFIG_IA64_GENERIC */ #define ACPI_MAX_PLATFORM_INTERRUPTS 256 @@ -199,6 +164,10 @@ char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size) return __va(phys_addr); } +void __init __acpi_unmap_table(char *map, unsigned long size) +{ +} + /* -------------------------------------------------------------------------- Boot-time Table Parsing -------------------------------------------------------------------------- */ @@ -372,11 +341,11 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, iosapic_override_isa_irq(p->source_irq, p->global_irq, ((p->inti_flags & ACPI_MADT_POLARITY_MASK) == - ACPI_MADT_POLARITY_ACTIVE_HIGH) ? - IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, + ACPI_MADT_POLARITY_ACTIVE_LOW) ? + IOSAPIC_POL_LOW : IOSAPIC_POL_HIGH, ((p->inti_flags & ACPI_MADT_TRIGGER_MASK) == - ACPI_MADT_TRIGGER_EDGE) ? - IOSAPIC_EDGE : IOSAPIC_LEVEL); + ACPI_MADT_TRIGGER_LEVEL) ? + IOSAPIC_LEVEL : IOSAPIC_EDGE); return 0; } @@ -446,28 +415,30 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) #define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32) static int __initdata srat_num_cpus; /* number of cpus */ -static u32 __devinitdata pxm_flag[PXM_FLAG_LEN]; +static u32 pxm_flag[PXM_FLAG_LEN]; #define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) #define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) static struct acpi_table_slit __initdata *slit_table; cpumask_t early_cpu_possible_map = CPU_MASK_NONE; -static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa) +static int __init +get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa) { int pxm; pxm = pa->proximity_domain_lo; - if (ia64_platform_is("sn2")) + if (ia64_platform_is("sn2") || acpi_srat_revision >= 2) pxm += pa->proximity_domain_hi[0] << 8; return pxm; } -static int get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma) +static int __init +get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma) { int pxm; pxm = ma->proximity_domain; - if (!ia64_platform_is("sn2")) + if (!ia64_platform_is("sn2") && acpi_srat_revision <= 1) pxm &= 0xff; return pxm; @@ -500,6 +471,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) if (!(pa->flags & ACPI_SRAT_CPU_ENABLED)) return; + if (srat_num_cpus >= ARRAY_SIZE(node_cpuid)) { + printk_once(KERN_WARNING + "node_cpuid[%ld] is too small, may not be able to use all cpus\n", + ARRAY_SIZE(node_cpuid)); + return; + } pxm = get_processor_proximity_domain(pa); /* record this node in proximity bitmap */ @@ -513,7 +490,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) srat_num_cpus++; } -void __init +int __init acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) { unsigned long paddr, size; @@ -528,7 +505,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) /* Ignore disabled entries */ if (!(ma->flags & ACPI_SRAT_MEM_ENABLED)) - return; + return -1; /* record this node in proximity bitmap */ pxm_bit_set(pxm); @@ -547,6 +524,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) p->size = size; p->nid = pxm; num_node_memblks++; + return 0; } void __init acpi_numa_arch_fixup(void) @@ -632,7 +610,7 @@ void __init acpi_numa_arch_fixup(void) * success: return IRQ number (>=0) * failure: return < 0 */ -int acpi_register_gsi(u32 gsi, int triggering, int polarity) +int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity) { if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM) return gsi; @@ -648,6 +626,7 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity) ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); } +EXPORT_SYMBOL_GPL(acpi_register_gsi); void acpi_unregister_gsi(u32 gsi) { @@ -659,6 +638,7 @@ void acpi_unregister_gsi(u32 gsi) iosapic_unregister_intr(gsi); } +EXPORT_SYMBOL_GPL(acpi_unregister_gsi); static int __init acpi_parse_fadt(struct acpi_table_header *table) { @@ -674,7 +654,8 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table) fadt = (struct acpi_table_fadt *)fadt_header; - acpi_register_gsi(fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); + acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, + ACPI_ACTIVE_LOW); return 0; } @@ -696,12 +677,26 @@ int __init early_acpi_boot_init(void) if (ret < 1) printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC entries\n"); + else + acpi_lapic = 1; + +#ifdef CONFIG_SMP + if (available_cpus == 0) { + printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n"); + printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id()); + smp_boot_data.cpu_phys_id[available_cpus] = + hard_smp_processor_id(); + available_cpus = 1; /* We've got at least one of these, no? */ + } + smp_boot_data.cpu_count = available_cpus; +#endif + /* Make boot-up look pretty */ + printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, + total_cpus); return 0; } - - int __init acpi_boot_init(void) { @@ -764,18 +759,8 @@ int __init acpi_boot_init(void) if (acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt)) printk(KERN_ERR PREFIX "Can't find FADT\n"); +#ifdef CONFIG_ACPI_NUMA #ifdef CONFIG_SMP - if (available_cpus == 0) { - printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n"); - printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id()); - smp_boot_data.cpu_phys_id[available_cpus] = - hard_smp_processor_id(); - available_cpus = 1; /* We've got at least one of these, no? */ - } - smp_boot_data.cpu_count = available_cpus; - - smp_build_cpu_map(); -# ifdef CONFIG_ACPI_NUMA if (srat_num_cpus == 0) { int cpu, i = 1; for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) @@ -784,14 +769,9 @@ int __init acpi_boot_init(void) node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu]; } -# endif #endif -#ifdef CONFIG_ACPI_NUMA build_cpu_to_node_map(); #endif - /* Make boot-up look pretty */ - printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, - total_cpus); return 0; } @@ -810,18 +790,21 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) return 0; } +int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) +{ + if (isa_irq >= 16) + return -1; + *gsi = isa_irq; + return 0; +} + /* * ACPI based hotplug CPU support */ #ifdef CONFIG_ACPI_HOTPLUG_CPU -static -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA - int pxm_id; - int nid; - - pxm_id = acpi_get_pxm(handle); /* * We don't have cpu-only-node hotadd. But if the system equips * SRAT table, pxm is already found and node is ready. @@ -829,11 +812,10 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) * This code here is for the system which doesn't have full SRAT * table for possible cpus. */ - nid = acpi_map_pxm_to_node(pxm_id); node_cpuid[cpu].phys_id = physid; - node_cpuid[cpu].nid = nid; + node_cpuid[cpu].nid = acpi_get_node(handle); #endif - return (0); + return 0; } int additional_cpus __initdata = -1; @@ -849,11 +831,11 @@ static __init int setup_additional_cpus(char *s) early_param("additional_cpus", setup_additional_cpus); /* - * cpu_possible_map should be static, it cannot change as CPUs + * cpu_possible_mask should be static, it cannot change as CPUs * are onlined, or offlined. The reason is per-cpu data-structures * are allocated by some modules at init time, and dont expect to * do this dynamically on cpu arrival/departure. - * cpu_present_map on the other hand can change dynamically. + * cpu_present_mask on the other hand can change dynamically. * In case when cpu_hotplug is not compiled, then we resort to current * behaviour, which is cpu_possible == cpu_present. * - Ashok Raj @@ -879,71 +861,48 @@ __init void prefill_possible_map(void) possible = available_cpus + additional_cpus; - if (possible > NR_CPUS) - possible = NR_CPUS; + if (possible > nr_cpu_ids) + possible = nr_cpu_ids; printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", possible, max((possible - available_cpus), 0)); for (i = 0; i < possible; i++) - cpu_set(i, cpu_possible_map); + set_cpu_possible(i, true); } -int acpi_map_lsapic(acpi_handle handle, int *pcpu) +static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) { - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object *obj; - struct acpi_madt_local_sapic *lsapic; cpumask_t tmp_map; - int cpu, physid; - - if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) - return -EINVAL; - - if (!buffer.length || !buffer.pointer) - return -EINVAL; - - obj = buffer.pointer; - if (obj->type != ACPI_TYPE_BUFFER) - { - kfree(buffer.pointer); - return -EINVAL; - } + int cpu; - lsapic = (struct acpi_madt_local_sapic *)obj->buffer.pointer; - - if ((lsapic->header.type != ACPI_MADT_TYPE_LOCAL_SAPIC) || - (!(lsapic->lapic_flags & ACPI_MADT_ENABLED))) { - kfree(buffer.pointer); - return -EINVAL; - } - - physid = ((lsapic->id << 8) | (lsapic->eid)); - - kfree(buffer.pointer); - buffer.length = ACPI_ALLOCATE_BUFFER; - buffer.pointer = NULL; - - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - if (cpu >= NR_CPUS) + cpumask_complement(&tmp_map, cpu_present_mask); + cpu = cpumask_first(&tmp_map); + if (cpu >= nr_cpu_ids) return -EINVAL; acpi_map_cpu2node(handle, cpu, physid); - cpu_set(cpu, cpu_present_map); + set_cpu_present(cpu, true); ia64_cpu_to_sapicid[cpu] = physid; + acpi_processor_set_pdc(handle); + *pcpu = cpu; return (0); } +/* wrapper to silence section mismatch warning */ +int __ref acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) +{ + return _acpi_map_lsapic(handle, physid, pcpu); +} EXPORT_SYMBOL(acpi_map_lsapic); int acpi_unmap_lsapic(int cpu) { ia64_cpu_to_sapicid[cpu] = -1; - cpu_clear(cpu, cpu_present_map); + set_cpu_present(cpu, false); #ifdef CONFIG_ACPI_NUMA /* NUMA specific cleanup's */ @@ -956,14 +915,14 @@ EXPORT_SYMBOL(acpi_unmap_lsapic); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ #ifdef CONFIG_ACPI_NUMA -static acpi_status __devinit -acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) +static acpi_status acpi_map_iosapic(acpi_handle handle, u32 depth, + void *context, void **ret) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; struct acpi_madt_io_sapic *iosapic; unsigned int gsi_base; - int pxm, node; + int node; /* Only care about objects w/ a method that returns the MADT */ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) @@ -990,17 +949,9 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) kfree(buffer.pointer); - /* - * OK, it's an IOSAPIC MADT entry, look for a _PXM value to tell - * us which node to associate this with. - */ - pxm = acpi_get_pxm(handle); - if (pxm < 0) - return AE_OK; - - node = pxm_to_node(pxm); - - if (node >= MAX_NUMNODES || !node_online(node) || + /* OK, it's an IOSAPIC MADT entry; associate it with a node */ + node = acpi_get_node(handle); + if (node == NUMA_NO_NODE || !node_online(node) || cpumask_empty(cpumask_of_node(node))) return AE_OK; @@ -1043,20 +994,8 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) EXPORT_SYMBOL(acpi_unregister_ioapic); /* - * acpi_save_state_mem() - save kernel state + * acpi_suspend_lowlevel() - save kernel state and suspend. * * TBD when when IA64 starts to support suspend... */ -int acpi_save_state_mem(void) { return 0; } - -/* - * acpi_restore_state() - */ -void acpi_restore_state_mem(void) {} - -/* - * do_suspend_lowlevel() - */ -void do_suspend_lowlevel(void) {} - -#endif /* CONFIG_ACPI */ +int acpi_suspend_lowlevel(void) { return 0; } diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index 742dbb1d5a4..60ef83e6db7 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -16,9 +16,6 @@ #include <asm/sigcontext.h> #include <asm/mca.h> -#include <asm/xen/interface.h> -#include <asm/xen/hypervisor.h> - #include "../kernel/sigframe.h" #include "../kernel/fsyscall_gtod_data.h" @@ -41,7 +38,7 @@ void foo(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp)); DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave)); DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime)); @@ -269,8 +266,8 @@ void foo(void) BLANK(); /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */ - DEFINE(IA64_GTOD_LOCK_OFFSET, - offsetof (struct fsyscall_gtod_data_t, lock)); + DEFINE(IA64_GTOD_SEQ_OFFSET, + offsetof (struct fsyscall_gtod_data_t, seq)); DEFINE(IA64_GTOD_WALL_TIME_OFFSET, offsetof (struct fsyscall_gtod_data_t, wall_time)); DEFINE(IA64_GTOD_MONO_TIME_OFFSET, @@ -290,31 +287,4 @@ void foo(void) DEFINE(IA64_ITC_LASTCYCLE_OFFSET, offsetof (struct itc_jitter_data_t, itc_lastcycle)); -#ifdef CONFIG_XEN - BLANK(); - - DEFINE(XEN_NATIVE_ASM, XEN_NATIVE); - DEFINE(XEN_PV_DOMAIN_ASM, XEN_PV_DOMAIN); - -#define DEFINE_MAPPED_REG_OFS(sym, field) \ - DEFINE(sym, (XMAPPEDREGS_OFS + offsetof(struct mapped_regs, field))) - - DEFINE_MAPPED_REG_OFS(XSI_PSR_I_ADDR_OFS, interrupt_mask_addr); - DEFINE_MAPPED_REG_OFS(XSI_IPSR_OFS, ipsr); - DEFINE_MAPPED_REG_OFS(XSI_IIP_OFS, iip); - DEFINE_MAPPED_REG_OFS(XSI_IFS_OFS, ifs); - DEFINE_MAPPED_REG_OFS(XSI_PRECOVER_IFS_OFS, precover_ifs); - DEFINE_MAPPED_REG_OFS(XSI_ISR_OFS, isr); - DEFINE_MAPPED_REG_OFS(XSI_IFA_OFS, ifa); - DEFINE_MAPPED_REG_OFS(XSI_IIPA_OFS, iipa); - DEFINE_MAPPED_REG_OFS(XSI_IIM_OFS, iim); - DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha); - DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir); - DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled); - DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum); - DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]); - DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]); - DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat); - DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat); -#endif /* CONFIG_XEN */ } diff --git a/arch/ia64/kernel/audit.c b/arch/ia64/kernel/audit.c index f3802ae89b1..96a9d18ff4c 100644 --- a/arch/ia64/kernel/audit.c +++ b/arch/ia64/kernel/audit.c @@ -30,20 +30,11 @@ static unsigned signal_class[] = { int audit_classify_arch(int arch) { -#ifdef CONFIG_IA32_SUPPORT - if (arch == AUDIT_ARCH_I386) - return 1; -#endif return 0; } int audit_classify_syscall(int abi, unsigned syscall) { -#ifdef CONFIG_IA32_SUPPORT - extern int ia32_classify_syscall(unsigned); - if (abi == AUDIT_ARCH_I386) - return ia32_classify_syscall(syscall); -#endif switch(syscall) { case __NR_open: return 2; @@ -58,18 +49,6 @@ int audit_classify_syscall(int abi, unsigned syscall) static int __init audit_classes_init(void) { -#ifdef CONFIG_IA32_SUPPORT - extern __u32 ia32_dir_class[]; - extern __u32 ia32_write_class[]; - extern __u32 ia32_read_class[]; - extern __u32 ia32_chattr_class[]; - extern __u32 ia32_signal_class[]; - audit_register_class(AUDIT_CLASS_WRITE_32, ia32_write_class); - audit_register_class(AUDIT_CLASS_READ_32, ia32_read_class); - audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ia32_dir_class); - audit_register_class(AUDIT_CLASS_CHATTR_32, ia32_chattr_class); - audit_register_class(AUDIT_CLASS_SIGNAL_32, ia32_signal_class); -#endif audit_register_class(AUDIT_CLASS_WRITE, write_class); audit_register_class(AUDIT_CLASS_READ, read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); diff --git a/arch/ia64/kernel/cpufreq/Kconfig b/arch/ia64/kernel/cpufreq/Kconfig deleted file mode 100644 index 2d9d5279b98..00000000000 --- a/arch/ia64/kernel/cpufreq/Kconfig +++ /dev/null @@ -1,29 +0,0 @@ - -# -# CPU Frequency scaling -# - -menu "CPU Frequency scaling" - -source "drivers/cpufreq/Kconfig" - -if CPU_FREQ - -comment "CPUFreq processor drivers" - -config IA64_ACPI_CPUFREQ - tristate "ACPI Processor P-States driver" - select CPU_FREQ_TABLE - depends on ACPI_PROCESSOR - help - This driver adds a CPUFreq driver which utilizes the ACPI - Processor Performance States. - - For details, take a look at <file:Documentation/cpu-freq/>. - - If in doubt, say N. - -endif # CPU_FREQ - -endmenu - diff --git a/arch/ia64/kernel/cpufreq/Makefile b/arch/ia64/kernel/cpufreq/Makefile deleted file mode 100644 index 4838f2a57c7..00000000000 --- a/arch/ia64/kernel/cpufreq/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -obj-$(CONFIG_IA64_ACPI_CPUFREQ) += acpi-cpufreq.o - diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c deleted file mode 100644 index 7b435451b3d..00000000000 --- a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c +++ /dev/null @@ -1,438 +0,0 @@ -/* - * arch/ia64/kernel/cpufreq/acpi-cpufreq.c - * This file provides the ACPI based P-state support. This - * module works with generic cpufreq infrastructure. Most of - * the code is based on i386 version - * (arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c) - * - * Copyright (C) 2005 Intel Corp - * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/cpufreq.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <asm/io.h> -#include <asm/uaccess.h> -#include <asm/pal.h> - -#include <linux/acpi.h> -#include <acpi/processor.h> - -#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) - -MODULE_AUTHOR("Venkatesh Pallipadi"); -MODULE_DESCRIPTION("ACPI Processor P-States Driver"); -MODULE_LICENSE("GPL"); - - -struct cpufreq_acpi_io { - struct acpi_processor_performance acpi_data; - struct cpufreq_frequency_table *freq_table; - unsigned int resume; -}; - -static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS]; - -static struct cpufreq_driver acpi_cpufreq_driver; - - -static int -processor_set_pstate ( - u32 value) -{ - s64 retval; - - dprintk("processor_set_pstate\n"); - - retval = ia64_pal_set_pstate((u64)value); - - if (retval) { - dprintk("Failed to set freq to 0x%x, with error 0x%lx\n", - value, retval); - return -ENODEV; - } - return (int)retval; -} - - -static int -processor_get_pstate ( - u32 *value) -{ - u64 pstate_index = 0; - s64 retval; - - dprintk("processor_get_pstate\n"); - - retval = ia64_pal_get_pstate(&pstate_index, - PAL_GET_PSTATE_TYPE_INSTANT); - *value = (u32) pstate_index; - - if (retval) - dprintk("Failed to get current freq with " - "error 0x%lx, idx 0x%x\n", retval, *value); - - return (int)retval; -} - - -/* To be used only after data->acpi_data is initialized */ -static unsigned -extract_clock ( - struct cpufreq_acpi_io *data, - unsigned value, - unsigned int cpu) -{ - unsigned long i; - - dprintk("extract_clock\n"); - - for (i = 0; i < data->acpi_data.state_count; i++) { - if (value == data->acpi_data.states[i].status) - return data->acpi_data.states[i].core_frequency; - } - return data->acpi_data.states[i-1].core_frequency; -} - - -static unsigned int -processor_get_freq ( - struct cpufreq_acpi_io *data, - unsigned int cpu) -{ - int ret = 0; - u32 value = 0; - cpumask_t saved_mask; - unsigned long clock_freq; - - dprintk("processor_get_freq\n"); - - saved_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) - goto migrate_end; - - /* processor_get_pstate gets the instantaneous frequency */ - ret = processor_get_pstate(&value); - - if (ret) { - set_cpus_allowed(current, saved_mask); - printk(KERN_WARNING "get performance failed with error %d\n", - ret); - ret = 0; - goto migrate_end; - } - clock_freq = extract_clock(data, value, cpu); - ret = (clock_freq*1000); - -migrate_end: - set_cpus_allowed(current, saved_mask); - return ret; -} - - -static int -processor_set_freq ( - struct cpufreq_acpi_io *data, - unsigned int cpu, - int state) -{ - int ret = 0; - u32 value = 0; - struct cpufreq_freqs cpufreq_freqs; - cpumask_t saved_mask; - int retval; - - dprintk("processor_set_freq\n"); - - saved_mask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) { - retval = -EAGAIN; - goto migrate_end; - } - - if (state == data->acpi_data.state) { - if (unlikely(data->resume)) { - dprintk("Called after resume, resetting to P%d\n", state); - data->resume = 0; - } else { - dprintk("Already at target state (P%d)\n", state); - retval = 0; - goto migrate_end; - } - } - - dprintk("Transitioning from P%d to P%d\n", - data->acpi_data.state, state); - - /* cpufreq frequency struct */ - cpufreq_freqs.cpu = cpu; - cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency; - cpufreq_freqs.new = data->freq_table[state].frequency; - - /* notify cpufreq */ - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE); - - /* - * First we write the target state's 'control' value to the - * control_register. - */ - - value = (u32) data->acpi_data.states[state].control; - - dprintk("Transitioning to state: 0x%08x\n", value); - - ret = processor_set_pstate(value); - if (ret) { - unsigned int tmp = cpufreq_freqs.new; - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); - cpufreq_freqs.new = cpufreq_freqs.old; - cpufreq_freqs.old = tmp; - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE); - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); - printk(KERN_WARNING "Transition failed with error %d\n", ret); - retval = -ENODEV; - goto migrate_end; - } - - cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); - - data->acpi_data.state = state; - - retval = 0; - -migrate_end: - set_cpus_allowed(current, saved_mask); - return (retval); -} - - -static unsigned int -acpi_cpufreq_get ( - unsigned int cpu) -{ - struct cpufreq_acpi_io *data = acpi_io_data[cpu]; - - dprintk("acpi_cpufreq_get\n"); - - return processor_get_freq(data, cpu); -} - - -static int -acpi_cpufreq_target ( - struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; - unsigned int next_state = 0; - unsigned int result = 0; - - dprintk("acpi_cpufreq_setpolicy\n"); - - result = cpufreq_frequency_table_target(policy, - data->freq_table, target_freq, relation, &next_state); - if (result) - return (result); - - result = processor_set_freq(data, policy->cpu, next_state); - - return (result); -} - - -static int -acpi_cpufreq_verify ( - struct cpufreq_policy *policy) -{ - unsigned int result = 0; - struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; - - dprintk("acpi_cpufreq_verify\n"); - - result = cpufreq_frequency_table_verify(policy, - data->freq_table); - - return (result); -} - - -static int -acpi_cpufreq_cpu_init ( - struct cpufreq_policy *policy) -{ - unsigned int i; - unsigned int cpu = policy->cpu; - struct cpufreq_acpi_io *data; - unsigned int result = 0; - - dprintk("acpi_cpufreq_cpu_init\n"); - - data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); - if (!data) - return (-ENOMEM); - - acpi_io_data[cpu] = data; - - result = acpi_processor_register_performance(&data->acpi_data, cpu); - - if (result) - goto err_free; - - /* capability check */ - if (data->acpi_data.state_count <= 1) { - dprintk("No P-States\n"); - result = -ENODEV; - goto err_unreg; - } - - if ((data->acpi_data.control_register.space_id != - ACPI_ADR_SPACE_FIXED_HARDWARE) || - (data->acpi_data.status_register.space_id != - ACPI_ADR_SPACE_FIXED_HARDWARE)) { - dprintk("Unsupported address space [%d, %d]\n", - (u32) (data->acpi_data.control_register.space_id), - (u32) (data->acpi_data.status_register.space_id)); - result = -ENODEV; - goto err_unreg; - } - - /* alloc freq_table */ - data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * - (data->acpi_data.state_count + 1), - GFP_KERNEL); - if (!data->freq_table) { - result = -ENOMEM; - goto err_unreg; - } - - /* detect transition latency */ - policy->cpuinfo.transition_latency = 0; - for (i=0; i<data->acpi_data.state_count; i++) { - if ((data->acpi_data.states[i].transition_latency * 1000) > - policy->cpuinfo.transition_latency) { - policy->cpuinfo.transition_latency = - data->acpi_data.states[i].transition_latency * 1000; - } - } - policy->cur = processor_get_freq(data, policy->cpu); - - /* table init */ - for (i = 0; i <= data->acpi_data.state_count; i++) - { - data->freq_table[i].index = i; - if (i < data->acpi_data.state_count) { - data->freq_table[i].frequency = - data->acpi_data.states[i].core_frequency * 1000; - } else { - data->freq_table[i].frequency = CPUFREQ_TABLE_END; - } - } - - result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); - if (result) { - goto err_freqfree; - } - - /* notify BIOS that we exist */ - acpi_processor_notify_smm(THIS_MODULE); - - printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management " - "activated.\n", cpu); - - for (i = 0; i < data->acpi_data.state_count; i++) - dprintk(" %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n", - (i == data->acpi_data.state?'*':' '), i, - (u32) data->acpi_data.states[i].core_frequency, - (u32) data->acpi_data.states[i].power, - (u32) data->acpi_data.states[i].transition_latency, - (u32) data->acpi_data.states[i].bus_master_latency, - (u32) data->acpi_data.states[i].status, - (u32) data->acpi_data.states[i].control); - - cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); - - /* the first call to ->target() should result in us actually - * writing something to the appropriate registers. */ - data->resume = 1; - - return (result); - - err_freqfree: - kfree(data->freq_table); - err_unreg: - acpi_processor_unregister_performance(&data->acpi_data, cpu); - err_free: - kfree(data); - acpi_io_data[cpu] = NULL; - - return (result); -} - - -static int -acpi_cpufreq_cpu_exit ( - struct cpufreq_policy *policy) -{ - struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; - - dprintk("acpi_cpufreq_cpu_exit\n"); - - if (data) { - cpufreq_frequency_table_put_attr(policy->cpu); - acpi_io_data[policy->cpu] = NULL; - acpi_processor_unregister_performance(&data->acpi_data, - policy->cpu); - kfree(data); - } - - return (0); -} - - -static struct freq_attr* acpi_cpufreq_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - - -static struct cpufreq_driver acpi_cpufreq_driver = { - .verify = acpi_cpufreq_verify, - .target = acpi_cpufreq_target, - .get = acpi_cpufreq_get, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, - .name = "acpi-cpufreq", - .owner = THIS_MODULE, - .attr = acpi_cpufreq_attr, -}; - - -static int __init -acpi_cpufreq_init (void) -{ - dprintk("acpi_cpufreq_init\n"); - - return cpufreq_register_driver(&acpi_cpufreq_driver); -} - - -static void __exit -acpi_cpufreq_exit (void) -{ - dprintk("acpi_cpufreq_exit\n"); - - cpufreq_unregister_driver(&acpi_cpufreq_driver); - return; -} - - -late_initcall(acpi_cpufreq_init); -module_exit(acpi_cpufreq_exit); - diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index f065093f8e9..2955f359e2a 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -23,6 +23,7 @@ int kdump_status[NR_CPUS]; static atomic_t kdump_cpu_frozen; atomic_t kdump_in_progress; +static int kdump_freeze_monarch; static int kdump_on_init = 1; static int kdump_on_fatal_mca = 1; @@ -108,10 +109,38 @@ machine_crash_shutdown(struct pt_regs *pt) */ kexec_disable_iosapic(); #ifdef CONFIG_SMP + /* + * If kdump_on_init is set and an INIT is asserted here, kdump will + * be started again via INIT monarch. + */ + local_irq_disable(); + ia64_set_psr_mc(); /* mask MCA/INIT */ + if (atomic_inc_return(&kdump_in_progress) != 1) + unw_init_running(kdump_cpu_freeze, NULL); + + /* + * Now this cpu is ready for kdump. + * Stop all others by IPI or INIT. They could receive INIT from + * outside and might be INIT monarch, but only thing they have to + * do is falling into kdump_cpu_freeze(). + * + * If an INIT is asserted here: + * - All receivers might be slaves, since some of cpus could already + * be frozen and INIT might be masked on monarch. In this case, + * all slaves will be frozen soon since kdump_in_progress will let + * them into DIE_INIT_SLAVE_LEAVE. + * - One might be a monarch, but INIT rendezvous will fail since + * at least this cpu already have INIT masked so it never join + * to the rendezvous. In this case, all slaves and monarch will + * be frozen soon with no wait since the INIT rendezvous is skipped + * by kdump_in_progress. + */ kdump_smp_send_stop(); /* not all cpu response to IPI, send INIT to freeze them */ - if (kdump_wait_cpu_freeze() && kdump_on_init) { + if (kdump_wait_cpu_freeze()) { kdump_smp_send_init(); + /* wait again, don't go ahead if possible */ + kdump_wait_cpu_freeze(); } #endif } @@ -129,17 +158,17 @@ void kdump_cpu_freeze(struct unw_frame_info *info, void *arg) { int cpuid; + local_irq_disable(); cpuid = smp_processor_id(); crash_save_this_cpu(); current->thread.ksp = (__u64)info->sw - 16; + + ia64_set_psr_mc(); /* mask MCA/INIT and stop reentrance */ + atomic_inc(&kdump_cpu_frozen); kdump_status[cpuid] = 1; mb(); -#ifdef CONFIG_HOTPLUG_CPU - if (cpuid != 0) - ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]); -#endif for (;;) cpu_relax(); } @@ -150,6 +179,20 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) struct ia64_mca_notify_die *nd; struct die_args *args = data; + if (atomic_read(&kdump_in_progress)) { + switch (val) { + case DIE_INIT_MONARCH_LEAVE: + if (!kdump_freeze_monarch) + break; + /* fall through */ + case DIE_INIT_SLAVE_LEAVE: + case DIE_INIT_MONARCH_ENTER: + case DIE_MCA_RENDZVOUS_LEAVE: + unw_init_running(kdump_cpu_freeze, NULL); + break; + } + } + if (!kdump_on_init && !kdump_on_fatal_mca) return NOTIFY_DONE; @@ -162,43 +205,31 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) } if (val != DIE_INIT_MONARCH_LEAVE && - val != DIE_INIT_SLAVE_LEAVE && val != DIE_INIT_MONARCH_PROCESS && - val != DIE_MCA_RENDZVOUS_LEAVE && val != DIE_MCA_MONARCH_LEAVE) return NOTIFY_DONE; nd = (struct ia64_mca_notify_die *)args->err; - /* Reason code 1 means machine check rendezvous*/ - if ((val == DIE_INIT_MONARCH_LEAVE || val == DIE_INIT_SLAVE_LEAVE - || val == DIE_INIT_MONARCH_PROCESS) && nd->sos->rv_rc == 1) - return NOTIFY_DONE; switch (val) { case DIE_INIT_MONARCH_PROCESS: - if (kdump_on_init) { - atomic_set(&kdump_in_progress, 1); - *(nd->monarch_cpu) = -1; + /* Reason code 1 means machine check rendezvous*/ + if (kdump_on_init && (nd->sos->rv_rc != 1)) { + if (atomic_inc_return(&kdump_in_progress) != 1) + kdump_freeze_monarch = 1; } break; case DIE_INIT_MONARCH_LEAVE: - if (kdump_on_init) + /* Reason code 1 means machine check rendezvous*/ + if (kdump_on_init && (nd->sos->rv_rc != 1)) machine_kdump_on_init(); break; - case DIE_INIT_SLAVE_LEAVE: - if (atomic_read(&kdump_in_progress)) - unw_init_running(kdump_cpu_freeze, NULL); - break; - case DIE_MCA_RENDZVOUS_LEAVE: - if (atomic_read(&kdump_in_progress)) - unw_init_running(kdump_cpu_freeze, NULL); - break; case DIE_MCA_MONARCH_LEAVE: /* *(nd->data) indicate if MCA is recoverable */ if (kdump_on_fatal_mca && !(*(nd->data))) { - atomic_set(&kdump_in_progress, 1); - *(nd->monarch_cpu) = -1; - machine_kdump_on_init(); + if (atomic_inc_return(&kdump_in_progress) == 1) + machine_kdump_on_init(); + /* We got fatal MCA while kdump!? No way!! */ } break; } @@ -206,34 +237,31 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) } #ifdef CONFIG_SYSCTL -static ctl_table kdump_ctl_table[] = { +static struct ctl_table kdump_ctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "kdump_on_init", .data = &kdump_on_init, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "kdump_on_fatal_mca", .data = &kdump_on_fatal_mca, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, - { .ctl_name = 0 } + { } }; -static ctl_table sys_table[] = { +static struct ctl_table sys_table[] = { { - .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, .child = kdump_ctl_table, }, - { .ctl_name = 0 } + { } }; #endif diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c index 23e91290e41..c8c9298666f 100644 --- a/arch/ia64/kernel/crash_dump.c +++ b/arch/ia64/kernel/crash_dump.c @@ -13,9 +13,6 @@ #include <asm/page.h> #include <asm/uaccess.h> -/* Stores the physical address of elf header of crash image. */ -unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; - /** * copy_oldmem_page - copy one page from "oldmem" * @pfn: page frame number to be copied diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c index 790ef0d87e1..4826ff957a3 100644 --- a/arch/ia64/kernel/cyclone.c +++ b/arch/ia64/kernel/cyclone.c @@ -21,7 +21,7 @@ void __init cyclone_setup(void) static void __iomem *cyclone_mc; -static cycle_t read_cyclone(void) +static cycle_t read_cyclone(struct clocksource *cs) { return (cycle_t)readq((void __iomem *)cyclone_mc); } @@ -31,8 +31,6 @@ static struct clocksource clocksource_cyclone = { .rating = 300, .read = read_cyclone, .mask = (1LL << 40) - 1, - .mult = 0, /*to be caluclated*/ - .shift = 16, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -59,13 +57,13 @@ int __init init_cyclone_clock(void) return -ENODEV; } base = readq(reg); + iounmap(reg); if(!base){ printk(KERN_ERR "Summit chipset: Could not find valid CBAR" " value.\n"); use_cyclone = 0; return -ENODEV; } - iounmap(reg); /* setup PMCC */ offset = (base + CYCLONE_PMCC_OFFSET); @@ -117,10 +115,8 @@ int __init init_cyclone_clock(void) } /* initialize last tick */ cyclone_mc = cyclone_timer; - clocksource_cyclone.fsys_mmio = cyclone_timer; - clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ, - clocksource_cyclone.shift); - clocksource_register(&clocksource_cyclone); + clocksource_cyclone.archdata.fsys_mmio = cyclone_timer; + clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ); return 0; } diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c new file mode 100644 index 00000000000..7f791623820 --- /dev/null +++ b/arch/ia64/kernel/dma-mapping.c @@ -0,0 +1,24 @@ +#include <linux/dma-mapping.h> +#include <linux/export.h> + +/* Set this to 1 if there is a HW IOMMU in the system */ +int iommu_detected __read_mostly; + +struct dma_map_ops *dma_ops; +EXPORT_SYMBOL(dma_ops); + +#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) + +static int __init dma_init(void) +{ + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + + return 0; +} +fs_initcall(dma_init); + +struct dma_map_ops *dma_get_ops(struct device *dev) +{ + return dma_ops; +} +EXPORT_SYMBOL(dma_get_ops); diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index efaff15d8cf..741b99c1a0b 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -23,9 +23,11 @@ */ #include <linux/module.h> #include <linux/bootmem.h> +#include <linux/crash_dump.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/types.h> +#include <linux/slab.h> #include <linux/time.h> #include <linux/efi.h> #include <linux/kexec.h> @@ -37,16 +39,22 @@ #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/mca.h> +#include <asm/setup.h> #include <asm/tlbflush.h> #define EFI_DEBUG 0 +static __initdata unsigned long palo_phys; + +static __initdata efi_config_table_type_t arch_tables[] = { + {PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID, "PALO", &palo_phys}, + {NULL_GUID, NULL, 0}, +}; + extern efi_status_t efi_call_phys (void *, ...); -struct efi efi; -EXPORT_SYMBOL(efi); static efi_runtime_services_t *runtime; -static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL; +static u64 mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL; #define efi_call_virt(f, args...) (*(f))(args) @@ -154,7 +162,7 @@ prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, \ #define STUB_SET_VARIABLE(prefix, adjust_arg) \ static efi_status_t \ prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, \ - unsigned long attr, unsigned long data_size, \ + u32 attr, unsigned long data_size, \ void *data) \ { \ struct ia64_fpreg fr[6]; \ @@ -356,7 +364,7 @@ efi_get_pal_addr (void) if (++pal_code_count > 1) { printk(KERN_ERR "Too many EFI Pal Code memory ranges, " - "dropped @ %lx\n", md->phys_addr); + "dropped @ %llx\n", md->phys_addr); continue; } /* @@ -420,9 +428,9 @@ static u8 __init palo_checksum(u8 *buffer, u32 length) * Parse and handle PALO table which is published at: * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf */ -static void __init handle_palo(unsigned long palo_phys) +static void __init handle_palo(unsigned long phys_addr) { - struct palo_table *palo = __va(palo_phys); + struct palo_table *palo = __va(phys_addr); u8 checksum; if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) { @@ -456,6 +464,7 @@ efi_map_pal_code (void) GRANULEROUNDDOWN((unsigned long) pal_vaddr), pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)), IA64_GRANULE_SHIFT); + paravirt_dv_serialize_data(); ia64_set_psr(psr); /* restore psr */ } @@ -463,12 +472,13 @@ void __init efi_init (void) { void *efi_map_start, *efi_map_end; - efi_config_table_t *config_tables; efi_char16_t *c16; u64 efi_desc_size; char *cp, vendor[100] = "unknown"; int i; - unsigned long palo_phys; + + set_bit(EFI_BOOT, &efi.flags); + set_bit(EFI_64BIT, &efi.flags); /* * It's too early to be able to use the standard kernel command line @@ -489,10 +499,10 @@ efi_init (void) } } if (min_addr != 0UL) - printk(KERN_INFO "Ignoring memory below %luMB\n", + printk(KERN_INFO "Ignoring memory below %lluMB\n", min_addr >> 20); if (max_addr != ~0UL) - printk(KERN_INFO "Ignoring memory above %luMB\n", + printk(KERN_INFO "Ignoring memory above %lluMB\n", max_addr >> 20); efi.systab = __va(ia64_boot_param->efi_systab); @@ -510,8 +520,6 @@ efi_init (void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff); - config_tables = __va(efi.systab->tables); - /* Show what we know for posterity */ c16 = __va(efi.systab->fw_vendor); if (c16) { @@ -524,43 +532,12 @@ efi_init (void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); - efi.mps = EFI_INVALID_TABLE_ADDR; - efi.acpi = EFI_INVALID_TABLE_ADDR; - efi.acpi20 = EFI_INVALID_TABLE_ADDR; - efi.smbios = EFI_INVALID_TABLE_ADDR; - efi.sal_systab = EFI_INVALID_TABLE_ADDR; - efi.boot_info = EFI_INVALID_TABLE_ADDR; - efi.hcdp = EFI_INVALID_TABLE_ADDR; - efi.uga = EFI_INVALID_TABLE_ADDR; + set_bit(EFI_SYSTEM_TABLES, &efi.flags); palo_phys = EFI_INVALID_TABLE_ADDR; - for (i = 0; i < (int) efi.systab->nr_tables; i++) { - if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { - efi.mps = config_tables[i].table; - printk(" MPS=0x%lx", config_tables[i].table); - } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { - efi.acpi20 = config_tables[i].table; - printk(" ACPI 2.0=0x%lx", config_tables[i].table); - } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { - efi.acpi = config_tables[i].table; - printk(" ACPI=0x%lx", config_tables[i].table); - } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { - efi.smbios = config_tables[i].table; - printk(" SMBIOS=0x%lx", config_tables[i].table); - } else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) { - efi.sal_systab = config_tables[i].table; - printk(" SALsystab=0x%lx", config_tables[i].table); - } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { - efi.hcdp = config_tables[i].table; - printk(" HCDP=0x%lx", config_tables[i].table); - } else if (efi_guidcmp(config_tables[i].guid, - PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) { - palo_phys = config_tables[i].table; - printk(" PALO=0x%lx", config_tables[i].table); - } - } - printk("\n"); + if (efi_config_init(arch_tables) != 0) + return; if (palo_phys != EFI_INVALID_TABLE_ADDR) handle_palo(palo_phys); @@ -685,6 +662,8 @@ efi_enter_virtual_mode (void) return; } + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + /* * Now that EFI is in virtual mode, we call the EFI functions more * efficiently: @@ -866,7 +845,7 @@ kern_mem_attribute (unsigned long phys_addr, unsigned long size) EXPORT_SYMBOL(kern_mem_attribute); int -valid_phys_addr_range (unsigned long phys_addr, unsigned long size) +valid_phys_addr_range (phys_addr_t phys_addr, unsigned long size) { u64 attr; @@ -1065,7 +1044,7 @@ find_memmap_space (void) * parts exist, and are WB. */ unsigned long -efi_memmap_init(unsigned long *s, unsigned long *e) +efi_memmap_init(u64 *s, u64 *e) { struct kern_memdesc *k, *prev = NULL; u64 contig_low=0, contig_high=0; @@ -1112,11 +1091,6 @@ efi_memmap_init(unsigned long *s, unsigned long *e) if (!is_memory_available(md)) continue; -#ifdef CONFIG_CRASH_DUMP - /* saved_max_pfn should ignore max_addr= command line arg */ - if (saved_max_pfn < (efi_md_end(md) >> PAGE_SHIFT)) - saved_max_pfn = (efi_md_end(md) >> PAGE_SHIFT); -#endif /* * Round ends inward to granule boundaries * Give trimmings to uncached allocator diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c new file mode 100644 index 00000000000..04bc8fd5f89 --- /dev/null +++ b/arch/ia64/kernel/elfcore.c @@ -0,0 +1,76 @@ +#include <linux/elf.h> +#include <linux/coredump.h> +#include <linux/fs.h> +#include <linux/mm.h> + +#include <asm/elf.h> + + +Elf64_Half elf_core_extra_phdrs(void) +{ + return GATE_EHDR->e_phnum; +} + +int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + Elf64_Off ofs = 0; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + struct elf_phdr phdr = gate_phdrs[i]; + + if (phdr.p_type == PT_LOAD) { + phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); + phdr.p_filesz = phdr.p_memsz; + if (ofs == 0) { + ofs = phdr.p_offset = offset; + offset += phdr.p_filesz; + } else { + phdr.p_offset = ofs; + } + } else { + phdr.p_offset += ofs; + } + phdr.p_paddr = 0; /* match other core phdrs */ + if (!dump_emit(cprm, &phdr, sizeof(phdr))) + return 0; + } + return 1; +} + +int elf_core_write_extra_data(struct coredump_params *cprm) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + if (gate_phdrs[i].p_type == PT_LOAD) { + void *addr = (void *)gate_phdrs[i].p_vaddr; + size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz); + + if (!dump_emit(cprm, addr, memsz)) + return 0; + break; + } + } + return 1; +} + +size_t elf_core_extra_data_size(void) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + size_t size = 0; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + if (gate_phdrs[i].p_type == PT_LOAD) { + size += PAGE_ALIGN(gate_phdrs[i].p_memsz); + break; + } + } + return size; +} diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index e5341e2c117..ba3d03503e8 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -47,6 +47,7 @@ #include <asm/processor.h> #include <asm/thread_info.h> #include <asm/unistd.h> +#include <asm/ftrace.h> #include "minstate.h" @@ -60,31 +61,20 @@ ENTRY(ia64_execve) * Allocate 8 input registers since ptrace() may clobber them */ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,4,0 + alloc loc1=ar.pfs,8,2,3,0 mov loc0=rp .body mov out0=in0 // filename ;; // stop bit between alloc and call mov out1=in1 // argv mov out2=in2 // envp - add out3=16,sp // regs br.call.sptk.many rp=sys_execve .ret0: -#ifdef CONFIG_IA32_SUPPORT - /* - * Check if we're returning to ia32 mode. If so, we need to restore ia32 registers - * from pt_regs. - */ - adds r16=PT(CR_IPSR)+16,sp - ;; - ld8 r16=[r16] -#endif cmp4.ge p6,p7=r8,r0 mov ar.pfs=loc1 // restore ar.pfs sxt4 r8=r8 // return 64-bit result ;; stf.spill [sp]=f0 -(p6) cmp.ne pKStk,pUStk=r0,r0 // a successful execve() lands us in user-mode... mov rp=loc0 (p6) mov ar.pfs=r0 // clear ar.pfs on success (p7) br.ret.sptk.many rp @@ -107,12 +97,6 @@ ENTRY(ia64_execve) ldf.fill f23=[sp]; ldf.fill f24=[sp]; mov f25=f0 ldf.fill f26=[sp]; ldf.fill f27=[sp]; mov f28=f0 ldf.fill f29=[sp]; ldf.fill f30=[sp]; mov f31=f0 -#ifdef CONFIG_IA32_SUPPORT - tbit.nz p6,p0=r16, IA64_PSR_IS_BIT - movl loc0=ia64_ret_from_ia32_execve - ;; -(p6) mov rp=loc0 -#endif br.ret.sptk.many rp END(ia64_execve) @@ -132,13 +116,12 @@ GLOBAL_ENTRY(sys_clone2) mov loc1=r16 // save ar.pfs across do_fork .body mov out1=in1 - mov out3=in2 + mov out2=in2 tbit.nz p6,p0=in0,CLONE_SETTLS_BIT - mov out4=in3 // parent_tidptr: valid only w/CLONE_PARENT_SETTID + mov out3=in3 // parent_tidptr: valid only w/CLONE_PARENT_SETTID ;; (p6) st8 [r2]=in5 // store TLS in r16 for copy_thread() - mov out5=in4 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID - adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = ®s + mov out4=in4 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID mov out0=in0 // out0 = clone_flags br.call.sptk.many rp=do_fork .ret1: .restore sp @@ -164,13 +147,12 @@ GLOBAL_ENTRY(sys_clone) mov loc1=r16 // save ar.pfs across do_fork .body mov out1=in1 - mov out3=16 // stacksize (compensates for 16-byte scratch area) + mov out2=16 // stacksize (compensates for 16-byte scratch area) tbit.nz p6,p0=in0,CLONE_SETTLS_BIT - mov out4=in2 // parent_tidptr: valid only w/CLONE_PARENT_SETTID + mov out3=in2 // parent_tidptr: valid only w/CLONE_PARENT_SETTID ;; (p6) st8 [r2]=in4 // store TLS in r13 (tp) - mov out5=in3 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID - adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = ®s + mov out4=in3 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID mov out0=in0 // out0 = clone_flags br.call.sptk.many rp=do_fork .ret2: .restore sp @@ -498,19 +480,6 @@ GLOBAL_ENTRY(prefetch_stack) br.ret.sptk.many rp END(prefetch_stack) -GLOBAL_ENTRY(kernel_execve) - rum psr.ac - mov r15=__NR_execve // put syscall number in place - break __BREAK_SYSCALL - br.ret.sptk.many rp -END(kernel_execve) - -GLOBAL_ENTRY(clone) - mov r15=__NR_clone // put syscall number in place - break __BREAK_SYSCALL - br.ret.sptk.many rp -END(clone) - /* * Invoke a system call, but do some tracing before and after the call. * We MUST preserve the current register frame throughout this routine @@ -614,6 +583,27 @@ GLOBAL_ENTRY(ia64_strace_leave_kernel) .ret4: br.cond.sptk ia64_leave_kernel END(ia64_strace_leave_kernel) +ENTRY(call_payload) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(0) + /* call the kernel_thread payload; fn is in r4, arg - in r5 */ + alloc loc1=ar.pfs,0,3,1,0 + mov loc0=rp + mov loc2=gp + mov out0=r5 // arg + ld8 r14 = [r4], 8 // fn.address + ;; + mov b6 = r14 + ld8 gp = [r4] // fn.gp + ;; + br.call.sptk.many rp=b6 // fn(arg) +.ret12: mov gp=loc2 + mov rp=loc0 + mov ar.pfs=loc1 + /* ... and if it has returned, we are going to userland */ + cmp.ne pKStk,pUStk=r0,r0 + br.ret.sptk.many rp +END(call_payload) + GLOBAL_ENTRY(ia64_ret_from_clone) PT_REGS_UNWIND_INFO(0) { /* @@ -630,6 +620,7 @@ GLOBAL_ENTRY(ia64_ret_from_clone) br.call.sptk.many rp=ia64_invoke_schedule_tail } .ret8: +(pKStk) br.call.sptk.many rp=call_payload adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 ;; ld4 r2=[r2] @@ -733,9 +724,9 @@ GLOBAL_ENTRY(__paravirt_leave_syscall) #endif .global __paravirt_work_processed_syscall; __paravirt_work_processed_syscall: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE adds r2=PT(LOADRS)+16,r12 -(pUStk) mov.m r22=ar.itc // fetch time at leave + MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 ;; (p6) ld4 r31=[r18] // load current_thread_info()->flags @@ -771,7 +762,7 @@ __paravirt_work_processed_syscall: ld8 r29=[r2],16 // M0|1 load cr.ipsr ld8 r28=[r3],16 // M0|1 load cr.iip -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13 ;; ld8 r30=[r2],16 // M0|1 load cr.ifs @@ -802,7 +793,7 @@ __paravirt_work_processed_syscall: ld8.fill r1=[r3],16 // M0|1 load r1 (pUStk) mov r17=1 // A ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) st1 [r15]=r17 // M2|3 #else (pUStk) st1 [r14]=r17 // M2|3 @@ -822,7 +813,7 @@ __paravirt_work_processed_syscall: shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition COVER // B add current frame into dirty partition & set cr.ifs ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE mov r19=ar.bsp // M2 get new backing store pointer st8 [r14]=r22 // M save time at leave mov f10=f0 // F clear f10 @@ -847,30 +838,6 @@ __paravirt_work_processed_syscall: br.cond.sptk.many rbs_switch // B END(__paravirt_leave_syscall) -#ifdef __IA64_ASM_PARAVIRTUALIZED_NATIVE -#ifdef CONFIG_IA32_SUPPORT -GLOBAL_ENTRY(ia64_ret_from_ia32_execve) - PT_REGS_UNWIND_INFO(0) - adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 - adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 - ;; - .mem.offset 0,0 - st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit - .mem.offset 8,0 - st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit -#ifdef CONFIG_PARAVIRT - ;; - // don't fall through, ia64_leave_kernel may be #define'd - br.cond.sptk.few ia64_leave_kernel - ;; -#endif /* CONFIG_PARAVIRT */ -END(ia64_ret_from_ia32_execve) -#ifndef CONFIG_PARAVIRT - // fall through -#endif -#endif /* CONFIG_IA32_SUPPORT */ -#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ - GLOBAL_ENTRY(__paravirt_leave_kernel) PT_REGS_UNWIND_INFO(0) /* @@ -981,10 +948,10 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) adds r16=PT(CR_IPSR)+16,r12 adds r17=PT(CR_IIP)+16,r12 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE .pred.rel.mutex pUStk,pKStk MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled -(pUStk) mov.m r22=ar.itc // M fetch time at leave + MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave nop.i 0 ;; #else @@ -1014,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ;; ld8.fill r12=[r16],16 ld8.fill r13=[r17],16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18 #else (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 @@ -1022,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ;; ld8 r20=[r16],16 // ar.fpsr ld8.fill r15=[r17],16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred #endif ;; @@ -1030,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ld8.fill r2=[r17] (pUStk) mov r17=1 ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;; // mib : mov add br -> mib : ld8 add br // bbb_ : br nop cover;; mbb_ : mov br cover;; @@ -1202,21 +1169,8 @@ skip_rbs_switch: .work_pending: tbit.z p6,p0=r31,TIF_NEED_RESCHED // is resched not needed? (p6) br.cond.sptk.few .notify -#ifdef CONFIG_PREEMPT -(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1 - ;; -(pKStk) st4 [r20]=r21 -#endif - SSM_PSR_I(p0, p6, r2) // enable interrupts - br.call.spnt.many rp=schedule + br.call.spnt.many rp=preempt_schedule_irq .ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 (re-check) - RSM_PSR_I(p0, r2, r20) // disable interrupts - ;; -#ifdef CONFIG_PREEMPT -(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 - ;; -(pKStk) st4 [r20]=r0 // preempt_count() <- 0 -#endif (pLvSys)br.cond.sptk.few __paravirt_pending_syscall_end br.cond.sptk.many .work_processed_kernel @@ -1404,6 +1358,105 @@ GLOBAL_ENTRY(unw_init_running) br.ret.sptk.many rp END(unw_init_running) +#ifdef CONFIG_FUNCTION_TRACER +#ifdef CONFIG_DYNAMIC_FTRACE +GLOBAL_ENTRY(_mcount) + br ftrace_stub +END(_mcount) + +.here: + br.ret.sptk.many b0 + +GLOBAL_ENTRY(ftrace_caller) + alloc out0 = ar.pfs, 8, 0, 4, 0 + mov out3 = r0 + ;; + mov out2 = b0 + add r3 = 0x20, r3 + mov out1 = r1; + br.call.sptk.many b0 = ftrace_patch_gp + //this might be called from module, so we must patch gp +ftrace_patch_gp: + movl gp=__gp + mov b0 = r3 + ;; +.global ftrace_call; +ftrace_call: +{ + .mlx + nop.m 0x0 + movl r3 = .here;; +} + alloc loc0 = ar.pfs, 4, 4, 2, 0 + ;; + mov loc1 = b0 + mov out0 = b0 + mov loc2 = r8 + mov loc3 = r15 + ;; + adds out0 = -MCOUNT_INSN_SIZE, out0 + mov out1 = in2 + mov b6 = r3 + + br.call.sptk.many b0 = b6 + ;; + mov ar.pfs = loc0 + mov b0 = loc1 + mov r8 = loc2 + mov r15 = loc3 + br ftrace_stub + ;; +END(ftrace_caller) + +#else +GLOBAL_ENTRY(_mcount) + movl r2 = ftrace_stub + movl r3 = ftrace_trace_function;; + ld8 r3 = [r3];; + ld8 r3 = [r3];; + cmp.eq p7,p0 = r2, r3 +(p7) br.sptk.many ftrace_stub + ;; + + alloc loc0 = ar.pfs, 4, 4, 2, 0 + ;; + mov loc1 = b0 + mov out0 = b0 + mov loc2 = r8 + mov loc3 = r15 + ;; + adds out0 = -MCOUNT_INSN_SIZE, out0 + mov out1 = in2 + mov b6 = r3 + + br.call.sptk.many b0 = b6 + ;; + mov ar.pfs = loc0 + mov b0 = loc1 + mov r8 = loc2 + mov r15 = loc3 + br ftrace_stub + ;; +END(_mcount) +#endif + +GLOBAL_ENTRY(ftrace_stub) + mov r3 = b0 + movl r2 = _mcount_ret_helper + ;; + mov b6 = r2 + mov b7 = r3 + br.ret.sptk.many b6 + +_mcount_ret_helper: + mov b0 = r42 + mov r1 = r41 + mov ar.pfs = r40 + br b7 +END(ftrace_stub) + +#endif /* CONFIG_FUNCTION_TRACER */ + .rodata .align 8 .globl sys_call_table @@ -1553,7 +1606,7 @@ sys_call_table: data8 sys_sched_get_priority_min data8 sys_sched_rr_get_interval data8 sys_nanosleep - data8 sys_nfsservctl + data8 sys_ni_syscall // old nfsservctl data8 sys_prctl // 1170 data8 sys_getpagesize data8 sys_mmap2 @@ -1703,6 +1756,26 @@ sys_call_table: data8 sys_dup3 data8 sys_pipe2 data8 sys_inotify_init1 + data8 sys_preadv + data8 sys_pwritev // 1320 + data8 sys_rt_tgsigqueueinfo + data8 sys_recvmmsg + data8 sys_fanotify_init + data8 sys_fanotify_mark + data8 sys_prlimit64 // 1325 + data8 sys_name_to_handle_at + data8 sys_open_by_handle_at + data8 sys_clock_adjtime + data8 sys_syncfs + data8 sys_setns // 1330 + data8 sys_sendmmsg + data8 sys_process_vm_readv + data8 sys_process_vm_writev + data8 sys_accept4 + data8 sys_finit_module // 1335 + data8 sys_sched_setattr + data8 sys_sched_getattr + data8 sys_renameat2 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index c539c689493..0c161ed6d18 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -24,7 +24,7 @@ * Copyright (C) 2006, Intel Corp. All rights reserved. * */ -#include <linux/sysdev.h> +#include <linux/device.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/cpu.h> @@ -35,10 +35,10 @@ #define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte; #define define_one_ro(name) \ -static SYSDEV_ATTR(name, 0444, show_##name, NULL) +static DEVICE_ATTR(name, 0444, show_##name, NULL) #define define_one_rw(name) \ -static SYSDEV_ATTR(name, 0644, show_##name, store_##name) +static DEVICE_ATTR(name, 0644, show_##name, store_##name) static u64 call_start[NR_CPUS]; static u64 phys_addr[NR_CPUS]; @@ -55,7 +55,7 @@ static u64 resources[NR_CPUS]; #define show(name) \ static ssize_t \ -show_##name(struct sys_device *dev, struct sysdev_attribute *attr, \ +show_##name(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ u32 cpu=dev->id; \ @@ -64,7 +64,7 @@ show_##name(struct sys_device *dev, struct sysdev_attribute *attr, \ #define store(name) \ static ssize_t \ -store_##name(struct sys_device *dev, struct sysdev_attribute *attr, \ +store_##name(struct device *dev, struct device_attribute *attr, \ const char *buf, size_t size) \ { \ unsigned int cpu=dev->id; \ @@ -78,7 +78,7 @@ show(call_start) * processor. The cpu number in driver is only used for storing data. */ static ssize_t -store_call_start(struct sys_device *dev, struct sysdev_attribute *attr, +store_call_start(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { unsigned int cpu=dev->id; @@ -127,7 +127,7 @@ show(err_type_info) store(err_type_info) static ssize_t -show_virtual_to_phys(struct sys_device *dev, struct sysdev_attribute *attr, +show_virtual_to_phys(struct device *dev, struct device_attribute *attr, char *buf) { unsigned int cpu=dev->id; @@ -135,7 +135,7 @@ show_virtual_to_phys(struct sys_device *dev, struct sysdev_attribute *attr, } static ssize_t -store_virtual_to_phys(struct sys_device *dev, struct sysdev_attribute *attr, +store_virtual_to_phys(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { unsigned int cpu=dev->id; @@ -159,8 +159,8 @@ show(err_struct_info) store(err_struct_info) static ssize_t -show_err_data_buffer(struct sys_device *dev, - struct sysdev_attribute *attr, char *buf) +show_err_data_buffer(struct device *dev, + struct device_attribute *attr, char *buf) { unsigned int cpu=dev->id; @@ -171,8 +171,8 @@ show_err_data_buffer(struct sys_device *dev, } static ssize_t -store_err_data_buffer(struct sys_device *dev, - struct sysdev_attribute *attr, +store_err_data_buffer(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) { unsigned int cpu=dev->id; @@ -209,14 +209,14 @@ define_one_ro(capabilities); define_one_ro(resources); static struct attribute *default_attrs[] = { - &attr_call_start.attr, - &attr_virtual_to_phys.attr, - &attr_err_type_info.attr, - &attr_err_struct_info.attr, - &attr_err_data_buffer.attr, - &attr_status.attr, - &attr_capabilities.attr, - &attr_resources.attr, + &dev_attr_call_start.attr, + &dev_attr_virtual_to_phys.attr, + &dev_attr_err_type_info.attr, + &dev_attr_err_struct_info.attr, + &dev_attr_err_data_buffer.attr, + &dev_attr_status.attr, + &dev_attr_capabilities.attr, + &dev_attr_resources.attr, NULL }; @@ -225,23 +225,23 @@ static struct attribute_group err_inject_attr_group = { .name = "err_inject" }; /* Add/Remove err_inject interface for CPU device */ -static int __cpuinit err_inject_add_dev(struct sys_device * sys_dev) +static int err_inject_add_dev(struct device *sys_dev) { return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group); } -static int __cpuinit err_inject_remove_dev(struct sys_device * sys_dev) +static int err_inject_remove_dev(struct device *sys_dev) { sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); return 0; } -static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb, +static int err_inject_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; + struct device *sys_dev; - sys_dev = get_cpu_sysdev(cpu); + sys_dev = get_cpu_device(cpu); switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: @@ -256,7 +256,7 @@ static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata err_inject_cpu_notifier = +static struct notifier_block err_inject_cpu_notifier = { .notifier_call = err_inject_cpu_callback, }; @@ -269,12 +269,17 @@ err_inject_init(void) #ifdef ERR_INJ_DEBUG printk(KERN_INFO "Enter error injection driver.\n"); #endif + + cpu_notifier_register_begin(); + for_each_online_cpu(i) { err_inject_cpu_callback(&err_inject_cpu_notifier, CPU_ONLINE, (void *)(long)i); } - register_hotcpu_notifier(&err_inject_cpu_notifier); + __register_hotcpu_notifier(&err_inject_cpu_notifier); + + cpu_notifier_register_done(); return 0; } @@ -283,16 +288,22 @@ static void __exit err_inject_exit(void) { int i; - struct sys_device *sys_dev; + struct device *sys_dev; #ifdef ERR_INJ_DEBUG printk(KERN_INFO "Exit error injection driver.\n"); #endif + + cpu_notifier_register_begin(); + for_each_online_cpu(i) { - sys_dev = get_cpu_sysdev(i); + sys_dev = get_cpu_device(i); sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); } - unregister_hotcpu_notifier(&err_inject_cpu_notifier); + + __unregister_hotcpu_notifier(&err_inject_cpu_notifier); + + cpu_notifier_register_done(); } module_init(err_inject_init); diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c index ebf4e988e78..b091111270c 100644 --- a/arch/ia64/kernel/esi.c +++ b/arch/ia64/kernel/esi.c @@ -65,7 +65,7 @@ static int __init esi_init (void) } if (!esi) - return -ENODEV;; + return -ENODEV; systab = __va(esi); @@ -84,7 +84,7 @@ static int __init esi_init (void) case ESI_DESC_ENTRY_POINT: break; default: - printk(KERN_WARNING "Unkown table type %d found in " + printk(KERN_WARNING "Unknown table type %d found in " "ESI table, ignoring rest of table\n", *p); return -ENODEV; } diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index c1625c7e177..abc6dee3799 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -21,10 +21,10 @@ #include <asm/thread_info.h> #include <asm/sal.h> #include <asm/signal.h> -#include <asm/system.h> #include <asm/unistd.h> #include "entry.h" +#include "paravirt_inst.h" /* * See Documentation/ia64/fsys.txt for details on fsyscalls. @@ -90,53 +90,6 @@ ENTRY(fsys_getpid) FSYS_RETURN END(fsys_getpid) -ENTRY(fsys_getppid) - .prologue - .altrp b6 - .body - add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 - ;; - ld8 r17=[r17] // r17 = current->group_leader - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - ;; - - ld4 r9=[r9] - add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = ¤t->group_leader->real_parent - ;; - and r9=TIF_ALLWORK_MASK,r9 - -1: ld8 r18=[r17] // r18 = current->group_leader->real_parent - ;; - cmp.ne p8,p0=0,r9 - add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = ¤t->group_leader->real_parent->tgid - ;; - - /* - * The .acq is needed to ensure that the read of tgid has returned its data before - * we re-check "real_parent". - */ - ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid -#ifdef CONFIG_SMP - /* - * Re-read current->group_leader->real_parent. - */ - ld8 r19=[r17] // r19 = current->group_leader->real_parent -(p8) br.spnt.many fsys_fallback_syscall - ;; - cmp.ne p6,p0=r18,r19 // did real_parent change? - mov r19=0 // i must not leak kernel bits... -(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check - ;; - mov r17=0 // i must not leak kernel bits... - mov r18=0 // i must not leak kernel bits... -#else - mov r17=0 // i must not leak kernel bits... - mov r18=0 // i must not leak kernel bits... - mov r19=0 // i must not leak kernel bits... -#endif - FSYS_RETURN -END(fsys_getppid) - ENTRY(fsys_set_tid_address) .prologue .altrp b6 @@ -173,7 +126,7 @@ ENTRY(fsys_set_tid_address) FSYS_RETURN END(fsys_set_tid_address) -#if IA64_GTOD_LOCK_OFFSET !=0 +#if IA64_GTOD_SEQ_OFFSET !=0 #error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t #endif #if IA64_ITC_JITTER_OFFSET !=0 @@ -279,7 +232,7 @@ ENTRY(fsys_gettimeofday) (p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control ;; .pred.rel.mutex p8,p9 -(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! + MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!! (p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. (p13) ld8 r25 = [r19] // get itc_lastcycle value ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec @@ -372,163 +325,6 @@ ENTRY(fsys_clock_gettime) END(fsys_clock_gettime) /* - * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize). - */ -#if _NSIG_WORDS != 1 -# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1. -#endif -ENTRY(fsys_rt_sigprocmask) - .prologue - .altrp b6 - .body - - add r2=IA64_TASK_BLOCKED_OFFSET,r16 - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - cmp4.ltu p6,p0=SIG_SETMASK,r32 - - cmp.ne p15,p0=r0,r34 // oset != NULL? - tnat.nz p8,p0=r34 - add r31=IA64_TASK_SIGHAND_OFFSET,r16 - ;; - ld8 r3=[r2] // read/prefetch current->blocked - ld4 r9=[r9] - tnat.nz.or p6,p0=r35 - - cmp.ne.or p6,p0=_NSIG_WORDS*8,r35 - tnat.nz.or p6,p0=r32 -(p6) br.spnt.few .fail_einval // fail with EINVAL - ;; -#ifdef CONFIG_SMP - ld8 r31=[r31] // r31 <- current->sighand -#endif - and r9=TIF_ALLWORK_MASK,r9 - tnat.nz.or p8,p0=r33 - ;; - cmp.ne p7,p0=0,r9 - cmp.eq p6,p0=r0,r33 // set == NULL? - add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock -(p8) br.spnt.few .fail_efault // fail with EFAULT -(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work... -(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask - - /* Argh, we actually have to do some work and _update_ the signal mask: */ - -EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set -EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set - mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1)) - ;; - - rsm psr.i // mask interrupt delivery - mov ar.ccv=0 - andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP - -#ifdef CONFIG_SMP - mov r17=1 - ;; - cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock - mov r8=EINVAL // default to EINVAL - ;; - ld8 r3=[r2] // re-read current->blocked now that we hold the lock - cmp4.ne p6,p0=r18,r0 -(p6) br.cond.spnt.many .lock_contention - ;; -#else - ld8 r3=[r2] // re-read current->blocked now that we hold the lock - mov r8=EINVAL // default to EINVAL -#endif - add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16 - add r19=IA64_TASK_SIGNAL_OFFSET,r16 - cmp4.eq p6,p0=SIG_BLOCK,r32 - ;; - ld8 r19=[r19] // r19 <- current->signal - cmp4.eq p7,p0=SIG_UNBLOCK,r32 - cmp4.eq p8,p0=SIG_SETMASK,r32 - ;; - ld8 r18=[r18] // r18 <- current->pending.signal - .pred.rel.mutex p6,p7,p8 -(p6) or r14=r3,r14 // SIG_BLOCK -(p7) andcm r14=r3,r14 // SIG_UNBLOCK - -(p8) mov r14=r14 // SIG_SETMASK -(p6) mov r8=0 // clear error code - // recalc_sigpending() - add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19 - - add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19 - ;; - ld4 r17=[r17] // r17 <- current->signal->group_stop_count -(p7) mov r8=0 // clear error code - - ld8 r19=[r19] // r19 <- current->signal->shared_pending - ;; - cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)? -(p8) mov r8=0 // clear error code - - or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending - ;; - // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked: - andcm r18=r18,r14 - add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - ;; - -(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending - mov r19=0 // i must not leak kernel bits... -(p6) br.cond.dpnt.many .sig_pending - ;; - -1: ld4 r17=[r9] // r17 <- current->thread_info->flags - ;; - mov ar.ccv=r17 - and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING) - ;; - - st8 [r2]=r14 // update current->blocked with new mask - cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18 - ;; - cmp.ne p6,p0=r17,r8 // update failed? -(p6) br.cond.spnt.few 1b // yes -> retry - -#ifdef CONFIG_SMP - st4.rel [r31]=r0 // release the lock -#endif - ssm psr.i - ;; - - srlz.d // ensure psr.i is set again - mov r18=0 // i must not leak kernel bits... - -.store_mask: -EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset -EX(.fail_efault, (p15) st8 [r34]=r3) - mov r2=0 // i must not leak kernel bits... - mov r3=0 // i must not leak kernel bits... - mov r8=0 // return 0 - mov r9=0 // i must not leak kernel bits... - mov r14=0 // i must not leak kernel bits... - mov r17=0 // i must not leak kernel bits... - mov r31=0 // i must not leak kernel bits... - FSYS_RETURN - -.sig_pending: -#ifdef CONFIG_SMP - st4.rel [r31]=r0 // release the lock -#endif - ssm psr.i - ;; - srlz.d - br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall - -#ifdef CONFIG_SMP -.lock_contention: - /* Rather than spinning here, fall back on doing a heavy-weight syscall. */ - ssm psr.i - ;; - srlz.d - br.sptk.many fsys_fallback_syscall -#endif -END(fsys_rt_sigprocmask) - -/* * fsys_getcpu doesn't use the third parameter in this implementation. It reads * current_thread_info()->cpu and corresponding node in cpu_to_node_map. */ @@ -547,11 +343,15 @@ ENTRY(fsys_getcpu) ;; tnat.nz p7,p0 = r33 // I guard against NaT argument (p7) br.cond.spnt.few .fail_einval // B + ;; + cmp.ne p6,p0=r32,r0 + cmp.ne p7,p0=r33,r0 + ;; #ifdef CONFIG_NUMA movl r17=cpu_to_node_map ;; -EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles -EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles +EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles +EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles shladd r18=r3,1,r17 ;; ld2 r20=[r18] // r20 = cpu_to_node_map[cpu] @@ -561,20 +361,20 @@ EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles (p8) br.spnt.many fsys_fallback_syscall ;; ;; -EX(.fail_efault, st4 [r32] = r3) -EX(.fail_efault, st2 [r33] = r20) +EX(.fail_efault, (p6) st4 [r32] = r3) +EX(.fail_efault, (p7) st2 [r33] = r20) mov r8=0 ;; #else -EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles -EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles +EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles +EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles and r2 = TIF_ALLWORK_MASK,r2 ;; cmp.ne p8,p0=0,r2 (p8) br.spnt.many fsys_fallback_syscall ;; -EX(.fail_efault, st4 [r32] = r3) -EX(.fail_efault, st2 [r33] = r0) +EX(.fail_efault, (p6) st4 [r32] = r3) +EX(.fail_efault, (p7) st2 [r33] = r0) mov r8=0 ;; #endif @@ -592,17 +392,17 @@ ENTRY(fsys_fallback_syscall) adds r17=-1024,r15 movl r14=sys_call_table ;; - rsm psr.i + RSM_PSR_I(p0, r26, r27) shladd r18=r17,3,r14 ;; ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point - mov r29=psr // read psr (12 cyc load latency) + MOV_FROM_PSR(p0, r29, r26) // read psr (12 cyc load latency) mov r27=ar.rsc mov r21=ar.fpsr mov r26=ar.pfs END(fsys_fallback_syscall) /* FALL THROUGH */ -GLOBAL_ENTRY(fsys_bubble_down) +GLOBAL_ENTRY(paravirt_fsys_bubble_down) .prologue .altrp b6 .body @@ -640,7 +440,7 @@ GLOBAL_ENTRY(fsys_bubble_down) * * PSR.BE : already is turned off in __kernel_syscall_via_epc() * PSR.AC : don't care (kernel normally turns PSR.AC on) - * PSR.I : already turned off by the time fsys_bubble_down gets + * PSR.I : already turned off by the time paravirt_fsys_bubble_down gets * invoked * PSR.DFL: always 0 (kernel never turns it on) * PSR.DFH: don't care --- kernel never touches f32-f127 on its own @@ -650,7 +450,7 @@ GLOBAL_ENTRY(fsys_bubble_down) * PSR.DB : don't care --- kernel never enables kernel-level * breakpoints * PSR.TB : must be 0 already; if it wasn't zero on entry to - * __kernel_syscall_via_epc, the branch to fsys_bubble_down + * __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down * will trigger a taken branch; the taken-trap-handler then * converts the syscall into a break-based system-call. */ @@ -682,8 +482,8 @@ GLOBAL_ENTRY(fsys_bubble_down) nop.i 0 ;; mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mov.m r30=ar.itc // M get cycle for accounting +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting #else nop.m 0 #endif @@ -708,7 +508,7 @@ GLOBAL_ENTRY(fsys_bubble_down) cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 br.call.sptk.many b7=ia64_syscall_setup // B ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE // mov.m r30=ar.itc is called in advance add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 @@ -734,21 +534,21 @@ GLOBAL_ENTRY(fsys_bubble_down) mov rp=r14 // I0 set the real return addr and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A ;; - ssm psr.i // M2 we're on kernel stacks now, reenable irqs + SSM_PSR_I(p0, p6, r22) // M2 we're on kernel stacks now, reenable irqs cmp.eq p8,p0=r3,r0 // A (p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT nop.m 0 (p8) br.call.sptk.many b6=b6 // B (ignore return address) br.cond.spnt ia64_trace_syscall // B -END(fsys_bubble_down) +END(paravirt_fsys_bubble_down) .rodata .align 8 - .globl fsyscall_table + .globl paravirt_fsyscall_table - data8 fsys_bubble_down -fsyscall_table: + data8 paravirt_fsys_bubble_down +paravirt_fsyscall_table: data8 fsys_ni_syscall data8 0 // exit // 1025 data8 0 // read @@ -767,7 +567,7 @@ fsyscall_table: data8 0 // chown data8 0 // lseek // 1040 data8 fsys_getpid // getpid - data8 fsys_getppid // getppid + data8 0 // getppid data8 0 // mount data8 0 // umount data8 0 // setuid // 1045 @@ -904,7 +704,7 @@ fsyscall_table: data8 0 // sigaltstack data8 0 // rt_sigaction data8 0 // rt_sigpending - data8 fsys_rt_sigprocmask // rt_sigprocmask + data8 0 // rt_sigprocmask data8 0 // rt_sigqueueinfo // 1180 data8 0 // rt_sigreturn data8 0 // rt_sigsuspend @@ -1033,4 +833,4 @@ fsyscall_table: // fill in zeros for the remaining entries .zero: - .space fsyscall_table + 8*NR_syscalls - .zero, 0 + .space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0 diff --git a/arch/ia64/kernel/fsyscall_gtod_data.h b/arch/ia64/kernel/fsyscall_gtod_data.h index 57d2ee6c83e..146b15b5fec 100644 --- a/arch/ia64/kernel/fsyscall_gtod_data.h +++ b/arch/ia64/kernel/fsyscall_gtod_data.h @@ -6,7 +6,7 @@ */ struct fsyscall_gtod_data_t { - seqlock_t lock; + seqcount_t seq; struct timespec wall_time; struct timespec monotonic_time; cycle_t clk_mask; diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c new file mode 100644 index 00000000000..3b0c2aa0785 --- /dev/null +++ b/arch/ia64/kernel/ftrace.c @@ -0,0 +1,204 @@ +/* + * Dynamic function tracing support. + * + * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com> + * + * For licencing details, see COPYING. + * + * Defines low-level handling of mcount calls when the kernel + * is compiled with the -pg flag. When using dynamic ftrace, the + * mcount call-sites get patched lazily with NOP till they are + * enabled. All code mutation routines here take effect atomically. + */ + +#include <linux/uaccess.h> +#include <linux/ftrace.h> + +#include <asm/cacheflush.h> +#include <asm/patch.h> + +/* In IA64, each function will be added below two bundles with -pg option */ +static unsigned char __attribute__((aligned(8))) +ftrace_orig_code[MCOUNT_INSN_SIZE] = { + 0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */ + 0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */ + 0x05, 0x00, 0xc4, 0x00, /* mov r42=b0 */ + 0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */ + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */ + 0x08, 0x00, 0x00, 0x50 /* br.call.sptk.many b0 = _mcount;; */ +}; + +struct ftrace_orig_insn { + u64 dummy1, dummy2, dummy3; + u64 dummy4:64-41+13; + u64 imm20:20; + u64 dummy5:3; + u64 sign:1; + u64 dummy6:4; +}; + +/* mcount stub will be converted below for nop */ +static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */ + 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */ + 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */ + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */ + 0x00, 0x00, 0x04, 0x00 +}; + +static unsigned char *ftrace_nop_replace(void) +{ + return ftrace_nop_code; +} + +/* + * mcount stub will be converted below for call + * Note: Just the last instruction is changed against nop + * */ +static unsigned char __attribute__((aligned(8))) +ftrace_call_code[MCOUNT_INSN_SIZE] = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */ + 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */ + 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */ + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */ + 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/ + 0xf8, 0xff, 0xff, 0xc8 +}; + +struct ftrace_call_insn { + u64 dummy1, dummy2; + u64 dummy3:48; + u64 imm39_l:16; + u64 imm39_h:23; + u64 dummy4:13; + u64 imm20:20; + u64 dummy5:3; + u64 i:1; + u64 dummy6:4; +}; + +static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + struct ftrace_call_insn *code = (void *)ftrace_call_code; + unsigned long offset = addr - (ip + 0x10); + + code->imm39_l = offset >> 24; + code->imm39_h = offset >> 40; + code->imm20 = offset >> 4; + code->i = offset >> 63; + return ftrace_call_code; +} + +static int +ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code, int do_check) +{ + unsigned char replaced[MCOUNT_INSN_SIZE]; + + /* + * Note: Due to modules and __init, code can + * disappear and change, we need to protect against faulting + * as well as code changing. We do this by using the + * probe_kernel_* functions. + * + * No real locking needed, this code is run through + * kstop_machine, or before SMP starts. + */ + + if (!do_check) + goto skip_check; + + /* read the text we want to modify */ + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* Make sure it is what we expect it to be */ + if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + +skip_check: + /* replace the text with the new text */ + if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE)) + return -EPERM; + flush_icache_range(ip, ip + MCOUNT_INSN_SIZE); + + return 0; +} + +static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE]; + unsigned long ip = rec->ip; + + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + if (rec->flags & FTRACE_FL_CONVERTED) { + struct ftrace_call_insn *call_insn, *tmp_call; + + call_insn = (void *)ftrace_call_code; + tmp_call = (void *)replaced; + call_insn->imm39_l = tmp_call->imm39_l; + call_insn->imm39_h = tmp_call->imm39_h; + call_insn->imm20 = tmp_call->imm20; + call_insn->i = tmp_call->i; + if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + return 0; + } else { + struct ftrace_orig_insn *call_insn, *tmp_call; + + call_insn = (void *)ftrace_orig_code; + tmp_call = (void *)replaced; + call_insn->sign = tmp_call->sign; + call_insn->imm20 = tmp_call->imm20; + if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + return 0; + } +} + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + int ret; + char *new; + + ret = ftrace_make_nop_check(rec, addr); + if (ret) + return ret; + new = ftrace_nop_replace(); + return ftrace_modify_code(rec->ip, NULL, new, 0); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + unsigned char *old, *new; + + old= ftrace_nop_replace(); + new = ftrace_call_replace(ip, addr); + return ftrace_modify_code(ip, old, new, 1); +} + +/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */ +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip; + unsigned long addr = ((struct fnptr *)ftrace_call)->ip; + + if (func == ftrace_stub) + return 0; + ip = ((struct fnptr *)func)->ip; + + ia64_patch_imm64(addr + 2, ip); + + flush_icache_range(addr, addr + 16); + return 0; +} + +/* run from kstop_machine */ +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} diff --git a/arch/ia64/kernel/gate-data.S b/arch/ia64/kernel/gate-data.S index 258c0a3238f..b3ef1c72e13 100644 --- a/arch/ia64/kernel/gate-data.S +++ b/arch/ia64/kernel/gate-data.S @@ -1,3 +1,3 @@ - .section .data.gate, "aw" + .section .data..gate, "aw" .incbin "arch/ia64/kernel/gate.so" diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S index 74b1ccce4e8..b5f8bdd8618 100644 --- a/arch/ia64/kernel/gate.S +++ b/arch/ia64/kernel/gate.S @@ -11,8 +11,10 @@ #include <asm/errno.h> #include <asm/asm-offsets.h> #include <asm/sigcontext.h> -#include <asm/system.h> #include <asm/unistd.h> +#include <asm/kregs.h> +#include <asm/page.h> +#include "paravirt_inst.h" /* * We can't easily refer to symbols inside the kernel. To avoid full runtime relocation, @@ -20,18 +22,18 @@ * to targets outside the shared object) and to avoid multi-phase kernel builds, we * simply create minimalistic "patch lists" in special ELF sections. */ - .section ".data.patch.fsyscall_table", "a" + .section ".data..patch.fsyscall_table", "a" .previous #define LOAD_FSYSCALL_TABLE(reg) \ [1:] movl reg=0; \ - .xdata4 ".data.patch.fsyscall_table", 1b-. + .xdata4 ".data..patch.fsyscall_table", 1b-. - .section ".data.patch.brl_fsys_bubble_down", "a" + .section ".data..patch.brl_fsys_bubble_down", "a" .previous #define BRL_COND_FSYS_BUBBLE_DOWN(pr) \ [1:](pr)brl.cond.sptk 0; \ ;; \ - .xdata4 ".data.patch.brl_fsys_bubble_down", 1b-. + .xdata4 ".data..patch.brl_fsys_bubble_down", 1b-. GLOBAL_ENTRY(__kernel_syscall_via_break) .prologue @@ -48,87 +50,6 @@ GLOBAL_ENTRY(__kernel_syscall_via_break) } END(__kernel_syscall_via_break) -/* - * On entry: - * r11 = saved ar.pfs - * r15 = system call # - * b0 = saved return address - * b6 = return address - * On exit: - * r11 = saved ar.pfs - * r15 = system call # - * b0 = saved return address - * all other "scratch" registers: undefined - * all "preserved" registers: same as on entry - */ - -GLOBAL_ENTRY(__kernel_syscall_via_epc) - .prologue - .altrp b6 - .body -{ - /* - * Note: the kernel cannot assume that the first two instructions in this - * bundle get executed. The remaining code must be safe even if - * they do not get executed. - */ - adds r17=-1024,r15 // A - mov r10=0 // A default to successful syscall execution - epc // B causes split-issue -} - ;; - rsm psr.be | psr.i // M2 (5 cyc to srlz.d) - LOAD_FSYSCALL_TABLE(r14) // X - ;; - mov r16=IA64_KR(CURRENT) // M2 (12 cyc) - shladd r18=r17,3,r14 // A - mov r19=NR_syscalls-1 // A - ;; - lfetch [r18] // M0|1 - mov r29=psr // M2 (12 cyc) - // If r17 is a NaT, p6 will be zero - cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)? - ;; - mov r21=ar.fpsr // M2 (12 cyc) - tnat.nz p10,p9=r15 // I0 - mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...) - ;; - srlz.d // M0 (forces split-issue) ensure PSR.BE==0 -(p6) ld8 r18=[r18] // M0|1 - nop.i 0 - ;; - nop.m 0 -(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!) - nop.i 0 - ;; -(p8) ssm psr.i -(p6) mov b7=r18 // I0 -(p8) br.dptk.many b7 // B - - mov r27=ar.rsc // M2 (12 cyc) -/* - * brl.cond doesn't work as intended because the linker would convert this branch - * into a branch to a PLT. Perhaps there will be a way to avoid this with some - * future version of the linker. In the meantime, we just use an indirect branch - * instead. - */ -#ifdef CONFIG_ITANIUM -(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry - ;; -(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down - ;; -(p6) mov b7=r14 -(p6) br.sptk.many b7 -#else - BRL_COND_FSYS_BUBBLE_DOWN(p6) -#endif - ssm psr.i - mov r10=-1 -(p10) mov r8=EINVAL -(p9) mov r8=ENOSYS - FSYS_RETURN -END(__kernel_syscall_via_epc) - # define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET) # define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET) # define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET) @@ -374,3 +295,92 @@ restore_rbs: // invala not necessary as that will happen when returning to user-mode br.cond.sptk back_from_restore_rbs END(__kernel_sigtramp) + +/* + * On entry: + * r11 = saved ar.pfs + * r15 = system call # + * b0 = saved return address + * b6 = return address + * On exit: + * r11 = saved ar.pfs + * r15 = system call # + * b0 = saved return address + * all other "scratch" registers: undefined + * all "preserved" registers: same as on entry + */ + +GLOBAL_ENTRY(__kernel_syscall_via_epc) + .prologue + .altrp b6 + .body +{ + /* + * Note: the kernel cannot assume that the first two instructions in this + * bundle get executed. The remaining code must be safe even if + * they do not get executed. + */ + adds r17=-1024,r15 // A + mov r10=0 // A default to successful syscall execution + epc // B causes split-issue +} + ;; + RSM_PSR_BE_I(r20, r22) // M2 (5 cyc to srlz.d) + LOAD_FSYSCALL_TABLE(r14) // X + ;; + mov r16=IA64_KR(CURRENT) // M2 (12 cyc) + shladd r18=r17,3,r14 // A + mov r19=NR_syscalls-1 // A + ;; + lfetch [r18] // M0|1 + MOV_FROM_PSR(p0, r29, r8) // M2 (12 cyc) + // If r17 is a NaT, p6 will be zero + cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)? + ;; + mov r21=ar.fpsr // M2 (12 cyc) + tnat.nz p10,p9=r15 // I0 + mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...) + ;; + srlz.d // M0 (forces split-issue) ensure PSR.BE==0 +(p6) ld8 r18=[r18] // M0|1 + nop.i 0 + ;; + nop.m 0 +(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!) + nop.i 0 + ;; + SSM_PSR_I(p8, p14, r25) +(p6) mov b7=r18 // I0 +(p8) br.dptk.many b7 // B + + mov r27=ar.rsc // M2 (12 cyc) +/* + * brl.cond doesn't work as intended because the linker would convert this branch + * into a branch to a PLT. Perhaps there will be a way to avoid this with some + * future version of the linker. In the meantime, we just use an indirect branch + * instead. + */ +#ifdef CONFIG_ITANIUM +(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry + ;; +(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down + ;; +(p6) mov b7=r14 +(p6) br.sptk.many b7 +#else + BRL_COND_FSYS_BUBBLE_DOWN(p6) +#endif + SSM_PSR_I(p0, p14, r10) + mov r10=-1 +(p10) mov r8=EINVAL +(p9) mov r8=ENOSYS + FSYS_RETURN + +#ifdef CONFIG_PARAVIRT + /* + * padd to make the size of this symbol constant + * independent of paravirtualization. + */ + .align PAGE_SIZE / 8 +#endif +END(__kernel_syscall_via_epc) diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S index 3cb1abc00e2..e518f7902af 100644 --- a/arch/ia64/kernel/gate.lds.S +++ b/arch/ia64/kernel/gate.lds.S @@ -5,8 +5,8 @@ * its layout. */ - -#include <asm/system.h> +#include <asm/page.h> +#include "paravirt_patchlist.h" SECTIONS { @@ -32,22 +32,22 @@ SECTIONS */ . = GATE_ADDR + 0x600; - .data.patch : { - __start_gate_mckinley_e9_patchlist = .; - *(.data.patch.mckinley_e9) - __end_gate_mckinley_e9_patchlist = .; + .data..patch : { + __paravirt_start_gate_mckinley_e9_patchlist = .; + *(.data..patch.mckinley_e9) + __paravirt_end_gate_mckinley_e9_patchlist = .; - __start_gate_vtop_patchlist = .; - *(.data.patch.vtop) - __end_gate_vtop_patchlist = .; + __paravirt_start_gate_vtop_patchlist = .; + *(.data..patch.vtop) + __paravirt_end_gate_vtop_patchlist = .; - __start_gate_fsyscall_patchlist = .; - *(.data.patch.fsyscall_table) - __end_gate_fsyscall_patchlist = .; + __paravirt_start_gate_fsyscall_patchlist = .; + *(.data..patch.fsyscall_table) + __paravirt_end_gate_fsyscall_patchlist = .; - __start_gate_brl_fsys_bubble_down_patchlist = .; - *(.data.patch.brl_fsys_bubble_down) - __end_gate_brl_fsys_bubble_down_patchlist = .; + __paravirt_start_gate_brl_fsys_bubble_down_patchlist = .; + *(.data..patch.brl_fsys_bubble_down) + __paravirt_end_gate_brl_fsys_bubble_down_patchlist = .; } :readable .IA_64.unwind_info : { *(.IA_64.unwind_info*) } diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index 59301c47280..a4acddad0c7 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -30,7 +30,6 @@ #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/ptrace.h> -#include <asm/system.h> #include <asm/mca_asm.h> #include <linux/init.h> #include <linux/linkage.h> @@ -167,7 +166,7 @@ RestRR: \ mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \ mov rr[_tmp1]=_tmp2 - .section __special_page_section,"ax" + __PAGE_ALIGNED_DATA .global empty_zero_page empty_zero_page: @@ -181,7 +180,7 @@ swapper_pg_dir: halt_msg: stringz "Halting kernel\n" - .section .text.head,"ax" + __REF .global start_ap @@ -260,7 +259,7 @@ start_ap: * Switch into virtual mode: */ movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ - |IA64_PSR_DI|IA64_PSR_AC) + |IA64_PSR_DI) ;; mov cr.ipsr=r16 movl r17=1f @@ -417,8 +416,6 @@ start_ap: default_setup_hook = 0 // Currently nothing needs to be done. - .weak xen_setup_hook - .global hypervisor_type hypervisor_type: data8 PARAVIRT_HYPERVISOR_TYPE_DEFAULT @@ -427,7 +424,6 @@ hypervisor_type: hypervisor_setup_hooks: data8 default_setup_hook - data8 xen_setup_hook num_hypervisor_hooks = (. - hypervisor_setup_hooks) / 8 .previous @@ -1036,7 +1032,7 @@ END(ia64_delay_loop) * Return a CPU-local timestamp in nano-seconds. This timestamp is * NOT synchronized across CPUs its return value must never be * compared against the values returned on another CPU. The usage in - * kernel/sched.c ensures that. + * kernel/sched/core.c ensures that. * * The return-value of sched_clock() is NOT supposed to wrap-around. * If it did, it would cause some scheduling hiccups (at the worst). @@ -1050,8 +1046,8 @@ END(ia64_delay_loop) * except that the multiplication and the shift are done with 128-bit * intermediate precision so that we can produce a full 64-bit result. */ -GLOBAL_ENTRY(sched_clock) - addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 +GLOBAL_ENTRY(ia64_native_sched_clock) + addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 mov.m r9=ar.itc // fetch cycle-counter (35 cyc) ;; ldf8 f8=[r8] @@ -1066,12 +1062,18 @@ GLOBAL_ENTRY(sched_clock) ;; shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT br.ret.sptk.many rp -END(sched_clock) +END(ia64_native_sched_clock) +#ifndef CONFIG_PARAVIRT + //unsigned long long + //sched_clock(void) __attribute__((alias("ia64_native_sched_clock"))); + .global sched_clock +sched_clock = ia64_native_sched_clock +#endif -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE GLOBAL_ENTRY(cycle_to_cputime) alloc r16=ar.pfs,1,0,0,0 - addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 + addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 ;; ldf8 f8=[r8] ;; @@ -1086,20 +1088,7 @@ GLOBAL_ENTRY(cycle_to_cputime) shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT br.ret.sptk.many rp END(cycle_to_cputime) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ - -GLOBAL_ENTRY(start_kernel_thread) - .prologue - .save rp, r0 // this is the end of the call-chain - .body - alloc r2 = ar.pfs, 0, 0, 2, 0 - mov out0 = r9 - mov out1 = r11;; - br.call.sptk.many rp = kernel_thread_helper;; - mov out0 = r8 - br.call.sptk.many rp = sys_exit;; -1: br.sptk.few 1b // not reached -END(start_kernel_thread) +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_IA64_BRL_EMU @@ -1124,95 +1113,6 @@ SET_REG(b5); #endif /* CONFIG_IA64_BRL_EMU */ #ifdef CONFIG_SMP - /* - * This routine handles spinlock contention. It uses a non-standard calling - * convention to avoid converting leaf routines into interior routines. Because - * of this special convention, there are several restrictions: - * - * - do not use gp relative variables, this code is called from the kernel - * and from modules, r1 is undefined. - * - do not use stacked registers, the caller owns them. - * - do not use the scratch stack space, the caller owns it. - * - do not use any registers other than the ones listed below - * - * Inputs: - * ar.pfs - saved CFM of caller - * ar.ccv - 0 (and available for use) - * r27 - flags from spin_lock_irqsave or 0. Must be preserved. - * r28 - available for use. - * r29 - available for use. - * r30 - available for use. - * r31 - address of lock, available for use. - * b6 - return address - * p14 - available for use. - * p15 - used to track flag status. - * - * If you patch this code to use more registers, do not forget to update - * the clobber lists for spin_lock() in arch/ia64/include/asm/spinlock.h. - */ - -#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) - -GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4) - .prologue - .save ar.pfs, r0 // this code effectively has a zero frame size - .save rp, r28 - .body - nop 0 - tbit.nz p15,p0=r27,IA64_PSR_I_BIT - .restore sp // pop existing prologue after next insn - mov b6 = r28 - .prologue - .save ar.pfs, r0 - .altrp b6 - .body - ;; -(p15) ssm psr.i // reenable interrupts if they were on - // DavidM says that srlz.d is slow and is not required in this case -.wait: - // exponential backoff, kdb, lockmeter etc. go in here - hint @pause - ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word - nop 0 - ;; - cmp4.ne p14,p0=r30,r0 -(p14) br.cond.sptk.few .wait -(p15) rsm psr.i // disable interrupts if we reenabled them - br.cond.sptk.few b6 // lock is now free, try to acquire - .global ia64_spinlock_contention_pre3_4_end // for kernprof -ia64_spinlock_contention_pre3_4_end: -END(ia64_spinlock_contention_pre3_4) - -#else - -GLOBAL_ENTRY(ia64_spinlock_contention) - .prologue - .altrp b6 - .body - tbit.nz p15,p0=r27,IA64_PSR_I_BIT - ;; -.wait: -(p15) ssm psr.i // reenable interrupts if they were on - // DavidM says that srlz.d is slow and is not required in this case -.wait2: - // exponential backoff, kdb, lockmeter etc. go in here - hint @pause - ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word - ;; - cmp4.ne p14,p0=r30,r0 - mov r30 = 1 -(p14) br.cond.sptk.few .wait2 -(p15) rsm psr.i // disable interrupts if we reenabled them - ;; - cmpxchg4.acq r30=[r31], r30, ar.ccv - ;; - cmp4.ne p14,p0=r0,r30 -(p14) br.cond.sptk.few .wait - - br.ret.sptk.many b6 // lock is now taken -END(ia64_spinlock_contention) - -#endif #ifdef CONFIG_HOTPLUG_CPU GLOBAL_ENTRY(ia64_jump_to_sal) @@ -1236,7 +1136,7 @@ GLOBAL_ENTRY(ia64_jump_to_sal) movl r16=SAL_PSR_BITS_TO_SET;; mov cr.ipsr=r16 mov cr.ifs=r0;; - rfi;; + rfi;; // note: this unmask MCA/INIT (psr.mc) 1: /* * Invalidate all TLB data/inst diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 6da1f20d737..5b7791dd396 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -12,7 +12,7 @@ EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(strlen); -#include<asm/pgtable.h> +#include <asm/pgtable.h> EXPORT_SYMBOL_GPL(empty_zero_page); #include <asm/checksum.h> @@ -21,6 +21,7 @@ EXPORT_SYMBOL(csum_ipv6_magic); #include <asm/page.h> EXPORT_SYMBOL(clear_page); +EXPORT_SYMBOL(copy_page); #ifdef CONFIG_VIRTUAL_MEM_MAP #include <linux/bootmem.h> @@ -29,9 +30,9 @@ EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic #endif #include <asm/processor.h> -EXPORT_SYMBOL(per_cpu__cpu_info); +EXPORT_SYMBOL(ia64_cpu_info); #ifdef CONFIG_SMP -EXPORT_SYMBOL(per_cpu__local_per_cpu_offset); +EXPORT_SYMBOL(local_per_cpu_offset); #endif #include <asm/uaccess.h> @@ -60,9 +61,6 @@ EXPORT_SYMBOL(__udivdi3); EXPORT_SYMBOL(__moddi3); EXPORT_SYMBOL(__umoddi3); -#include <asm/page.h> -EXPORT_SYMBOL(copy_page); - #if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE) extern void xor_ia64_2(void); extern void xor_ia64_3(void); @@ -86,29 +84,15 @@ EXPORT_SYMBOL(ia64_save_scratch_fpregs); #include <asm/unwind.h> EXPORT_SYMBOL(unw_init_running); -#ifdef ASM_SUPPORTED -# ifdef CONFIG_SMP -# if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) -/* - * This is not a normal routine and we don't want a function descriptor for it, so we use - * a fake declaration here. - */ -extern char ia64_spinlock_contention_pre3_4; -EXPORT_SYMBOL(ia64_spinlock_contention_pre3_4); -# else -/* - * This is not a normal routine and we don't want a function descriptor for it, so we use - * a fake declaration here. - */ -extern char ia64_spinlock_contention; -EXPORT_SYMBOL(ia64_spinlock_contention); -# endif -# endif -#endif - #if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE) extern void esi_call_phys (void); EXPORT_SYMBOL_GPL(esi_call_phys); #endif extern char ia64_ivt[]; EXPORT_SYMBOL(ia64_ivt); + +#include <asm/ftrace.h> +#ifdef CONFIG_FUNCTION_TRACER +/* mcount is defined in assembly */ +EXPORT_SYMBOL(_mcount); +#endif diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index 5b0e830c6f3..f9efe9739d3 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -19,15 +19,11 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -struct mm_struct init_mm = INIT_MM(init_mm); - -EXPORT_SYMBOL(init_mm); - /* * Initial task structure. * * We need to make sure that this is properly aligned due to the way process stacks are - * handled. This is done by having a special ".data.init_task" section... + * handled. This is done by having a special ".data..init_task" section... */ #define init_thread_info init_task_mem.s.thread_info @@ -37,7 +33,8 @@ union { struct thread_info thread_info; } s; unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)]; -} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{ +} init_task_mem asm ("init_task") __init_task_data = + {{ .task = INIT_TASK(init_task_mem.s.task), .thread_info = INIT_THREAD_INFO(init_task_mem.s.task) }}; diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index e13125058be..cd44a57c73b 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -76,7 +76,7 @@ * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ * * Note: The term "IRQ" is loosely used everywhere in Linux kernel to - * describeinterrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ + * describe interrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ * (isa_irq) is the only exception in this source code. */ @@ -86,6 +86,7 @@ #include <linux/kernel.h> #include <linux/list.h> #include <linux/pci.h> +#include <linux/slab.h> #include <linux/smp.h> #include <linux/string.h> #include <linux/bootmem.h> @@ -97,7 +98,6 @@ #include <asm/machvec.h> #include <asm/processor.h> #include <asm/ptrace.h> -#include <asm/system.h> #undef DEBUG_INTERRUPT_ROUTING @@ -107,10 +107,6 @@ #define DBG(fmt...) #endif -#define NR_PREALLOCATE_RTE_ENTRIES \ - (PAGE_SIZE / sizeof(struct iosapic_rte_info)) -#define RTE_PREALLOCATED (1) - static DEFINE_SPINLOCK(iosapic_lock); /* @@ -135,7 +131,6 @@ struct iosapic_rte_info { struct list_head rte_list; /* RTEs sharing the same vector */ char rte_index; /* IOSAPIC RTE index */ int refcnt; /* reference counter */ - unsigned int flags; /* flags */ struct iosapic *iosapic; } ____cacheline_aligned; @@ -152,10 +147,7 @@ static struct iosapic_intr_info { unsigned char trigger : 1; /* trigger mode (see iosapic.h) */ } iosapic_intr_info[NR_IRQS]; -static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */ - -static int iosapic_kmalloc_ok; -static LIST_HEAD(free_rte_list); +static unsigned char pcat_compat; /* 8259 compatibility flag */ static inline void iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val) @@ -264,7 +256,7 @@ set_rte (unsigned int gsi, unsigned int irq, unsigned int dest, int mask) } static void -nop (unsigned int irq) +nop (struct irq_data *data) { /* do nothing... */ } @@ -294,8 +286,9 @@ kexec_disable_iosapic(void) #endif static void -mask_irq (unsigned int irq) +mask_irq (struct irq_data *data) { + unsigned int irq = data->irq; u32 low32; int rte_index; struct iosapic_rte_info *rte; @@ -312,8 +305,9 @@ mask_irq (unsigned int irq) } static void -unmask_irq (unsigned int irq) +unmask_irq (struct irq_data *data) { + unsigned int irq = data->irq; u32 low32; int rte_index; struct iosapic_rte_info *rte; @@ -329,10 +323,12 @@ unmask_irq (unsigned int irq) } -static void -iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) +static int +iosapic_set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) { #ifdef CONFIG_SMP + unsigned int irq = data->irq; u32 high32, low32; int cpu, dest, rte_index; int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; @@ -343,15 +339,15 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) cpu = cpumask_first_and(cpu_online_mask, mask); if (cpu >= nr_cpu_ids) - return; + return -1; if (irq_prepare_move(irq, cpu)) - return; + return -1; dest = cpu_physical_id(cpu); if (!iosapic_intr_info[irq].count) - return; /* not an IOSAPIC interrupt */ + return -1; /* not an IOSAPIC interrupt */ set_irq_affinity_info(irq, dest, redir); @@ -376,7 +372,9 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32); } + #endif + return 0; } /* @@ -384,31 +382,33 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) */ static unsigned int -iosapic_startup_level_irq (unsigned int irq) +iosapic_startup_level_irq (struct irq_data *data) { - unmask_irq(irq); + unmask_irq(data); return 0; } static void -iosapic_end_level_irq (unsigned int irq) +iosapic_unmask_level_irq (struct irq_data *data) { + unsigned int irq = data->irq; ia64_vector vec = irq_to_vector(irq); struct iosapic_rte_info *rte; int do_unmask_irq = 0; irq_complete_move(irq); - if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { + if (unlikely(irqd_is_setaffinity_pending(data))) { do_unmask_irq = 1; - mask_irq(irq); - } + mask_irq(data); + } else + unmask_irq(data); list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) iosapic_eoi(rte->iosapic->addr, vec); if (unlikely(do_unmask_irq)) { - move_masked_irq(irq); - unmask_irq(irq); + irq_move_masked_irq(data); + unmask_irq(data); } } @@ -418,16 +418,15 @@ iosapic_end_level_irq (unsigned int irq) #define iosapic_ack_level_irq nop static struct irq_chip irq_type_iosapic_level = { - .name = "IO-SAPIC-level", - .startup = iosapic_startup_level_irq, - .shutdown = iosapic_shutdown_level_irq, - .enable = iosapic_enable_level_irq, - .disable = iosapic_disable_level_irq, - .ack = iosapic_ack_level_irq, - .end = iosapic_end_level_irq, - .mask = mask_irq, - .unmask = unmask_irq, - .set_affinity = iosapic_set_affinity + .name = "IO-SAPIC-level", + .irq_startup = iosapic_startup_level_irq, + .irq_shutdown = iosapic_shutdown_level_irq, + .irq_enable = iosapic_enable_level_irq, + .irq_disable = iosapic_disable_level_irq, + .irq_ack = iosapic_ack_level_irq, + .irq_mask = mask_irq, + .irq_unmask = iosapic_unmask_level_irq, + .irq_set_affinity = iosapic_set_affinity }; /* @@ -435,9 +434,9 @@ static struct irq_chip irq_type_iosapic_level = { */ static unsigned int -iosapic_startup_edge_irq (unsigned int irq) +iosapic_startup_edge_irq (struct irq_data *data) { - unmask_irq(irq); + unmask_irq(data); /* * IOSAPIC simply drops interrupts pended while the * corresponding pin was masked, so we can't know if an @@ -447,37 +446,25 @@ iosapic_startup_edge_irq (unsigned int irq) } static void -iosapic_ack_edge_irq (unsigned int irq) +iosapic_ack_edge_irq (struct irq_data *data) { - irq_desc_t *idesc = irq_desc + irq; - - irq_complete_move(irq); - move_native_irq(irq); - /* - * Once we have recorded IRQ_PENDING already, we can mask the - * interrupt for real. This prevents IRQ storms from unhandled - * devices. - */ - if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == - (IRQ_PENDING|IRQ_DISABLED)) - mask_irq(irq); + irq_complete_move(data->irq); + irq_move_irq(data); } #define iosapic_enable_edge_irq unmask_irq #define iosapic_disable_edge_irq nop -#define iosapic_end_edge_irq nop static struct irq_chip irq_type_iosapic_edge = { - .name = "IO-SAPIC-edge", - .startup = iosapic_startup_edge_irq, - .shutdown = iosapic_disable_edge_irq, - .enable = iosapic_enable_edge_irq, - .disable = iosapic_disable_edge_irq, - .ack = iosapic_ack_edge_irq, - .end = iosapic_end_edge_irq, - .mask = mask_irq, - .unmask = unmask_irq, - .set_affinity = iosapic_set_affinity + .name = "IO-SAPIC-edge", + .irq_startup = iosapic_startup_edge_irq, + .irq_shutdown = iosapic_disable_edge_irq, + .irq_enable = iosapic_enable_edge_irq, + .irq_disable = iosapic_disable_edge_irq, + .irq_ack = iosapic_ack_edge_irq, + .irq_mask = mask_irq, + .irq_unmask = unmask_irq, + .irq_set_affinity = iosapic_set_affinity }; static unsigned int @@ -549,37 +536,6 @@ iosapic_reassign_vector (int irq) } } -static struct iosapic_rte_info * __init_refok iosapic_alloc_rte (void) -{ - int i; - struct iosapic_rte_info *rte; - int preallocated = 0; - - if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) { - rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * - NR_PREALLOCATE_RTE_ENTRIES); - for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++) - list_add(&rte->rte_list, &free_rte_list); - } - - if (!list_empty(&free_rte_list)) { - rte = list_entry(free_rte_list.next, struct iosapic_rte_info, - rte_list); - list_del(&rte->rte_list); - preallocated++; - } else { - rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC); - if (!rte) - return NULL; - } - - memset(rte, 0, sizeof(struct iosapic_rte_info)); - if (preallocated) - rte->flags |= RTE_PREALLOCATED; - - return rte; -} - static inline int irq_is_shared (int irq) { return (iosapic_intr_info[irq].count > 1); @@ -598,8 +554,7 @@ static int register_intr (unsigned int gsi, int irq, unsigned char delivery, unsigned long polarity, unsigned long trigger) { - irq_desc_t *idesc; - struct hw_interrupt_type *irq_type; + struct irq_chip *chip, *irq_type; int index; struct iosapic_rte_info *rte; @@ -612,7 +567,7 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery, rte = find_rte(irq, gsi); if (!rte) { - rte = iosapic_alloc_rte(); + rte = kzalloc(sizeof (*rte), GFP_ATOMIC); if (!rte) { printk(KERN_WARNING "%s: cannot allocate memory\n", __func__); @@ -646,15 +601,18 @@ register_intr (unsigned int gsi, int irq, unsigned char delivery, irq_type = iosapic_get_irq_chip(trigger); - idesc = irq_desc + irq; - if (irq_type != NULL && idesc->chip != irq_type) { - if (idesc->chip != &no_irq_type) + chip = irq_get_chip(irq); + if (irq_type != NULL && chip != irq_type) { + if (chip != &no_irq_chip) printk(KERN_WARNING "%s: changing vector %d from %s to %s\n", __func__, irq_to_vector(irq), - idesc->chip->name, irq_type->name); - idesc->chip = irq_type; + chip->name, irq_type->name); + chip = irq_type; } + __irq_set_chip_handler_name_locked(irq, chip, trigger == IOSAPIC_EDGE ? + handle_edge_irq : handle_level_irq, + NULL); return 0; } @@ -764,6 +722,7 @@ iosapic_register_intr (unsigned int gsi, struct iosapic_rte_info *rte; u32 low32; unsigned char dmode; + struct irq_desc *desc; /* * If this GSI has already been registered (i.e., it's a @@ -776,7 +735,7 @@ iosapic_register_intr (unsigned int gsi, rte = find_rte(irq, gsi); if(iosapic_intr_info[irq].count == 0) { assign_irq_vector(irq); - dynamic_irq_init(irq); + irq_init_desc(irq); } else if (rte->refcnt != NO_REF_RTE) { rte->refcnt++; goto unlock_iosapic_lock; @@ -791,12 +750,13 @@ iosapic_register_intr (unsigned int gsi, goto unlock_iosapic_lock; } - spin_lock(&irq_desc[irq].lock); + desc = irq_to_desc(irq); + raw_spin_lock(&desc->lock); dest = get_target_cpu(gsi, irq); dmode = choose_dmode(); err = register_intr(gsi, irq, dmode, polarity, trigger); if (err < 0) { - spin_unlock(&irq_desc[irq].lock); + raw_spin_unlock(&desc->lock); irq = err; goto unlock_iosapic_lock; } @@ -815,7 +775,7 @@ iosapic_register_intr (unsigned int gsi, (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), cpu_logical_id(dest), dest, irq_to_vector(irq)); - spin_unlock(&irq_desc[irq].lock); + raw_spin_unlock(&desc->lock); unlock_iosapic_lock: spin_unlock_irqrestore(&iosapic_lock, flags); return irq; @@ -826,7 +786,6 @@ iosapic_unregister_intr (unsigned int gsi) { unsigned long flags; int irq, index; - irq_desc_t *idesc; u32 low32; unsigned long trigger, polarity; unsigned int dest; @@ -856,7 +815,6 @@ iosapic_unregister_intr (unsigned int gsi) if (--rte->refcnt > 0) goto out; - idesc = irq_desc + irq; rte->refcnt = NO_REF_RTE; /* Mask the interrupt */ @@ -880,7 +838,7 @@ iosapic_unregister_intr (unsigned int gsi) if (iosapic_intr_info[irq].count == 0) { #ifdef CONFIG_SMP /* Clear affinity */ - cpus_setall(idesc->affinity); + cpumask_setall(irq_get_irq_data(irq)->affinity); #endif /* Clear the interrupt information */ iosapic_intr_info[irq].dest = 0; @@ -956,10 +914,8 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, /* * ACPI calls this when it finds an entry for a legacy ISA IRQ override. */ -void __devinit -iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, - unsigned long polarity, - unsigned long trigger) +void iosapic_override_isa_irq(unsigned int isa_irq, unsigned int gsi, + unsigned long polarity, unsigned long trigger) { int vector, irq; unsigned int dest = cpu_physical_id(smp_processor_id()); @@ -1054,8 +1010,27 @@ iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver) return 0; } -int __devinit -iosapic_init (unsigned long phys_addr, unsigned int gsi_base) +static int +iosapic_delete_rte(unsigned int irq, unsigned int gsi) +{ + struct iosapic_rte_info *rte, *temp; + + list_for_each_entry_safe(rte, temp, &iosapic_intr_info[irq].rtes, + rte_list) { + if (rte->iosapic->gsi_base + rte->rte_index == gsi) { + if (rte->refcnt) + return -EBUSY; + + list_del(&rte->rte_list); + kfree(rte); + return 0; + } + } + + return -EINVAL; +} + +int iosapic_init(unsigned long phys_addr, unsigned int gsi_base) { int num_rte, err, index; unsigned int isa_irq, ver; @@ -1070,6 +1045,10 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base) } addr = ioremap(phys_addr, 0); + if (addr == NULL) { + spin_unlock_irqrestore(&iosapic_lock, flags); + return -ENOMEM; + } ver = iosapic_version(addr); if ((err = iosapic_check_gsi_range(gsi_base, ver))) { iounmap(addr); @@ -1108,11 +1087,9 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base) return 0; } -#ifdef CONFIG_HOTPLUG -int -iosapic_remove (unsigned int gsi_base) +int iosapic_remove(unsigned int gsi_base) { - int index, err = 0; + int i, irq, index, err = 0; unsigned long flags; spin_lock_irqsave(&iosapic_lock, flags); @@ -1130,17 +1107,25 @@ iosapic_remove (unsigned int gsi_base) goto out; } + for (i = gsi_base; i < gsi_base + iosapic_lists[index].num_rte; i++) { + irq = __gsi_to_irq(i); + if (irq < 0) + continue; + + err = iosapic_delete_rte(irq, i); + if (err) + goto out; + } + iounmap(iosapic_lists[index].addr); iosapic_free(index); out: spin_unlock_irqrestore(&iosapic_lock, flags); return err; } -#endif /* CONFIG_HOTPLUG */ #ifdef CONFIG_NUMA -void __devinit -map_iosapic_to_node(unsigned int gsi_base, int node) +void map_iosapic_to_node(unsigned int gsi_base, int node) { int index; @@ -1154,10 +1139,3 @@ map_iosapic_to_node(unsigned int gsi_base, int node) return; } #endif - -static int __init iosapic_enable_kmalloc (void) -{ - iosapic_kmalloc_ok = 1; - return 0; -} -core_initcall (iosapic_enable_kmalloc); diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index a58f64ca9f0..f2c41828113 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -23,6 +23,8 @@ #include <linux/interrupt.h> #include <linux/kernel_stat.h> +#include <asm/mca.h> + /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. @@ -53,47 +55,9 @@ atomic_t irq_err_count; /* * /proc/interrupts printing: */ - -int show_interrupts(struct seq_file *p, void *v) +int arch_show_interrupts(struct seq_file *p, int prec) { - int i = *(loff_t *) v, j; - struct irqaction * action; - unsigned long flags; - - if (i == 0) { - char cpuname[16]; - seq_printf(p, " "); - for_each_online_cpu(j) { - snprintf(cpuname, 10, "CPU%d", j); - seq_printf(p, "%10s ", cpuname); - } - seq_putc(p, '\n'); - } - - if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); - action = irq_desc[i].action; - if (!action) - goto skip; - seq_printf(p, "%3d: ",i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) { - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); - } -#endif - seq_printf(p, " %14s", irq_desc[i].chip->name); - seq_printf(p, " %s", action->name); - - for (action=action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); - - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); - } else if (i == NR_IRQS) - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); return 0; } @@ -103,7 +67,7 @@ static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; void set_irq_affinity_info (unsigned int irq, int hwid, int redir) { if (irq < NR_IRQS) { - cpumask_copy(&irq_desc[irq].affinity, + cpumask_copy(irq_get_irq_data(irq)->affinity, cpumask_of(cpu_logical_id(hwid))); irq_redir[irq] = (char) (redir & 0xff); } @@ -121,6 +85,12 @@ bool is_affinity_mask_valid(const struct cpumask *cpumask) #endif /* CONFIG_SMP */ +int __init arch_early_irq_init(void) +{ + ia64_mca_irq_init(); + return 0; +} + #ifdef CONFIG_HOTPLUG_CPU unsigned int vectors_in_migration[NR_IRQS]; @@ -130,13 +100,14 @@ unsigned int vectors_in_migration[NR_IRQS]; */ static void migrate_irqs(void) { - irq_desc_t *desc; int irq, new_cpu; for (irq=0; irq < NR_IRQS; irq++) { - desc = irq_desc + irq; + struct irq_desc *desc = irq_to_desc(irq); + struct irq_data *data = irq_desc_get_irq_data(desc); + struct irq_chip *chip = irq_data_get_irq_chip(data); - if (desc->status == IRQ_DISABLED) + if (irqd_irq_disabled(data)) continue; /* @@ -145,10 +116,10 @@ static void migrate_irqs(void) * tell CPU not to respond to these local intr sources. * such as ITV,CPEI,MCA etc. */ - if (desc->status == IRQ_PER_CPU) + if (irqd_is_per_cpu(data)) continue; - if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask) + if (cpumask_any_and(data->affinity, cpu_online_mask) >= nr_cpu_ids) { /* * Save it for phase 2 processing @@ -160,16 +131,16 @@ static void migrate_irqs(void) /* * Al three are essential, currently WARN_ON.. maybe panic? */ - if (desc->chip && desc->chip->disable && - desc->chip->enable && desc->chip->set_affinity) { - desc->chip->disable(irq); - desc->chip->set_affinity(irq, - cpumask_of(new_cpu)); - desc->chip->enable(irq); + if (chip && chip->irq_disable && + chip->irq_enable && chip->irq_set_affinity) { + chip->irq_disable(data); + chip->irq_set_affinity(data, + cpumask_of(new_cpu), false); + chip->irq_enable(data); } else { - WARN_ON((!(desc->chip) || !(desc->chip->disable) || - !(desc->chip->enable) || - !(desc->chip->set_affinity))); + WARN_ON((!chip || !chip->irq_disable || + !chip->irq_enable || + !chip->irq_set_affinity)); } } } diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 28d3d483db9..03ea78ed64a 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -22,14 +22,15 @@ #include <linux/interrupt.h> #include <linux/ioport.h> #include <linux/kernel_stat.h> -#include <linux/slab.h> #include <linux/ptrace.h> -#include <linux/random.h> /* for rand_initialize_irq() */ #include <linux/signal.h> #include <linux/smp.h> #include <linux/threads.h> #include <linux/bitops.h> #include <linux/irq.h> +#include <linux/ratelimit.h> +#include <linux/acpi.h> +#include <linux/sched.h> #include <asm/delay.h> #include <asm/intrinsics.h> @@ -37,7 +38,6 @@ #include <asm/hw_irq.h> #include <asm/machvec.h> #include <asm/pgtable.h> -#include <asm/system.h> #include <asm/tlbflush.h> #ifdef CONFIG_PERFMON @@ -93,14 +93,6 @@ static int irq_status[NR_IRQS] = { [0 ... NR_IRQS -1] = IRQ_UNUSED }; -int check_irq_used(int irq) -{ - if (irq_status[irq] == IRQ_USED) - return 1; - - return -1; -} - static inline int find_unassigned_irq(void) { int irq; @@ -116,7 +108,7 @@ static inline int find_unassigned_vector(cpumask_t domain) cpumask_t mask; int pos, vector; - cpus_and(mask, domain, cpu_online_map); + cpumask_and(&mask, &domain, cpu_online_mask); if (cpus_empty(mask)) return -EINVAL; @@ -139,7 +131,7 @@ static int __bind_irq_vector(int irq, int vector, cpumask_t domain) BUG_ON((unsigned)irq >= NR_IRQS); BUG_ON((unsigned)vector >= IA64_NUM_VECTORS); - cpus_and(mask, domain, cpu_online_map); + cpumask_and(&mask, &domain, cpu_online_mask); if (cpus_empty(mask)) return -EINVAL; if ((cfg->vector == vector) && cpus_equal(cfg->domain, domain)) @@ -177,7 +169,7 @@ static void __clear_irq_vector(int irq) BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED); vector = cfg->vector; domain = cfg->domain; - cpus_and(mask, cfg->domain, cpu_online_map); + cpumask_and(&mask, &cfg->domain, cpu_online_mask); for_each_cpu_mask(cpu, mask) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = IRQ_VECTOR_UNASSIGNED; @@ -260,7 +252,6 @@ void __setup_vector_irq(int cpu) } #if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG)) -#define IA64_IRQ_MOVE_VECTOR IA64_DEF_FIRST_DEVICE_VECTOR static enum vector_domain_type { VECTOR_DOMAIN_NONE, @@ -321,7 +312,7 @@ void irq_complete_move(unsigned irq) if (unlikely(cpu_isset(smp_processor_id(), cfg->old_domain))) return; - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cpumask_and(&cleanup_mask, &cfg->old_domain, cpu_online_mask); cfg->move_cleanup_count = cpus_weight(cleanup_mask); for_each_cpu_mask(i, cleanup_mask) platform_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0); @@ -343,9 +334,9 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) if (irq < 0) continue; - desc = irq_desc + irq; + desc = irq_to_desc(irq); cfg = irq_cfg + irq; - spin_lock(&desc->lock); + raw_spin_lock(&desc->lock); if (!cfg->move_cleanup_count) goto unlock; @@ -358,14 +349,13 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&vector_lock, flags); cfg->move_cleanup_count--; unlock: - spin_unlock(&desc->lock); + raw_spin_unlock(&desc->lock); } return IRQ_HANDLED; } static struct irqaction irq_move_irqaction = { .handler = smp_irq_move_cleanup_interrupt, - .flags = IRQF_DISABLED, .name = "irq_move" }; @@ -392,8 +382,7 @@ void destroy_and_reserve_irq(unsigned int irq) { unsigned long flags; - dynamic_irq_cleanup(irq); - + irq_init_desc(irq); spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq); irq_status[irq] = IRQ_RSVD; @@ -426,13 +415,13 @@ int create_irq(void) out: spin_unlock_irqrestore(&vector_lock, flags); if (irq >= 0) - dynamic_irq_init(irq); + irq_init_desc(irq); return irq; } void destroy_irq(unsigned int irq) { - dynamic_irq_cleanup(irq); + irq_init_desc(irq); clear_irq_vector(irq); } @@ -469,13 +458,9 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) sp = ia64_getreg(_IA64_REG_SP); if ((sp - bsp) < 1024) { - static unsigned char count; - static long last_time; + static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5); - if (time_after(jiffies, last_time + 5 * HZ)) - count = 0; - if (++count < 5) { - last_time = jiffies; + if (__ratelimit(&ratelimit)) { printk("ia64_handle_irq: DANGER: less than " "1KB of free stack space!!\n" "(bsp=0x%lx, sp=%lx)\n", bsp, sp); @@ -493,14 +478,15 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); ia64_srlz_d(); while (vector != IA64_SPURIOUS_INT_VECTOR) { + int irq = local_vector_to_irq(vector); + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { smp_local_flush_tlb(); - kstat_this_cpu.irqs[vector]++; - } else if (unlikely(IS_RESCHEDULE(vector))) - kstat_this_cpu.irqs[vector]++; - else { - int irq = local_vector_to_irq(vector); - + kstat_incr_irq_this_cpu(irq); + } else if (unlikely(IS_RESCHEDULE(vector))) { + scheduler_ipi(); + kstat_incr_irq_this_cpu(irq); + } else { ia64_setreg(_IA64_REG_CR_TPR, vector); ia64_srlz_d(); @@ -543,22 +529,23 @@ void ia64_process_pending_intr(void) vector = ia64_get_ivr(); - irq_enter(); - saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); - ia64_srlz_d(); + irq_enter(); + saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); + ia64_srlz_d(); /* * Perform normal interrupt style processing */ while (vector != IA64_SPURIOUS_INT_VECTOR) { + int irq = local_vector_to_irq(vector); + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { smp_local_flush_tlb(); - kstat_this_cpu.irqs[vector]++; - } else if (unlikely(IS_RESCHEDULE(vector))) - kstat_this_cpu.irqs[vector]++; - else { + kstat_incr_irq_this_cpu(irq); + } else if (unlikely(IS_RESCHEDULE(vector))) { + kstat_incr_irq_this_cpu(irq); + } else { struct pt_regs *old_regs = set_irq_regs(NULL); - int irq = local_vector_to_irq(vector); ia64_setreg(_IA64_REG_CR_TPR, vector); ia64_srlz_d(); @@ -603,19 +590,19 @@ static irqreturn_t dummy_handler (int irq, void *dev_id) static struct irqaction ipi_irqaction = { .handler = handle_IPI, - .flags = IRQF_DISABLED, .name = "IPI" }; +/* + * KVM uses this interrupt to force a cpu out of guest mode + */ static struct irqaction resched_irqaction = { .handler = dummy_handler, - .flags = IRQF_DISABLED, .name = "resched" }; static struct irqaction tlb_irqaction = { .handler = dummy_handler, - .flags = IRQF_DISABLED, .name = "tlb_flush" }; @@ -624,16 +611,15 @@ static struct irqaction tlb_irqaction = { void ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action) { - irq_desc_t *desc; unsigned int irq; irq = vec; BUG_ON(bind_irq_vector(irq, vec, CPU_MASK_ALL)); - desc = irq_desc + irq; - desc->status |= IRQ_PER_CPU; - desc->chip = &irq_type_ia64_lsapic; + irq_set_status_flags(irq, IRQ_PER_CPU); + irq_set_chip(irq, &irq_type_ia64_lsapic); if (action) setup_irq(irq, action); + irq_set_handler(irq, handle_percpu_irq); } void __init @@ -649,15 +635,15 @@ ia64_native_register_ipi(void) void __init init_IRQ (void) { +#ifdef CONFIG_ACPI + acpi_boot_init(); +#endif ia64_register_ipi(); register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL); #ifdef CONFIG_SMP #if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG) - if (vector_domain_type != VECTOR_DOMAIN_NONE) { - BUG_ON(IA64_FIRST_DEVICE_VECTOR != IA64_IRQ_MOVE_VECTOR); - IA64_FIRST_DEVICE_VECTOR++; + if (vector_domain_type != VECTOR_DOMAIN_NONE) register_percpu_irq(IA64_IRQ_MOVE_VECTOR, &irq_move_irqaction); - } #endif #endif #ifdef CONFIG_PERFMON diff --git a/arch/ia64/kernel/irq_lsapic.c b/arch/ia64/kernel/irq_lsapic.c index e56a7a36aca..1b3a776e516 100644 --- a/arch/ia64/kernel/irq_lsapic.c +++ b/arch/ia64/kernel/irq_lsapic.c @@ -15,31 +15,30 @@ #include <linux/irq.h> static unsigned int -lsapic_noop_startup (unsigned int irq) +lsapic_noop_startup (struct irq_data *data) { return 0; } static void -lsapic_noop (unsigned int irq) +lsapic_noop (struct irq_data *data) { /* nothing to do... */ } -static int lsapic_retrigger(unsigned int irq) +static int lsapic_retrigger(struct irq_data *data) { - ia64_resend_irq(irq); + ia64_resend_irq(data->irq); return 1; } -struct hw_interrupt_type irq_type_ia64_lsapic = { - .name = "LSAPIC", - .startup = lsapic_noop_startup, - .shutdown = lsapic_noop, - .enable = lsapic_noop, - .disable = lsapic_noop, - .ack = lsapic_noop, - .end = lsapic_noop, - .retrigger = lsapic_retrigger, +struct irq_chip irq_type_ia64_lsapic = { + .name = "LSAPIC", + .irq_startup = lsapic_noop_startup, + .irq_shutdown = lsapic_noop, + .irq_enable = lsapic_noop, + .irq_disable = lsapic_noop, + .irq_ack = lsapic_noop, + .irq_retrigger = lsapic_retrigger, }; diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index f675d8e3385..18e794a5724 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -49,18 +49,16 @@ #include <asm/asmmacro.h> #include <asm/break.h> -#include <asm/ia32.h> #include <asm/kregs.h> #include <asm/asm-offsets.h> #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/ptrace.h> -#include <asm/system.h> #include <asm/thread_info.h> #include <asm/unistd.h> #include <asm/errno.h> -#if 1 +#if 0 # define PSR_DEFAULT_BITS psr.ac #else # define PSR_DEFAULT_BITS 0 @@ -83,7 +81,7 @@ mov r19=n;; /* prepare to save predicates */ \ br.sptk.many dispatch_to_fault_handler - .section .text.ivt,"ax" + .section .text..ivt,"ax" .align 32768 // align on 32KB boundary .global ia64_ivt @@ -786,7 +784,7 @@ ENTRY(break_fault) (p8) adds r28=16,r28 // A switch cr.iip to next bundle (p9) adds r8=1,r8 // A increment ei to next slot -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE ;; mov b6=r30 // I0 setup syscall handler branch reg early #else @@ -803,8 +801,8 @@ ENTRY(break_fault) // /////////////////////////////////////////////////////////////////////// st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag -#ifdef CONFIG_VIRT_CPU_ACCOUNTING - mov.m r30=ar.itc // M get cycle for accounting +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting #else mov b6=r30 // I0 setup syscall handler branch reg early #endif @@ -819,7 +817,7 @@ ENTRY(break_fault) cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? br.call.sptk.many b7=ia64_syscall_setup // B 1: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE // mov.m r30=ar.itc is called in advance, and r13 is current add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A @@ -1045,7 +1043,7 @@ END(ia64_syscall_setup) DBG_FAULT(16) FAULT(16) -#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) +#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) /* * There is no particular reason for this code to be here, other than * that there happens to be space here that would go unused otherwise. @@ -1386,28 +1384,6 @@ END(ia32_exception) // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) ENTRY(ia32_intercept) DBG_FAULT(46) -#ifdef CONFIG_IA32_SUPPORT - mov r31=pr - MOV_FROM_ISR(r16) - ;; - extr.u r17=r16,16,8 // get ISR.code - mov r18=ar.eflag - MOV_FROM_IIM(r19) // old eflag value - ;; - cmp.ne p6,p0=2,r17 -(p6) br.cond.spnt 1f // not a system flag fault - xor r16=r18,r19 - ;; - extr.u r17=r16,18,1 // get the eflags.ac bit - ;; - cmp.eq p6,p0=0,r17 -(p6) br.cond.spnt 1f // eflags.ac bit didn't change - ;; - mov pr=r31,-1 // restore predicate registers - RFI - -1: -#endif // CONFIG_IA32_SUPPORT FAULT(46) END(ia32_intercept) @@ -1416,12 +1392,7 @@ END(ia32_intercept) // 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) ENTRY(ia32_interrupt) DBG_FAULT(47) -#ifdef CONFIG_IA32_SUPPORT - mov r31=pr - br.sptk.many dispatch_to_ia32_handler -#else FAULT(47) -#endif END(ia32_interrupt) .org ia64_ivt+0x6c00 @@ -1715,89 +1686,3 @@ ENTRY(dispatch_illegal_op_fault) (p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel br.sptk.many ia64_leave_kernel END(dispatch_illegal_op_fault) - -#ifdef CONFIG_IA32_SUPPORT - - /* - * There is no particular reason for this code to be here, other than that - * there happens to be space here that would go unused otherwise. If this - * fault ever gets "unreserved", simply moved the following code to a more - * suitable spot... - */ - - // IA32 interrupt entry point - -ENTRY(dispatch_to_ia32_handler) - SAVE_MIN - ;; - MOV_FROM_ISR(r14) - SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) - // guarantee that interruption collection is on - ;; - SSM_PSR_I(p15, p15, r3) - adds r3=8,r2 // Base pointer for SAVE_REST - ;; - SAVE_REST - ;; - mov r15=0x80 - shr r14=r14,16 // Get interrupt number - ;; - cmp.ne p6,p0=r14,r15 -(p6) br.call.dpnt.many b6=non_ia32_syscall - - adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions - adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp - ;; - cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 - ld8 r8=[r14] // get r8 - ;; - st8 [r15]=r8 // save original EAX in r1 (IA32 procs don't use the GP) - ;; - alloc r15=ar.pfs,0,0,6,0 // must first in an insn group - ;; - ld4 r8=[r14],8 // r8 == eax (syscall number) - mov r15=IA32_NR_syscalls - ;; - cmp.ltu.unc p6,p7=r8,r15 - ld4 out1=[r14],8 // r9 == ecx - ;; - ld4 out2=[r14],8 // r10 == edx - ;; - ld4 out0=[r14] // r11 == ebx - adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp - ;; - ld4 out5=[r14],PT(R14)-PT(R13) // r13 == ebp - ;; - ld4 out3=[r14],PT(R15)-PT(R14) // r14 == esi - adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 - ;; - ld4 out4=[r14] // r15 == edi - movl r16=ia32_syscall_table - ;; -(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number - ld4 r2=[r2] // r2 = current_thread_info()->flags - ;; - ld8 r16=[r16] - and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit - ;; - mov b6=r16 - movl r15=ia32_ret_from_syscall - cmp.eq p8,p0=r2,r0 - ;; - mov rp=r15 -(p8) br.call.sptk.many b6=b6 - br.cond.sptk ia32_trace_syscall - -non_ia32_syscall: - alloc r15=ar.pfs,0,0,2,0 - mov out0=r14 // interrupt # - add out1=16,sp // pointer to pt_regs - ;; // avoid WAW on CFM - br.call.sptk.many rp=ia32_bad_interrupt -.ret1: movl r15=ia64_leave_kernel - ;; - mov rp=r15 - br.ret.sptk.many rp -END(dispatch_to_ia32_handler) - -#endif /* CONFIG_IA32_SUPPORT */ diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 9adac441ac9..074fde49c9e 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -423,7 +423,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = ((struct fnptr *)kretprobe_trampoline)->ip; @@ -444,7 +444,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -461,7 +461,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) regs->cr_iip = orig_ret_address; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -487,7 +487,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); preempt_enable_no_resched(); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } @@ -870,7 +870,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) return 1; ss_probe: -#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) +#if !defined(CONFIG_PREEMPT) if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ ia64_psr(regs)->ri = p->ainsn.slot; @@ -947,7 +947,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) case KPROBE_HIT_SSDONE: /* * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accouting + * we can also use npre/npostfault count for accounting * these specific fault cases. */ kprobes_inc_nmissed_count(cur); diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c index 0823de1f6eb..5151a649c96 100644 --- a/arch/ia64/kernel/machine_kexec.c +++ b/arch/ia64/kernel/machine_kexec.c @@ -24,12 +24,14 @@ #include <asm/delay.h> #include <asm/meminit.h> #include <asm/processor.h> +#include <asm/sal.h> +#include <asm/mca.h> -typedef NORET_TYPE void (*relocate_new_kernel_t)( +typedef void (*relocate_new_kernel_t)( unsigned long indirection_page, unsigned long start_address, struct ia64_boot_param *boot_param, - unsigned long pal_addr) ATTRIB_NORET; + unsigned long pal_addr) __noreturn; struct kimage *ia64_kimage; @@ -83,15 +85,29 @@ static void ia64_machine_kexec(struct unw_frame_info *info, void *arg) struct kimage *image = arg; relocate_new_kernel_t rnk; void *pal_addr = efi_get_pal_addr(); - unsigned long code_addr = (unsigned long)page_address(image->control_code_page); + unsigned long code_addr; int ii; + u64 fp, gp; + ia64_fptr_t *init_handler = (ia64_fptr_t *)ia64_os_init_on_kdump; BUG_ON(!image); + code_addr = (unsigned long)page_address(image->control_code_page); if (image->type == KEXEC_TYPE_CRASH) { crash_save_this_cpu(); current->thread.ksp = (__u64)info->sw - 16; + + /* Register noop init handler */ + fp = ia64_tpa(init_handler->fp); + gp = ia64_tpa(ia64_getreg(_IA64_REG_GP)); + ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, fp, gp, 0, fp, gp, 0); + } else { + /* Unregister init handlers of current kernel */ + ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, 0, 0, 0, 0, 0, 0); } + /* Unregister mca handler - No more recovery on current kernel */ + ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, 0, 0, 0, 0, 0, 0); + /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); @@ -142,7 +158,7 @@ void arch_crash_save_vmcoreinfo(void) #endif #ifdef CONFIG_PGTABLE_3 VMCOREINFO_CONFIG(PGTABLE_3); -#elif CONFIG_PGTABLE_4 +#elif defined(CONFIG_PGTABLE_4) VMCOREINFO_CONFIG(PGTABLE_4); #endif } diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c index 7ccb228ceed..f5a1e5246b3 100644 --- a/arch/ia64/kernel/machvec.c +++ b/arch/ia64/kernel/machvec.c @@ -1,7 +1,6 @@ #include <linux/module.h> - +#include <linux/dma-mapping.h> #include <asm/machvec.h> -#include <asm/system.h> #ifdef CONFIG_IA64_GENERIC @@ -75,14 +74,16 @@ machvec_timer_interrupt (int irq, void *dev_id) EXPORT_SYMBOL(machvec_timer_interrupt); void -machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle, size_t size, int dir) +machvec_dma_sync_single(struct device *hwdev, dma_addr_t dma_handle, size_t size, + enum dma_data_direction dir) { mb(); } EXPORT_SYMBOL(machvec_dma_sync_single); void -machvec_dma_sync_sg (struct device *hwdev, struct scatterlist *sg, int n, int dir) +machvec_dma_sync_sg(struct device *hwdev, struct scatterlist *sg, int n, + enum dma_data_direction dir) { mb(); } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index bab1de2d2f6..db7b36bb068 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -85,13 +85,13 @@ #include <linux/cpumask.h> #include <linux/kdebug.h> #include <linux/cpu.h> +#include <linux/gfp.h> #include <asm/delay.h> #include <asm/machvec.h> #include <asm/meminit.h> #include <asm/page.h> #include <asm/ptrace.h> -#include <asm/system.h> #include <asm/sal.h> #include <asm/mca.h> #include <asm/kexec.h> @@ -217,7 +217,7 @@ void ia64_mca_printk(const char *fmt, ...) /* Copy the output into mlogbuf */ if (oops_in_progress) { /* mlogbuf was abandoned, use printk directly instead. */ - printk(temp_buf); + printk("%s", temp_buf); } else { spin_lock(&mlogbuf_wlock); for (p = temp_buf; *p; p++) { @@ -268,7 +268,7 @@ void ia64_mlogbuf_dump(void) } *p = '\0'; if (temp_buf[0]) - printk(temp_buf); + printk("%s", temp_buf); mlogbuf_start = index; mlogbuf_timestamp = 0; @@ -581,6 +581,8 @@ out: /* Get the CPE error record and log it */ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); + local_irq_disable(); + return IRQ_HANDLED; } @@ -629,7 +631,7 @@ ia64_mca_register_cpev (int cpev) * Outputs * None */ -void __cpuinit +void ia64_mca_cmc_vector_setup (void) { cmcv_reg_t cmcv; @@ -850,7 +852,7 @@ EXPORT_SYMBOL(ia64_unreg_MCA_extension); static inline void -copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat) +copy_reg(const u64 *fr, u64 fnat, unsigned long *tr, unsigned long *tnat) { u64 fslot, tslot, nat; *tr = *fr; @@ -887,6 +889,65 @@ ia64_mca_modify_comm(const struct task_struct *previous_current) memcpy(current->comm, comm, sizeof(current->comm)); } +static void +finish_pt_regs(struct pt_regs *regs, struct ia64_sal_os_state *sos, + unsigned long *nat) +{ + const pal_min_state_area_t *ms = sos->pal_min_state; + const u64 *bank; + + /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use + * pmsa_{xip,xpsr,xfs} + */ + if (ia64_psr(regs)->ic) { + regs->cr_iip = ms->pmsa_iip; + regs->cr_ipsr = ms->pmsa_ipsr; + regs->cr_ifs = ms->pmsa_ifs; + } else { + regs->cr_iip = ms->pmsa_xip; + regs->cr_ipsr = ms->pmsa_xpsr; + regs->cr_ifs = ms->pmsa_xfs; + + sos->iip = ms->pmsa_iip; + sos->ipsr = ms->pmsa_ipsr; + sos->ifs = ms->pmsa_ifs; + } + regs->pr = ms->pmsa_pr; + regs->b0 = ms->pmsa_br0; + regs->ar_rsc = ms->pmsa_rsc; + copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, ®s->r1, nat); + copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, ®s->r2, nat); + copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, ®s->r3, nat); + copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, ®s->r8, nat); + copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, ®s->r9, nat); + copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, ®s->r10, nat); + copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, ®s->r11, nat); + copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, ®s->r12, nat); + copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, ®s->r13, nat); + copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, ®s->r14, nat); + copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, ®s->r15, nat); + if (ia64_psr(regs)->bn) + bank = ms->pmsa_bank1_gr; + else + bank = ms->pmsa_bank0_gr; + copy_reg(&bank[16-16], ms->pmsa_nat_bits, ®s->r16, nat); + copy_reg(&bank[17-16], ms->pmsa_nat_bits, ®s->r17, nat); + copy_reg(&bank[18-16], ms->pmsa_nat_bits, ®s->r18, nat); + copy_reg(&bank[19-16], ms->pmsa_nat_bits, ®s->r19, nat); + copy_reg(&bank[20-16], ms->pmsa_nat_bits, ®s->r20, nat); + copy_reg(&bank[21-16], ms->pmsa_nat_bits, ®s->r21, nat); + copy_reg(&bank[22-16], ms->pmsa_nat_bits, ®s->r22, nat); + copy_reg(&bank[23-16], ms->pmsa_nat_bits, ®s->r23, nat); + copy_reg(&bank[24-16], ms->pmsa_nat_bits, ®s->r24, nat); + copy_reg(&bank[25-16], ms->pmsa_nat_bits, ®s->r25, nat); + copy_reg(&bank[26-16], ms->pmsa_nat_bits, ®s->r26, nat); + copy_reg(&bank[27-16], ms->pmsa_nat_bits, ®s->r27, nat); + copy_reg(&bank[28-16], ms->pmsa_nat_bits, ®s->r28, nat); + copy_reg(&bank[29-16], ms->pmsa_nat_bits, ®s->r29, nat); + copy_reg(&bank[30-16], ms->pmsa_nat_bits, ®s->r30, nat); + copy_reg(&bank[31-16], ms->pmsa_nat_bits, ®s->r31, nat); +} + /* On entry to this routine, we are running on the per cpu stack, see * mca_asm.h. The original stack has not been touched by this event. Some of * the original stack's registers will be in the RBS on this stack. This stack @@ -914,14 +975,13 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, struct switch_stack *old_sw; unsigned size = sizeof(struct pt_regs) + sizeof(struct switch_stack) + 16; - u64 *old_bspstore, *old_bsp; - u64 *new_bspstore, *new_bsp; - u64 old_unat, old_rnat, new_rnat, nat; + unsigned long *old_bspstore, *old_bsp; + unsigned long *new_bspstore, *new_bsp; + unsigned long old_unat, old_rnat, new_rnat, nat; u64 slots, loadrs = regs->loadrs; u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1]; u64 ar_bspstore = regs->ar_bspstore; u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16); - const u64 *bank; const char *msg; int cpu = smp_processor_id(); @@ -968,10 +1028,10 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, * loadrs for the new stack and save it in the new pt_regs, where * ia64_old_stack() can get it. */ - old_bspstore = (u64 *)ar_bspstore; - old_bsp = (u64 *)ar_bsp; + old_bspstore = (unsigned long *)ar_bspstore; + old_bsp = (unsigned long *)ar_bsp; slots = ia64_rse_num_regs(old_bspstore, old_bsp); - new_bspstore = (u64 *)((u64)current + IA64_RBS_OFFSET); + new_bspstore = (unsigned long *)((u64)current + IA64_RBS_OFFSET); new_bsp = ia64_rse_skip_regs(new_bspstore, slots); regs->loadrs = (new_bsp - new_bspstore) * 8 << 16; @@ -1024,54 +1084,9 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, p = (char *)r12 - sizeof(*regs); old_regs = (struct pt_regs *)p; memcpy(old_regs, regs, sizeof(*regs)); - /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use - * pmsa_{xip,xpsr,xfs} - */ - if (ia64_psr(regs)->ic) { - old_regs->cr_iip = ms->pmsa_iip; - old_regs->cr_ipsr = ms->pmsa_ipsr; - old_regs->cr_ifs = ms->pmsa_ifs; - } else { - old_regs->cr_iip = ms->pmsa_xip; - old_regs->cr_ipsr = ms->pmsa_xpsr; - old_regs->cr_ifs = ms->pmsa_xfs; - } - old_regs->pr = ms->pmsa_pr; - old_regs->b0 = ms->pmsa_br0; old_regs->loadrs = loadrs; - old_regs->ar_rsc = ms->pmsa_rsc; old_unat = old_regs->ar_unat; - copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &old_regs->r1, &old_unat); - copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &old_regs->r2, &old_unat); - copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &old_regs->r3, &old_unat); - copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &old_regs->r8, &old_unat); - copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &old_regs->r9, &old_unat); - copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &old_regs->r10, &old_unat); - copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &old_regs->r11, &old_unat); - copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &old_regs->r12, &old_unat); - copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &old_regs->r13, &old_unat); - copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &old_regs->r14, &old_unat); - copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &old_regs->r15, &old_unat); - if (ia64_psr(old_regs)->bn) - bank = ms->pmsa_bank1_gr; - else - bank = ms->pmsa_bank0_gr; - copy_reg(&bank[16-16], ms->pmsa_nat_bits, &old_regs->r16, &old_unat); - copy_reg(&bank[17-16], ms->pmsa_nat_bits, &old_regs->r17, &old_unat); - copy_reg(&bank[18-16], ms->pmsa_nat_bits, &old_regs->r18, &old_unat); - copy_reg(&bank[19-16], ms->pmsa_nat_bits, &old_regs->r19, &old_unat); - copy_reg(&bank[20-16], ms->pmsa_nat_bits, &old_regs->r20, &old_unat); - copy_reg(&bank[21-16], ms->pmsa_nat_bits, &old_regs->r21, &old_unat); - copy_reg(&bank[22-16], ms->pmsa_nat_bits, &old_regs->r22, &old_unat); - copy_reg(&bank[23-16], ms->pmsa_nat_bits, &old_regs->r23, &old_unat); - copy_reg(&bank[24-16], ms->pmsa_nat_bits, &old_regs->r24, &old_unat); - copy_reg(&bank[25-16], ms->pmsa_nat_bits, &old_regs->r25, &old_unat); - copy_reg(&bank[26-16], ms->pmsa_nat_bits, &old_regs->r26, &old_unat); - copy_reg(&bank[27-16], ms->pmsa_nat_bits, &old_regs->r27, &old_unat); - copy_reg(&bank[28-16], ms->pmsa_nat_bits, &old_regs->r28, &old_unat); - copy_reg(&bank[29-16], ms->pmsa_nat_bits, &old_regs->r29, &old_unat); - copy_reg(&bank[30-16], ms->pmsa_nat_bits, &old_regs->r30, &old_unat); - copy_reg(&bank[31-16], ms->pmsa_nat_bits, &old_regs->r31, &old_unat); + finish_pt_regs(old_regs, sos, &old_unat); /* Next stack a struct switch_stack. mca_asm.S built a partial * switch_stack, copy it and fill in the blanks using pt_regs and @@ -1141,6 +1156,8 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, no_mod: mprintk(KERN_INFO "cpu %d, %s %s, original stack not modified\n", smp_processor_id(), type, msg); + old_unat = regs->ar_unat; + finish_pt_regs(regs, sos, &old_unat); return previous_current; } @@ -1210,9 +1227,12 @@ static void mca_insert_tr(u64 iord) unsigned long psr; int cpu = smp_processor_id(); + if (!ia64_idtrs[cpu]) + return; + psr = ia64_clear_ic(); for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) { - p = &__per_cpu_idtrs[cpu][iord-1][i]; + p = ia64_idtrs[cpu] + (iord - 1) * IA64_TR_ALLOC_MAX; if (p->pte & 0x1) { old_rr = ia64_get_rr(p->ifa); if (old_rr != p->rr) { @@ -1426,6 +1446,8 @@ out: /* Get the CMC error record and log it */ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); + local_irq_disable(); + return IRQ_HANDLED; } @@ -1456,9 +1478,9 @@ ia64_mca_cmc_int_caller(int cmc_irq, void *arg) ia64_mca_cmc_int_handler(cmc_irq, arg); - for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++); + cpuid = cpumask_next(cpuid+1, cpu_online_mask); - if (cpuid < NR_CPUS) { + if (cpuid < nr_cpu_ids) { platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); } else { /* If no log record, switch out of polling mode */ @@ -1492,7 +1514,8 @@ static void ia64_mca_cmc_poll (unsigned long dummy) { /* Trigger a CMC interrupt cascade */ - platform_send_ipi(first_cpu(cpu_online_map), IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); + platform_send_ipi(cpumask_first(cpu_online_mask), IA64_CMCP_VECTOR, + IA64_IPI_DM_INT, 0); } /* @@ -1525,7 +1548,7 @@ ia64_mca_cpe_int_caller(int cpe_irq, void *arg) ia64_mca_cpe_int_handler(cpe_irq, arg); - for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++); + cpuid = cpumask_next(cpuid+1, cpu_online_mask); if (cpuid < NR_CPUS) { platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); @@ -1568,7 +1591,8 @@ static void ia64_mca_cpe_poll (unsigned long dummy) { /* Trigger a CPE interrupt cascade */ - platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); + platform_send_ipi(cpumask_first(cpu_online_mask), IA64_CPEP_VECTOR, + IA64_IPI_DM_INT, 0); } #endif /* CONFIG_ACPI */ @@ -1682,14 +1706,25 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, if (!sos->monarch) { ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; + +#ifdef CONFIG_KEXEC + while (monarch_cpu == -1 && !atomic_read(&kdump_in_progress)) + udelay(1000); +#else while (monarch_cpu == -1) - cpu_relax(); /* spin until monarch enters */ + cpu_relax(); /* spin until monarch enters */ +#endif NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1); NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1); +#ifdef CONFIG_KEXEC + while (monarch_cpu != -1 && !atomic_read(&kdump_in_progress)) + udelay(1000); +#else while (monarch_cpu != -1) - cpu_relax(); /* spin until monarch leaves */ + cpu_relax(); /* spin until monarch leaves */ +#endif NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1); @@ -1737,38 +1772,32 @@ __setup("disable_cpe_poll", ia64_mca_disable_cpe_polling); static struct irqaction cmci_irqaction = { .handler = ia64_mca_cmc_int_handler, - .flags = IRQF_DISABLED, .name = "cmc_hndlr" }; static struct irqaction cmcp_irqaction = { .handler = ia64_mca_cmc_int_caller, - .flags = IRQF_DISABLED, .name = "cmc_poll" }; static struct irqaction mca_rdzv_irqaction = { .handler = ia64_mca_rendez_int_handler, - .flags = IRQF_DISABLED, .name = "mca_rdzv" }; static struct irqaction mca_wkup_irqaction = { .handler = ia64_mca_wakeup_int_handler, - .flags = IRQF_DISABLED, .name = "mca_wkup" }; #ifdef CONFIG_ACPI static struct irqaction mca_cpe_irqaction = { .handler = ia64_mca_cpe_int_handler, - .flags = IRQF_DISABLED, .name = "cpe_hndlr" }; static struct irqaction mca_cpep_irqaction = { .handler = ia64_mca_cpe_int_caller, - .flags = IRQF_DISABLED, .name = "cpe_poll" }; #endif /* CONFIG_ACPI */ @@ -1779,7 +1808,7 @@ static struct irqaction mca_cpep_irqaction = { * format most of the fields. */ -static void __cpuinit +static void format_mca_init_stack(void *mca_data, unsigned long offset, const char *type, int cpu) { @@ -1809,7 +1838,7 @@ static void * __init_refok mca_bootmem(void) } /* Do per-CPU MCA-related initialization. */ -void __cpuinit +void ia64_mca_cpu_init(void *cpu_data) { void *pal_vaddr; @@ -1829,8 +1858,8 @@ ia64_mca_cpu_init(void *cpu_data) data = mca_bootmem(); first_time = 0; } else - data = page_address(alloc_pages_node(numa_node_id(), - GFP_KERNEL, get_order(sz))); + data = (void *)__get_free_pages(GFP_KERNEL, + get_order(sz)); if (!data) panic("Could not allocate MCA memory for cpu %d\n", cpu); @@ -1861,7 +1890,7 @@ ia64_mca_cpu_init(void *cpu_data) PAGE_KERNEL)); } -static void __cpuinit ia64_mca_cmc_vector_adjust(void *dummy) +static void ia64_mca_cmc_vector_adjust(void *dummy) { unsigned long flags; @@ -1871,7 +1900,7 @@ static void __cpuinit ia64_mca_cmc_vector_adjust(void *dummy) local_irq_restore(flags); } -static int __cpuinit mca_cpu_callback(struct notifier_block *nfb, +static int mca_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -1887,7 +1916,7 @@ static int __cpuinit mca_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block mca_cpu_notifier __cpuinitdata = { +static struct notifier_block mca_cpu_notifier = { .notifier_call = mca_cpu_callback }; @@ -1918,9 +1947,9 @@ ia64_mca_init(void) ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave; ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch; int i; - s64 rc; + long rc; struct ia64_sal_retval isrv; - u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ + unsigned long timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ static struct notifier_block default_init_monarch_nb = { .notifier_call = default_monarch_init_process, .priority = 0/* we need to notified last */ @@ -2026,6 +2055,29 @@ ia64_mca_init(void) IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __func__); + /* Initialize the areas set aside by the OS to buffer the + * platform/processor error states for MCA/INIT/CMC + * handling. + */ + ia64_log_init(SAL_INFO_TYPE_MCA); + ia64_log_init(SAL_INFO_TYPE_INIT); + ia64_log_init(SAL_INFO_TYPE_CMC); + ia64_log_init(SAL_INFO_TYPE_CPE); + + mca_init = 1; + printk(KERN_INFO "MCA related initialization done\n"); +} + + +/* + * These pieces cannot be done in ia64_mca_init() because it is called before + * early_irq_init() which would wipe out our percpu irq registrations. But we + * cannot leave them until ia64_mca_late_init() because by then all the other + * processors have been brought online and have set their own CMC vectors to + * point at a non-existant action. Called from arch_early_irq_init(). + */ +void __init ia64_mca_irq_init(void) +{ /* * Configure the CMCI/P vector and handler. Interrupts for CMC are * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c). @@ -2044,18 +2096,6 @@ ia64_mca_init(void) /* Setup the CPEI/P handler */ register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); #endif - - /* Initialize the areas set aside by the OS to buffer the - * platform/processor error states for MCA/INIT/CMC - * handling. - */ - ia64_log_init(SAL_INFO_TYPE_MCA); - ia64_log_init(SAL_INFO_TYPE_INIT); - ia64_log_init(SAL_INFO_TYPE_CMC); - ia64_log_init(SAL_INFO_TYPE_CPE); - - mca_init = 1; - printk(KERN_INFO "MCA related initialization done\n"); } /* @@ -2093,7 +2133,6 @@ ia64_mca_late_init(void) cpe_poll_timer.function = ia64_mca_cpe_poll; { - irq_desc_t *desc; unsigned int irq; if (cpe_vector >= 0) { @@ -2101,8 +2140,7 @@ ia64_mca_late_init(void) irq = local_vector_to_irq(cpe_vector); if (irq > 0) { cpe_poll_enabled = 0; - desc = irq_desc + irq; - desc->status |= IRQ_PER_CPU; + irq_set_status_flags(irq, IRQ_PER_CPU); setup_irq(irq, &mca_cpe_irqaction); ia64_cpe_irq = irq; ia64_mca_register_cpev(cpe_vector); diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index a06d46548ff..d5bdf9de36b 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -40,6 +40,7 @@ .global ia64_do_tlb_purge .global ia64_os_mca_dispatch + .global ia64_os_init_on_kdump .global ia64_os_init_dispatch_monarch .global ia64_os_init_dispatch_slave @@ -58,7 +59,7 @@ ia64_do_tlb_purge: #define O(member) IA64_CPUINFO_##member##_OFFSET - GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2 + GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2 ;; addl r17=O(PTCE_STRIDE),r2 addl r2=O(PTCE_BASE),r2 @@ -299,6 +300,25 @@ END(ia64_os_mca_virtual_begin) //StartMain//////////////////////////////////////////////////////////////////// // +// NOP init handler for kdump. In panic situation, we may receive INIT +// while kernel transition. Since we initialize registers on leave from +// current kernel, no longer monarch/slave handlers of current kernel in +// virtual mode are called safely. +// We can unregister these init handlers from SAL, however then the INIT +// will result in warmboot by SAL and we cannot retrieve the crashdump. +// Therefore register this NOP function to SAL, to prevent entering virtual +// mode and resulting warmboot by SAL. +// +ia64_os_init_on_kdump: + mov r8=r0 // IA64_INIT_RESUME + mov r9=r10 // SAL_GP + mov r22=r17 // *minstate + ;; + mov r10=r0 // return to same context + mov b0=r12 // SAL_CHECK return address + br b0 + +// // SAL to OS entry point for INIT on all processors. This has been defined for // registration purposes with SAL as a part of ia64_mca_init. Monarch and // slave INIT have identical processing, except for the value of the @@ -1073,3 +1093,30 @@ GLOBAL_ENTRY(ia64_get_rnat) mov ar.rsc=3 br.ret.sptk.many rp END(ia64_get_rnat) + + +// void ia64_set_psr_mc(void) +// +// Set psr.mc bit to mask MCA/INIT. +GLOBAL_ENTRY(ia64_set_psr_mc) + rsm psr.i | psr.ic // disable interrupts + ;; + srlz.d + ;; + mov r14 = psr // get psr{36:35,31:0} + movl r15 = 1f + ;; + dep r14 = -1, r14, PSR_MC, 1 // set psr.mc + ;; + dep r14 = -1, r14, PSR_IC, 1 // set psr.ic + ;; + dep r14 = -1, r14, PSR_BN, 1 // keep bank1 in use + ;; + mov cr.ipsr = r14 + mov cr.ifs = r0 + mov cr.iip = r15 + ;; + rfi +1: + br.ret.sptk.many rp +END(ia64_set_psr_mc) diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index f94aaa86933..94f8bf777af 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -22,12 +22,12 @@ #include <linux/smp.h> #include <linux/workqueue.h> #include <linux/mm.h> +#include <linux/slab.h> #include <asm/delay.h> #include <asm/machvec.h> #include <asm/page.h> #include <asm/ptrace.h> -#include <asm/system.h> #include <asm/sal.h> #include <asm/mca.h> @@ -158,7 +158,8 @@ mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) ia64_mlogbuf_dump(); printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", - raw_smp_processor_id(), current->pid, current_uid(), + raw_smp_processor_id(), current->pid, + from_kuid(&init_user_ns, current_uid()), iip, ipsr, paddr, current->comm); spin_lock(&mca_bh_lock); @@ -348,7 +349,7 @@ init_record_index_pools(void) /* - 3 - */ slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1; - slidx_pool.buffer = (slidx_list_t *) + slidx_pool.buffer = kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL); return slidx_pool.buffer ? 0 : -ENOMEM; diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h index 292e214a3b8..cc82a7d744c 100644 --- a/arch/ia64/kernel/minstate.h +++ b/arch/ia64/kernel/minstate.h @@ -4,7 +4,7 @@ #include "entry.h" #include "paravirt_inst.h" -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* read ar.itc in advance, and use it before leaving bank 0 */ #define ACCOUNT_GET_STAMP \ (pUStk) mov.m r20=ar.itc; @@ -16,7 +16,7 @@ #define ACCOUNT_SYS_ENTER #endif -.section ".data.patch.rse", "a" +.section ".data..patch.rse", "a" .previous /* @@ -215,7 +215,7 @@ (pUStk) extr.u r17=r18,3,6; \ (pUStk) sub r16=r18,r22; \ [1:](pKStk) br.cond.sptk.many 1f; \ - .xdata4 ".data.patch.rse",1b-. \ + .xdata4 ".data..patch.rse",1b-. \ ;; \ cmp.ge p6,p7 = 33,r17; \ ;; \ diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index aaa7d901521..24603be24c1 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -171,7 +171,8 @@ apply_imm60 (struct module *mod, struct insn *insn, uint64_t val) return 0; } if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) { - printk(KERN_ERR "%s: value %ld out of IMM60 range\n", mod->name, (int64_t) val); + printk(KERN_ERR "%s: value %ld out of IMM60 range\n", + mod->name, (long) val); return 0; } ia64_patch_imm60((u64) insn, val); @@ -182,7 +183,8 @@ static int apply_imm22 (struct module *mod, struct insn *insn, uint64_t val) { if (val + (1 << 21) >= (1 << 22)) { - printk(KERN_ERR "%s: value %li out of IMM22 range\n", mod->name, (int64_t)val); + printk(KERN_ERR "%s: value %li out of IMM22 range\n", + mod->name, (long)val); return 0; } ia64_patch((u64) insn, 0x01fffcfe000UL, ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */ @@ -196,7 +198,8 @@ static int apply_imm21b (struct module *mod, struct insn *insn, uint64_t val) { if (val + (1 << 20) >= (1 << 21)) { - printk(KERN_ERR "%s: value %li out of IMM21b range\n", mod->name, (int64_t)val); + printk(KERN_ERR "%s: value %li out of IMM21b range\n", + mod->name, (long)val); return 0; } ia64_patch((u64) insn, 0x11ffffe000UL, ( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */ @@ -301,14 +304,6 @@ plt_target (struct plt_entry *plt) #endif /* !USE_BRL */ -void * -module_alloc (unsigned long size) -{ - if (!size) - return NULL; - return vmalloc(size); -} - void module_free (struct module *mod, void *module_region) { @@ -446,6 +441,14 @@ module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, mod->arch.opd = s; else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0) mod->arch.unwind = s; +#ifdef CONFIG_PARAVIRT + else if (strcmp(".paravirt_bundles", + secstrings + s->sh_name) == 0) + mod->arch.paravirt_bundles = s; + else if (strcmp(".paravirt_insts", + secstrings + s->sh_name) == 0) + mod->arch.paravirt_insts = s; +#endif if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) { printk(KERN_ERR "%s: sections missing\n", mod->name); @@ -525,8 +528,7 @@ get_ltoff (struct module *mod, uint64_t value, int *okp) goto found; /* Not enough GOT entries? */ - if (e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size)) - BUG(); + BUG_ON(e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size)); e->val = value; ++mod->arch.next_got_entry; @@ -694,8 +696,9 @@ do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend, case RV_PCREL2: if (r_type == R_IA64_PCREL21BI) { if (!is_internal(mod, val)) { - printk(KERN_ERR "%s: %s reloc against non-local symbol (%lx)\n", - __func__, reloc_name[r_type], val); + printk(KERN_ERR "%s: %s reloc against " + "non-local symbol (%lx)\n", __func__, + reloc_name[r_type], (unsigned long)val); return -ENOEXEC; } format = RF_INSN21B; @@ -842,14 +845,6 @@ apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symind return 0; } -int -apply_relocate (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, - unsigned int relsec, struct module *mod) -{ - printk(KERN_ERR "module %s: REL relocs in section %u unsupported\n", mod->name, relsec); - return -ENOEXEC; -} - /* * Modules contain a single unwind table which covers both the core and the init text * sections but since the two are not contiguous, we need to split this table up such that @@ -921,6 +916,30 @@ module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mo DEBUGP("%s: init: entry=%p\n", __func__, mod->init); if (mod->arch.unwind) register_unwind_table(mod); +#ifdef CONFIG_PARAVIRT + if (mod->arch.paravirt_bundles) { + struct paravirt_patch_site_bundle *start = + (struct paravirt_patch_site_bundle *) + mod->arch.paravirt_bundles->sh_addr; + struct paravirt_patch_site_bundle *end = + (struct paravirt_patch_site_bundle *) + (mod->arch.paravirt_bundles->sh_addr + + mod->arch.paravirt_bundles->sh_size); + + paravirt_patch_apply_bundle(start, end); + } + if (mod->arch.paravirt_insts) { + struct paravirt_patch_site_inst *start = + (struct paravirt_patch_site_inst *) + mod->arch.paravirt_insts->sh_addr; + struct paravirt_patch_site_inst *end = + (struct paravirt_patch_site_inst *) + (mod->arch.paravirt_insts->sh_addr + + mod->arch.paravirt_insts->sh_size); + + paravirt_patch_apply_inst(start, end); + } +#endif return 0; } diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 89033933903..c430f9198d1 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -7,66 +7,27 @@ #include <linux/msi.h> #include <linux/dmar.h> #include <asm/smp.h> - -/* - * Shifts for APIC-based data - */ - -#define MSI_DATA_VECTOR_SHIFT 0 -#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT) -#define MSI_DATA_VECTOR_MASK 0xffffff00 - -#define MSI_DATA_DELIVERY_SHIFT 8 -#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_SHIFT) -#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_SHIFT) - -#define MSI_DATA_LEVEL_SHIFT 14 -#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT) -#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT) - -#define MSI_DATA_TRIGGER_SHIFT 15 -#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT) -#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT) - -/* - * Shift/mask fields for APIC-based bus address - */ - -#define MSI_TARGET_CPU_SHIFT 4 -#define MSI_ADDR_HEADER 0xfee00000 - -#define MSI_ADDR_DESTID_MASK 0xfff0000f -#define MSI_ADDR_DESTID_CPU(cpu) ((cpu) << MSI_TARGET_CPU_SHIFT) - -#define MSI_ADDR_DESTMODE_SHIFT 2 -#define MSI_ADDR_DESTMODE_PHYS (0 << MSI_ADDR_DESTMODE_SHIFT) -#define MSI_ADDR_DESTMODE_LOGIC (1 << MSI_ADDR_DESTMODE_SHIFT) - -#define MSI_ADDR_REDIRECTION_SHIFT 3 -#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT) -#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT) +#include <asm/msidef.h> static struct irq_chip ia64_msi_chip; #ifdef CONFIG_SMP -static void ia64_set_msi_irq_affinity(unsigned int irq, - const cpumask_t *cpu_mask) +static int ia64_set_msi_irq_affinity(struct irq_data *idata, + const cpumask_t *cpu_mask, bool force) { struct msi_msg msg; u32 addr, data; - int cpu = first_cpu(*cpu_mask); - - if (!cpu_online(cpu)) - return; + int cpu = cpumask_first_and(cpu_mask, cpu_online_mask); + unsigned int irq = idata->irq; if (irq_prepare_move(irq, cpu)) - return; + return -1; - read_msi_msg(irq, &msg); + get_cached_msi_msg(irq, &msg); addr = msg.address_lo; - addr &= MSI_ADDR_DESTID_MASK; - addr |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); + addr &= MSI_ADDR_DEST_ID_MASK; + addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu)); msg.address_lo = addr; data = msg.data; @@ -75,7 +36,9 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, msg.data = data; write_msi_msg(irq, &msg); - irq_desc[irq].affinity = cpumask_of_cpu(cpu); + cpumask_copy(idata->affinity, cpumask_of(cpu)); + + return 0; } #endif /* CONFIG_SMP */ @@ -90,17 +53,17 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) if (irq < 0) return irq; - set_irq_msi(irq, desc); - cpus_and(mask, irq_to_domain(irq), cpu_online_map); + irq_set_msi_desc(irq, desc); + cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask); dest_phys_id = cpu_physical_id(first_cpu(mask)); vector = irq_to_vector(irq); msg.address_hi = 0; msg.address_lo = MSI_ADDR_HEADER | - MSI_ADDR_DESTMODE_PHYS | + MSI_ADDR_DEST_MODE_PHYS | MSI_ADDR_REDIRECTION_CPU | - MSI_ADDR_DESTID_CPU(dest_phys_id); + MSI_ADDR_DEST_ID_CPU(dest_phys_id); msg.data = MSI_DATA_TRIGGER_EDGE | @@ -109,7 +72,7 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) MSI_DATA_VECTOR(vector); write_msi_msg(irq, &msg); - set_irq_chip_and_handler(irq, &ia64_msi_chip, handle_edge_irq); + irq_set_chip_and_handler(irq, &ia64_msi_chip, handle_edge_irq); return 0; } @@ -119,16 +82,16 @@ void ia64_teardown_msi_irq(unsigned int irq) destroy_irq(irq); } -static void ia64_ack_msi_irq(unsigned int irq) +static void ia64_ack_msi_irq(struct irq_data *data) { - irq_complete_move(irq); - move_native_irq(irq); + irq_complete_move(data->irq); + irq_move_irq(data); ia64_eoi(); } -static int ia64_msi_retrigger_irq(unsigned int irq) +static int ia64_msi_retrigger_irq(struct irq_data *data) { - unsigned int vector = irq_to_vector(irq); + unsigned int vector = irq_to_vector(data->irq); ia64_resend_irq(vector); return 1; @@ -138,14 +101,14 @@ static int ia64_msi_retrigger_irq(unsigned int irq) * Generic ops used on most IA64 platforms. */ static struct irq_chip ia64_msi_chip = { - .name = "PCI-MSI", - .mask = mask_msi_irq, - .unmask = unmask_msi_irq, - .ack = ia64_ack_msi_irq, + .name = "PCI-MSI", + .irq_mask = mask_msi_irq, + .irq_unmask = unmask_msi_irq, + .irq_ack = ia64_ack_msi_irq, #ifdef CONFIG_SMP - .set_affinity = ia64_set_msi_irq_affinity, + .irq_set_affinity = ia64_set_msi_irq_affinity, #endif - .retrigger = ia64_msi_retrigger_irq, + .irq_retrigger = ia64_msi_retrigger_irq, }; @@ -165,41 +128,42 @@ void arch_teardown_msi_irq(unsigned int irq) return ia64_teardown_msi_irq(irq); } -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int dmar_msi_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) { + unsigned int irq = data->irq; struct irq_cfg *cfg = irq_cfg + irq; struct msi_msg msg; - int cpu = cpumask_first(mask); - - if (!cpu_online(cpu)) - return; + int cpu = cpumask_first_and(mask, cpu_online_mask); if (irq_prepare_move(irq, cpu)) - return; + return -1; dmar_msi_read(irq, &msg); msg.data &= ~MSI_DATA_VECTOR_MASK; msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DESTID_MASK; - msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu)); dmar_msi_write(irq, &msg); - irq_desc[irq].affinity = *mask; + cpumask_copy(data->affinity, mask); + + return 0; } #endif /* CONFIG_SMP */ -struct irq_chip dmar_msi_type = { +static struct irq_chip dmar_msi_type = { .name = "DMAR_MSI", - .unmask = dmar_msi_unmask, - .mask = dmar_msi_mask, - .ack = ia64_ack_msi_irq, + .irq_unmask = dmar_msi_unmask, + .irq_mask = dmar_msi_mask, + .irq_ack = ia64_ack_msi_irq, #ifdef CONFIG_SMP - .set_affinity = dmar_msi_set_affinity, + .irq_set_affinity = dmar_msi_set_affinity, #endif - .retrigger = ia64_msi_retrigger_irq, + .irq_retrigger = ia64_msi_retrigger_irq, }; static int @@ -209,15 +173,15 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) unsigned dest; cpumask_t mask; - cpus_and(mask, irq_to_domain(irq), cpu_online_map); + cpumask_and(&mask, &(irq_to_domain(irq)), cpu_online_mask); dest = cpu_physical_id(first_cpu(mask)); msg->address_hi = 0; msg->address_lo = MSI_ADDR_HEADER | - MSI_ADDR_DESTMODE_PHYS | + MSI_ADDR_DEST_MODE_PHYS | MSI_ADDR_REDIRECTION_CPU | - MSI_ADDR_DESTID_CPU(dest); + MSI_ADDR_DEST_ID_CPU(dest); msg->data = MSI_DATA_TRIGGER_EDGE | @@ -236,9 +200,9 @@ int arch_setup_dmar_msi(unsigned int irq) if (ret < 0) return ret; dmar_msi_write(irq, &msg); - set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); + irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); return 0; } -#endif /* CONFIG_DMAR */ +#endif /* CONFIG_INTEL_IOMMU */ diff --git a/arch/ia64/kernel/nr-irqs.c b/arch/ia64/kernel/nr-irqs.c index ee564575148..f6769cd54bd 100644 --- a/arch/ia64/kernel/nr-irqs.c +++ b/arch/ia64/kernel/nr-irqs.c @@ -10,15 +10,11 @@ #include <linux/kbuild.h> #include <linux/threads.h> #include <asm/native/irq.h> -#include <asm/xen/irq.h> void foo(void) { union paravirt_nr_irqs_max { char ia64_native_nr_irqs[IA64_NATIVE_NR_IRQS]; -#ifdef CONFIG_XEN - char xen_nr_irqs[XEN_NR_IRQS]; -#endif }; DEFINE(NR_IRQS, sizeof (union paravirt_nr_irqs_max)); diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c index c93420c9740..d288cde9360 100644 --- a/arch/ia64/kernel/numa.c +++ b/arch/ia64/kernel/numa.c @@ -30,7 +30,7 @@ EXPORT_SYMBOL(cpu_to_node_map); cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; EXPORT_SYMBOL(node_to_cpu_mask); -void __cpuinit map_cpu_to_node(int cpu, int nid) +void map_cpu_to_node(int cpu, int nid) { int oldnid; if (nid < 0) { /* just initialize by zero */ @@ -51,7 +51,7 @@ void __cpuinit map_cpu_to_node(int cpu, int nid) return; } -void __cpuinit unmap_cpu_from_node(int cpu, int nid) +void unmap_cpu_from_node(int cpu, int nid) { WARN_ON(!cpu_isset(cpu, node_to_cpu_mask[nid])); WARN_ON(cpu_to_node_map[cpu] != nid); diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index e5c57f413ca..c39c3cd3ac3 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -22,6 +22,7 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/efi.h> @@ -41,7 +42,7 @@ MODULE_LICENSE("GPL"); #define PALINFO_VERSION "0.5" -typedef int (*palinfo_func_t)(char*); +typedef int (*palinfo_func_t)(struct seq_file *); typedef struct { const char *name; /* name of the proc entry */ @@ -54,7 +55,7 @@ typedef struct { * A bunch of string array to get pretty printing */ -static char *cache_types[] = { +static const char *cache_types[] = { "", /* not used */ "Instruction", "Data", @@ -122,19 +123,16 @@ static const char *mem_attrib[]={ * - a pointer to the end of the buffer * */ -static char * -bitvector_process(char *p, u64 vector) +static void bitvector_process(struct seq_file *m, u64 vector) { int i,j; - const char *units[]={ "", "K", "M", "G", "T" }; + static const char *units[]={ "", "K", "M", "G", "T" }; for (i=0, j=0; i < 64; i++ , j=i/10) { - if (vector & 0x1) { - p += sprintf(p, "%d%s ", 1 << (i-j*10), units[j]); - } + if (vector & 0x1) + seq_printf(m, "%d%s ", 1 << (i-j*10), units[j]); vector >>= 1; } - return p; } /* @@ -149,8 +147,7 @@ bitvector_process(char *p, u64 vector) * - a pointer to the end of the buffer * */ -static char * -bitregister_process(char *p, u64 *reg_info, int max) +static void bitregister_process(struct seq_file *m, u64 *reg_info, int max) { int i, begin, skip = 0; u64 value = reg_info[0]; @@ -163,9 +160,9 @@ bitregister_process(char *p, u64 *reg_info, int max) if ((value & 0x1) == 0 && skip == 0) { if (begin <= i - 2) - p += sprintf(p, "%d-%d ", begin, i-1); + seq_printf(m, "%d-%d ", begin, i-1); else - p += sprintf(p, "%d ", i-1); + seq_printf(m, "%d ", i-1); skip = 1; begin = -1; } else if ((value & 0x1) && skip == 1) { @@ -176,19 +173,15 @@ bitregister_process(char *p, u64 *reg_info, int max) } if (begin > -1) { if (begin < 127) - p += sprintf(p, "%d-127", begin); + seq_printf(m, "%d-127", begin); else - p += sprintf(p, "127"); + seq_puts(m, "127"); } - - return p; } -static int -power_info(char *page) +static int power_info(struct seq_file *m) { s64 status; - char *p = page; u64 halt_info_buffer[8]; pal_power_mgmt_info_u_t *halt_info =(pal_power_mgmt_info_u_t *)halt_info_buffer; int i; @@ -198,103 +191,103 @@ power_info(char *page) for (i=0; i < 8 ; i++ ) { if (halt_info[i].pal_power_mgmt_info_s.im == 1) { - p += sprintf(p, "Power level %d:\n" - "\tentry_latency : %d cycles\n" - "\texit_latency : %d cycles\n" - "\tpower consumption : %d mW\n" - "\tCache+TLB coherency : %s\n", i, - halt_info[i].pal_power_mgmt_info_s.entry_latency, - halt_info[i].pal_power_mgmt_info_s.exit_latency, - halt_info[i].pal_power_mgmt_info_s.power_consumption, - halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No"); + seq_printf(m, + "Power level %d:\n" + "\tentry_latency : %d cycles\n" + "\texit_latency : %d cycles\n" + "\tpower consumption : %d mW\n" + "\tCache+TLB coherency : %s\n", i, + halt_info[i].pal_power_mgmt_info_s.entry_latency, + halt_info[i].pal_power_mgmt_info_s.exit_latency, + halt_info[i].pal_power_mgmt_info_s.power_consumption, + halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No"); } else { - p += sprintf(p,"Power level %d: not implemented\n",i); + seq_printf(m,"Power level %d: not implemented\n", i); } } - return p - page; + return 0; } -static int -cache_info(char *page) +static int cache_info(struct seq_file *m) { - char *p = page; - u64 i, levels, unique_caches; + unsigned long i, levels, unique_caches; pal_cache_config_info_t cci; int j, k; - s64 status; + long status; if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) { printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status); return 0; } - p += sprintf(p, "Cache levels : %ld\nUnique caches : %ld\n\n", levels, unique_caches); + seq_printf(m, "Cache levels : %ld\nUnique caches : %ld\n\n", + levels, unique_caches); for (i=0; i < levels; i++) { - for (j=2; j >0 ; j--) { - /* even without unification some level may not be present */ - if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0) { + if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0) continue; - } - p += sprintf(p, - "%s Cache level %lu:\n" - "\tSize : %u bytes\n" - "\tAttributes : ", - cache_types[j+cci.pcci_unified], i+1, - cci.pcci_cache_size); - - if (cci.pcci_unified) p += sprintf(p, "Unified "); - - p += sprintf(p, "%s\n", cache_mattrib[cci.pcci_cache_attr]); - - p += sprintf(p, - "\tAssociativity : %d\n" - "\tLine size : %d bytes\n" - "\tStride : %d bytes\n", - cci.pcci_assoc, 1<<cci.pcci_line_size, 1<<cci.pcci_stride); + + seq_printf(m, + "%s Cache level %lu:\n" + "\tSize : %u bytes\n" + "\tAttributes : ", + cache_types[j+cci.pcci_unified], i+1, + cci.pcci_cache_size); + + if (cci.pcci_unified) + seq_puts(m, "Unified "); + + seq_printf(m, "%s\n", cache_mattrib[cci.pcci_cache_attr]); + + seq_printf(m, + "\tAssociativity : %d\n" + "\tLine size : %d bytes\n" + "\tStride : %d bytes\n", + cci.pcci_assoc, + 1<<cci.pcci_line_size, + 1<<cci.pcci_stride); if (j == 1) - p += sprintf(p, "\tStore latency : N/A\n"); + seq_puts(m, "\tStore latency : N/A\n"); else - p += sprintf(p, "\tStore latency : %d cycle(s)\n", - cci.pcci_st_latency); + seq_printf(m, "\tStore latency : %d cycle(s)\n", + cci.pcci_st_latency); - p += sprintf(p, - "\tLoad latency : %d cycle(s)\n" - "\tStore hints : ", cci.pcci_ld_latency); + seq_printf(m, + "\tLoad latency : %d cycle(s)\n" + "\tStore hints : ", cci.pcci_ld_latency); for(k=0; k < 8; k++ ) { if ( cci.pcci_st_hints & 0x1) - p += sprintf(p, "[%s]", cache_st_hints[k]); + seq_printf(m, "[%s]", cache_st_hints[k]); cci.pcci_st_hints >>=1; } - p += sprintf(p, "\n\tLoad hints : "); + seq_puts(m, "\n\tLoad hints : "); for(k=0; k < 8; k++ ) { if (cci.pcci_ld_hints & 0x1) - p += sprintf(p, "[%s]", cache_ld_hints[k]); + seq_printf(m, "[%s]", cache_ld_hints[k]); cci.pcci_ld_hints >>=1; } - p += sprintf(p, - "\n\tAlias boundary : %d byte(s)\n" - "\tTag LSB : %d\n" - "\tTag MSB : %d\n", - 1<<cci.pcci_alias_boundary, cci.pcci_tag_lsb, - cci.pcci_tag_msb); + seq_printf(m, + "\n\tAlias boundary : %d byte(s)\n" + "\tTag LSB : %d\n" + "\tTag MSB : %d\n", + 1<<cci.pcci_alias_boundary, cci.pcci_tag_lsb, + cci.pcci_tag_msb); /* when unified, data(j=2) is enough */ - if (cci.pcci_unified) break; + if (cci.pcci_unified) + break; } } - return p - page; + return 0; } -static int -vm_info(char *page) +static int vm_info(struct seq_file *m) { - char *p = page; u64 tr_pages =0, vw_pages=0, tc_pages; u64 attrib; pal_vm_info_1_u_t vm_info_1; @@ -303,13 +296,13 @@ vm_info(char *page) ia64_ptce_info_t ptce; const char *sep; int i, j; - s64 status; + long status; if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) { printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); } else { - p += sprintf(p, + seq_printf(m, "Physical Address Space : %d bits\n" "Virtual Address Space : %d bits\n" "Protection Key Registers(PKR) : %d\n" @@ -324,49 +317,49 @@ vm_info(char *page) vm_info_1.pal_vm_info_1_s.hash_tag_id, vm_info_2.pal_vm_info_2_s.rid_size); if (vm_info_2.pal_vm_info_2_s.max_purges == PAL_MAX_PURGES) - p += sprintf(p, "unlimited\n"); + seq_puts(m, "unlimited\n"); else - p += sprintf(p, "%d\n", + seq_printf(m, "%d\n", vm_info_2.pal_vm_info_2_s.max_purges ? vm_info_2.pal_vm_info_2_s.max_purges : 1); } if (ia64_pal_mem_attrib(&attrib) == 0) { - p += sprintf(p, "Supported memory attributes : "); + seq_puts(m, "Supported memory attributes : "); sep = ""; for (i = 0; i < 8; i++) { if (attrib & (1 << i)) { - p += sprintf(p, "%s%s", sep, mem_attrib[i]); + seq_printf(m, "%s%s", sep, mem_attrib[i]); sep = ", "; } } - p += sprintf(p, "\n"); + seq_putc(m, '\n'); } if ((status = ia64_pal_vm_page_size(&tr_pages, &vw_pages)) !=0) { printk(KERN_ERR "ia64_pal_vm_page_size=%ld\n", status); } else { - p += sprintf(p, - "\nTLB walker : %simplemented\n" - "Number of DTR : %d\n" - "Number of ITR : %d\n" - "TLB insertable page sizes : ", - vm_info_1.pal_vm_info_1_s.vw ? "" : "not ", - vm_info_1.pal_vm_info_1_s.max_dtr_entry+1, - vm_info_1.pal_vm_info_1_s.max_itr_entry+1); + seq_printf(m, + "\nTLB walker : %simplemented\n" + "Number of DTR : %d\n" + "Number of ITR : %d\n" + "TLB insertable page sizes : ", + vm_info_1.pal_vm_info_1_s.vw ? "" : "not ", + vm_info_1.pal_vm_info_1_s.max_dtr_entry+1, + vm_info_1.pal_vm_info_1_s.max_itr_entry+1); + bitvector_process(m, tr_pages); - p = bitvector_process(p, tr_pages); + seq_puts(m, "\nTLB purgeable page sizes : "); - p += sprintf(p, "\nTLB purgeable page sizes : "); - - p = bitvector_process(p, vw_pages); + bitvector_process(m, vw_pages); } - if ((status=ia64_get_ptce(&ptce)) != 0) { + + if ((status = ia64_get_ptce(&ptce)) != 0) { printk(KERN_ERR "ia64_get_ptce=%ld\n", status); } else { - p += sprintf(p, + seq_printf(m, "\nPurge base address : 0x%016lx\n" "Purge outer loop count : %d\n" "Purge inner loop count : %d\n" @@ -375,7 +368,7 @@ vm_info(char *page) ptce.base, ptce.count[0], ptce.count[1], ptce.stride[0], ptce.stride[1]); - p += sprintf(p, + seq_printf(m, "TC Levels : %d\n" "Unique TC(s) : %d\n", vm_info_1.pal_vm_info_1_s.num_tc_levels, @@ -385,13 +378,11 @@ vm_info(char *page) for (j=2; j>0 ; j--) { tc_pages = 0; /* just in case */ - /* even without unification, some levels may not be present */ - if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) { + if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) continue; - } - p += sprintf(p, + seq_printf(m, "\n%s Translation Cache Level %d:\n" "\tHash sets : %d\n" "\tAssociativity : %d\n" @@ -403,15 +394,15 @@ vm_info(char *page) tc_info.tc_num_entries); if (tc_info.tc_pf) - p += sprintf(p, "PreferredPageSizeOptimized "); + seq_puts(m, "PreferredPageSizeOptimized "); if (tc_info.tc_unified) - p += sprintf(p, "Unified "); + seq_puts(m, "Unified "); if (tc_info.tc_reduce_tr) - p += sprintf(p, "TCReduction"); + seq_puts(m, "TCReduction"); - p += sprintf(p, "\n\tSupported page sizes: "); + seq_puts(m, "\n\tSupported page sizes: "); - p = bitvector_process(p, tc_pages); + bitvector_process(m, tc_pages); /* when unified date (j=2) is enough */ if (tc_info.tc_unified) @@ -419,22 +410,20 @@ vm_info(char *page) } } } - p += sprintf(p, "\n"); - return p - page; + seq_putc(m, '\n'); + return 0; } -static int -register_info(char *page) +static int register_info(struct seq_file *m) { - char *p = page; u64 reg_info[2]; u64 info; - u64 phys_stacked; + unsigned long phys_stacked; pal_hints_u_t hints; - u64 iregs, dregs; - char *info_type[]={ + unsigned long iregs, dregs; + static const char * const info_type[] = { "Implemented AR(s)", "AR(s) with read side-effects", "Implemented CR(s)", @@ -442,35 +431,31 @@ register_info(char *page) }; for(info=0; info < 4; info++) { - - if (ia64_pal_register_info(info, ®_info[0], ®_info[1]) != 0) return 0; - - p += sprintf(p, "%-32s : ", info_type[info]); - - p = bitregister_process(p, reg_info, 128); - - p += sprintf(p, "\n"); + if (ia64_pal_register_info(info, ®_info[0], ®_info[1]) != 0) + return 0; + seq_printf(m, "%-32s : ", info_type[info]); + bitregister_process(m, reg_info, 128); + seq_putc(m, '\n'); } - if (ia64_pal_rse_info(&phys_stacked, &hints) == 0) { + if (ia64_pal_rse_info(&phys_stacked, &hints) == 0) + seq_printf(m, + "RSE stacked physical registers : %ld\n" + "RSE load/store hints : %ld (%s)\n", + phys_stacked, hints.ph_data, + hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)"); - p += sprintf(p, - "RSE stacked physical registers : %ld\n" - "RSE load/store hints : %ld (%s)\n", - phys_stacked, hints.ph_data, - hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)"); - } if (ia64_pal_debug_info(&iregs, &dregs)) return 0; - p += sprintf(p, - "Instruction debug register pairs : %ld\n" - "Data debug register pairs : %ld\n", iregs, dregs); + seq_printf(m, + "Instruction debug register pairs : %ld\n" + "Data debug register pairs : %ld\n", iregs, dregs); - return p - page; + return 0; } -static char *proc_features_0[]={ /* Feature set 0 */ +static const char *const proc_features_0[]={ /* Feature set 0 */ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, @@ -502,7 +487,7 @@ static char *proc_features_0[]={ /* Feature set 0 */ "Enable BERR promotion" }; -static char *proc_features_16[]={ /* Feature set 16 */ +static const char *const proc_features_16[]={ /* Feature set 16 */ "Disable ETM", "Enable ETM", "Enable MCA on half-way timer", @@ -522,7 +507,7 @@ static char *proc_features_16[]={ /* Feature set 16 */ NULL, NULL, NULL, NULL, NULL }; -static char **proc_features[]={ +static const char *const *const proc_features[]={ proc_features_0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -530,11 +515,10 @@ static char **proc_features[]={ NULL, NULL, NULL, NULL, }; -static char * -feature_set_info(char *page, u64 avail, u64 status, u64 control, u64 set) +static void feature_set_info(struct seq_file *m, u64 avail, u64 status, u64 control, + unsigned long set) { - char *p = page; - char **vf, **v; + const char *const *vf, *const *v; int i; vf = v = proc_features[set]; @@ -547,13 +531,13 @@ feature_set_info(char *page, u64 avail, u64 status, u64 control, u64 set) if (vf) v = vf + i; if ( v && *v ) { - p += sprintf(p, "%-40s : %s %s\n", *v, + seq_printf(m, "%-40s : %s %s\n", *v, avail & 0x1 ? (status & 0x1 ? - "On " : "Off"): "", + "On " : "Off"): "", avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): ""); } else { - p += sprintf(p, "Feature set %2ld bit %2d\t\t\t" + seq_printf(m, "Feature set %2ld bit %2d\t\t\t" " : %s %s\n", set, i, avail & 0x1 ? (status & 0x1 ? @@ -562,36 +546,32 @@ feature_set_info(char *page, u64 avail, u64 status, u64 control, u64 set) "Ctrl" : "NoCtrl"): ""); } } - return p; } -static int -processor_info(char *page) +static int processor_info(struct seq_file *m) { - char *p = page; u64 avail=1, status=1, control=1, feature_set=0; s64 ret; do { ret = ia64_pal_proc_get_features(&avail, &status, &control, feature_set); - if (ret < 0) { - return p - page; - } + if (ret < 0) + return 0; + if (ret == 1) { feature_set++; continue; } - p = feature_set_info(p, avail, status, control, feature_set); - + feature_set_info(m, avail, status, control, feature_set); feature_set++; } while(1); - return p - page; + return 0; } -static const char *bus_features[]={ +static const char *const bus_features[]={ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, @@ -617,162 +597,155 @@ static const char *bus_features[]={ }; -static int -bus_info(char *page) +static int bus_info(struct seq_file *m) { - char *p = page; - const char **v = bus_features; + const char *const *v = bus_features; pal_bus_features_u_t av, st, ct; u64 avail, status, control; int i; s64 ret; - if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) return 0; + if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) + return 0; avail = av.pal_bus_features_val; status = st.pal_bus_features_val; control = ct.pal_bus_features_val; for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) { - if ( ! *v ) continue; - p += sprintf(p, "%-48s : %s%s %s\n", *v, - avail & 0x1 ? "" : "NotImpl", - avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "", - avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): ""); + if ( ! *v ) + continue; + seq_printf(m, "%-48s : %s%s %s\n", *v, + avail & 0x1 ? "" : "NotImpl", + avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "", + avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): ""); } - return p - page; + return 0; } -static int -version_info(char *page) +static int version_info(struct seq_file *m) { pal_version_u_t min_ver, cur_ver; - char *p = page; if (ia64_pal_version(&min_ver, &cur_ver) != 0) return 0; - p += sprintf(p, - "PAL_vendor : 0x%02x (min=0x%02x)\n" - "PAL_A : %02x.%02x (min=%02x.%02x)\n" - "PAL_B : %02x.%02x (min=%02x.%02x)\n", - cur_ver.pal_version_s.pv_pal_vendor, - min_ver.pal_version_s.pv_pal_vendor, - cur_ver.pal_version_s.pv_pal_a_model, - cur_ver.pal_version_s.pv_pal_a_rev, - min_ver.pal_version_s.pv_pal_a_model, - min_ver.pal_version_s.pv_pal_a_rev, - cur_ver.pal_version_s.pv_pal_b_model, - cur_ver.pal_version_s.pv_pal_b_rev, - min_ver.pal_version_s.pv_pal_b_model, - min_ver.pal_version_s.pv_pal_b_rev); - return p - page; + seq_printf(m, + "PAL_vendor : 0x%02x (min=0x%02x)\n" + "PAL_A : %02x.%02x (min=%02x.%02x)\n" + "PAL_B : %02x.%02x (min=%02x.%02x)\n", + cur_ver.pal_version_s.pv_pal_vendor, + min_ver.pal_version_s.pv_pal_vendor, + cur_ver.pal_version_s.pv_pal_a_model, + cur_ver.pal_version_s.pv_pal_a_rev, + min_ver.pal_version_s.pv_pal_a_model, + min_ver.pal_version_s.pv_pal_a_rev, + cur_ver.pal_version_s.pv_pal_b_model, + cur_ver.pal_version_s.pv_pal_b_rev, + min_ver.pal_version_s.pv_pal_b_model, + min_ver.pal_version_s.pv_pal_b_rev); + return 0; } -static int -perfmon_info(char *page) +static int perfmon_info(struct seq_file *m) { - char *p = page; u64 pm_buffer[16]; pal_perf_mon_info_u_t pm_info; - if (ia64_pal_perf_mon_info(pm_buffer, &pm_info) != 0) return 0; - - p += sprintf(p, - "PMC/PMD pairs : %d\n" - "Counter width : %d bits\n" - "Cycle event number : %d\n" - "Retired event number : %d\n" - "Implemented PMC : ", - pm_info.pal_perf_mon_info_s.generic, pm_info.pal_perf_mon_info_s.width, - pm_info.pal_perf_mon_info_s.cycles, pm_info.pal_perf_mon_info_s.retired); + if (ia64_pal_perf_mon_info(pm_buffer, &pm_info) != 0) + return 0; - p = bitregister_process(p, pm_buffer, 256); - p += sprintf(p, "\nImplemented PMD : "); - p = bitregister_process(p, pm_buffer+4, 256); - p += sprintf(p, "\nCycles count capable : "); - p = bitregister_process(p, pm_buffer+8, 256); - p += sprintf(p, "\nRetired bundles count capable : "); + seq_printf(m, + "PMC/PMD pairs : %d\n" + "Counter width : %d bits\n" + "Cycle event number : %d\n" + "Retired event number : %d\n" + "Implemented PMC : ", + pm_info.pal_perf_mon_info_s.generic, + pm_info.pal_perf_mon_info_s.width, + pm_info.pal_perf_mon_info_s.cycles, + pm_info.pal_perf_mon_info_s.retired); + + bitregister_process(m, pm_buffer, 256); + seq_puts(m, "\nImplemented PMD : "); + bitregister_process(m, pm_buffer+4, 256); + seq_puts(m, "\nCycles count capable : "); + bitregister_process(m, pm_buffer+8, 256); + seq_puts(m, "\nRetired bundles count capable : "); #ifdef CONFIG_ITANIUM /* * PAL_PERF_MON_INFO reports that only PMC4 can be used to count CPU_CYCLES * which is wrong, both PMC4 and PMD5 support it. */ - if (pm_buffer[12] == 0x10) pm_buffer[12]=0x30; + if (pm_buffer[12] == 0x10) + pm_buffer[12]=0x30; #endif - p = bitregister_process(p, pm_buffer+12, 256); - - p += sprintf(p, "\n"); - - return p - page; + bitregister_process(m, pm_buffer+12, 256); + seq_putc(m, '\n'); + return 0; } -static int -frequency_info(char *page) +static int frequency_info(struct seq_file *m) { - char *p = page; struct pal_freq_ratio proc, itc, bus; - u64 base; + unsigned long base; if (ia64_pal_freq_base(&base) == -1) - p += sprintf(p, "Output clock : not implemented\n"); + seq_puts(m, "Output clock : not implemented\n"); else - p += sprintf(p, "Output clock : %ld ticks/s\n", base); + seq_printf(m, "Output clock : %ld ticks/s\n", base); if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0; - p += sprintf(p, + seq_printf(m, "Processor/Clock ratio : %d/%d\n" "Bus/Clock ratio : %d/%d\n" "ITC/Clock ratio : %d/%d\n", proc.num, proc.den, bus.num, bus.den, itc.num, itc.den); - - return p - page; + return 0; } -static int -tr_info(char *page) +static int tr_info(struct seq_file *m) { - char *p = page; - s64 status; + long status; pal_tr_valid_u_t tr_valid; u64 tr_buffer[4]; pal_vm_info_1_u_t vm_info_1; pal_vm_info_2_u_t vm_info_2; - u64 i, j; - u64 max[3], pgm; + unsigned long i, j; + unsigned long max[3], pgm; struct ifa_reg { - u64 valid:1; - u64 ig:11; - u64 vpn:52; + unsigned long valid:1; + unsigned long ig:11; + unsigned long vpn:52; } *ifa_reg; struct itir_reg { - u64 rv1:2; - u64 ps:6; - u64 key:24; - u64 rv2:32; + unsigned long rv1:2; + unsigned long ps:6; + unsigned long key:24; + unsigned long rv2:32; } *itir_reg; struct gr_reg { - u64 p:1; - u64 rv1:1; - u64 ma:3; - u64 a:1; - u64 d:1; - u64 pl:2; - u64 ar:3; - u64 ppn:38; - u64 rv2:2; - u64 ed:1; - u64 ig:11; + unsigned long p:1; + unsigned long rv1:1; + unsigned long ma:3; + unsigned long a:1; + unsigned long d:1; + unsigned long pl:2; + unsigned long ar:3; + unsigned long ppn:38; + unsigned long rv2:2; + unsigned long ed:1; + unsigned long ig:11; } *gr_reg; struct rid_reg { - u64 ig1:1; - u64 rv1:1; - u64 ig2:6; - u64 rid:24; - u64 rv2:32; + unsigned long ig1:1; + unsigned long rv1:1; + unsigned long ig2:6; + unsigned long rid:24; + unsigned long rv2:32; } *rid_reg; if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) { @@ -794,39 +767,40 @@ tr_info(char *page) ifa_reg = (struct ifa_reg *)&tr_buffer[2]; - if (ifa_reg->valid == 0) continue; + if (ifa_reg->valid == 0) + continue; gr_reg = (struct gr_reg *)tr_buffer; itir_reg = (struct itir_reg *)&tr_buffer[1]; rid_reg = (struct rid_reg *)&tr_buffer[3]; pgm = -1 << (itir_reg->ps - 12); - p += sprintf(p, - "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n" - "\tppn : 0x%lx\n" - "\tvpn : 0x%lx\n" - "\tps : ", - "ID"[i], j, - tr_valid.pal_tr_valid_s.access_rights_valid, - tr_valid.pal_tr_valid_s.priv_level_valid, - tr_valid.pal_tr_valid_s.dirty_bit_valid, - tr_valid.pal_tr_valid_s.mem_attr_valid, - (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12); - - p = bitvector_process(p, 1<< itir_reg->ps); - - p += sprintf(p, - "\n\tpl : %d\n" - "\tar : %d\n" - "\trid : %x\n" - "\tp : %d\n" - "\tma : %d\n" - "\td : %d\n", - gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma, - gr_reg->d); + seq_printf(m, + "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n" + "\tppn : 0x%lx\n" + "\tvpn : 0x%lx\n" + "\tps : ", + "ID"[i], j, + tr_valid.pal_tr_valid_s.access_rights_valid, + tr_valid.pal_tr_valid_s.priv_level_valid, + tr_valid.pal_tr_valid_s.dirty_bit_valid, + tr_valid.pal_tr_valid_s.mem_attr_valid, + (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12); + + bitvector_process(m, 1<< itir_reg->ps); + + seq_printf(m, + "\n\tpl : %d\n" + "\tar : %d\n" + "\trid : %x\n" + "\tp : %d\n" + "\tma : %d\n" + "\td : %d\n", + gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma, + gr_reg->d); } } - return p - page; + return 0; } @@ -834,7 +808,7 @@ tr_info(char *page) /* * List {name,function} pairs for every entry in /proc/palinfo/cpu* */ -static palinfo_entry_t palinfo_entries[]={ +static const palinfo_entry_t palinfo_entries[]={ { "version_info", version_info, }, { "vm_info", vm_info, }, { "cache_info", cache_info, }, @@ -849,17 +823,6 @@ static palinfo_entry_t palinfo_entries[]={ #define NR_PALINFO_ENTRIES (int) ARRAY_SIZE(palinfo_entries) -/* - * this array is used to keep track of the proc entries we create. This is - * required in the module mode when we need to remove all entries. The procfs code - * does not do recursion of deletion - * - * Notes: - * - +1 accounts for the cpuN directory entry in /proc/pal - */ -#define NR_PALINFO_PROC_ENTRIES (NR_CPUS*(NR_PALINFO_ENTRIES+1)) - -static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES]; static struct proc_dir_entry *palinfo_dir; /* @@ -887,7 +850,7 @@ typedef union { */ typedef struct { palinfo_func_t func; /* pointer to function to call */ - char *page; /* buffer to store results */ + struct seq_file *m; /* buffer to store results */ int ret; /* return value from call */ } palinfo_smp_data_t; @@ -900,7 +863,7 @@ static void palinfo_smp_call(void *info) { palinfo_smp_data_t *data = (palinfo_smp_data_t *)info; - data->ret = (*data->func)(data->page); + data->ret = (*data->func)(data->m); } /* @@ -910,13 +873,13 @@ palinfo_smp_call(void *info) * otherwise how many bytes in the "page" buffer were written */ static -int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page) +int palinfo_handle_smp(struct seq_file *m, pal_func_cpu_u_t *f) { palinfo_smp_data_t ptr; int ret; ptr.func = palinfo_entries[f->func_id].proc_read; - ptr.page = page; + ptr.m = m; ptr.ret = 0; /* just in case */ @@ -930,7 +893,7 @@ int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page) } #else /* ! CONFIG_SMP */ static -int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page) +int palinfo_handle_smp(struct seq_file *m, pal_func_cpu_u_t *f) { printk(KERN_ERR "palinfo: should not be called with non SMP kernel\n"); return 0; @@ -940,96 +903,66 @@ int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page) /* * Entry point routine: all calls go through this function */ -static int -palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data) +static int proc_palinfo_show(struct seq_file *m, void *v) { - int len=0; - pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&data; + pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&m->private; /* * in SMP mode, we may need to call another CPU to get correct * information. PAL, by definition, is processor specific */ if (f->req_cpu == get_cpu()) - len = (*palinfo_entries[f->func_id].proc_read)(page); + (*palinfo_entries[f->func_id].proc_read)(m); else - len = palinfo_handle_smp(f, page); + palinfo_handle_smp(m, f); put_cpu(); + return 0; +} - if (len <= off+count) *eof = 1; - - *start = page + off; - len -= off; - - if (len>count) len = count; - if (len<0) len = 0; - - return len; +static int proc_palinfo_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_palinfo_show, PDE_DATA(inode)); } -static void __cpuinit +static const struct file_operations proc_palinfo_fops = { + .open = proc_palinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void create_palinfo_proc_entries(unsigned int cpu) { -# define CPUSTR "cpu%d" - pal_func_cpu_u_t f; - struct proc_dir_entry **pdir; struct proc_dir_entry *cpu_dir; int j; - char cpustr[sizeof(CPUSTR)]; - - - /* - * we keep track of created entries in a depth-first order for - * cleanup purposes. Each entry is stored into palinfo_proc_entries - */ - sprintf(cpustr,CPUSTR, cpu); + char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ + sprintf(cpustr, "cpu%d", cpu); cpu_dir = proc_mkdir(cpustr, palinfo_dir); + if (!cpu_dir) + return; f.req_cpu = cpu; - /* - * Compute the location to store per cpu entries - * We dont store the top level entry in this list, but - * remove it finally after removing all cpu entries. - */ - pdir = &palinfo_proc_entries[cpu*(NR_PALINFO_ENTRIES+1)]; - *pdir++ = cpu_dir; for (j=0; j < NR_PALINFO_ENTRIES; j++) { f.func_id = j; - *pdir = create_proc_read_entry( - palinfo_entries[j].name, 0, cpu_dir, - palinfo_read_entry, (void *)f.value); - if (*pdir) - (*pdir)->owner = THIS_MODULE; - pdir++; + proc_create_data(palinfo_entries[j].name, 0, cpu_dir, + &proc_palinfo_fops, (void *)f.value); } } static void remove_palinfo_proc_entries(unsigned int hcpu) { - int j; - struct proc_dir_entry *cpu_dir, **pdir; - - pdir = &palinfo_proc_entries[hcpu*(NR_PALINFO_ENTRIES+1)]; - cpu_dir = *pdir; - *pdir++=NULL; - for (j=0; j < (NR_PALINFO_ENTRIES); j++) { - if ((*pdir)) { - remove_proc_entry ((*pdir)->name, cpu_dir); - *pdir ++= NULL; - } - } - - if (cpu_dir) { - remove_proc_entry(cpu_dir->name, palinfo_dir); - } + char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ + sprintf(cpustr, "cpu%d", hcpu); + remove_proc_subtree(cpustr, palinfo_dir); } -static int __cpuinit palinfo_cpu_callback(struct notifier_block *nfb, +static int palinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int hotcpu = (unsigned long)hcpu; @@ -1060,6 +993,10 @@ palinfo_init(void) printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION); palinfo_dir = proc_mkdir("pal", NULL); + if (!palinfo_dir) + return -ENOMEM; + + cpu_notifier_register_begin(); /* Create palinfo dirs in /proc for all online cpus */ for_each_online_cpu(i) { @@ -1067,7 +1004,9 @@ palinfo_init(void) } /* Register for future delivery via notify registration */ - register_hotcpu_notifier(&palinfo_cpu_notifier); + __register_hotcpu_notifier(&palinfo_cpu_notifier); + + cpu_notifier_register_done(); return 0; } @@ -1075,22 +1014,8 @@ palinfo_init(void) static void __exit palinfo_exit(void) { - int i = 0; - - /* remove all nodes: depth first pass. Could optimize this */ - for_each_online_cpu(i) { - remove_palinfo_proc_entries(i); - } - - /* - * Remove the top level entry finally - */ - remove_proc_entry(palinfo_dir->name, NULL); - - /* - * Unregister from cpu notifier callbacks - */ unregister_hotcpu_notifier(&palinfo_cpu_notifier); + remove_proc_subtree("pal", NULL); } module_init(palinfo_init); diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 9f14c16f636..1b22f6de293 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -46,13 +46,23 @@ struct pv_info pv_info = { * initialization hooks. */ -struct pv_init_ops pv_init_ops; +static void __init +ia64_native_patch_branch(unsigned long tag, unsigned long type); + +struct pv_init_ops pv_init_ops = +{ +#ifdef ASM_SUPPORTED + .patch_bundle = ia64_native_patch_bundle, +#endif + .patch_branch = ia64_native_patch_branch, +}; /*************************************************************************** * pv_cpu_ops * intrinsics hooks. */ +#ifndef ASM_SUPPORTED /* ia64_native_xxx are macros so that we have to make them real functions */ #define DEFINE_VOID_FUNC1(name) \ @@ -60,7 +70,14 @@ struct pv_init_ops pv_init_ops; ia64_native_ ## name ## _func(unsigned long arg) \ { \ ia64_native_ ## name(arg); \ - } \ + } + +#define DEFINE_VOID_FUNC1_VOID(name) \ + static void \ + ia64_native_ ## name ## _func(void *arg) \ + { \ + ia64_native_ ## name(arg); \ + } #define DEFINE_VOID_FUNC2(name) \ static void \ @@ -68,7 +85,7 @@ struct pv_init_ops pv_init_ops; unsigned long arg1) \ { \ ia64_native_ ## name(arg0, arg1); \ - } \ + } #define DEFINE_FUNC0(name) \ static unsigned long \ @@ -84,7 +101,7 @@ struct pv_init_ops pv_init_ops; return ia64_native_ ## name(arg); \ } \ -DEFINE_VOID_FUNC1(fc); +DEFINE_VOID_FUNC1_VOID(fc); DEFINE_VOID_FUNC1(intrin_local_irq_restore); DEFINE_VOID_FUNC2(ptcga); @@ -274,6 +291,266 @@ ia64_native_setreg_func(int regnum, unsigned long val) break; } } +#else + +#define __DEFINE_FUNC(name, code) \ + extern const char ia64_native_ ## name ## _direct_start[]; \ + extern const char ia64_native_ ## name ## _direct_end[]; \ + asm (".align 32\n" \ + ".proc ia64_native_" #name "_func\n" \ + "ia64_native_" #name "_func:\n" \ + "ia64_native_" #name "_direct_start:\n" \ + code \ + "ia64_native_" #name "_direct_end:\n" \ + "br.cond.sptk.many b6\n" \ + ".endp ia64_native_" #name "_func\n") + +#define DEFINE_VOID_FUNC0(name, code) \ + extern void \ + ia64_native_ ## name ## _func(void); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC1(name, code) \ + extern void \ + ia64_native_ ## name ## _func(unsigned long arg); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC1_VOID(name, code) \ + extern void \ + ia64_native_ ## name ## _func(void *arg); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_VOID_FUNC2(name, code) \ + extern void \ + ia64_native_ ## name ## _func(unsigned long arg0, \ + unsigned long arg1); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_FUNC0(name, code) \ + extern unsigned long \ + ia64_native_ ## name ## _func(void); \ + __DEFINE_FUNC(name, code) + +#define DEFINE_FUNC1(name, type, code) \ + extern unsigned long \ + ia64_native_ ## name ## _func(type arg); \ + __DEFINE_FUNC(name, code) + +DEFINE_VOID_FUNC1_VOID(fc, + "fc r8\n"); +DEFINE_VOID_FUNC1(intrin_local_irq_restore, + ";;\n" + " cmp.ne p6, p7 = r8, r0\n" + ";;\n" + "(p6) ssm psr.i\n" + "(p7) rsm psr.i\n" + ";;\n" + "(p6) srlz.d\n"); + +DEFINE_VOID_FUNC2(ptcga, + "ptc.ga r8, r9\n"); +DEFINE_VOID_FUNC2(set_rr, + "mov rr[r8] = r9\n"); + +/* ia64_native_getreg(_IA64_REG_PSR) & IA64_PSR_I */ +DEFINE_FUNC0(get_psr_i, + "mov r2 = " __stringify(1 << IA64_PSR_I_BIT) "\n" + "mov r8 = psr\n" + ";;\n" + "and r8 = r2, r8\n"); + +DEFINE_FUNC1(thash, unsigned long, + "thash r8 = r8\n"); +DEFINE_FUNC1(get_cpuid, int, + "mov r8 = cpuid[r8]\n"); +DEFINE_FUNC1(get_pmd, int, + "mov r8 = pmd[r8]\n"); +DEFINE_FUNC1(get_rr, unsigned long, + "mov r8 = rr[r8]\n"); + +DEFINE_VOID_FUNC0(ssm_i, + "ssm psr.i\n"); +DEFINE_VOID_FUNC0(rsm_i, + "rsm psr.i\n"); + +extern void +ia64_native_set_rr0_to_rr4_func(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4); +__DEFINE_FUNC(set_rr0_to_rr4, + "mov rr[r0] = r8\n" + "movl r2 = 0x2000000000000000\n" + ";;\n" + "mov rr[r2] = r9\n" + "shl r3 = r2, 1\n" /* movl r3 = 0x4000000000000000 */ + ";;\n" + "add r2 = r2, r3\n" /* movl r2 = 0x6000000000000000 */ + "mov rr[r3] = r10\n" + ";;\n" + "mov rr[r2] = r11\n" + "shl r3 = r3, 1\n" /* movl r3 = 0x8000000000000000 */ + ";;\n" + "mov rr[r3] = r14\n"); + +extern unsigned long ia64_native_getreg_func(int regnum); +asm(".global ia64_native_getreg_func\n"); +#define __DEFINE_GET_REG(id, reg) \ + "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \ + ";;\n" \ + "cmp.eq p6, p0 = r2, r8\n" \ + ";;\n" \ + "(p6) mov r8 = " #reg "\n" \ + "(p6) br.cond.sptk.many b6\n" \ + ";;\n" +#define __DEFINE_GET_AR(id, reg) __DEFINE_GET_REG(AR_ ## id, ar.reg) +#define __DEFINE_GET_CR(id, reg) __DEFINE_GET_REG(CR_ ## id, cr.reg) + +__DEFINE_FUNC(getreg, + __DEFINE_GET_REG(GP, gp) + /*__DEFINE_GET_REG(IP, ip)*/ /* returned ip value shouldn't be constant */ + __DEFINE_GET_REG(PSR, psr) + __DEFINE_GET_REG(TP, tp) + __DEFINE_GET_REG(SP, sp) + + __DEFINE_GET_REG(AR_KR0, ar0) + __DEFINE_GET_REG(AR_KR1, ar1) + __DEFINE_GET_REG(AR_KR2, ar2) + __DEFINE_GET_REG(AR_KR3, ar3) + __DEFINE_GET_REG(AR_KR4, ar4) + __DEFINE_GET_REG(AR_KR5, ar5) + __DEFINE_GET_REG(AR_KR6, ar6) + __DEFINE_GET_REG(AR_KR7, ar7) + __DEFINE_GET_AR(RSC, rsc) + __DEFINE_GET_AR(BSP, bsp) + __DEFINE_GET_AR(BSPSTORE, bspstore) + __DEFINE_GET_AR(RNAT, rnat) + __DEFINE_GET_AR(FCR, fcr) + __DEFINE_GET_AR(EFLAG, eflag) + __DEFINE_GET_AR(CSD, csd) + __DEFINE_GET_AR(SSD, ssd) + __DEFINE_GET_REG(AR_CFLAG, ar27) + __DEFINE_GET_AR(FSR, fsr) + __DEFINE_GET_AR(FIR, fir) + __DEFINE_GET_AR(FDR, fdr) + __DEFINE_GET_AR(CCV, ccv) + __DEFINE_GET_AR(UNAT, unat) + __DEFINE_GET_AR(FPSR, fpsr) + __DEFINE_GET_AR(ITC, itc) + __DEFINE_GET_AR(PFS, pfs) + __DEFINE_GET_AR(LC, lc) + __DEFINE_GET_AR(EC, ec) + + __DEFINE_GET_CR(DCR, dcr) + __DEFINE_GET_CR(ITM, itm) + __DEFINE_GET_CR(IVA, iva) + __DEFINE_GET_CR(PTA, pta) + __DEFINE_GET_CR(IPSR, ipsr) + __DEFINE_GET_CR(ISR, isr) + __DEFINE_GET_CR(IIP, iip) + __DEFINE_GET_CR(IFA, ifa) + __DEFINE_GET_CR(ITIR, itir) + __DEFINE_GET_CR(IIPA, iipa) + __DEFINE_GET_CR(IFS, ifs) + __DEFINE_GET_CR(IIM, iim) + __DEFINE_GET_CR(IHA, iha) + __DEFINE_GET_CR(LID, lid) + __DEFINE_GET_CR(IVR, ivr) + __DEFINE_GET_CR(TPR, tpr) + __DEFINE_GET_CR(EOI, eoi) + __DEFINE_GET_CR(IRR0, irr0) + __DEFINE_GET_CR(IRR1, irr1) + __DEFINE_GET_CR(IRR2, irr2) + __DEFINE_GET_CR(IRR3, irr3) + __DEFINE_GET_CR(ITV, itv) + __DEFINE_GET_CR(PMV, pmv) + __DEFINE_GET_CR(CMCV, cmcv) + __DEFINE_GET_CR(LRR0, lrr0) + __DEFINE_GET_CR(LRR1, lrr1) + + "mov r8 = -1\n" /* unsupported case */ + ); + +extern void ia64_native_setreg_func(int regnum, unsigned long val); +asm(".global ia64_native_setreg_func\n"); +#define __DEFINE_SET_REG(id, reg) \ + "mov r2 = " __stringify(_IA64_REG_ ## id) "\n" \ + ";;\n" \ + "cmp.eq p6, p0 = r2, r9\n" \ + ";;\n" \ + "(p6) mov " #reg " = r8\n" \ + "(p6) br.cond.sptk.many b6\n" \ + ";;\n" +#define __DEFINE_SET_AR(id, reg) __DEFINE_SET_REG(AR_ ## id, ar.reg) +#define __DEFINE_SET_CR(id, reg) __DEFINE_SET_REG(CR_ ## id, cr.reg) +__DEFINE_FUNC(setreg, + "mov r2 = " __stringify(_IA64_REG_PSR_L) "\n" + ";;\n" + "cmp.eq p6, p0 = r2, r9\n" + ";;\n" + "(p6) mov psr.l = r8\n" +#ifdef HAVE_SERIALIZE_DIRECTIVE + ".serialize.data\n" +#endif + "(p6) br.cond.sptk.many b6\n" + __DEFINE_SET_REG(GP, gp) + __DEFINE_SET_REG(SP, sp) + + __DEFINE_SET_REG(AR_KR0, ar0) + __DEFINE_SET_REG(AR_KR1, ar1) + __DEFINE_SET_REG(AR_KR2, ar2) + __DEFINE_SET_REG(AR_KR3, ar3) + __DEFINE_SET_REG(AR_KR4, ar4) + __DEFINE_SET_REG(AR_KR5, ar5) + __DEFINE_SET_REG(AR_KR6, ar6) + __DEFINE_SET_REG(AR_KR7, ar7) + __DEFINE_SET_AR(RSC, rsc) + __DEFINE_SET_AR(BSP, bsp) + __DEFINE_SET_AR(BSPSTORE, bspstore) + __DEFINE_SET_AR(RNAT, rnat) + __DEFINE_SET_AR(FCR, fcr) + __DEFINE_SET_AR(EFLAG, eflag) + __DEFINE_SET_AR(CSD, csd) + __DEFINE_SET_AR(SSD, ssd) + __DEFINE_SET_REG(AR_CFLAG, ar27) + __DEFINE_SET_AR(FSR, fsr) + __DEFINE_SET_AR(FIR, fir) + __DEFINE_SET_AR(FDR, fdr) + __DEFINE_SET_AR(CCV, ccv) + __DEFINE_SET_AR(UNAT, unat) + __DEFINE_SET_AR(FPSR, fpsr) + __DEFINE_SET_AR(ITC, itc) + __DEFINE_SET_AR(PFS, pfs) + __DEFINE_SET_AR(LC, lc) + __DEFINE_SET_AR(EC, ec) + + __DEFINE_SET_CR(DCR, dcr) + __DEFINE_SET_CR(ITM, itm) + __DEFINE_SET_CR(IVA, iva) + __DEFINE_SET_CR(PTA, pta) + __DEFINE_SET_CR(IPSR, ipsr) + __DEFINE_SET_CR(ISR, isr) + __DEFINE_SET_CR(IIP, iip) + __DEFINE_SET_CR(IFA, ifa) + __DEFINE_SET_CR(ITIR, itir) + __DEFINE_SET_CR(IIPA, iipa) + __DEFINE_SET_CR(IFS, ifs) + __DEFINE_SET_CR(IIM, iim) + __DEFINE_SET_CR(IHA, iha) + __DEFINE_SET_CR(LID, lid) + __DEFINE_SET_CR(IVR, ivr) + __DEFINE_SET_CR(TPR, tpr) + __DEFINE_SET_CR(EOI, eoi) + __DEFINE_SET_CR(IRR0, irr0) + __DEFINE_SET_CR(IRR1, irr1) + __DEFINE_SET_CR(IRR2, irr2) + __DEFINE_SET_CR(IRR3, irr3) + __DEFINE_SET_CR(ITV, itv) + __DEFINE_SET_CR(PMV, pmv) + __DEFINE_SET_CR(CMCV, cmcv) + __DEFINE_SET_CR(LRR0, lrr0) + __DEFINE_SET_CR(LRR1, lrr1) + ); +#endif struct pv_cpu_ops pv_cpu_ops = { .fc = ia64_native_fc_func, @@ -357,6 +634,8 @@ struct pv_irq_ops pv_irq_ops = { * pv_time_ops * time operations */ +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; static int ia64_native_do_steal_accounting(unsigned long *new_itm) @@ -366,4 +645,258 @@ ia64_native_do_steal_accounting(unsigned long *new_itm) struct pv_time_ops pv_time_ops = { .do_steal_accounting = ia64_native_do_steal_accounting, + .sched_clock = ia64_native_sched_clock, +}; + +/*************************************************************************** + * binary pacthing + * pv_init_ops.patch_bundle + */ + +#ifdef ASM_SUPPORTED +#define IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg) \ + __DEFINE_FUNC(get_ ## name, \ + ";;\n" \ + "mov r8 = " #reg "\n" \ + ";;\n") + +#define IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \ + __DEFINE_FUNC(set_ ## name, \ + ";;\n" \ + "mov " #reg " = r8\n" \ + ";;\n") + +#define IA64_NATIVE_PATCH_DEFINE_REG(name, reg) \ + IA64_NATIVE_PATCH_DEFINE_GET_REG(name, reg); \ + IA64_NATIVE_PATCH_DEFINE_SET_REG(name, reg) \ + +#define IA64_NATIVE_PATCH_DEFINE_AR(name, reg) \ + IA64_NATIVE_PATCH_DEFINE_REG(ar_ ## name, ar.reg) + +#define IA64_NATIVE_PATCH_DEFINE_CR(name, reg) \ + IA64_NATIVE_PATCH_DEFINE_REG(cr_ ## name, cr.reg) + + +IA64_NATIVE_PATCH_DEFINE_GET_REG(psr, psr); +IA64_NATIVE_PATCH_DEFINE_GET_REG(tp, tp); + +/* IA64_NATIVE_PATCH_DEFINE_SET_REG(psr_l, psr.l); */ +__DEFINE_FUNC(set_psr_l, + ";;\n" + "mov psr.l = r8\n" +#ifdef HAVE_SERIALIZE_DIRECTIVE + ".serialize.data\n" +#endif + ";;\n"); + +IA64_NATIVE_PATCH_DEFINE_REG(gp, gp); +IA64_NATIVE_PATCH_DEFINE_REG(sp, sp); + +IA64_NATIVE_PATCH_DEFINE_REG(kr0, ar0); +IA64_NATIVE_PATCH_DEFINE_REG(kr1, ar1); +IA64_NATIVE_PATCH_DEFINE_REG(kr2, ar2); +IA64_NATIVE_PATCH_DEFINE_REG(kr3, ar3); +IA64_NATIVE_PATCH_DEFINE_REG(kr4, ar4); +IA64_NATIVE_PATCH_DEFINE_REG(kr5, ar5); +IA64_NATIVE_PATCH_DEFINE_REG(kr6, ar6); +IA64_NATIVE_PATCH_DEFINE_REG(kr7, ar7); + +IA64_NATIVE_PATCH_DEFINE_AR(rsc, rsc); +IA64_NATIVE_PATCH_DEFINE_AR(bsp, bsp); +IA64_NATIVE_PATCH_DEFINE_AR(bspstore, bspstore); +IA64_NATIVE_PATCH_DEFINE_AR(rnat, rnat); +IA64_NATIVE_PATCH_DEFINE_AR(fcr, fcr); +IA64_NATIVE_PATCH_DEFINE_AR(eflag, eflag); +IA64_NATIVE_PATCH_DEFINE_AR(csd, csd); +IA64_NATIVE_PATCH_DEFINE_AR(ssd, ssd); +IA64_NATIVE_PATCH_DEFINE_REG(ar27, ar27); +IA64_NATIVE_PATCH_DEFINE_AR(fsr, fsr); +IA64_NATIVE_PATCH_DEFINE_AR(fir, fir); +IA64_NATIVE_PATCH_DEFINE_AR(fdr, fdr); +IA64_NATIVE_PATCH_DEFINE_AR(ccv, ccv); +IA64_NATIVE_PATCH_DEFINE_AR(unat, unat); +IA64_NATIVE_PATCH_DEFINE_AR(fpsr, fpsr); +IA64_NATIVE_PATCH_DEFINE_AR(itc, itc); +IA64_NATIVE_PATCH_DEFINE_AR(pfs, pfs); +IA64_NATIVE_PATCH_DEFINE_AR(lc, lc); +IA64_NATIVE_PATCH_DEFINE_AR(ec, ec); + +IA64_NATIVE_PATCH_DEFINE_CR(dcr, dcr); +IA64_NATIVE_PATCH_DEFINE_CR(itm, itm); +IA64_NATIVE_PATCH_DEFINE_CR(iva, iva); +IA64_NATIVE_PATCH_DEFINE_CR(pta, pta); +IA64_NATIVE_PATCH_DEFINE_CR(ipsr, ipsr); +IA64_NATIVE_PATCH_DEFINE_CR(isr, isr); +IA64_NATIVE_PATCH_DEFINE_CR(iip, iip); +IA64_NATIVE_PATCH_DEFINE_CR(ifa, ifa); +IA64_NATIVE_PATCH_DEFINE_CR(itir, itir); +IA64_NATIVE_PATCH_DEFINE_CR(iipa, iipa); +IA64_NATIVE_PATCH_DEFINE_CR(ifs, ifs); +IA64_NATIVE_PATCH_DEFINE_CR(iim, iim); +IA64_NATIVE_PATCH_DEFINE_CR(iha, iha); +IA64_NATIVE_PATCH_DEFINE_CR(lid, lid); +IA64_NATIVE_PATCH_DEFINE_CR(ivr, ivr); +IA64_NATIVE_PATCH_DEFINE_CR(tpr, tpr); +IA64_NATIVE_PATCH_DEFINE_CR(eoi, eoi); +IA64_NATIVE_PATCH_DEFINE_CR(irr0, irr0); +IA64_NATIVE_PATCH_DEFINE_CR(irr1, irr1); +IA64_NATIVE_PATCH_DEFINE_CR(irr2, irr2); +IA64_NATIVE_PATCH_DEFINE_CR(irr3, irr3); +IA64_NATIVE_PATCH_DEFINE_CR(itv, itv); +IA64_NATIVE_PATCH_DEFINE_CR(pmv, pmv); +IA64_NATIVE_PATCH_DEFINE_CR(cmcv, cmcv); +IA64_NATIVE_PATCH_DEFINE_CR(lrr0, lrr0); +IA64_NATIVE_PATCH_DEFINE_CR(lrr1, lrr1); + +static const struct paravirt_patch_bundle_elem ia64_native_patch_bundle_elems[] +__initdata_or_module = +{ +#define IA64_NATIVE_PATCH_BUNDLE_ELEM(name, type) \ + { \ + (void*)ia64_native_ ## name ## _direct_start, \ + (void*)ia64_native_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_ ## type, \ + } + + IA64_NATIVE_PATCH_BUNDLE_ELEM(fc, FC), + IA64_NATIVE_PATCH_BUNDLE_ELEM(thash, THASH), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_cpuid, GET_CPUID), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_pmd, GET_PMD), + IA64_NATIVE_PATCH_BUNDLE_ELEM(ptcga, PTCGA), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_rr, GET_RR), + IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr, SET_RR), + IA64_NATIVE_PATCH_BUNDLE_ELEM(set_rr0_to_rr4, SET_RR0_TO_RR4), + IA64_NATIVE_PATCH_BUNDLE_ELEM(ssm_i, SSM_I), + IA64_NATIVE_PATCH_BUNDLE_ELEM(rsm_i, RSM_I), + IA64_NATIVE_PATCH_BUNDLE_ELEM(get_psr_i, GET_PSR_I), + IA64_NATIVE_PATCH_BUNDLE_ELEM(intrin_local_irq_restore, + INTRIN_LOCAL_IRQ_RESTORE), + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg) \ + { \ + (void*)ia64_native_get_ ## name ## _direct_start, \ + (void*)ia64_native_get_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_GETREG + _IA64_REG_ ## reg, \ + } + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \ + { \ + (void*)ia64_native_set_ ## name ## _direct_start, \ + (void*)ia64_native_set_ ## name ## _direct_end, \ + PARAVIRT_PATCH_TYPE_SETREG + _IA64_REG_ ## reg, \ + } + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(name, reg) \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(name, reg), \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(name, reg) \ + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(name, reg) \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar_ ## name, AR_ ## reg) + +#define IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(name, reg) \ + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(cr_ ## name, CR_ ## reg) + + IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(psr, PSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_GETREG(tp, TP), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_SETREG(psr_l, PSR_L), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(gp, GP), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(sp, SP), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr0, AR_KR0), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr1, AR_KR1), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr2, AR_KR2), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr3, AR_KR3), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr4, AR_KR4), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr5, AR_KR5), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr6, AR_KR6), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(kr7, AR_KR7), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rsc, RSC), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bsp, BSP), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(bspstore, BSPSTORE), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(rnat, RNAT), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fcr, FCR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(eflag, EFLAG), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(csd, CSD), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ssd, SSD), + IA64_NATIVE_PATCH_BUNDLE_ELEM_REG(ar27, AR_CFLAG), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fsr, FSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fir, FIR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fdr, FDR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ccv, CCV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(unat, UNAT), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(fpsr, FPSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(itc, ITC), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(pfs, PFS), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(lc, LC), + IA64_NATIVE_PATCH_BUNDLE_ELEM_AR(ec, EC), + + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(dcr, DCR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itm, ITM), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iva, IVA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pta, PTA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ipsr, IPSR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(isr, ISR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iip, IIP), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifa, IFA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itir, ITIR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iipa, IIPA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ifs, IFS), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iim, IIM), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(iha, IHA), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lid, LID), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(ivr, IVR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(tpr, TPR), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(eoi, EOI), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr0, IRR0), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr1, IRR1), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr2, IRR2), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(irr3, IRR3), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(itv, ITV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(pmv, PMV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(cmcv, CMCV), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr0, LRR0), + IA64_NATIVE_PATCH_BUNDLE_ELEM_CR(lrr1, LRR1), }; + +unsigned long __init_or_module +ia64_native_patch_bundle(void *sbundle, void *ebundle, unsigned long type) +{ + const unsigned long nelems = sizeof(ia64_native_patch_bundle_elems) / + sizeof(ia64_native_patch_bundle_elems[0]); + + return __paravirt_patch_apply_bundle(sbundle, ebundle, type, + ia64_native_patch_bundle_elems, + nelems, NULL); +} +#endif /* ASM_SUPPOTED */ + +extern const char ia64_native_switch_to[]; +extern const char ia64_native_leave_syscall[]; +extern const char ia64_native_work_processed_syscall[]; +extern const char ia64_native_leave_kernel[]; + +const struct paravirt_patch_branch_target ia64_native_branch_target[] +__initconst = { +#define PARAVIRT_BR_TARGET(name, type) \ + { \ + ia64_native_ ## name, \ + PARAVIRT_PATCH_TYPE_BR_ ## type, \ + } + PARAVIRT_BR_TARGET(switch_to, SWITCH_TO), + PARAVIRT_BR_TARGET(leave_syscall, LEAVE_SYSCALL), + PARAVIRT_BR_TARGET(work_processed_syscall, WORK_PROCESSED_SYSCALL), + PARAVIRT_BR_TARGET(leave_kernel, LEAVE_KERNEL), +}; + +static void __init +ia64_native_patch_branch(unsigned long tag, unsigned long type) +{ + const unsigned long nelem = + sizeof(ia64_native_branch_target) / + sizeof(ia64_native_branch_target[0]); + __paravirt_patch_apply_branch(tag, type, + ia64_native_branch_target, nelem); +} diff --git a/arch/ia64/kernel/paravirt_inst.h b/arch/ia64/kernel/paravirt_inst.h index 64d6d810c64..1ad7512b5f6 100644 --- a/arch/ia64/kernel/paravirt_inst.h +++ b/arch/ia64/kernel/paravirt_inst.h @@ -22,9 +22,6 @@ #ifdef __IA64_ASM_PARAVIRTUALIZED_PVCHECK #include <asm/native/pvchk_inst.h> -#elif defined(__IA64_ASM_PARAVIRTUALIZED_XEN) -#include <asm/xen/inst.h> -#include <asm/xen/minstate.h> #else #include <asm/native/inst.h> #endif diff --git a/arch/ia64/kernel/paravirt_patch.c b/arch/ia64/kernel/paravirt_patch.c new file mode 100644 index 00000000000..bfdfef1b1ff --- /dev/null +++ b/arch/ia64/kernel/paravirt_patch.c @@ -0,0 +1,514 @@ +/****************************************************************************** + * linux/arch/ia64/xen/paravirt_patch.c + * + * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/init.h> +#include <asm/intrinsics.h> +#include <asm/kprobes.h> +#include <asm/paravirt.h> +#include <asm/paravirt_patch.h> + +typedef union ia64_inst { + struct { + unsigned long long qp : 6; + unsigned long long : 31; + unsigned long long opcode : 4; + unsigned long long reserved : 23; + } generic; + unsigned long long l; +} ia64_inst_t; + +/* + * flush_icache_range() can't be used here. + * we are here before cpu_init() which initializes + * ia64_i_cache_stride_shift. flush_icache_range() uses it. + */ +void __init_or_module +paravirt_flush_i_cache_range(const void *instr, unsigned long size) +{ + extern void paravirt_fc_i(const void *addr); + unsigned long i; + + for (i = 0; i < size; i += sizeof(bundle_t)) + paravirt_fc_i(instr + i); +} + +bundle_t* __init_or_module +paravirt_get_bundle(unsigned long tag) +{ + return (bundle_t *)(tag & ~3UL); +} + +unsigned long __init_or_module +paravirt_get_slot(unsigned long tag) +{ + return tag & 3UL; +} + +unsigned long __init_or_module +paravirt_get_num_inst(unsigned long stag, unsigned long etag) +{ + bundle_t *sbundle = paravirt_get_bundle(stag); + unsigned long sslot = paravirt_get_slot(stag); + bundle_t *ebundle = paravirt_get_bundle(etag); + unsigned long eslot = paravirt_get_slot(etag); + + return (ebundle - sbundle) * 3 + eslot - sslot + 1; +} + +unsigned long __init_or_module +paravirt_get_next_tag(unsigned long tag) +{ + unsigned long slot = paravirt_get_slot(tag); + + switch (slot) { + case 0: + case 1: + return tag + 1; + case 2: { + bundle_t *bundle = paravirt_get_bundle(tag); + return (unsigned long)(bundle + 1); + } + default: + BUG(); + } + /* NOTREACHED */ +} + +ia64_inst_t __init_or_module +paravirt_read_slot0(const bundle_t *bundle) +{ + ia64_inst_t inst; + inst.l = bundle->quad0.slot0; + return inst; +} + +ia64_inst_t __init_or_module +paravirt_read_slot1(const bundle_t *bundle) +{ + ia64_inst_t inst; + inst.l = bundle->quad0.slot1_p0 | + ((unsigned long long)bundle->quad1.slot1_p1 << 18UL); + return inst; +} + +ia64_inst_t __init_or_module +paravirt_read_slot2(const bundle_t *bundle) +{ + ia64_inst_t inst; + inst.l = bundle->quad1.slot2; + return inst; +} + +ia64_inst_t __init_or_module +paravirt_read_inst(unsigned long tag) +{ + bundle_t *bundle = paravirt_get_bundle(tag); + unsigned long slot = paravirt_get_slot(tag); + + switch (slot) { + case 0: + return paravirt_read_slot0(bundle); + case 1: + return paravirt_read_slot1(bundle); + case 2: + return paravirt_read_slot2(bundle); + default: + BUG(); + } + /* NOTREACHED */ +} + +void __init_or_module +paravirt_write_slot0(bundle_t *bundle, ia64_inst_t inst) +{ + bundle->quad0.slot0 = inst.l; +} + +void __init_or_module +paravirt_write_slot1(bundle_t *bundle, ia64_inst_t inst) +{ + bundle->quad0.slot1_p0 = inst.l; + bundle->quad1.slot1_p1 = inst.l >> 18UL; +} + +void __init_or_module +paravirt_write_slot2(bundle_t *bundle, ia64_inst_t inst) +{ + bundle->quad1.slot2 = inst.l; +} + +void __init_or_module +paravirt_write_inst(unsigned long tag, ia64_inst_t inst) +{ + bundle_t *bundle = paravirt_get_bundle(tag); + unsigned long slot = paravirt_get_slot(tag); + + switch (slot) { + case 0: + paravirt_write_slot0(bundle, inst); + break; + case 1: + paravirt_write_slot1(bundle, inst); + break; + case 2: + paravirt_write_slot2(bundle, inst); + break; + default: + BUG(); + break; + } + paravirt_flush_i_cache_range(bundle, sizeof(*bundle)); +} + +/* for debug */ +void +paravirt_print_bundle(const bundle_t *bundle) +{ + const unsigned long *quad = (const unsigned long *)bundle; + ia64_inst_t slot0 = paravirt_read_slot0(bundle); + ia64_inst_t slot1 = paravirt_read_slot1(bundle); + ia64_inst_t slot2 = paravirt_read_slot2(bundle); + + printk(KERN_DEBUG + "bundle 0x%p 0x%016lx 0x%016lx\n", bundle, quad[0], quad[1]); + printk(KERN_DEBUG + "bundle template 0x%x\n", + bundle->quad0.template); + printk(KERN_DEBUG + "slot0 0x%lx slot1_p0 0x%lx slot1_p1 0x%lx slot2 0x%lx\n", + (unsigned long)bundle->quad0.slot0, + (unsigned long)bundle->quad0.slot1_p0, + (unsigned long)bundle->quad1.slot1_p1, + (unsigned long)bundle->quad1.slot2); + printk(KERN_DEBUG + "slot0 0x%016llx slot1 0x%016llx slot2 0x%016llx\n", + slot0.l, slot1.l, slot2.l); +} + +static int noreplace_paravirt __init_or_module = 0; + +static int __init setup_noreplace_paravirt(char *str) +{ + noreplace_paravirt = 1; + return 1; +} +__setup("noreplace-paravirt", setup_noreplace_paravirt); + +#ifdef ASM_SUPPORTED +static void __init_or_module +fill_nop_bundle(void *sbundle, void *ebundle) +{ + extern const char paravirt_nop_bundle[]; + extern const unsigned long paravirt_nop_bundle_size; + + void *bundle = sbundle; + + BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0); + BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0); + + while (bundle < ebundle) { + memcpy(bundle, paravirt_nop_bundle, paravirt_nop_bundle_size); + + bundle += paravirt_nop_bundle_size; + } +} + +/* helper function */ +unsigned long __init_or_module +__paravirt_patch_apply_bundle(void *sbundle, void *ebundle, unsigned long type, + const struct paravirt_patch_bundle_elem *elems, + unsigned long nelems, + const struct paravirt_patch_bundle_elem **found) +{ + unsigned long used = 0; + unsigned long i; + + BUG_ON((((unsigned long)sbundle) % sizeof(bundle_t)) != 0); + BUG_ON((((unsigned long)ebundle) % sizeof(bundle_t)) != 0); + + found = NULL; + for (i = 0; i < nelems; i++) { + const struct paravirt_patch_bundle_elem *p = &elems[i]; + if (p->type == type) { + unsigned long need = p->ebundle - p->sbundle; + unsigned long room = ebundle - sbundle; + + if (found != NULL) + *found = p; + + if (room < need) { + /* no room to replace. skip it */ + printk(KERN_DEBUG + "the space is too small to put " + "bundles. type %ld need %ld room %ld\n", + type, need, room); + break; + } + + used = need; + memcpy(sbundle, p->sbundle, used); + break; + } + } + + return used; +} + +void __init_or_module +paravirt_patch_apply_bundle(const struct paravirt_patch_site_bundle *start, + const struct paravirt_patch_site_bundle *end) +{ + const struct paravirt_patch_site_bundle *p; + + if (noreplace_paravirt) + return; + if (pv_init_ops.patch_bundle == NULL) + return; + + for (p = start; p < end; p++) { + unsigned long used; + + used = (*pv_init_ops.patch_bundle)(p->sbundle, p->ebundle, + p->type); + if (used == 0) + continue; + + fill_nop_bundle(p->sbundle + used, p->ebundle); + paravirt_flush_i_cache_range(p->sbundle, + p->ebundle - p->sbundle); + } + ia64_sync_i(); + ia64_srlz_i(); +} + +/* + * nop.i, nop.m, nop.f instruction are same format. + * but nop.b has differennt format. + * This doesn't support nop.b for now. + */ +static void __init_or_module +fill_nop_inst(unsigned long stag, unsigned long etag) +{ + extern const bundle_t paravirt_nop_mfi_inst_bundle[]; + unsigned long tag; + const ia64_inst_t nop_inst = + paravirt_read_slot0(paravirt_nop_mfi_inst_bundle); + + for (tag = stag; tag < etag; tag = paravirt_get_next_tag(tag)) + paravirt_write_inst(tag, nop_inst); +} + +void __init_or_module +paravirt_patch_apply_inst(const struct paravirt_patch_site_inst *start, + const struct paravirt_patch_site_inst *end) +{ + const struct paravirt_patch_site_inst *p; + + if (noreplace_paravirt) + return; + if (pv_init_ops.patch_inst == NULL) + return; + + for (p = start; p < end; p++) { + unsigned long tag; + bundle_t *sbundle; + bundle_t *ebundle; + + tag = (*pv_init_ops.patch_inst)(p->stag, p->etag, p->type); + if (tag == p->stag) + continue; + + fill_nop_inst(tag, p->etag); + sbundle = paravirt_get_bundle(p->stag); + ebundle = paravirt_get_bundle(p->etag) + 1; + paravirt_flush_i_cache_range(sbundle, (ebundle - sbundle) * + sizeof(bundle_t)); + } + ia64_sync_i(); + ia64_srlz_i(); +} +#endif /* ASM_SUPPOTED */ + +/* brl.cond.sptk.many <target64> X3 */ +typedef union inst_x3_op { + ia64_inst_t inst; + struct { + unsigned long qp: 6; + unsigned long btyp: 3; + unsigned long unused: 3; + unsigned long p: 1; + unsigned long imm20b: 20; + unsigned long wh: 2; + unsigned long d: 1; + unsigned long i: 1; + unsigned long opcode: 4; + }; + unsigned long l; +} inst_x3_op_t; + +typedef union inst_x3_imm { + ia64_inst_t inst; + struct { + unsigned long unused: 2; + unsigned long imm39: 39; + }; + unsigned long l; +} inst_x3_imm_t; + +void __init_or_module +paravirt_patch_reloc_brl(unsigned long tag, const void *target) +{ + unsigned long tag_op = paravirt_get_next_tag(tag); + unsigned long tag_imm = tag; + bundle_t *bundle = paravirt_get_bundle(tag); + + ia64_inst_t inst_op = paravirt_read_inst(tag_op); + ia64_inst_t inst_imm = paravirt_read_inst(tag_imm); + + inst_x3_op_t inst_x3_op = { .l = inst_op.l }; + inst_x3_imm_t inst_x3_imm = { .l = inst_imm.l }; + + unsigned long imm60 = + ((unsigned long)target - (unsigned long)bundle) >> 4; + + BUG_ON(paravirt_get_slot(tag) != 1); /* MLX */ + BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0); + + /* imm60[59] 1bit */ + inst_x3_op.i = (imm60 >> 59) & 1; + /* imm60[19:0] 20bit */ + inst_x3_op.imm20b = imm60 & ((1UL << 20) - 1); + /* imm60[58:20] 39bit */ + inst_x3_imm.imm39 = (imm60 >> 20) & ((1UL << 39) - 1); + + inst_op.l = inst_x3_op.l; + inst_imm.l = inst_x3_imm.l; + + paravirt_write_inst(tag_op, inst_op); + paravirt_write_inst(tag_imm, inst_imm); +} + +/* br.cond.sptk.many <target25> B1 */ +typedef union inst_b1 { + ia64_inst_t inst; + struct { + unsigned long qp: 6; + unsigned long btype: 3; + unsigned long unused: 3; + unsigned long p: 1; + unsigned long imm20b: 20; + unsigned long wh: 2; + unsigned long d: 1; + unsigned long s: 1; + unsigned long opcode: 4; + }; + unsigned long l; +} inst_b1_t; + +void __init +paravirt_patch_reloc_br(unsigned long tag, const void *target) +{ + bundle_t *bundle = paravirt_get_bundle(tag); + ia64_inst_t inst = paravirt_read_inst(tag); + unsigned long target25 = (unsigned long)target - (unsigned long)bundle; + inst_b1_t inst_b1; + + BUG_ON(((unsigned long)target & (sizeof(bundle_t) - 1)) != 0); + + inst_b1.l = inst.l; + if (target25 & (1UL << 63)) + inst_b1.s = 1; + else + inst_b1.s = 0; + + inst_b1.imm20b = target25 >> 4; + inst.l = inst_b1.l; + + paravirt_write_inst(tag, inst); +} + +void __init +__paravirt_patch_apply_branch( + unsigned long tag, unsigned long type, + const struct paravirt_patch_branch_target *entries, + unsigned int nr_entries) +{ + unsigned int i; + for (i = 0; i < nr_entries; i++) { + if (entries[i].type == type) { + paravirt_patch_reloc_br(tag, entries[i].entry); + break; + } + } +} + +static void __init +paravirt_patch_apply_branch(const struct paravirt_patch_site_branch *start, + const struct paravirt_patch_site_branch *end) +{ + const struct paravirt_patch_site_branch *p; + + if (noreplace_paravirt) + return; + if (pv_init_ops.patch_branch == NULL) + return; + + for (p = start; p < end; p++) + (*pv_init_ops.patch_branch)(p->tag, p->type); + + ia64_sync_i(); + ia64_srlz_i(); +} + +void __init +paravirt_patch_apply(void) +{ + extern const char __start_paravirt_bundles[]; + extern const char __stop_paravirt_bundles[]; + extern const char __start_paravirt_insts[]; + extern const char __stop_paravirt_insts[]; + extern const char __start_paravirt_branches[]; + extern const char __stop_paravirt_branches[]; + + paravirt_patch_apply_bundle((const struct paravirt_patch_site_bundle *) + __start_paravirt_bundles, + (const struct paravirt_patch_site_bundle *) + __stop_paravirt_bundles); + paravirt_patch_apply_inst((const struct paravirt_patch_site_inst *) + __start_paravirt_insts, + (const struct paravirt_patch_site_inst *) + __stop_paravirt_insts); + paravirt_patch_apply_branch((const struct paravirt_patch_site_branch *) + __start_paravirt_branches, + (const struct paravirt_patch_site_branch *) + __stop_paravirt_branches); +} + +/* + * Local variables: + * mode: C + * c-set-style: "linux" + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + */ diff --git a/arch/ia64/kernel/paravirt_patchlist.c b/arch/ia64/kernel/paravirt_patchlist.c new file mode 100644 index 00000000000..0a70720662e --- /dev/null +++ b/arch/ia64/kernel/paravirt_patchlist.c @@ -0,0 +1,81 @@ +/****************************************************************************** + * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/bug.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <asm/paravirt.h> + +#define DECLARE(name) \ + extern unsigned long \ + __ia64_native_start_gate_##name##_patchlist[]; \ + extern unsigned long \ + __ia64_native_end_gate_##name##_patchlist[] + +DECLARE(fsyscall); +DECLARE(brl_fsys_bubble_down); +DECLARE(vtop); +DECLARE(mckinley_e9); + +extern unsigned long __start_gate_section[]; + +#define ASSIGN(name) \ + .start_##name##_patchlist = \ + (unsigned long)__ia64_native_start_gate_##name##_patchlist, \ + .end_##name##_patchlist = \ + (unsigned long)__ia64_native_end_gate_##name##_patchlist + +struct pv_patchdata pv_patchdata __initdata = { + ASSIGN(fsyscall), + ASSIGN(brl_fsys_bubble_down), + ASSIGN(vtop), + ASSIGN(mckinley_e9), + + .gate_section = (void*)__start_gate_section, +}; + + +unsigned long __init +paravirt_get_gate_patchlist(enum pv_gate_patchlist type) +{ + +#define CASE(NAME, name) \ + case PV_GATE_START_##NAME: \ + return pv_patchdata.start_##name##_patchlist; \ + case PV_GATE_END_##NAME: \ + return pv_patchdata.end_##name##_patchlist; \ + + switch (type) { + CASE(FSYSCALL, fsyscall); + CASE(BRL_FSYS_BUBBLE_DOWN, brl_fsys_bubble_down); + CASE(VTOP, vtop); + CASE(MCKINLEY_E9, mckinley_e9); + default: + BUG(); + break; + } + return 0; +} + +void * __init +paravirt_get_gate_section(void) +{ + return pv_patchdata.gate_section; +} diff --git a/arch/ia64/kernel/paravirt_patchlist.h b/arch/ia64/kernel/paravirt_patchlist.h new file mode 100644 index 00000000000..67cffc3643a --- /dev/null +++ b/arch/ia64/kernel/paravirt_patchlist.h @@ -0,0 +1,24 @@ +/****************************************************************************** + * linux/arch/ia64/xen/paravirt_patchlist.h + * + * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <asm/native/patchlist.h> + diff --git a/arch/ia64/kernel/paravirtentry.S b/arch/ia64/kernel/paravirtentry.S index 2f42fcb9776..92d880c4d3d 100644 --- a/arch/ia64/kernel/paravirtentry.S +++ b/arch/ia64/kernel/paravirtentry.S @@ -20,41 +20,102 @@ * */ +#include <linux/init.h> #include <asm/asmmacro.h> #include <asm/asm-offsets.h> +#include <asm/paravirt_privop.h> +#include <asm/paravirt_patch.h> #include "entry.h" #define DATA8(sym, init_value) \ - .pushsection .data.read_mostly ; \ + .pushsection .data..read_mostly ; \ .align 8 ; \ .global sym ; \ sym: ; \ data8 init_value ; \ .popsection -#define BRANCH(targ, reg, breg) \ - movl reg=targ ; \ - ;; \ - ld8 reg=[reg] ; \ - ;; \ - mov breg=reg ; \ +#define BRANCH(targ, reg, breg, type) \ + PARAVIRT_PATCH_SITE_BR(PARAVIRT_PATCH_TYPE_BR_ ## type) ; \ + ;; \ + movl reg=targ ; \ + ;; \ + ld8 reg=[reg] ; \ + ;; \ + mov breg=reg ; \ br.cond.sptk.many breg -#define BRANCH_PROC(sym, reg, breg) \ - DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ - GLOBAL_ENTRY(paravirt_ ## sym) ; \ - BRANCH(paravirt_ ## sym ## _targ, reg, breg) ; \ +#define BRANCH_PROC(sym, reg, breg, type) \ + DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ + GLOBAL_ENTRY(paravirt_ ## sym) ; \ + BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \ END(paravirt_ ## sym) -#define BRANCH_PROC_UNWINFO(sym, reg, breg) \ - DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ - GLOBAL_ENTRY(paravirt_ ## sym) ; \ - PT_REGS_UNWIND_INFO(0) ; \ - BRANCH(paravirt_ ## sym ## _targ, reg, breg) ; \ +#define BRANCH_PROC_UNWINFO(sym, reg, breg, type) \ + DATA8(paravirt_ ## sym ## _targ, ia64_native_ ## sym) ; \ + GLOBAL_ENTRY(paravirt_ ## sym) ; \ + PT_REGS_UNWIND_INFO(0) ; \ + BRANCH(paravirt_ ## sym ## _targ, reg, breg, type) ; \ END(paravirt_ ## sym) -BRANCH_PROC(switch_to, r22, b7) -BRANCH_PROC_UNWINFO(leave_syscall, r22, b7) -BRANCH_PROC(work_processed_syscall, r2, b7) -BRANCH_PROC_UNWINFO(leave_kernel, r22, b7) +BRANCH_PROC(switch_to, r22, b7, SWITCH_TO) +BRANCH_PROC_UNWINFO(leave_syscall, r22, b7, LEAVE_SYSCALL) +BRANCH_PROC(work_processed_syscall, r2, b7, WORK_PROCESSED_SYSCALL) +BRANCH_PROC_UNWINFO(leave_kernel, r22, b7, LEAVE_KERNEL) + + +#ifdef CONFIG_MODULES +#define __INIT_OR_MODULE .text +#define __INITDATA_OR_MODULE .data +#else +#define __INIT_OR_MODULE __INIT +#define __INITDATA_OR_MODULE __INITDATA +#endif /* CONFIG_MODULES */ + + __INIT_OR_MODULE + GLOBAL_ENTRY(paravirt_fc_i) + fc.i r32 + br.ret.sptk.many rp + END(paravirt_fc_i) + __FINIT + + __INIT_OR_MODULE + .align 32 + GLOBAL_ENTRY(paravirt_nop_b_inst_bundle) + { + nop.b 0 + nop.b 0 + nop.b 0 + } + END(paravirt_nop_b_inst_bundle) + __FINIT + + /* NOTE: nop.[mfi] has same format */ + __INIT_OR_MODULE + GLOBAL_ENTRY(paravirt_nop_mfi_inst_bundle) + { + nop.m 0 + nop.f 0 + nop.i 0 + } + END(paravirt_nop_mfi_inst_bundle) + __FINIT + + __INIT_OR_MODULE + GLOBAL_ENTRY(paravirt_nop_bundle) +paravirt_nop_bundle_start: + { + nop 0 + nop 0 + nop 0 + } +paravirt_nop_bundle_end: + END(paravirt_nop_bundle) + __FINIT + + __INITDATA_OR_MODULE + .align 8 + .global paravirt_nop_bundle_size +paravirt_nop_bundle_size: + data8 paravirt_nop_bundle_end - paravirt_nop_bundle_start diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c index b83b2c51600..1cf09179371 100644 --- a/arch/ia64/kernel/patch.c +++ b/arch/ia64/kernel/patch.c @@ -7,10 +7,10 @@ #include <linux/init.h> #include <linux/string.h> +#include <asm/paravirt.h> #include <asm/patch.h> #include <asm/processor.h> #include <asm/sections.h> -#include <asm/system.h> #include <asm/unistd.h> /* @@ -169,16 +169,35 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) ia64_srlz_i(); } +extern unsigned long ia64_native_fsyscall_table[NR_syscalls]; +extern char ia64_native_fsys_bubble_down[]; +struct pv_fsys_data pv_fsys_data __initdata = { + .fsyscall_table = (unsigned long *)ia64_native_fsyscall_table, + .fsys_bubble_down = (void *)ia64_native_fsys_bubble_down, +}; + +unsigned long * __init +paravirt_get_fsyscall_table(void) +{ + return pv_fsys_data.fsyscall_table; +} + +char * __init +paravirt_get_fsys_bubble_down(void) +{ + return pv_fsys_data.fsys_bubble_down; +} + static void __init patch_fsyscall_table (unsigned long start, unsigned long end) { - extern unsigned long fsyscall_table[NR_syscalls]; + u64 fsyscall_table = (u64)paravirt_get_fsyscall_table(); s32 *offp = (s32 *) start; u64 ip; while (offp < (s32 *) end) { ip = (u64) ia64_imva((char *) offp + *offp); - ia64_patch_imm64(ip, (u64) fsyscall_table); + ia64_patch_imm64(ip, fsyscall_table); ia64_fc((void *) ip); ++offp; } @@ -189,7 +208,7 @@ patch_fsyscall_table (unsigned long start, unsigned long end) static void __init patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) { - extern char fsys_bubble_down[]; + u64 fsys_bubble_down = (u64)paravirt_get_fsys_bubble_down(); s32 *offp = (s32 *) start; u64 ip; @@ -207,13 +226,13 @@ patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) void __init ia64_patch_gate (void) { -# define START(name) ((unsigned long) __start_gate_##name##_patchlist) -# define END(name) ((unsigned long)__end_gate_##name##_patchlist) +# define START(name) paravirt_get_gate_patchlist(PV_GATE_START_##name) +# define END(name) paravirt_get_gate_patchlist(PV_GATE_END_##name) - patch_fsyscall_table(START(fsyscall), END(fsyscall)); - patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down)); - ia64_patch_vtop(START(vtop), END(vtop)); - ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); + patch_fsyscall_table(START(FSYSCALL), END(FSYSCALL)); + patch_brl_fsys_bubble_down(START(BRL_FSYS_BUBBLE_DOWN), END(BRL_FSYS_BUBBLE_DOWN)); + ia64_patch_vtop(START(VTOP), END(VTOP)); + ia64_patch_mckinley_e9(START(MCKINLEY_E9), END(MCKINLEY_E9)); } void ia64_patch_phys_stack_reg(unsigned long val) @@ -229,7 +248,7 @@ void ia64_patch_phys_stack_reg(unsigned long val) while (offp < end) { ip = (u64) offp + *offp; ia64_patch(ip, mask, imm); - ia64_fc(ip); + ia64_fc((void *)ip); ++offp; } ia64_sync_i(); diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index d0ada067a4a..992c1098c52 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -12,9 +12,8 @@ #include <asm/machvec.h> #include <linux/dma-mapping.h> -#include <asm/system.h> -#ifdef CONFIG_DMAR +#ifdef CONFIG_INTEL_IOMMU #include <linux/kernel.h> @@ -32,30 +31,9 @@ int force_iommu __read_mostly = 1; int force_iommu __read_mostly; #endif -/* Set this to 1 if there is a HW IOMMU in the system */ -int iommu_detected __read_mostly; +int iommu_pass_through; -/* Dummy device used for NULL arguments (normally ISA). Better would - be probably a smaller DMA mask, but this is bug-to-bug compatible - to i386. */ -struct device fallback_dev = { - .init_name = "fallback device", - .coherent_dma_mask = DMA_32BIT_MASK, - .dma_mask = &fallback_dev.coherent_dma_mask, -}; - -void __init pci_iommu_alloc(void) -{ - /* - * The order of these functions is important for - * fall-back/fail-over reasons - */ - detect_intel_iommu(); - -#ifdef CONFIG_SWIOTLB - pci_swiotlb_init(); -#endif -} +extern struct dma_map_ops intel_dma_ops; static int __init pci_iommu_init(void) { @@ -79,20 +57,12 @@ iommu_dma_init(void) return; } -struct dma_mapping_ops *dma_ops; -EXPORT_SYMBOL(dma_ops); - int iommu_dma_supported(struct device *dev, u64 mask) { - struct dma_mapping_ops *ops = get_dma_ops(dev); - - if (ops->dma_supported_op) - return ops->dma_supported_op(dev, mask); - /* Copied from i386. Doesn't make much sense, because it will only work for pci_alloc_coherent. The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_24BIT_MASK) + if (mask < DMA_BIT_MASK(24)) return 0; /* Tell the device to use SAC when IOMMU force is on. This @@ -107,8 +77,8 @@ int iommu_dma_supported(struct device *dev, u64 mask) SAC for these. Assume all masks <= 40 bits are of this type. Normally this doesn't make any difference, but gives more gentle handling of IOMMU overflow. */ - if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { - dev_info(dev, "Force SAC with mask %lx\n", mask); + if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) { + dev_info(dev, "Force SAC with mask %llx\n", mask); return 0; } @@ -116,4 +86,25 @@ int iommu_dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(iommu_dma_supported); +void __init pci_iommu_alloc(void) +{ + dma_ops = &intel_dma_ops; + + dma_ops->sync_single_for_cpu = machvec_dma_sync_single; + dma_ops->sync_sg_for_cpu = machvec_dma_sync_sg; + dma_ops->sync_single_for_device = machvec_dma_sync_single; + dma_ops->sync_sg_for_device = machvec_dma_sync_sg; + dma_ops->dma_supported = iommu_dma_supported; + + /* + * The order of these functions is important for + * fall-back/fail-over reasons + */ + detect_intel_iommu(); + +#ifdef CONFIG_SWIOTLB + pci_swiotlb_init(); +#endif +} + #endif diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 16c50516dbc..939260aeac9 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -1,6 +1,7 @@ /* Glue code to lib/swiotlb.c */ #include <linux/pci.h> +#include <linux/gfp.h> #include <linux/cache.h> #include <linux/module.h> #include <linux/dma-mapping.h> @@ -13,23 +14,43 @@ int swiotlb __read_mostly; EXPORT_SYMBOL(swiotlb); -struct dma_mapping_ops swiotlb_dma_ops = { - .mapping_error = swiotlb_dma_mapping_error, - .alloc_coherent = swiotlb_alloc_coherent, - .free_coherent = swiotlb_free_coherent, - .map_single = swiotlb_map_single, - .unmap_single = swiotlb_unmap_single, +static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + struct dma_attrs *attrs) +{ + if (dev->coherent_dma_mask != DMA_BIT_MASK(64)) + gfp |= GFP_DMA; + return swiotlb_alloc_coherent(dev, size, dma_handle, gfp); +} + +static void ia64_swiotlb_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr, + struct dma_attrs *attrs) +{ + swiotlb_free_coherent(dev, size, vaddr, dma_addr); +} + +struct dma_map_ops swiotlb_dma_ops = { + .alloc = ia64_swiotlb_alloc_coherent, + .free = ia64_swiotlb_free_coherent, + .map_page = swiotlb_map_page, + .unmap_page = swiotlb_unmap_page, + .map_sg = swiotlb_map_sg_attrs, + .unmap_sg = swiotlb_unmap_sg_attrs, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, - .sync_single_range_for_device = swiotlb_sync_single_range_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg, - .unmap_sg = swiotlb_unmap_sg, - .dma_supported_op = swiotlb_dma_supported, + .dma_supported = swiotlb_dma_supported, + .mapping_error = swiotlb_dma_mapping_error, }; +void __init swiotlb_dma_init(void) +{ + dma_ops = &swiotlb_dma_ops; + swiotlb_init(1); +} + void __init pci_swiotlb_init(void) { if (!iommu_detected) { @@ -37,7 +58,7 @@ void __init pci_swiotlb_init(void) swiotlb = 1; printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); machvec_init("dig"); - swiotlb_init(); + swiotlb_init(1); dma_ops = &swiotlb_dma_ops; #else panic("Unable to find Intel IOMMU"); diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 0e499757309..5845ffea67c 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -41,6 +41,8 @@ #include <linux/rcupdate.h> #include <linux/completion.h> #include <linux/tracehook.h> +#include <linux/slab.h> +#include <linux/cpu.h> #include <asm/errno.h> #include <asm/intrinsics.h> @@ -48,7 +50,6 @@ #include <asm/perfmon.h> #include <asm/processor.h> #include <asm/signal.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/delay.h> @@ -312,7 +313,7 @@ typedef struct pfm_context { unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */ unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */ - u64 ctx_saved_psr_up; /* only contains psr.up value */ + unsigned long ctx_saved_psr_up; /* only contains psr.up value */ unsigned long ctx_last_activation; /* context last activation number for last_cpu */ unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ @@ -520,53 +521,47 @@ static pmu_config_t *pmu_conf; pfm_sysctl_t pfm_sysctl; EXPORT_SYMBOL(pfm_sysctl); -static ctl_table pfm_ctl_table[]={ +static struct ctl_table pfm_ctl_table[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "debug", .data = &pfm_sysctl.debug, .maxlen = sizeof(int), .mode = 0666, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "debug_ovfl", .data = &pfm_sysctl.debug_ovfl, .maxlen = sizeof(int), .mode = 0666, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "fastctxsw", .data = &pfm_sysctl.fastctxsw, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, { - .ctl_name = CTL_UNNUMBERED, .procname = "expert_mode", .data = &pfm_sysctl.expert_mode, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = &proc_dointvec, + .proc_handler = proc_dointvec, }, {} }; -static ctl_table pfm_sysctl_dir[] = { +static struct ctl_table pfm_sysctl_dir[] = { { - .ctl_name = CTL_UNNUMBERED, .procname = "perfmon", .mode = 0555, .child = pfm_ctl_table, }, {} }; -static ctl_table pfm_sysctl_root[] = { +static struct ctl_table pfm_sysctl_root[] = { { - .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, .child = pfm_sysctl_dir, @@ -610,31 +605,22 @@ pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) spin_unlock(&(x)->ctx_lock); } -static inline unsigned int -pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct) -{ - return do_munmap(mm, addr, len); -} - -static inline unsigned long -pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec) -{ - return get_unmapped_area(file, addr, len, pgoff, flags); -} - +/* forward declaration */ +static const struct dentry_operations pfmfs_dentry_operations; -static int -pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - struct vfsmount *mnt) +static struct dentry * +pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt); + return mount_pseudo(fs_type, "pfm:", NULL, &pfmfs_dentry_operations, + PFMFS_MAGIC); } static struct file_system_type pfm_fs_type = { .name = "pfmfs", - .get_sb = pfmfs_get_sb, + .mount = pfmfs_mount, .kill_sb = kill_anon_super, }; +MODULE_ALIAS_FS("pfmfs"); DEFINE_PER_CPU(unsigned long, pfm_syst_info); DEFINE_PER_CPU(struct task_struct *, pmu_owner); @@ -835,10 +821,9 @@ pfm_rvmalloc(unsigned long size) unsigned long addr; size = PAGE_ALIGN(size); - mem = vmalloc(size); + mem = vzalloc(size); if (mem) { //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); - memset(mem, 0, size); addr = (unsigned long)mem; while (size > 0) { pfm_reserve_page(addr); @@ -1338,8 +1323,6 @@ out: } EXPORT_SYMBOL(pfm_unregister_buffer_fmt); -extern void update_pal_halt_status(int); - static int pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) { @@ -1387,9 +1370,9 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) cpu)); /* - * disable default_idle() to go to PAL_HALT + * Force idle() into poll mode */ - update_pal_halt_status(0); + cpu_idle_poll_ctrl(true); UNLOCK_PFS(flags); @@ -1446,11 +1429,8 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) is_syswide, cpu)); - /* - * if possible, enable default_idle() to go into PAL_HALT - */ - if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0) - update_pal_halt_status(1); + /* Undo forced polling. Last session reenables pal_halt */ + cpu_idle_poll_ctrl(false); UNLOCK_PFS(flags); @@ -1463,8 +1443,9 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) * a PROTECT_CTX() section. */ static int -pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size) +pfm_remove_smpl_mapping(void *vaddr, unsigned long size) { + struct task_struct *task = current; int r; /* sanity checks */ @@ -1478,13 +1459,8 @@ pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long siz /* * does the actual unmapping */ - down_write(&task->mm->mmap_sem); - - DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size)); - - r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0); + r = vm_munmap((unsigned long)vaddr, size); - up_write(&task->mm->mmap_sem); if (r !=0) { printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size); } @@ -1548,7 +1524,7 @@ pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) * any operations on the root directory. However, we need a non-trivial * d_name - pfm: will go nicely and kill the special-casing in procfs. */ -static struct vfsmount *pfmfs_mnt; +static struct vfsmount *pfmfs_mnt __read_mostly; static int __init init_pfm_fs(void) @@ -1578,7 +1554,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) return -EINVAL; } - ctx = (pfm_context_t *)filp->private_data; + ctx = filp->private_data; if (ctx == NULL) { printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current)); return -EINVAL; @@ -1678,7 +1654,7 @@ pfm_poll(struct file *filp, poll_table * wait) return 0; } - ctx = (pfm_context_t *)filp->private_data; + ctx = filp->private_data; if (ctx == NULL) { printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current)); return 0; @@ -1701,8 +1677,8 @@ pfm_poll(struct file *filp, poll_table * wait) return mask; } -static int -pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) +static long +pfm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { DPRINT(("pfm_ioctl called\n")); return -EINVAL; @@ -1738,7 +1714,7 @@ pfm_fasync(int fd, struct file *filp, int on) return -EBADF; } - ctx = (pfm_context_t *)filp->private_data; + ctx = filp->private_data; if (ctx == NULL) { printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current)); return -EBADF; @@ -1846,7 +1822,7 @@ pfm_flush(struct file *filp, fl_owner_t id) return -EBADF; } - ctx = (pfm_context_t *)filp->private_data; + ctx = filp->private_data; if (ctx == NULL) { printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current)); return -EBADF; @@ -1950,7 +1926,7 @@ pfm_flush(struct file *filp, fl_owner_t id) * because some VM function reenables interrupts. * */ - if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size); + if (smpl_buf_vaddr) pfm_remove_smpl_mapping(smpl_buf_vaddr, smpl_buf_size); return 0; } @@ -1989,7 +1965,7 @@ pfm_close(struct inode *inode, struct file *filp) return -EBADF; } - ctx = (pfm_context_t *)filp->private_data; + ctx = filp->private_data; if (ctx == NULL) { printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current)); return -EBADF; @@ -2179,25 +2155,26 @@ pfm_no_open(struct inode *irrelevant, struct file *dontcare) static const struct file_operations pfm_file_ops = { - .llseek = no_llseek, - .read = pfm_read, - .write = pfm_write, - .poll = pfm_poll, - .ioctl = pfm_ioctl, - .open = pfm_no_open, /* special open code to disallow open via /proc */ - .fasync = pfm_fasync, - .release = pfm_close, - .flush = pfm_flush + .llseek = no_llseek, + .read = pfm_read, + .write = pfm_write, + .poll = pfm_poll, + .unlocked_ioctl = pfm_ioctl, + .open = pfm_no_open, /* special open code to disallow open via /proc */ + .fasync = pfm_fasync, + .release = pfm_close, + .flush = pfm_flush }; -static int -pfmfs_delete_dentry(struct dentry *dentry) +static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen) { - return 1; + return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]", + dentry->d_inode->i_ino); } -static struct dentry_operations pfmfs_dentry_operations = { - .d_delete = pfmfs_delete_dentry, +static const struct dentry_operations pfmfs_dentry_operations = { + .d_delete = always_delete_dentry, + .d_dname = pfmfs_dname, }; @@ -2206,9 +2183,8 @@ pfm_alloc_file(pfm_context_t *ctx) { struct file *file; struct inode *inode; - struct dentry *dentry; - char name[32]; - struct qstr this; + struct path path; + struct qstr this = { .name = "" }; /* * allocate a new inode @@ -2223,27 +2199,22 @@ pfm_alloc_file(pfm_context_t *ctx) inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - sprintf(name, "[%lu]", inode->i_ino); - this.name = name; - this.len = strlen(name); - this.hash = inode->i_ino; - /* * allocate a new dcache entry */ - dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); - if (!dentry) { + path.dentry = d_alloc(pfmfs_mnt->mnt_root, &this); + if (!path.dentry) { iput(inode); return ERR_PTR(-ENOMEM); } + path.mnt = mntget(pfmfs_mnt); - dentry->d_op = &pfmfs_dentry_operations; - d_add(dentry, inode); + d_add(path.dentry, inode); - file = alloc_file(pfmfs_mnt, dentry, FMODE_READ, &pfm_file_ops); - if (!file) { - dput(dentry); - return ERR_PTR(-ENFILE); + file = alloc_file(&path, FMODE_READ, &pfm_file_ops); + if (IS_ERR(file)) { + path_put(&path); + return file; } file->f_flags = O_RDONLY; @@ -2298,7 +2269,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) * return -ENOMEM; */ - if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) + if (size > task_rlimit(task, RLIMIT_MEMLOCK)) return -ENOMEM; /* @@ -2320,13 +2291,14 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t DPRINT(("Cannot allocate vma\n")); goto error_kmem; } + INIT_LIST_HEAD(&vma->anon_vma_chain); /* * partially initialize the vma for the sampling buffer */ vma->vm_mm = mm; - vma->vm_file = filp; - vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; + vma->vm_file = get_file(filp); + vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP; vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ /* @@ -2346,8 +2318,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t down_write(&task->mm->mmap_sem); /* find some free area in address space, must have mmap sem held */ - vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0); - if (vma->vm_start == 0UL) { + vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS); + if (IS_ERR_VALUE(vma->vm_start)) { DPRINT(("Cannot find unmapped area for size %ld\n", size)); up_write(&task->mm->mmap_sem); goto error; @@ -2364,15 +2336,12 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t goto error; } - get_file(filp); - /* * now insert the vma in the vm list for the process, must be * done with mmap lock held */ insert_vm_struct(mm, vma); - mm->total_vm += size >> PAGE_SHIFT; vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, vma_pages(vma)); up_write(&task->mm->mmap_sem); @@ -2400,8 +2369,8 @@ static int pfm_bad_permissions(struct task_struct *task) { const struct cred *tcred; - uid_t uid = current_uid(); - gid_t gid = current_gid(); + kuid_t uid = current_uid(); + kgid_t gid = current_gid(); int ret; rcu_read_lock(); @@ -2409,20 +2378,20 @@ pfm_bad_permissions(struct task_struct *task) /* inspired by ptrace_attach() */ DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", - uid, - gid, - tcred->euid, - tcred->suid, - tcred->uid, - tcred->egid, - tcred->sgid)); - - ret = ((uid != tcred->euid) - || (uid != tcred->suid) - || (uid != tcred->uid) - || (gid != tcred->egid) - || (gid != tcred->sgid) - || (gid != tcred->gid)) && !capable(CAP_SYS_PTRACE); + from_kuid(&init_user_ns, uid), + from_kgid(&init_user_ns, gid), + from_kuid(&init_user_ns, tcred->euid), + from_kuid(&init_user_ns, tcred->suid), + from_kuid(&init_user_ns, tcred->uid), + from_kgid(&init_user_ns, tcred->egid), + from_kgid(&init_user_ns, tcred->sgid))); + + ret = ((!uid_eq(uid, tcred->euid)) + || (!uid_eq(uid, tcred->suid)) + || (!uid_eq(uid, tcred->uid)) + || (!gid_eq(gid, tcred->egid)) + || (!gid_eq(gid, tcred->sgid)) + || (!gid_eq(gid, tcred->gid))) && !capable(CAP_SYS_PTRACE); rcu_read_unlock(); return ret; @@ -2718,7 +2687,7 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg goto buffer_error; } - DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n", + DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d\n", ctx, ctx_flags, ctx->ctx_fl_system, @@ -3523,7 +3492,7 @@ pfm_use_debug_registers(struct task_struct *task) * IA64_THREAD_DBG_VALID set. This indicates a task which was * able to use the debug registers for debugging purposes via * ptrace(). Therefore we know it was not using them for - * perfmormance monitoring, so we only decrement the number + * performance monitoring, so we only decrement the number * of "ptraced" debug register users to keep the count up to date */ int @@ -3682,7 +3651,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * "self-monitoring". */ if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { - DPRINT(("unblocking [%d] \n", task_pid_nr(task))); + DPRINT(("unblocking [%d]\n", task_pid_nr(task))); complete(&ctx->ctx_restart_done); } else { DPRINT(("[%d] armed exit trap\n", task_pid_nr(task))); @@ -4802,7 +4771,7 @@ recheck: asmlinkage long sys_perfmonctl (int fd, int cmd, void __user *arg, int count) { - struct file *file = NULL; + struct fd f = {NULL, 0}; pfm_context_t *ctx = NULL; unsigned long flags = 0UL; void *args_k = NULL; @@ -4899,17 +4868,17 @@ restart_args: ret = -EBADF; - file = fget(fd); - if (unlikely(file == NULL)) { + f = fdget(fd); + if (unlikely(f.file == NULL)) { DPRINT(("invalid fd %d\n", fd)); goto error_args; } - if (unlikely(PFM_IS_FILE(file) == 0)) { + if (unlikely(PFM_IS_FILE(f.file) == 0)) { DPRINT(("fd %d not related to perfmon\n", fd)); goto error_args; } - ctx = (pfm_context_t *)file->private_data; + ctx = f.file->private_data; if (unlikely(ctx == NULL)) { DPRINT(("no context for fd %d\n", fd)); goto error_args; @@ -4939,8 +4908,8 @@ abort_locked: if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; error_args: - if (file) - fput(file); + if (f.file) + fdput(f); kfree(args_k); @@ -5213,8 +5182,8 @@ pfm_end_notify_user(pfm_context_t *ctx) * main overflow processing routine. * it can be called from the interrupt path or explicitly during the context switch code */ -static void -pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs) +static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, + unsigned long pmc0, struct pt_regs *regs) { pfm_ovfl_arg_t *ovfl_arg; unsigned long mask; @@ -5595,7 +5564,7 @@ pfm_interrupt_handler(int irq, void *arg) (*pfm_alt_intr_handler->handler)(irq, arg, regs); } - put_cpu_no_resched(); + put_cpu(); return IRQ_HANDLED; } @@ -5603,7 +5572,7 @@ pfm_interrupt_handler(int irq, void *arg) * /proc/perfmon interface, for debug only */ -#define PFM_PROC_SHOW_HEADER ((void *)NR_CPUS+1) +#define PFM_PROC_SHOW_HEADER ((void *)(long)nr_cpu_ids+1) static void * pfm_proc_start(struct seq_file *m, loff_t *pos) @@ -5612,7 +5581,7 @@ pfm_proc_start(struct seq_file *m, loff_t *pos) return PFM_PROC_SHOW_HEADER; } - while (*pos <= NR_CPUS) { + while (*pos <= nr_cpu_ids) { if (cpu_online(*pos - 1)) { return (void *)*pos; } @@ -5672,24 +5641,8 @@ pfm_proc_show_header(struct seq_file *m) list_for_each(pos, &pfm_buffer_fmt_list) { entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); - seq_printf(m, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n", - entry->fmt_uuid[0], - entry->fmt_uuid[1], - entry->fmt_uuid[2], - entry->fmt_uuid[3], - entry->fmt_uuid[4], - entry->fmt_uuid[5], - entry->fmt_uuid[6], - entry->fmt_uuid[7], - entry->fmt_uuid[8], - entry->fmt_uuid[9], - entry->fmt_uuid[10], - entry->fmt_uuid[11], - entry->fmt_uuid[12], - entry->fmt_uuid[13], - entry->fmt_uuid[14], - entry->fmt_uuid[15], - entry->fmt_name); + seq_printf(m, "format : %16phD %s\n", + entry->fmt_uuid, entry->fmt_name); } spin_unlock(&pfm_buffer_fmt_lock); @@ -6434,7 +6387,6 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) static struct irqaction perfmon_irqaction = { .handler = pfm_interrupt_handler, - .flags = IRQF_DISABLED, .name = "perfmon" }; diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c index 5f637bbfccc..30c644ea44c 100644 --- a/arch/ia64/kernel/perfmon_default_smpl.c +++ b/arch/ia64/kernel/perfmon_default_smpl.c @@ -150,7 +150,7 @@ default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct * current = task running at the time of the overflow. * * per-task mode: - * - this is ususally the task being monitored. + * - this is usually the task being monitored. * Under certain conditions, it might be a different task * * system-wide: diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index c5716270514..55d4ba47a90 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -15,11 +15,11 @@ #include <linux/kallsyms.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/notifier.h> #include <linux/personality.h> #include <linux/sched.h> -#include <linux/slab.h> #include <linux/stddef.h> #include <linux/thread_info.h> #include <linux/unistd.h> @@ -29,16 +29,17 @@ #include <linux/kdebug.h> #include <linux/utsname.h> #include <linux/tracehook.h> +#include <linux/rcupdate.h> #include <asm/cpu.h> #include <asm/delay.h> #include <asm/elf.h> -#include <asm/ia32.h> #include <asm/irq.h> #include <asm/kexec.h> #include <asm/pgalloc.h> #include <asm/processor.h> #include <asm/sal.h> +#include <asm/switch_to.h> #include <asm/tlbflush.h> #include <asm/uaccess.h> #include <asm/unwind.h> @@ -54,12 +55,10 @@ void (*ia64_mark_idle)(int); -unsigned long boot_option_idle_override = 0; +unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); -unsigned long idle_halt; -EXPORT_SYMBOL(idle_halt); -unsigned long idle_nomwait; -EXPORT_SYMBOL(idle_nomwait); +void (*pm_power_off) (void); +EXPORT_SYMBOL(pm_power_off); void ia64_do_show_stack (struct unw_frame_info *info, void *arg) @@ -97,21 +96,13 @@ show_stack (struct task_struct *task, unsigned long *sp) } void -dump_stack (void) -{ - show_stack(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - -void show_regs (struct pt_regs *regs) { unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; print_modules(); - printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current), - smp_processor_id(), current->comm); + printk("\n"); + show_regs_print_info(KERN_DEFAULT); printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s (%s)\n", regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(), init_utsname()->release); @@ -161,6 +152,13 @@ show_regs (struct pt_regs *regs) show_stack(NULL, NULL); } +/* local support for deprecated console_print */ +void +console_print(const char *s) +{ + printk(KERN_EMERG "%s", s); +} + void do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) { @@ -189,8 +187,8 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) ia64_do_signal(scr, in_syscall); } - if (test_thread_flag(TIF_NOTIFY_RESUME)) { - clear_thread_flag(TIF_NOTIFY_RESUME); + if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) { + local_irq_enable(); /* force interrupt enable */ tracehook_notify_resume(&scr->pt); } @@ -203,41 +201,13 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) local_irq_disable(); /* force interrupt disable */ } -static int pal_halt = 1; -static int can_do_pal_halt = 1; - static int __init nohalt_setup(char * str) { - pal_halt = can_do_pal_halt = 0; + cpu_idle_poll_ctrl(true); return 1; } __setup("nohalt", nohalt_setup); -void -update_pal_halt_status(int status) -{ - can_do_pal_halt = pal_halt && status; -} - -/* - * We use this if we don't have any better idle routine.. - */ -void -default_idle (void) -{ - local_irq_enable(); - while (!need_resched()) { - if (can_do_pal_halt) { - local_irq_disable(); - if (!need_resched()) { - safe_halt(); - } - local_irq_enable(); - } else - cpu_relax(); - } -} - #ifdef CONFIG_HOTPLUG_CPU /* We don't actually take CPU down, just spin without interrupts. */ static inline void play_dead(void) @@ -264,71 +234,29 @@ static inline void play_dead(void) } #endif /* CONFIG_HOTPLUG_CPU */ -static void do_nothing(void *unused) -{ -} - -/* - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of - * pm_idle and update to new pm_idle value. Required while changing pm_idle - * handler on SMP systems. - * - * Caller must have changed pm_idle to the new value before the call. Old - * pm_idle value will not be used by any CPU after the return of this function. - */ -void cpu_idle_wait(void) +void arch_cpu_idle_dead(void) { - smp_mb(); - /* kick all the CPUs so that they exit out of pm_idle */ - smp_call_function(do_nothing, NULL, 1); + play_dead(); } -EXPORT_SYMBOL_GPL(cpu_idle_wait); -void __attribute__((noreturn)) -cpu_idle (void) +void arch_cpu_idle(void) { void (*mark_idle)(int) = ia64_mark_idle; - int cpu = smp_processor_id(); - - /* endless idle loop with no priority at all */ - while (1) { - if (can_do_pal_halt) { - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - } else { - current_thread_info()->status |= TS_POLLING; - } - if (!need_resched()) { - void (*idle)(void); #ifdef CONFIG_SMP - min_xtp(); + min_xtp(); #endif - rmb(); - if (mark_idle) - (*mark_idle)(1); - - idle = pm_idle; - if (!idle) - idle = default_idle; - (*idle)(); - if (mark_idle) - (*mark_idle)(0); + rmb(); + if (mark_idle) + (*mark_idle)(1); + + safe_halt(); + + if (mark_idle) + (*mark_idle)(0); #ifdef CONFIG_SMP - normal_xtp(); + normal_xtp(); #endif - } - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - check_pgt_cache(); - if (cpu_is_offline(cpu)) - play_dead(); - } } void @@ -349,11 +277,6 @@ ia64_save_extra (struct task_struct *task) if (info & PFM_CPUINFO_SYST_WIDE) pfm_syst_wide_update_task(task, info, 0); #endif - -#ifdef CONFIG_IA32_SUPPORT - if (IS_IA32_PROCESS(task_pt_regs(task))) - ia32_save_state(task); -#endif } void @@ -374,11 +297,6 @@ ia64_load_extra (struct task_struct *task) if (info & PFM_CPUINFO_SYST_WIDE) pfm_syst_wide_update_task(task, info, 1); #endif - -#ifdef CONFIG_IA32_SUPPORT - if (IS_IA32_PROCESS(task_pt_regs(task))) - ia32_load_state(task); -#endif } /* @@ -413,77 +331,26 @@ ia64_load_extra (struct task_struct *task) * so there is nothing to worry about. */ int -copy_thread (int nr, unsigned long clone_flags, +copy_thread(unsigned long clone_flags, unsigned long user_stack_base, unsigned long user_stack_size, - struct task_struct *p, struct pt_regs *regs) + struct task_struct *p) { - extern char ia64_ret_from_clone, ia32_ret_from_clone; + extern char ia64_ret_from_clone; struct switch_stack *child_stack, *stack; unsigned long rbs, child_rbs, rbs_size; struct pt_regs *child_ptregs; + struct pt_regs *regs = current_pt_regs(); int retval = 0; -#ifdef CONFIG_SMP - /* - * For SMP idle threads, fork_by_hand() calls do_fork with - * NULL regs. - */ - if (!regs) - return 0; -#endif - - stack = ((struct switch_stack *) regs) - 1; - child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1; child_stack = (struct switch_stack *) child_ptregs - 1; - /* copy parent's switch_stack & pt_regs to child: */ - memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack)); - rbs = (unsigned long) current + IA64_RBS_OFFSET; child_rbs = (unsigned long) p + IA64_RBS_OFFSET; - rbs_size = stack->ar_bspstore - rbs; - - /* copy the parent's register backing store to the child: */ - memcpy((void *) child_rbs, (void *) rbs, rbs_size); - - if (likely(user_mode(child_ptregs))) { - if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs)) - child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */ - if (user_stack_base) { - child_ptregs->r12 = user_stack_base + user_stack_size - 16; - child_ptregs->ar_bspstore = user_stack_base; - child_ptregs->ar_rnat = 0; - child_ptregs->loadrs = 0; - } - } else { - /* - * Note: we simply preserve the relative position of - * the stack pointer here. There is no need to - * allocate a scratch area here, since that will have - * been taken care of by the caller of sys_clone() - * already. - */ - child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* kernel sp */ - child_ptregs->r13 = (unsigned long) p; /* set `current' pointer */ - } - child_stack->ar_bspstore = child_rbs + rbs_size; - if (IS_IA32_PROCESS(regs)) - child_stack->b0 = (unsigned long) &ia32_ret_from_clone; - else - child_stack->b0 = (unsigned long) &ia64_ret_from_clone; /* copy parts of thread_struct: */ p->thread.ksp = (unsigned long) child_stack - 16; - /* stop some PSR bits from being inherited. - * the psr.up/psr.pp bits must be cleared on fork but inherited on execve() - * therefore we must specify them explicitly here and not include them in - * IA64_PSR_BITS_TO_CLEAR. - */ - child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET) - & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP)); - /* * NOTE: The calling convention considers all floating point * registers in the high partition (fph) to be scratch. Since @@ -505,23 +372,65 @@ copy_thread (int nr, unsigned long clone_flags, # define THREAD_FLAGS_TO_SET 0 p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR) | THREAD_FLAGS_TO_SET); + ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */ -#ifdef CONFIG_IA32_SUPPORT - /* - * If we're cloning an IA32 task then save the IA32 extra - * state from the current task to the new task - */ - if (IS_IA32_PROCESS(task_pt_regs(current))) { - ia32_save_state(p); - if (clone_flags & CLONE_SETTLS) - retval = ia32_clone_tls(p, child_ptregs); - - /* Copy partially mapped page list */ - if (!retval) - retval = ia32_copy_ia64_partial_page_list(p, - clone_flags); + + if (unlikely(p->flags & PF_KTHREAD)) { + if (unlikely(!user_stack_base)) { + /* fork_idle() called us */ + return 0; + } + memset(child_stack, 0, sizeof(*child_ptregs) + sizeof(*child_stack)); + child_stack->r4 = user_stack_base; /* payload */ + child_stack->r5 = user_stack_size; /* argument */ + /* + * Preserve PSR bits, except for bits 32-34 and 37-45, + * which we can't read. + */ + child_ptregs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN; + /* mark as valid, empty frame */ + child_ptregs->cr_ifs = 1UL << 63; + child_stack->ar_fpsr = child_ptregs->ar_fpsr + = ia64_getreg(_IA64_REG_AR_FPSR); + child_stack->pr = (1 << PRED_KERNEL_STACK); + child_stack->ar_bspstore = child_rbs; + child_stack->b0 = (unsigned long) &ia64_ret_from_clone; + + /* stop some PSR bits from being inherited. + * the psr.up/psr.pp bits must be cleared on fork but inherited on execve() + * therefore we must specify them explicitly here and not include them in + * IA64_PSR_BITS_TO_CLEAR. + */ + child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET) + & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP)); + + return 0; } -#endif + stack = ((struct switch_stack *) regs) - 1; + /* copy parent's switch_stack & pt_regs to child: */ + memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack)); + + /* copy the parent's register backing store to the child: */ + rbs_size = stack->ar_bspstore - rbs; + memcpy((void *) child_rbs, (void *) rbs, rbs_size); + if (clone_flags & CLONE_SETTLS) + child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */ + if (user_stack_base) { + child_ptregs->r12 = user_stack_base + user_stack_size - 16; + child_ptregs->ar_bspstore = user_stack_base; + child_ptregs->ar_rnat = 0; + child_ptregs->loadrs = 0; + } + child_stack->ar_bspstore = child_rbs + rbs_size; + child_stack->b0 = (unsigned long) &ia64_ret_from_clone; + + /* stop some PSR bits from being inherited. + * the psr.up/psr.pp bits must be cleared on fork but inherited on execve() + * therefore we must specify them explicitly here and not include them in + * IA64_PSR_BITS_TO_CLEAR. + */ + child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET) + & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP)); #ifdef CONFIG_PERFMON if (current->thread.pfm_context) @@ -649,64 +558,6 @@ dump_fpu (struct pt_regs *pt, elf_fpregset_t dst) return 1; /* f0-f31 are always valid so we always return 1 */ } -long -sys_execve (char __user *filename, char __user * __user *argv, char __user * __user *envp, - struct pt_regs *regs) -{ - char *fname; - int error; - - fname = getname(filename); - error = PTR_ERR(fname); - if (IS_ERR(fname)) - goto out; - error = do_execve(fname, argv, envp, regs); - putname(fname); -out: - return error; -} - -pid_t -kernel_thread (int (*fn)(void *), void *arg, unsigned long flags) -{ - extern void start_kernel_thread (void); - unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread; - struct { - struct switch_stack sw; - struct pt_regs pt; - } regs; - - memset(®s, 0, sizeof(regs)); - regs.pt.cr_iip = helper_fptr[0]; /* set entry point (IP) */ - regs.pt.r1 = helper_fptr[1]; /* set GP */ - regs.pt.r9 = (unsigned long) fn; /* 1st argument */ - regs.pt.r11 = (unsigned long) arg; /* 2nd argument */ - /* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read. */ - regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN; - regs.pt.cr_ifs = 1UL << 63; /* mark as valid, empty frame */ - regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR); - regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET; - regs.sw.pr = (1 << PRED_KERNEL_STACK); - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s.pt, 0, NULL, NULL); -} -EXPORT_SYMBOL(kernel_thread); - -/* This gets called from kernel_thread() via ia64_invoke_thread_helper(). */ -int -kernel_thread_helper (int (*fn)(void *), void *arg) -{ -#ifdef CONFIG_IA32_SUPPORT - if (IS_IA32_PROCESS(task_pt_regs(current))) { - /* A kernel thread is always a 64-bit process. */ - current->thread.map_base = DEFAULT_MAP_BASE; - current->thread.task_size = DEFAULT_TASK_SIZE; - ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob); - ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1); - } -#endif - return (*fn)(arg); -} - /* * Flush thread state. This is called when a thread does an execve(). */ @@ -716,14 +567,6 @@ flush_thread (void) /* drop floating-point and debug-register state if it exists: */ current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID); ia64_drop_fpu(current); -#ifdef CONFIG_IA32_SUPPORT - if (IS_IA32_PROCESS(task_pt_regs(current))) { - ia32_drop_ia64_partial_page_list(current); - current->thread.task_size = IA32_PAGE_OFFSET; - set_fs(USER_DS); - memset(current->thread.tls_array, 0, sizeof(current->thread.tls_array)); - } -#endif } /* @@ -744,8 +587,6 @@ exit_thread (void) if (current->thread.flags & IA64_THREAD_DBG_VALID) pfm_release_debug_registers(current); #endif - if (IS_IA32_PROCESS(task_pt_regs(current))) - ia32_drop_ia64_partial_page_list(current); } unsigned long diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 92c9689b7d9..b7a5fffe092 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -11,11 +11,9 @@ */ #include <linux/kernel.h> #include <linux/sched.h> -#include <linux/slab.h> #include <linux/mm.h> #include <linux/errno.h> #include <linux/ptrace.h> -#include <linux/smp_lock.h> #include <linux/user.h> #include <linux/security.h> #include <linux/audit.h> @@ -28,7 +26,6 @@ #include <asm/processor.h> #include <asm/ptrace_offsets.h> #include <asm/rse.h> -#include <asm/system.h> #include <asm/uaccess.h> #include <asm/unwind.h> #ifdef CONFIG_PERFMON @@ -640,7 +637,7 @@ ptrace_attach_sync_user_rbs (struct task_struct *child) */ read_lock(&tasklist_lock); - if (child->signal) { + if (child->sighand) { spin_lock_irq(&child->sighand->siglock); if (child->state == TASK_STOPPED && !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) { @@ -664,7 +661,7 @@ ptrace_attach_sync_user_rbs (struct task_struct *child) * job control stop, so that SIGCONT can be used to wake it up. */ read_lock(&tasklist_lock); - if (child->signal) { + if (child->sighand) { spin_lock_irq(&child->sighand->siglock); if (child->state == TASK_TRACED && (child->signal->flags & SIGNAL_STOP_STOPPED)) { @@ -675,33 +672,6 @@ ptrace_attach_sync_user_rbs (struct task_struct *child) read_unlock(&tasklist_lock); } -static inline int -thread_matches (struct task_struct *thread, unsigned long addr) -{ - unsigned long thread_rbs_end; - struct pt_regs *thread_regs; - - if (ptrace_check_attach(thread, 0) < 0) - /* - * If the thread is not in an attachable state, we'll - * ignore it. The net effect is that if ADDR happens - * to overlap with the portion of the thread's - * register backing store that is currently residing - * on the thread's kernel stack, then ptrace() may end - * up accessing a stale value. But if the thread - * isn't stopped, that's a problem anyhow, so we're - * doing as well as we can... - */ - return 0; - - thread_regs = task_pt_regs(thread); - thread_rbs_end = ia64_get_user_rbs_end(thread, thread_regs, NULL); - if (!on_kernel_rbs(addr, thread_regs->ar_bspstore, thread_rbs_end)) - return 0; - - return 1; /* looks like we've got a winner */ -} - /* * Write f32-f127 back to task->thread.fph if it has been modified. */ @@ -1179,7 +1149,8 @@ ptrace_disable (struct task_struct *child) } long -arch_ptrace (struct task_struct *child, long request, long addr, long data) +arch_ptrace (struct task_struct *child, long request, + unsigned long addr, unsigned long data) { switch (request) { case PTRACE_PEEKTEXT: @@ -1247,20 +1218,8 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, if (test_thread_flag(TIF_RESTORE_RSE)) ia64_sync_krbs(); - if (unlikely(current->audit_context)) { - long syscall; - int arch; - - if (IS_IA32_PROCESS(®s)) { - syscall = regs.r1; - arch = AUDIT_ARCH_I386; - } else { - syscall = regs.r15; - arch = AUDIT_ARCH_IA64; - } - audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3); - } + audit_syscall_entry(AUDIT_ARCH_IA64, regs.r15, arg0, arg1, arg2, arg3); return 0; } @@ -1274,14 +1233,7 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, { int step; - if (unlikely(current->audit_context)) { - int success = AUDITSC_RESULT(regs.r10); - long result = regs.r8; - - if (success != AUDITSC_SUCCESS) - result = -result; - audit_syscall_exit(success, result); - } + audit_syscall_exit(®s); step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) @@ -2173,11 +2125,6 @@ static const struct user_regset_view user_ia64_view = { const struct user_regset_view *task_user_regset_view(struct task_struct *tsk) { -#ifdef CONFIG_IA32_SUPPORT - extern const struct user_regset_view user_ia32_view; - if (IS_IA32_PROCESS(task_pt_regs(tsk))) - return &user_ia32_view; -#endif return &user_ia64_view; } diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S index 903babd22d6..c370e02f006 100644 --- a/arch/ia64/kernel/relocate_kernel.S +++ b/arch/ia64/kernel/relocate_kernel.S @@ -52,7 +52,7 @@ GLOBAL_ENTRY(relocate_new_kernel) srlz.i ;; mov ar.rnat=r18 - rfi + rfi // note: this unmask MCA/INIT (psr.mc) ;; 1: //physical mode code begin @@ -61,7 +61,7 @@ GLOBAL_ENTRY(relocate_new_kernel) // purge all TC entries #define O(member) IA64_CPUINFO_##member##_OFFSET - GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2 + GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2 ;; addl r17=O(PTCE_STRIDE),r2 addl r2=O(PTCE_BASE),r2 diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index ecb9eb78d68..ee9719eebb1 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -40,6 +40,7 @@ #include <linux/cpu.h> #include <linux/types.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/module.h> #include <linux/smp.h> #include <linux/timer.h> @@ -53,7 +54,7 @@ MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>"); MODULE_DESCRIPTION("/proc interface to IA-64 SAL features"); MODULE_LICENSE("GPL"); -static int salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data); +static const struct file_operations proc_salinfo_fops; typedef struct { const char *name; /* name of the proc entry */ @@ -65,7 +66,7 @@ typedef struct { * List {name,feature} pairs for every entry in /proc/sal/<feature> * that this module exports */ -static salinfo_entry_t salinfo_entries[]={ +static const salinfo_entry_t salinfo_entries[]={ { "bus_lock", IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, }, { "irq_redirection", IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, }, { "ipi_redirection", IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, }, @@ -192,7 +193,7 @@ struct salinfo_platform_oemdata_parms { static void salinfo_work_to_do(struct salinfo_data *data) { - down_trylock(&data->mutex); + (void)(down_trylock(&data->mutex) ?: 0); up(&data->mutex); } @@ -301,9 +302,7 @@ salinfo_event_open(struct inode *inode, struct file *file) static ssize_t salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; - struct proc_dir_entry *entry = PDE(inode); - struct salinfo_data *data = entry->data; + struct salinfo_data *data = PDE_DATA(file_inode(file)); char cmd[32]; size_t size; int i, n, cpu = -1; @@ -317,7 +316,7 @@ retry: } n = data->cpu_check; - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { if (cpu_isset(n, data->cpu_event)) { if (!cpu_online(n)) { cpu_clear(n, data->cpu_event); @@ -326,7 +325,7 @@ retry: cpu = n; break; } - if (++n == NR_CPUS) + if (++n == nr_cpu_ids) n = 0; } @@ -337,7 +336,7 @@ retry: /* for next read, start checking at next CPU */ data->cpu_check = cpu; - if (++data->cpu_check == NR_CPUS) + if (++data->cpu_check == nr_cpu_ids) data->cpu_check = 0; snprintf(cmd, sizeof(cmd), "read %d\n", cpu); @@ -354,13 +353,13 @@ retry: static const struct file_operations salinfo_event_fops = { .open = salinfo_event_open, .read = salinfo_event_read, + .llseek = noop_llseek, }; static int salinfo_log_open(struct inode *inode, struct file *file) { - struct proc_dir_entry *entry = PDE(inode); - struct salinfo_data *data = entry->data; + struct salinfo_data *data = PDE_DATA(inode); if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -385,8 +384,7 @@ salinfo_log_open(struct inode *inode, struct file *file) static int salinfo_log_release(struct inode *inode, struct file *file) { - struct proc_dir_entry *entry = PDE(inode); - struct salinfo_data *data = entry->data; + struct salinfo_data *data = PDE_DATA(inode); if (data->state == STATE_NO_DATA) { vfree(data->log_buffer); @@ -404,10 +402,9 @@ static void call_on_cpu(int cpu, void (*fn)(void *), void *arg) { cpumask_t save_cpus_allowed = current->cpus_allowed; - cpumask_t new_cpus_allowed = cpumask_of_cpu(cpu); - set_cpus_allowed(current, new_cpus_allowed); + set_cpus_allowed_ptr(current, cpumask_of(cpu)); (*fn)(arg); - set_cpus_allowed(current, save_cpus_allowed); + set_cpus_allowed_ptr(current, &save_cpus_allowed); } static void @@ -463,9 +460,7 @@ retry: static ssize_t salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; - struct proc_dir_entry *entry = PDE(inode); - struct salinfo_data *data = entry->data; + struct salinfo_data *data = PDE_DATA(file_inode(file)); u8 *buf; u64 bufsize; @@ -524,9 +519,7 @@ salinfo_log_clear(struct salinfo_data *data, int cpu) static ssize_t salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - struct inode *inode = file->f_path.dentry->d_inode; - struct proc_dir_entry *entry = PDE(inode); - struct salinfo_data *data = entry->data; + struct salinfo_data *data = PDE_DATA(file_inode(file)); char cmd[32]; size_t size; u32 offset; @@ -572,9 +565,10 @@ static const struct file_operations salinfo_data_fops = { .release = salinfo_log_release, .read = salinfo_log_read, .write = salinfo_log_write, + .llseek = default_llseek, }; -static int __cpuinit +static int salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { unsigned int i, cpu = (unsigned long)hcpu; @@ -615,7 +609,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu return NOTIFY_OK; } -static struct notifier_block salinfo_cpu_notifier __cpuinitdata = +static struct notifier_block salinfo_cpu_notifier = { .notifier_call = salinfo_cpu_callback, .priority = 0, @@ -636,14 +630,17 @@ salinfo_init(void) for (i=0; i < NR_SALINFO_ENTRIES; i++) { /* pass the feature bit in question as misc data */ - *sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir, - salinfo_read, (void *)salinfo_entries[i].feature); + *sdir++ = proc_create_data(salinfo_entries[i].name, 0, salinfo_dir, + &proc_salinfo_fops, + (void *)salinfo_entries[i].feature); } + cpu_notifier_register_begin(); + for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) { data = salinfo_data + i; data->type = i; - init_MUTEX(&data->mutex); + sema_init(&data->mutex, 1); dir = proc_mkdir(salinfo_log_name[i], salinfo_dir); if (!dir) continue; @@ -674,7 +671,9 @@ salinfo_init(void) salinfo_timer.function = &salinfo_timeout; add_timer(&salinfo_timer); - register_hotcpu_notifier(&salinfo_cpu_notifier); + __register_hotcpu_notifier(&salinfo_cpu_notifier); + + cpu_notifier_register_done(); return 0; } @@ -683,22 +682,23 @@ salinfo_init(void) * 'data' contains an integer that corresponds to the feature we're * testing */ -static int -salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data) +static int proc_salinfo_show(struct seq_file *m, void *v) { - int len = 0; - - len = sprintf(page, (sal_platform_features & (unsigned long)data) ? "1\n" : "0\n"); - - if (len <= off+count) *eof = 1; - - *start = page + off; - len -= off; - - if (len>count) len = count; - if (len<0) len = 0; + unsigned long data = (unsigned long)v; + seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n"); + return 0; +} - return len; +static int proc_salinfo_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_salinfo_show, PDE_DATA(inode)); } +static const struct file_operations proc_salinfo_fops = { + .open = proc_salinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + module_init(salinfo_init); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 865af27c773..d86669bcdfb 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -46,12 +46,12 @@ #include <linux/kexec.h> #include <linux/crash_dump.h> -#include <asm/ia32.h> #include <asm/machvec.h> #include <asm/mca.h> #include <asm/meminit.h> #include <asm/page.h> #include <asm/paravirt.h> +#include <asm/paravirt_patch.h> #include <asm/patch.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -59,7 +59,6 @@ #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp.h> -#include <asm/system.h> #include <asm/tlbflush.h> #include <asm/unistd.h> #include <asm/hpsim.h> @@ -73,7 +72,7 @@ unsigned long __per_cpu_offset[NR_CPUS]; EXPORT_SYMBOL(__per_cpu_offset); #endif -DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info); +DEFINE_PER_CPU(struct cpuinfo_ia64, ia64_cpu_info); DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); unsigned long ia64_cycles_per_usec; struct ia64_boot_param *ia64_boot_param; @@ -98,12 +97,6 @@ static struct resource bss_resource = { unsigned long ia64_max_cacheline_size; -int dma_get_cache_alignment(void) -{ - return ia64_max_cacheline_size; -} -EXPORT_SYMBOL(dma_get_cache_alignment); - unsigned long ia64_iobase; /* virtual address for I/O accesses */ EXPORT_SYMBOL(ia64_iobase); struct io_space io_space[MAX_IO_SPACES]; @@ -150,9 +143,9 @@ int num_rsvd_regions __initdata; * This routine does not assume the incoming segments are sorted. */ int __init -filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) +filter_rsvd_memory (u64 start, u64 end, void *arg) { - unsigned long range_start, range_end, prev_start; + u64 range_start, range_end, prev_start; void (*func)(unsigned long, unsigned long, int); int i; @@ -190,7 +183,7 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) * are not filtered out. */ int __init -filter_memory(unsigned long start, unsigned long end, void *arg) +filter_memory(u64 start, u64 end, void *arg) { void (*func)(unsigned long, unsigned long, int); @@ -226,6 +219,23 @@ sort_regions (struct rsvd_region *rsvd_region, int max) } } +/* merge overlaps */ +static int __init +merge_regions (struct rsvd_region *rsvd_region, int max) +{ + int i; + for (i = 1; i < max; ++i) { + if (rsvd_region[i].start >= rsvd_region[i-1].end) + continue; + if (rsvd_region[i].end > rsvd_region[i-1].end) + rsvd_region[i-1].end = rsvd_region[i].end; + --max; + memmove(&rsvd_region[i], &rsvd_region[i+1], + (max - i) * sizeof(struct rsvd_region)); + } + return max; +} + /* * Request address space for all standard resources */ @@ -276,6 +286,7 @@ static void __init setup_crashkernel(unsigned long total, int *n) if (ret == 0 && size > 0) { if (!base) { sort_regions(rsvd_region, *n); + *n = merge_regions(rsvd_region, *n); base = kdump_find_rsvd_region(size, rsvd_region, *n); } @@ -379,6 +390,7 @@ reserve_memory (void) BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n); sort_regions(rsvd_region, num_rsvd_regions); + num_rsvd_regions = merge_regions(rsvd_region, num_rsvd_regions); } @@ -396,7 +408,7 @@ find_initrd (void) initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start); initrd_end = initrd_start+ia64_boot_param->initrd_size; - printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n", + printk(KERN_INFO "Initial ramdisk at: 0x%lx (%llu bytes)\n", initrd_start, ia64_boot_param->initrd_size); } #endif @@ -473,7 +485,7 @@ mark_bsp_online (void) { #ifdef CONFIG_SMP /* If we register an early console, allow CPU 0 to printk */ - cpu_set(smp_processor_id(), cpu_online_map); + set_cpu_online(smp_processor_id(), true); #endif } @@ -485,28 +497,10 @@ static __init int setup_nomca(char *s) } early_param("nomca", setup_nomca); -/* - * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by - * is_kdump_kernel() to determine if we are booting after a panic. Hence - * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE. - */ #ifdef CONFIG_CRASH_DUMP -/* elfcorehdr= specifies the location of elf core header - * stored by the crashed kernel. - */ -static int __init parse_elfcorehdr(char *arg) -{ - if (!arg) - return -EINVAL; - - elfcorehdr_addr = memparse(arg, &arg); - return 0; -} -early_param("elfcorehdr", parse_elfcorehdr); - -int __init reserve_elfcorehdr(unsigned long *start, unsigned long *end) +int __init reserve_elfcorehdr(u64 *start, u64 *end) { - unsigned long length; + u64 length; /* We get the address using the kernel command line, * but the size is extracted from the EFI tables. @@ -537,6 +531,7 @@ setup_arch (char **cmdline_p) paravirt_arch_setup_early(); ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); + paravirt_patch_apply(); *cmdline_p = __va(ia64_boot_param->command_line); strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); @@ -564,19 +559,18 @@ setup_arch (char **cmdline_p) early_acpi_boot_init(); # ifdef CONFIG_ACPI_NUMA acpi_numa_init(); -#ifdef CONFIG_ACPI_HOTPLUG_CPU +# ifdef CONFIG_ACPI_HOTPLUG_CPU prefill_possible_map(); -#endif +# endif per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ? 32 : cpus_weight(early_cpu_possible_map)), additional_cpus > 0 ? additional_cpus : 0); # endif -#else -# ifdef CONFIG_SMP - smp_build_cpu_map(); /* happens, e.g., with the Ski simulator */ -# endif #endif /* CONFIG_APCI_BOOT */ +#ifdef CONFIG_SMP + smp_build_cpu_map(); +#endif find_memory(); /* process SAL system table: */ @@ -586,7 +580,7 @@ setup_arch (char **cmdline_p) ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist); #else { - u64 num_phys_stacked; + unsigned long num_phys_stacked; if (ia64_pal_rse_info(&num_phys_stacked, 0) == 0 && num_phys_stacked > 96) ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist); @@ -600,10 +594,6 @@ setup_arch (char **cmdline_p) cpu_init(); /* initialize the bootstrap CPU */ mmu_context_init(); /* initialize context_id bitmap */ -#ifdef CONFIG_ACPI - acpi_boot_init(); -#endif - paravirt_banner(); paravirt_arch_setup_console(cmdline_p); @@ -730,10 +720,10 @@ static void * c_start (struct seq_file *m, loff_t *pos) { #ifdef CONFIG_SMP - while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map)) + while (*pos < nr_cpu_ids && !cpu_online(*pos)) ++*pos; #endif - return *pos < NR_CPUS ? cpu_data(*pos) : NULL; + return *pos < nr_cpu_ids ? cpu_data(*pos) : NULL; } static void * @@ -758,7 +748,7 @@ const struct seq_operations cpuinfo_op = { #define MAX_BRANDS 8 static char brandname[MAX_BRANDS][128]; -static char * __cpuinit +static char * get_model_name(__u8 family, __u8 model) { static int overflow; @@ -788,7 +778,7 @@ get_model_name(__u8 family, __u8 model) return "Unknown"; } -static void __cpuinit +static void identify_cpu (struct cpuinfo_ia64 *c) { union { @@ -853,12 +843,6 @@ identify_cpu (struct cpuinfo_ia64 *c) c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); } -void __init -setup_per_cpu_areas (void) -{ - /* start_kernel() requires this... */ -} - /* * Do the following calculations: * @@ -866,13 +850,13 @@ setup_per_cpu_areas (void) * 2. the minimum of the i-cache stride sizes for "flush_icache_range()". * 3. the minimum of the cache stride sizes for "clflush_cache_range()". */ -static void __cpuinit +static void get_cache_info(void) { unsigned long line_size, max = 1; - u64 l, levels, unique_caches; - pal_cache_config_info_t cci; - s64 status; + unsigned long l, levels, unique_caches; + pal_cache_config_info_t cci; + long status; status = ia64_pal_cache_summary(&levels, &unique_caches); if (status != 0) { @@ -890,9 +874,9 @@ get_cache_info(void) /* cache_type (data_or_unified)=2 */ status = ia64_pal_cache_config_info(l, 2, &cci); if (status != 0) { - printk(KERN_ERR - "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n", - __func__, l, status); + printk(KERN_ERR "%s: ia64_pal_cache_config_info" + "(l=%lu, 2) failed (status=%ld)\n", + __func__, l, status); max = SMP_CACHE_BYTES; /* The safest setup for "flush_icache_range()" */ cci.pcci_stride = I_CACHE_STRIDE_SHIFT; @@ -912,10 +896,10 @@ get_cache_info(void) /* cache_type (instruction)=1*/ status = ia64_pal_cache_config_info(l, 1, &cci); if (status != 0) { - printk(KERN_ERR - "%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n", + printk(KERN_ERR "%s: ia64_pal_cache_config_info" + "(l=%lu, 1) failed (status=%ld)\n", __func__, l, status); - /* The safest setup for "flush_icache_range()" */ + /* The safest setup for flush_icache_range() */ cci.pcci_stride = I_CACHE_STRIDE_SHIFT; } } @@ -931,10 +915,10 @@ get_cache_info(void) * cpu_init() initializes state that is per-CPU. This function acts * as a 'CPU state barrier', nothing should get across. */ -void __cpuinit +void cpu_init (void) { - extern void __cpuinit ia64_mmu_init (void *); + extern void ia64_mmu_init(void *); static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG; unsigned long num_phys_stacked; pal_vm_info_2_u_t vmi; @@ -972,7 +956,7 @@ cpu_init (void) * depends on the data returned by identify_cpu(). We break the dependency by * accessing cpu_data() through the canonical per-CPU address. */ - cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start); + cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(ia64_cpu_info) - __per_cpu_start); identify_cpu(cpu_info); #ifdef CONFIG_MCKINLEY @@ -1016,16 +1000,11 @@ cpu_init (void) | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC)); atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; - if (current->mm) - BUG(); + BUG_ON(current->mm); ia64_mmu_init(ia64_imva(cpu_data)); ia64_mca_cpu_init(ia64_imva(cpu_data)); -#ifdef CONFIG_IA32_SUPPORT - ia32_cpu_init(); -#endif - /* Clear ITC to eliminate sched_clock() overflows in human time. */ ia64_set_itc(0); @@ -1072,7 +1051,6 @@ cpu_init (void) max_num_phys_stacked = num_phys_stacked; } platform_cpu_init(); - pm_idle = default_idle; } void __init @@ -1085,6 +1063,8 @@ check_bugs (void) static int __init run_dmi_scan(void) { dmi_scan_machine(); + dmi_memdev_walk(); + dmi_set_dump_stack_arch_desc(); return 0; } core_initcall(run_dmi_scan); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index e1821ca4c7d..33cab9a8adf 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -21,7 +21,6 @@ #include <linux/unistd.h> #include <linux/wait.h> -#include <asm/ia32.h> #include <asm/intrinsics.h> #include <asm/uaccess.h> #include <asm/rse.h> @@ -31,7 +30,6 @@ #define DEBUG_SIG 0 #define STACK_ALIGN 16 /* minimal alignment for stack pointer */ -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) #if _NSIG_WORDS > 1 # define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t)) @@ -41,14 +39,6 @@ # define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0]) #endif -asmlinkage long -sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2, - long arg3, long arg4, long arg5, long arg6, long arg7, - struct pt_regs regs) -{ - return do_sigaltstack(uss, uoss, regs.r12); -} - static long restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) { @@ -115,7 +105,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) } int -copy_siginfo_to_user (siginfo_t __user *to, siginfo_t *from) +copy_siginfo_to_user (siginfo_t __user *to, const siginfo_t *from) { if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t))) return -EFAULT; @@ -201,14 +191,7 @@ ia64_rt_sigreturn (struct sigscratch *scr) if (GET_SIGSET(&set, &sc->sc_mask)) goto give_sigsegv; - sigdelsetmask(&set, ~_BLOCKABLE); - - spin_lock_irq(¤t->sighand->siglock); - { - current->blocked = set; - recalc_sigpending(); - } - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&set); if (restore_sigcontext(sc, scr)) goto give_sigsegv; @@ -217,11 +200,8 @@ ia64_rt_sigreturn (struct sigscratch *scr) printk("SIG return (%s:%d): sp=%lx ip=%lx\n", current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip); #endif - /* - * It is more difficult to avoid calling this function than to - * call it and ignore errors. - */ - do_sigaltstack(&sc->sc_stack, NULL, scr->pt.r12); + if (restore_altstack(&sc->sc_stack)) + goto give_sigsegv; return retval; give_sigsegv: @@ -229,7 +209,7 @@ ia64_rt_sigreturn (struct sigscratch *scr) si.si_errno = 0; si.si_code = SI_KERNEL; si.si_pid = task_pid_vnr(current); - si.si_uid = current_uid(); + si.si_uid = from_kuid_munged(current_user_ns(), current_uid()); si.si_addr = sc; force_sig_info(SIGSEGV, &si, current); return retval; @@ -326,7 +306,7 @@ force_sigsegv_info (int sig, void __user *addr) si.si_errno = 0; si.si_code = SI_KERNEL; si.si_pid = task_pid_vnr(current); - si.si_uid = current_uid(); + si.si_uid = from_kuid_munged(current_user_ns(), current_uid()); si.si_addr = addr; force_sig_info(SIGSEGV, &si, current); return 0; @@ -385,9 +365,7 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, err |= copy_siginfo_to_user(&frame->info, info); - err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp); - err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size); - err |= __put_user(sas_ss_flags(scr->pt.r12), &frame->sc.sc_stack.ss_flags); + err |= __save_altstack(&frame->sc.sc_stack, scr->pt.r12); err |= setup_sigcontext(&frame->sc, set, scr); if (unlikely(err)) @@ -422,29 +400,13 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, } static long -handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, +handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, struct sigscratch *scr) { - if (IS_IA32_PROCESS(&scr->pt)) { - /* send signal to IA-32 process */ - if (!ia32_setup_frame1(sig, ka, info, oldset, &scr->pt)) - return 0; - } else - /* send signal to IA-64 process */ - if (!setup_frame(sig, ka, info, oldset, scr)) - return 0; - - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked, sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + if (!setup_frame(sig, ka, info, sigmask_to_save(), scr)) + return 0; - /* - * Let tracing know that we've done the handler setup. - */ - tracehook_signal_handler(sig, info, ka, &scr->pt, + signal_delivered(sig, info, ka, &scr->pt, test_thread_flag(TIF_SINGLESTEP)); return 1; @@ -458,24 +420,9 @@ void ia64_do_signal (struct sigscratch *scr, long in_syscall) { struct k_sigaction ka; - sigset_t *oldset; siginfo_t info; long restart = in_syscall; long errno = scr->pt.r8; -# define ERR_CODE(c) (IS_IA32_PROCESS(&scr->pt) ? -(c) : (c)) - - /* - * In the ia64_leave_kernel code path, we want the common case to go fast, which - * is why we may in certain cases get here from kernel mode. Just return without - * doing anything if so. - */ - if (!user_mode(&scr->pt)) - return; - - if (current_thread_info()->status & TS_RESTORE_SIGMASK) - oldset = ¤t->saved_sigmask; - else - oldset = ¤t->blocked; /* * This only loops in the rare cases of handle_signal() failing, in which case we @@ -490,14 +437,7 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) * inferior call), thus it's important to check for restarting _after_ * get_signal_to_deliver(). */ - if (IS_IA32_PROCESS(&scr->pt)) { - if (in_syscall) { - if (errno >= 0) - restart = 0; - else - errno = -errno; - } - } else if ((long) scr->pt.r10 != -1) + if ((long) scr->pt.r10 != -1) /* * A system calls has to be restarted only if one of the error codes * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned. If r10 @@ -513,22 +453,18 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) switch (errno) { case ERESTART_RESTARTBLOCK: case ERESTARTNOHAND: - scr->pt.r8 = ERR_CODE(EINTR); + scr->pt.r8 = EINTR; /* note: scr->pt.r10 is already -1 */ break; case ERESTARTSYS: if ((ka.sa.sa_flags & SA_RESTART) == 0) { - scr->pt.r8 = ERR_CODE(EINTR); + scr->pt.r8 = EINTR; /* note: scr->pt.r10 is already -1 */ break; } case ERESTARTNOINTR: - if (IS_IA32_PROCESS(&scr->pt)) { - scr->pt.r8 = scr->pt.r1; - scr->pt.cr_iip -= 2; - } else - ia64_decrement_ip(&scr->pt); + ia64_decrement_ip(&scr->pt); restart = 0; /* don't restart twice if handle_signal() fails... */ } } @@ -537,16 +473,8 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) * Whee! Actually deliver the signal. If the delivery failed, we need to * continue to iterate in this loop so we can deliver the SIGSEGV... */ - if (handle_signal(signr, &ka, &info, oldset, scr)) { - /* - * A signal was successfully delivered; the saved - * sigmask will have been stored in the signal frame, - * and will be restored by sigreturn, so we can simply - * clear the TS_RESTORE_SIGMASK flag. - */ - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; + if (handle_signal(signr, &ka, &info, scr)) return; - } } /* Did we come from a system call? */ @@ -555,28 +483,18 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR || errno == ERESTART_RESTARTBLOCK) { - if (IS_IA32_PROCESS(&scr->pt)) { - scr->pt.r8 = scr->pt.r1; - scr->pt.cr_iip -= 2; - if (errno == ERESTART_RESTARTBLOCK) - scr->pt.r8 = 0; /* x86 version of __NR_restart_syscall */ - } else { - /* - * Note: the syscall number is in r15 which is saved in - * pt_regs so all we need to do here is adjust ip so that - * the "break" instruction gets re-executed. - */ - ia64_decrement_ip(&scr->pt); - if (errno == ERESTART_RESTARTBLOCK) - scr->pt.r15 = __NR_restart_syscall; - } + /* + * Note: the syscall number is in r15 which is saved in + * pt_regs so all we need to do here is adjust ip so that + * the "break" instruction gets re-executed. + */ + ia64_decrement_ip(&scr->pt); + if (errno == ERESTART_RESTARTBLOCK) + scr->pt.r15 = __NR_restart_syscall; } } /* if there's no signal to deliver, we just put the saved sigmask * back */ - if (current_thread_info()->status & TS_RESTORE_SIGMASK) { - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); - } + restore_saved_sigmask(); } diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index da8f020d82c..9fcd4e63048 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -32,7 +32,7 @@ #include <linux/bitops.h> #include <linux/kexec.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/current.h> #include <asm/delay.h> #include <asm/machvec.h> @@ -44,7 +44,6 @@ #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/sal.h> -#include <asm/system.h> #include <asm/tlbflush.h> #include <asm/unistd.h> #include <asm/mca.h> @@ -58,7 +57,8 @@ static struct local_tlb_flush_counts { unsigned int count; } __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; -static DEFINE_PER_CPU(unsigned short, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned; +static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS], + shadow_flush_counts); #define IPI_CALL_FUNC 0 #define IPI_CPU_STOP 1 @@ -66,7 +66,7 @@ static DEFINE_PER_CPU(unsigned short, shadow_flush_counts[NR_CPUS]) ____cachelin #define IPI_KDUMP_CPU_STOP 3 /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ -static DEFINE_PER_CPU_SHARED_ALIGNED(u64, ipi_operation); +static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, ipi_operation); extern void cpu_halt (void); @@ -76,7 +76,7 @@ stop_this_cpu(void) /* * Remove this CPU: */ - cpu_clear(smp_processor_id(), cpu_online_map); + set_cpu_online(smp_processor_id(), false); max_xtp(); local_irq_disable(); cpu_halt(); @@ -166,11 +166,11 @@ send_IPI_allbutself (int op) * Called with preemption disabled. */ static inline void -send_IPI_mask(cpumask_t mask, int op) +send_IPI_mask(const struct cpumask *mask, int op) { unsigned int cpu; - for_each_cpu_mask(cpu, mask) { + for_each_cpu(cpu, mask) { send_IPI_single(cpu, op); } } @@ -225,6 +225,7 @@ smp_send_reschedule (int cpu) { platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); } +EXPORT_SYMBOL_GPL(smp_send_reschedule); /* * Called with preemption disabled. @@ -291,6 +292,7 @@ smp_flush_tlb_all (void) void smp_flush_tlb_mm (struct mm_struct *mm) { + cpumask_var_t cpus; preempt_disable(); /* this happens for the common case of a single-threaded fork(): */ if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1)) @@ -299,16 +301,19 @@ smp_flush_tlb_mm (struct mm_struct *mm) preempt_enable(); return; } - + if (!alloc_cpumask_var(&cpus, GFP_ATOMIC)) { + smp_call_function((void (*)(void *))local_finish_flush_tlb_mm, + mm, 1); + } else { + cpumask_copy(cpus, mm_cpumask(mm)); + smp_call_function_many(cpus, + (void (*)(void *))local_finish_flush_tlb_mm, mm, 1); + free_cpumask_var(cpus); + } + local_irq_disable(); + local_finish_flush_tlb_mm(mm); + local_irq_enable(); preempt_enable(); - /* - * We could optimize this further by using mm->cpu_vm_mask to track which CPUs - * have been running in the address space. It's not clear that this is worth the - * trouble though: to avoid races, we have to raise the IPI on the target CPU - * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is - * rather trivial. - */ - on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1); } void arch_send_call_function_single_ipi(int cpu) @@ -316,7 +321,7 @@ void arch_send_call_function_single_ipi(int cpu) send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE); } -void arch_send_call_function_ipi(cpumask_t mask) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { send_IPI_mask(mask, IPI_CALL_FUNC); } diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 52290547c85..547a48d78bd 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -40,11 +40,10 @@ #include <linux/percpu.h> #include <linux/bitops.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/cache.h> #include <asm/current.h> #include <asm/delay.h> -#include <asm/ia32.h> #include <asm/io.h> #include <asm/irq.h> #include <asm/machvec.h> @@ -56,7 +55,6 @@ #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/sal.h> -#include <asm/system.h> #include <asm/tlbflush.h> #include <asm/unistd.h> #include <asm/sn/arch.h> @@ -77,13 +75,6 @@ #endif /* - * Store all idle threads, this can be reused instead of creating - * a new thread. Also avoids complicated thread destroy functionality - * for idle threads. - */ -struct task_struct *idle_thread_array[NR_CPUS]; - -/* * Global array allocated for NR_CPUS at boot time */ struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS]; @@ -96,13 +87,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0]; #define set_brendez_area(x) (sal_state_for_booting_cpu = &sal_boot_rendez_state[(x)]); -#define get_idle_for_cpu(x) (idle_thread_array[(x)]) -#define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) - #else - -#define get_idle_for_cpu(x) (NULL) -#define set_idle_for_cpu(x,p) #define set_brendez_area(x) #endif @@ -362,12 +347,11 @@ ia64_sync_itc (unsigned int master) /* * Ideally sets up per-cpu profiling hooks. Doesn't do much now... */ -static inline void __devinit -smp_setup_percpu_timer (void) +static inline void smp_setup_percpu_timer(void) { } -static void __cpuinit +static void smp_callin (void) { int cpuid, phys_id, itc_master; @@ -391,15 +375,19 @@ smp_callin (void) fix_b0_for_bsp(); - ipi_call_lock_irq(); + /* + * numa_node_id() works after this. + */ + set_numa_node(cpu_to_node_map[cpuid]); + set_numa_mem(local_memory_node(cpu_to_node_map[cpuid])); + spin_lock(&vector_lock); /* Setup the per cpu irq handling data structures */ __setup_vector_irq(cpuid); notify_cpu_starting(cpuid); - cpu_set(cpuid, cpu_online_map); + set_cpu_online(cpuid, true); per_cpu(cpu_state, cpuid) = CPU_ONLINE; spin_unlock(&vector_lock); - ipi_call_unlock_irq(); smp_setup_percpu_timer(); @@ -443,10 +431,6 @@ smp_callin (void) calibrate_delay(); local_cpu_data->loops_per_jiffy = loops_per_jiffy; -#ifdef CONFIG_IA32_SUPPORT - ia32_gdt_init(); -#endif - /* * Allow the master to continue. */ @@ -458,7 +442,7 @@ smp_callin (void) /* * Activate a secondary processor. head.S calls this. */ -int __cpuinit +int start_secondary (void *unused) { /* Early console may use I/O ports */ @@ -471,66 +455,16 @@ start_secondary (void *unused) preempt_disable(); smp_callin(); - cpu_idle(); + cpu_startup_entry(CPUHP_ONLINE); return 0; } -struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) -{ - return NULL; -} - -struct create_idle { - struct work_struct work; - struct task_struct *idle; - struct completion done; - int cpu; -}; - -void __cpuinit -do_fork_idle(struct work_struct *work) -{ - struct create_idle *c_idle = - container_of(work, struct create_idle, work); - - c_idle->idle = fork_idle(c_idle->cpu); - complete(&c_idle->done); -} - -static int __cpuinit -do_boot_cpu (int sapicid, int cpu) +static int +do_boot_cpu (int sapicid, int cpu, struct task_struct *idle) { int timeout; - struct create_idle c_idle = { - .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle), - .cpu = cpu, - .done = COMPLETION_INITIALIZER(c_idle.done), - }; - - c_idle.idle = get_idle_for_cpu(cpu); - if (c_idle.idle) { - init_idle(c_idle.idle, cpu); - goto do_rest; - } - - /* - * We can't use kernel_thread since we must avoid to reschedule the child. - */ - if (!keventd_up() || current_is_keventd()) - c_idle.work.func(&c_idle.work); - else { - schedule_work(&c_idle.work); - wait_for_completion(&c_idle.done); - } - - if (IS_ERR(c_idle.idle)) - panic("failed fork for CPU %d", cpu); - - set_idle_for_cpu(cpu, c_idle.idle); - -do_rest: - task_for_booting_cpu = c_idle.idle; + task_for_booting_cpu = idle; Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid); set_brendez_area(cpu); @@ -550,7 +484,7 @@ do_rest: if (!cpu_isset(cpu, cpu_callin_map)) { printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid); ia64_cpu_to_sapicid[cpu] = -1; - cpu_clear(cpu, cpu_online_map); /* was set in smp_callin() */ + set_cpu_online(cpu, false); /* was set in smp_callin() */ return -EINVAL; } return 0; @@ -580,15 +514,14 @@ smp_build_cpu_map (void) } ia64_cpu_to_sapicid[0] = boot_cpu_id; - cpus_clear(cpu_present_map); - cpu_set(0, cpu_present_map); - cpu_set(0, cpu_possible_map); + init_cpu_present(cpumask_of(0)); + set_cpu_possible(0, true); for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) { sapicid = smp_boot_data.cpu_phys_id[i]; if (sapicid == boot_cpu_id) continue; - cpu_set(cpu, cpu_present_map); - cpu_set(cpu, cpu_possible_map); + set_cpu_present(cpu, true); + set_cpu_possible(cpu, true); ia64_cpu_to_sapicid[cpu] = sapicid; cpu++; } @@ -608,10 +541,6 @@ smp_prepare_cpus (unsigned int max_cpus) smp_setup_percpu_timer(); - /* - * We have the boot CPU online for sure. - */ - cpu_set(0, cpu_online_map); cpu_set(0, cpu_callin_map); local_cpu_data->loops_per_jiffy = loops_per_jiffy; @@ -626,20 +555,18 @@ smp_prepare_cpus (unsigned int max_cpus) */ if (!max_cpus) { printk(KERN_INFO "SMP mode deactivated.\n"); - cpus_clear(cpu_online_map); - cpus_clear(cpu_present_map); - cpus_clear(cpu_possible_map); - cpu_set(0, cpu_online_map); - cpu_set(0, cpu_present_map); - cpu_set(0, cpu_possible_map); + init_cpu_online(cpumask_of(0)); + init_cpu_present(cpumask_of(0)); + init_cpu_possible(cpumask_of(0)); return; } } -void __devinit smp_prepare_boot_cpu(void) +void smp_prepare_boot_cpu(void) { - cpu_set(smp_processor_id(), cpu_online_map); + set_cpu_online(smp_processor_id(), true); cpu_set(smp_processor_id(), cpu_callin_map); + set_numa_node(cpu_to_node_map[smp_processor_id()]); per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; paravirt_post_smp_prepare_boot_cpu(); } @@ -681,7 +608,7 @@ extern void fixup_irqs(void); int migrate_platform_irqs(unsigned int cpu) { int new_cpei_cpu; - irq_desc_t *desc = NULL; + struct irq_data *data = NULL; const struct cpumask *mask; int retval = 0; @@ -694,23 +621,23 @@ int migrate_platform_irqs(unsigned int cpu) /* * Now re-target the CPEI to a different processor */ - new_cpei_cpu = any_online_cpu(cpu_online_map); + new_cpei_cpu = cpumask_any(cpu_online_mask); mask = cpumask_of(new_cpei_cpu); set_cpei_target_cpu(new_cpei_cpu); - desc = irq_desc + ia64_cpe_irq; + data = irq_get_irq_data(ia64_cpe_irq); /* * Switch for now, immediately, we need to do fake intr * as other interrupts, but need to study CPEI behaviour with * polling before making changes. */ - if (desc) { - desc->chip->disable(ia64_cpe_irq); - desc->chip->set_affinity(ia64_cpe_irq, mask); - desc->chip->enable(ia64_cpe_irq); - printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu); + if (data && data->chip) { + data->chip->irq_disable(data); + data->chip->irq_set_affinity(data, mask, false); + data->chip->irq_enable(data); + printk ("Re-targeting CPEI to cpu %d\n", new_cpei_cpu); } } - if (!desc) { + if (!data) { printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu); retval = -EBUSY; } @@ -736,10 +663,10 @@ int __cpu_disable(void) return -EBUSY; } - cpu_clear(cpu, cpu_online_map); + set_cpu_online(cpu, false); if (migrate_platform_irqs(cpu)) { - cpu_set(cpu, cpu_online_map); + set_cpu_online(cpu, true); return -EBUSY; } @@ -785,8 +712,7 @@ smp_cpus_done (unsigned int dummy) (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); } -static inline void __devinit -set_cpu_sibling_map(int cpu) +static inline void set_cpu_sibling_map(int cpu) { int i; @@ -802,8 +728,8 @@ set_cpu_sibling_map(int cpu) } } -int __cpuinit -__cpu_up (unsigned int cpu) +int +__cpu_up(unsigned int cpu, struct task_struct *tidle) { int ret; int sapicid; @@ -821,7 +747,7 @@ __cpu_up (unsigned int cpu) per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Processor goes to start_secondary(), sets online flag */ - ret = do_boot_cpu(sapicid, cpu); + ret = do_boot_cpu(sapicid, cpu, tidle); if (ret < 0) return ret; @@ -865,10 +791,9 @@ init_smp_config(void) * identify_siblings(cpu) gets called from identify_cpu. This populates the * information related to logical execution units in per_cpu_data structure. */ -void __devinit -identify_siblings(struct cpuinfo_ia64 *c) +void identify_siblings(struct cpuinfo_ia64 *c) { - s64 status; + long status; u16 pltid; pal_logical_to_physical_t info; diff --git a/arch/ia64/kernel/stacktrace.c b/arch/ia64/kernel/stacktrace.c new file mode 100644 index 00000000000..5af2783a87f --- /dev/null +++ b/arch/ia64/kernel/stacktrace.c @@ -0,0 +1,39 @@ +/* + * arch/ia64/kernel/stacktrace.c + * + * Stack trace management functions + * + */ +#include <linux/sched.h> +#include <linux/stacktrace.h> +#include <linux/module.h> + +static void +ia64_do_save_stack(struct unw_frame_info *info, void *arg) +{ + struct stack_trace *trace = arg; + unsigned long ip; + int skip = trace->skip; + + trace->nr_entries = 0; + do { + unw_get_ip(info, &ip); + if (ip == 0) + break; + if (skip == 0) { + trace->entries[trace->nr_entries++] = ip; + if (trace->nr_entries == trace->max_entries) + break; + } else + skip--; + } while (unw_unwind(info) >= 0); +} + +/* + * Save stack-backtrace addresses into a stack_trace buffer. + */ +void save_stack_trace(struct stack_trace *trace) +{ + unw_init_running(ia64_do_save_stack, trace); +} +EXPORT_SYMBOL(save_stack_trace); diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 92ed83f3403..41e33f84c18 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -25,9 +25,9 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len unsigned long pgoff, unsigned long flags) { long map_shared = (flags & MAP_SHARED); - unsigned long start_addr, align_mask = PAGE_SIZE - 1; + unsigned long align_mask = 0; struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; + struct vm_unmapped_area_info info; if (len > RGN_MAP_LIMIT) return -ENOMEM; @@ -44,7 +44,7 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len addr = 0; #endif if (!addr) - addr = mm->free_area_cache; + addr = TASK_UNMAPPED_BASE; if (map_shared && (TASK_SIZE > 0xfffffffful)) /* @@ -53,28 +53,15 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len * tasks, we prefer to avoid exhausting the address space too quickly by * limiting alignment to a single page. */ - align_mask = SHMLBA - 1; - - full_search: - start_addr = addr = (addr + align_mask) & ~align_mask; - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) { - if (start_addr != TASK_UNMAPPED_BASE) { - /* Start a new search --- just in case we missed some holes. */ - addr = TASK_UNMAPPED_BASE; - goto full_search; - } - return -ENOMEM; - } - if (!vma || addr + len <= vma->vm_start) { - /* Remember the address where we stopped this search: */ - mm->free_area_cache = addr + len; - return addr; - } - addr = (vma->vm_end + align_mask) & ~align_mask; - } + align_mask = PAGE_MASK & (SHMLBA - 1); + + info.flags = 0; + info.length = len; + info.low_limit = addr; + info.high_limit = TASK_SIZE; + info.align_mask = align_mask; + info.align_offset = 0; + return vm_unmapped_area(&info); } asmlinkage long @@ -100,51 +87,7 @@ sys_getpagesize (void) asmlinkage unsigned long ia64_brk (unsigned long brk) { - unsigned long rlim, retval, newbrk, oldbrk; - struct mm_struct *mm = current->mm; - - /* - * Most of this replicates the code in sys_brk() except for an additional safety - * check and the clearing of r8. However, we can't call sys_brk() because we need - * to acquire the mmap_sem before we can do the test... - */ - down_write(&mm->mmap_sem); - - if (brk < mm->end_code) - goto out; - newbrk = PAGE_ALIGN(brk); - oldbrk = PAGE_ALIGN(mm->brk); - if (oldbrk == newbrk) - goto set_brk; - - /* Always allow shrinking brk. */ - if (brk <= mm->brk) { - if (!do_munmap(mm, newbrk, oldbrk-newbrk)) - goto set_brk; - goto out; - } - - /* Check against unimplemented/unmapped addresses: */ - if ((newbrk - oldbrk) > RGN_MAP_LIMIT || REGION_OFFSET(newbrk) > RGN_MAP_LIMIT) - goto out; - - /* Check against rlimit.. */ - rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; - if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) - goto out; - - /* Check against existing mmap mappings. */ - if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) - goto out; - - /* Ok, looks good - let it rip. */ - if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk) - goto out; -set_brk: - mm->brk = brk; -out: - retval = mm->brk; - up_write(&mm->mmap_sem); + unsigned long retval = sys_brk(brk); force_successful_syscall_return(); return retval; } @@ -185,39 +128,6 @@ int ia64_mmap_check(unsigned long addr, unsigned long len, return 0; } -static inline unsigned long -do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff) -{ - struct file *file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - return -EBADF; - - if (!file->f_op || !file->f_op->mmap) { - addr = -ENODEV; - goto out; - } - } - - /* Careful about overflows.. */ - len = PAGE_ALIGN(len); - if (!len || len > TASK_SIZE) { - addr = -EINVAL; - goto out; - } - - down_write(¤t->mm->mmap_sem); - addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - -out: if (file) - fput(file); - return addr; -} - /* * mmap2() is like mmap() except that the offset is expressed in units * of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces @@ -226,7 +136,7 @@ out: if (file) asmlinkage unsigned long sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff) { - addr = do_mmap2(addr, len, prot, flags, fd, pgoff); + addr = sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); if (!IS_ERR((void *) addr)) force_successful_syscall_return(); return addr; @@ -238,7 +148,7 @@ sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, lo if (offset_in_page(off) != 0) return -EINVAL; - addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + addr = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); if (!IS_ERR((void *) addr)) force_successful_syscall_return(); return addr; @@ -248,22 +158,9 @@ asmlinkage unsigned long ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) { - extern unsigned long do_mremap (unsigned long addr, - unsigned long old_len, - unsigned long new_len, - unsigned long flags, - unsigned long new_addr); - - down_write(¤t->mm->mmap_sem); - { - addr = do_mremap(addr, old_len, new_len, flags, new_addr); - } - up_write(¤t->mm->mmap_sem); - - if (IS_ERR((void *) addr)) - return addr; - - force_successful_syscall_return(); + addr = sys_mremap(addr, old_len, new_len, flags, new_addr); + if (!IS_ERR((void *) addr)) + force_successful_syscall_return(); return addr; } diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index f0ebb342409..71c52bc7c28 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -19,7 +19,8 @@ #include <linux/interrupt.h> #include <linux/efi.h> #include <linux/timex.h> -#include <linux/clocksource.h> +#include <linux/timekeeper_internal.h> +#include <linux/platform_device.h> #include <asm/machvec.h> #include <asm/delay.h> @@ -28,15 +29,12 @@ #include <asm/ptrace.h> #include <asm/sal.h> #include <asm/sections.h> -#include <asm/system.h> #include "fsyscall_gtod_data.h" -static cycle_t itc_get_cycles(void); +static cycle_t itc_get_cycles(struct clocksource *cs); -struct fsyscall_gtod_data_t fsyscall_gtod_data = { - .lock = SEQLOCK_UNLOCKED, -}; +struct fsyscall_gtod_data_t fsyscall_gtod_data; struct itc_jitter_data_t itc_jitter_data; @@ -50,8 +48,17 @@ EXPORT_SYMBOL(last_cli_ip); #endif #ifdef CONFIG_PARAVIRT +/* We need to define a real function for sched_clock, to override the + weak default version */ +unsigned long long sched_clock(void) +{ + return paravirt_sched_clock(); +} +#endif + +#ifdef CONFIG_PARAVIRT static void -paravirt_clocksource_resume(void) +paravirt_clocksource_resume(struct clocksource *cs) { if (pv_time_ops.clocksource_resume) pv_time_ops.clocksource_resume(); @@ -63,8 +70,6 @@ static struct clocksource clocksource_itc = { .rating = 350, .read = itc_get_cycles, .mask = CLOCKSOURCE_MASK(64), - .mult = 0, /*to be calculated*/ - .shift = 16, .flags = CLOCK_SOURCE_IS_CONTINUOUS, #ifdef CONFIG_PARAVIRT .resume = paravirt_clocksource_resume, @@ -72,38 +77,35 @@ static struct clocksource clocksource_itc = { }; static struct clocksource *itc_clocksource; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include <linux/kernel_stat.h> extern cputime_t cycle_to_cputime(u64 cyc); +void vtime_account_user(struct task_struct *tsk) +{ + cputime_t delta_utime; + struct thread_info *ti = task_thread_info(tsk); + + if (ti->ac_utime) { + delta_utime = cycle_to_cputime(ti->ac_utime); + account_user_time(tsk, delta_utime, delta_utime); + ti->ac_utime = 0; + } +} + /* * Called from the context switch with interrupts disabled, to charge all * accumulated times to the current process, and to prepare accounting on * the next process. */ -void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) +void arch_vtime_task_switch(struct task_struct *prev) { struct thread_info *pi = task_thread_info(prev); - struct thread_info *ni = task_thread_info(next); - cputime_t delta_stime, delta_utime; - __u64 now; - - now = ia64_get_itc(); - - delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); - if (idle_task(smp_processor_id()) != prev) - account_system_time(prev, 0, delta_stime, delta_stime); - else - account_idle_time(delta_stime); - - if (pi->ac_utime) { - delta_utime = cycle_to_cputime(pi->ac_utime); - account_user_time(prev, delta_utime, delta_utime); - } + struct thread_info *ni = task_thread_info(current); - pi->ac_stamp = ni->ac_stamp = now; + pi->ac_stamp = ni->ac_stamp; ni->ac_stime = ni->ac_utime = 0; } @@ -111,54 +113,44 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) * Account time for a transition between system, hard irq or soft irq state. * Note that this function is called with interrupts enabled. */ -void account_system_vtime(struct task_struct *tsk) +static cputime_t vtime_delta(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); - unsigned long flags; cputime_t delta_stime; __u64 now; - local_irq_save(flags); + WARN_ON_ONCE(!irqs_disabled()); now = ia64_get_itc(); delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); - if (irq_count() || idle_task(smp_processor_id()) != tsk) - account_system_time(tsk, 0, delta_stime, delta_stime); - else - account_idle_time(delta_stime); ti->ac_stime = 0; - ti->ac_stamp = now; - local_irq_restore(flags); + return delta_stime; } -EXPORT_SYMBOL_GPL(account_system_vtime); -/* - * Called from the timer interrupt handler to charge accumulated user time - * to the current process. Must be called with interrupts disabled. - */ -void account_process_tick(struct task_struct *p, int user_tick) +void vtime_account_system(struct task_struct *tsk) { - struct thread_info *ti = task_thread_info(p); - cputime_t delta_utime; + cputime_t delta = vtime_delta(tsk); - if (ti->ac_utime) { - delta_utime = cycle_to_cputime(ti->ac_utime); - account_user_time(p, delta_utime, delta_utime); - ti->ac_utime = 0; - } + account_system_time(tsk, 0, delta, delta); } +EXPORT_SYMBOL_GPL(vtime_account_system); -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +void vtime_account_idle(struct task_struct *tsk) +{ + account_idle_time(vtime_delta(tsk)); +} + +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static irqreturn_t timer_interrupt (int irq, void *dev_id) { unsigned long new_itm; - if (unlikely(cpu_is_offline(smp_processor_id()))) { + if (cpu_is_offline(smp_processor_id())) { return IRQ_HANDLED; } @@ -180,19 +172,10 @@ timer_interrupt (int irq, void *dev_id) new_itm += local_cpu_data->itm_delta; - if (smp_processor_id() == time_keeper_id) { - /* - * Here we are in the timer irq handler. We have irqs locally - * disabled, but we don't know if the timer_bh is running on - * another CPU. We need to avoid to SMP race by acquiring the - * xtime_lock. - */ - write_seqlock(&xtime_lock); - do_timer(1); - local_cpu_data->itm_next = new_itm; - write_sequnlock(&xtime_lock); - } else - local_cpu_data->itm_next = new_itm; + if (smp_processor_id() == time_keeper_id) + xtime_update(1); + + local_cpu_data->itm_next = new_itm; if (time_after(new_itm, ia64_get_itc())) break; @@ -212,7 +195,7 @@ skip_process_time_accounting: * comfort, we increase the safety margin by * intentionally dropping the next tick(s). We do NOT * update itm.next because that would force us to call - * do_timer() which in turn would let our clock run + * xtime_update() which in turn would let our clock run * too fast (with the potentially devastating effect * of losing monotony of time). */ @@ -261,8 +244,7 @@ static int __init nojitter_setup(char *str) __setup("nojitter", nojitter_setup); -void __devinit -ia64_init_itm (void) +void ia64_init_itm(void) { unsigned long platform_base_freq, itc_freq; struct pal_freq_ratio itc_ratio, proc_ratio; @@ -364,18 +346,15 @@ ia64_init_itm (void) ia64_cpu_local_tick(); if (!itc_clocksource) { - /* Sort out mult/shift values: */ - clocksource_itc.mult = - clocksource_hz2mult(local_cpu_data->itc_freq, - clocksource_itc.shift); - clocksource_register(&clocksource_itc); + clocksource_register_hz(&clocksource_itc, + local_cpu_data->itc_freq); itc_clocksource = &clocksource_itc; } } -static cycle_t itc_get_cycles(void) +static cycle_t itc_get_cycles(struct clocksource *cs) { - u64 lcycle, now, ret; + unsigned long lcycle, now, ret; if (!itc_jitter_data.itc_jitter) return get_cycles(); @@ -401,22 +380,35 @@ static cycle_t itc_get_cycles(void) static struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_IRQPOLL, + .flags = IRQF_IRQPOLL, .name = "timer" }; +static struct platform_device rtc_efi_dev = { + .name = "rtc-efi", + .id = -1, +}; + +static int __init rtc_init(void) +{ + if (platform_device_register(&rtc_efi_dev) < 0) + printk(KERN_ERR "unable to register rtc device...\n"); + + /* not necessarily an error */ + return 0; +} +module_init(rtc_init); + +void read_persistent_clock(struct timespec *ts) +{ + efi_gettimeofday(ts); +} + void __init time_init (void) { register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction); - efi_gettimeofday(&xtime); ia64_init_itm(); - - /* - * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the - * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC). - */ - set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); } /* @@ -448,25 +440,24 @@ void update_vsyscall_tz(void) { } -void update_vsyscall(struct timespec *wall, struct clocksource *c) +void update_vsyscall_old(struct timespec *wall, struct timespec *wtm, + struct clocksource *c, u32 mult) { - unsigned long flags; - - write_seqlock_irqsave(&fsyscall_gtod_data.lock, flags); + write_seqcount_begin(&fsyscall_gtod_data.seq); /* copy fsyscall clock data */ fsyscall_gtod_data.clk_mask = c->mask; - fsyscall_gtod_data.clk_mult = c->mult; + fsyscall_gtod_data.clk_mult = mult; fsyscall_gtod_data.clk_shift = c->shift; - fsyscall_gtod_data.clk_fsys_mmio = c->fsys_mmio; + fsyscall_gtod_data.clk_fsys_mmio = c->archdata.fsys_mmio; fsyscall_gtod_data.clk_cycle_last = c->cycle_last; /* copy kernel time structures */ fsyscall_gtod_data.wall_time.tv_sec = wall->tv_sec; fsyscall_gtod_data.wall_time.tv_nsec = wall->tv_nsec; - fsyscall_gtod_data.monotonic_time.tv_sec = wall_to_monotonic.tv_sec + fsyscall_gtod_data.monotonic_time.tv_sec = wtm->tv_sec + wall->tv_sec; - fsyscall_gtod_data.monotonic_time.tv_nsec = wall_to_monotonic.tv_nsec + fsyscall_gtod_data.monotonic_time.tv_nsec = wtm->tv_nsec + wall->tv_nsec; /* normalize */ @@ -475,6 +466,6 @@ void update_vsyscall(struct timespec *wall, struct clocksource *c) fsyscall_gtod_data.monotonic_time.tv_sec++; } - write_sequnlock_irqrestore(&fsyscall_gtod_data.lock, flags); + write_seqcount_end(&fsyscall_gtod_data.seq); } diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index a8d61a3e9a9..f295f9abba4 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -17,10 +17,12 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/node.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/bootmem.h> #include <linux/nodemask.h> #include <linux/notifier.h> +#include <linux/export.h> #include <asm/mmzone.h> #include <asm/numa.h> #include <asm/cpu.h> @@ -42,7 +44,7 @@ int __ref arch_register_cpu(int num) { #ifdef CONFIG_ACPI /* - * If CPEI can be re-targetted or if this is not + * If CPEI can be re-targeted or if this is not * CPEI target, then it is hotpluggable */ if (can_cpei_retarget() || !is_cpu_cpei_target(num)) @@ -133,11 +135,11 @@ struct cpu_cache_info { struct kobject kobj; }; -static struct cpu_cache_info all_cpu_cache_info[NR_CPUS] __cpuinitdata; +static struct cpu_cache_info all_cpu_cache_info[NR_CPUS]; #define LEAF_KOBJECT_PTR(x,y) (&all_cpu_cache_info[x].cache_leaves[y]) #ifdef CONFIG_SMP -static void __cpuinit cache_shared_cpu_map_setup( unsigned int cpu, +static void cache_shared_cpu_map_setup(unsigned int cpu, struct cache_info * this_leaf) { pal_cache_shared_info_t csi; @@ -172,7 +174,7 @@ static void __cpuinit cache_shared_cpu_map_setup( unsigned int cpu, &csi) == PAL_STATUS_SUCCESS); } #else -static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, +static void cache_shared_cpu_map_setup(unsigned int cpu, struct cache_info * this_leaf) { cpu_set(cpu, this_leaf->shared_cpu_map); @@ -218,7 +220,8 @@ static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf) ssize_t len; cpumask_t shared_cpu_map; - cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map); + cpumask_and(&shared_cpu_map, + &this_leaf->shared_cpu_map, cpu_online_mask); len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map); len += sprintf(buf+len, "\n"); return len; @@ -272,7 +275,7 @@ static struct attribute * cache_default_attrs[] = { #define to_object(k) container_of(k, struct cache_info, kobj) #define to_attr(a) container_of(a, struct cache_attr, attr) -static ssize_t cache_show(struct kobject * kobj, struct attribute * attr, char * buf) +static ssize_t ia64_cache_show(struct kobject * kobj, struct attribute * attr, char * buf) { struct cache_attr *fattr = to_attr(attr); struct cache_info *this_leaf = to_object(kobj); @@ -282,8 +285,8 @@ static ssize_t cache_show(struct kobject * kobj, struct attribute * attr, char * return ret; } -static struct sysfs_ops cache_sysfs_ops = { - .show = cache_show +static const struct sysfs_ops cache_sysfs_ops = { + .show = ia64_cache_show }; static struct kobj_type cache_ktype = { @@ -295,7 +298,7 @@ static struct kobj_type cache_ktype_percpu_entry = { .sysfs_ops = &cache_sysfs_ops, }; -static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu) +static void cpu_cache_sysfs_exit(unsigned int cpu) { kfree(all_cpu_cache_info[cpu].cache_leaves); all_cpu_cache_info[cpu].cache_leaves = NULL; @@ -304,12 +307,12 @@ static void __cpuinit cpu_cache_sysfs_exit(unsigned int cpu) return; } -static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu) +static int cpu_cache_sysfs_init(unsigned int cpu) { - u64 i, levels, unique_caches; + unsigned long i, levels, unique_caches; pal_cache_config_info_t cci; int j; - s64 status; + long status; struct cache_info *this_cache; int num_cache_leaves = 0; @@ -348,7 +351,7 @@ static int __cpuinit cpu_cache_sysfs_init(unsigned int cpu) } /* Add cache interface for CPU device */ -static int __cpuinit cache_add_dev(struct sys_device * sys_dev) +static int cache_add_dev(struct device *sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i, j; @@ -360,18 +363,22 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) return 0; oldmask = current->cpus_allowed; - retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); + retval = set_cpus_allowed_ptr(current, cpumask_of(cpu)); if (unlikely(retval)) return retval; retval = cpu_cache_sysfs_init(cpu); - set_cpus_allowed(current, oldmask); + set_cpus_allowed_ptr(current, &oldmask); if (unlikely(retval < 0)) return retval; retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj, &cache_ktype_percpu_entry, &sys_dev->kobj, "%s", "cache"); + if (unlikely(retval < 0)) { + cpu_cache_sysfs_exit(cpu); + return retval; + } for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) { this_object = LEAF_KOBJECT_PTR(cpu,i); @@ -385,7 +392,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } kobject_put(&all_cpu_cache_info[cpu].kobj); cpu_cache_sysfs_exit(cpu); - break; + return retval; } kobject_uevent(&(this_object->kobj), KOBJ_ADD); } @@ -394,7 +401,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } /* Remove cache interface for CPU device */ -static int __cpuinit cache_remove_dev(struct sys_device * sys_dev) +static int cache_remove_dev(struct device *sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i; @@ -418,13 +425,13 @@ static int __cpuinit cache_remove_dev(struct sys_device * sys_dev) * When a cpu is hot-plugged, do a check and initiate * cache kobject if necessary */ -static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, +static int cache_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct sys_device *sys_dev; + struct device *sys_dev; - sys_dev = get_cpu_sysdev(cpu); + sys_dev = get_cpu_device(cpu); switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: @@ -438,7 +445,7 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata cache_cpu_notifier = +static struct notifier_block cache_cpu_notifier = { .notifier_call = cache_cpu_callback }; @@ -447,12 +454,16 @@ static int __init cache_sysfs_init(void) { int i; + cpu_notifier_register_begin(); + for_each_online_cpu(i) { - struct sys_device *sys_dev = get_cpu_sysdev((unsigned int)i); + struct device *sys_dev = get_cpu_device((unsigned int)i); cache_add_dev(sys_dev); } - register_hotcpu_notifier(&cache_cpu_notifier); + __register_hotcpu_notifier(&cache_cpu_notifier); + + cpu_notifier_register_done(); return 0; } diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index f0cda765e68..d3636e67a98 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -19,10 +19,10 @@ #include <linux/kdebug.h> #include <asm/fpswa.h> -#include <asm/ia32.h> #include <asm/intrinsics.h> #include <asm/processor.h> #include <asm/uaccess.h> +#include <asm/setup.h> fpswa_interface_t *fpswa_interface; EXPORT_SYMBOL(fpswa_interface); @@ -72,7 +72,7 @@ die (const char *str, struct pt_regs *regs, long err) bust_spinlocks(0); die.lock_owner = -1; - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irq(&die.lock); if (!regs) @@ -626,21 +626,13 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, break; case 45: -#ifdef CONFIG_IA32_SUPPORT - if (ia32_exception(®s, isr) == 0) - return; -#endif printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n"); printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", iip, ifa, isr); force_sig(SIGSEGV, current); - break; + return; case 46: -#ifdef CONFIG_IA32_SUPPORT - if (ia32_intercept(®s, isr) == 0) - return; -#endif printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n"); printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n", iip, ifa, isr, iim); diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 6db08599ebb..622772b7fb6 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -17,6 +17,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/tty.h> +#include <linux/ratelimit.h> #include <asm/intrinsics.h> #include <asm/processor.h> @@ -60,7 +61,6 @@ dump (const char *str, void *vp, size_t len) */ int no_unaligned_warning; int unaligned_dump_stack; -static int noprint_warning; /* * For M-unit: @@ -1284,24 +1284,9 @@ emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs) /* * Make sure we log the unaligned access, so that user/sysadmin can notice it and * eventually fix the program. However, we don't want to do that for every access so we - * pace it with jiffies. This isn't really MP-safe, but it doesn't really have to be - * either... + * pace it with jiffies. */ -static int -within_logging_rate_limit (void) -{ - static unsigned long count, last_time; - - if (time_after(jiffies, last_time + 5 * HZ)) - count = 0; - if (count < 5) { - last_time = jiffies; - count++; - return 1; - } - return 0; - -} +static DEFINE_RATELIMIT_STATE(logging_rate_limit, 5 * HZ, 5); void ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) @@ -1338,7 +1323,7 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) if (!no_unaligned_warning && !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) && - within_logging_rate_limit()) + __ratelimit(&logging_rate_limit)) { char buf[200]; /* comm[] is at most 16 bytes... */ size_t len; @@ -1357,9 +1342,8 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) /* watch for command names containing %s */ printk(KERN_WARNING "%s", buf); } else { - if (no_unaligned_warning && !noprint_warning) { - noprint_warning = 1; - printk(KERN_WARNING "%s(%d) encountered an " + if (no_unaligned_warning) { + printk_once(KERN_WARNING "%s(%d) encountered an " "unaligned exception which required\n" "kernel assistance, which degrades " "the performance of the application.\n" @@ -1372,7 +1356,7 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) } } } else { - if (within_logging_rate_limit()) { + if (__ratelimit(&logging_rate_limit)) { printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n", ifa, regs->cr_iip + ipsr->ri); if (unaligned_dump_stack) diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index 8eff8c1d40a..20e8a9b21d7 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -18,14 +18,13 @@ #include <linux/init.h> #include <linux/errno.h> #include <linux/string.h> -#include <linux/slab.h> #include <linux/efi.h> #include <linux/genalloc.h> +#include <linux/gfp.h> #include <asm/page.h> #include <asm/pal.h> -#include <asm/system.h> #include <asm/pgtable.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/tlbflush.h> #include <asm/sn/arch.h> @@ -98,7 +97,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) /* attempt to allocate a granule's worth of cached memory pages */ - page = alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + page = alloc_pages_exact_node(nid, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, IA64_GRANULE_SHIFT-PAGE_SHIFT); if (!page) { mutex_unlock(&uc_pool->add_chunk_mutex); @@ -249,8 +249,7 @@ EXPORT_SYMBOL(uncached_free_page); * Called at boot time to build a map of pages that can be used for * memory special operations. */ -static int __init uncached_build_memmap(unsigned long uc_start, - unsigned long uc_end, void *arg) +static int __init uncached_build_memmap(u64 uc_start, u64 uc_end, void *arg) { int nid = paddr_to_nid(uc_start - __IA64_UNCACHED_OFFSET); struct gen_pool *pool = uncached_pools[nid].pool; diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c index b6c0e63a0bf..8f66195999e 100644 --- a/arch/ia64/kernel/unwind.c +++ b/arch/ia64/kernel/unwind.c @@ -41,7 +41,6 @@ #include <asm/ptrace_offsets.h> #include <asm/rse.h> #include <asm/sections.h> -#include <asm/system.h> #include <asm/uaccess.h> #include "entry.h" @@ -1204,10 +1203,10 @@ desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word static inline unw_hash_index_t hash (unsigned long ip) { -# define hashmagic 0x9e3779b97f4a7c16UL /* based on (sqrt(5)/2-1)*2^64 */ + /* magic number = ((sqrt(5)-1)/2)*2^64 */ + static const unsigned long hashmagic = 0x9e3779b97f4a7c16UL; - return (ip >> 4)*hashmagic >> (64 - UNW_LOG_HASH_SIZE); -#undef hashmagic + return (ip >> 4) * hashmagic >> (64 - UNW_LOG_HASH_SIZE); } static inline long @@ -1531,7 +1530,7 @@ build_script (struct unw_frame_info *info) struct unw_labeled_state *ls, *next; unsigned long ip = info->ip; struct unw_state_record sr; - struct unw_table *table; + struct unw_table *table, *prev; struct unw_reg_info *r; struct unw_insn insn; u8 *dp, *desc_end; @@ -1560,11 +1559,26 @@ build_script (struct unw_frame_info *info) STAT(parse_start = ia64_get_itc()); + prev = NULL; for (table = unw.tables; table; table = table->next) { if (ip >= table->start && ip < table->end) { + /* + * Leave the kernel unwind table at the very front, + * lest moving it breaks some assumption elsewhere. + * Otherwise, move the matching table to the second + * position in the list so that traversals can benefit + * from commonality in backtrace paths. + */ + if (prev && prev != unw.tables) { + /* unw is safe - we're already spinlocked */ + prev->next = table->next; + table->next = unw.tables->next; + unw.tables->next = table; + } e = lookup(table, ip - table->segment_base); break; } + prev = table; } if (!e) { /* no info, return default unwinder (leaf proc, no mem stack, no saved regs) */ diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 10a7d47e851..84f8a52ac5a 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -1,299 +1,248 @@ #include <asm/cache.h> #include <asm/ptrace.h> -#include <asm/system.h> #include <asm/pgtable.h> #include <asm-generic/vmlinux.lds.h> -#define IVT_TEXT \ - VMLINUX_SYMBOL(__start_ivt_text) = .; \ - *(.text.ivt) \ - VMLINUX_SYMBOL(__end_ivt_text) = .; - OUTPUT_FORMAT("elf64-ia64-little") OUTPUT_ARCH(ia64) ENTRY(phys_start) jiffies = jiffies_64; + PHDRS { - code PT_LOAD; - percpu PT_LOAD; - data PT_LOAD; - note PT_NOTE; - unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */ + code PT_LOAD; + percpu PT_LOAD; + data PT_LOAD; + note PT_NOTE; + unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */ } -SECTIONS -{ - /* Sections to be discarded */ - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - *(.exitcall.exit) - *(.IA_64.unwind.exit.text) - *(.IA_64.unwind_info.exit.text) - } - v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ - phys_start = _start - LOAD_OFFSET; - - code : { } :code - . = KERNEL_START; - - _text = .; - _stext = .; - - .text : AT(ADDR(.text) - LOAD_OFFSET) - { - IVT_TEXT - TEXT_TEXT - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - *(.gnu.linkonce.t*) - } - .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) - { *(.text.head) } - .text2 : AT(ADDR(.text2) - LOAD_OFFSET) - { *(.text2) } -#ifdef CONFIG_SMP - .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) - { *(.text.lock) } -#endif - _etext = .; - - /* Read-only data */ - - NOTES :code :note /* put .notes in text and mark in PT_NOTE */ - code_continues : {} :code /* switch back to regular program... */ +SECTIONS { + /* + * unwind exit sections must be discarded before + * the rest of the sections get included. + */ + /DISCARD/ : { + *(.IA_64.unwind.exit.text) + *(.IA_64.unwind_info.exit.text) + *(.comment) + *(.note) + } - /* Exception table */ - . = ALIGN(16); - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) - { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; + v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ + phys_start = _start - LOAD_OFFSET; + + code : { + } :code + . = KERNEL_START; + + _text = .; + _stext = .; + + .text : AT(ADDR(.text) - LOAD_OFFSET) { + __start_ivt_text = .; + *(.text..ivt) + __end_ivt_text = .; + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + *(.gnu.linkonce.t*) } - /* MCA table */ - . = ALIGN(16); - __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) - { - __start___mca_table = .; - *(__mca_table) - __stop___mca_table = .; + .text2 : AT(ADDR(.text2) - LOAD_OFFSET) { + *(.text2) } - .data.patch.phys_stack_reg : AT(ADDR(.data.patch.phys_stack_reg) - LOAD_OFFSET) - { - __start___phys_stack_reg_patchlist = .; - *(.data.patch.phys_stack_reg) - __end___phys_stack_reg_patchlist = .; +#ifdef CONFIG_SMP + .text..lock : AT(ADDR(.text..lock) - LOAD_OFFSET) { + *(.text..lock) + } +#endif + _etext = .; + + /* + * Read-only data + */ + NOTES :code :note /* put .notes in text and mark in PT_NOTE */ + code_continues : { + } : code /* switch back to regular program... */ + + EXCEPTION_TABLE(16) + + /* MCA table */ + . = ALIGN(16); + __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) { + __start___mca_table = .; + *(__mca_table) + __stop___mca_table = .; } - /* Global data */ - _data = .; - - /* Unwind info & table: */ - . = ALIGN(8); - .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) - { *(.IA_64.unwind_info*) } - .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) - { - __start_unwind = .; - *(.IA_64.unwind*) - __end_unwind = .; - } :code :unwind - code_continues2 : {} : code + .data..patch.phys_stack_reg : AT(ADDR(.data..patch.phys_stack_reg) - LOAD_OFFSET) { + __start___phys_stack_reg_patchlist = .; + *(.data..patch.phys_stack_reg) + __end___phys_stack_reg_patchlist = .; + } - RODATA + /* + * Global data + */ + _data = .; - .opd : AT(ADDR(.opd) - LOAD_OFFSET) - { *(.opd) } + /* Unwind info & table: */ + . = ALIGN(8); + .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) { + *(.IA_64.unwind_info*) + } + .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) { + __start_unwind = .; + *(.IA_64.unwind*) + __end_unwind = .; + } :code :unwind + code_continues2 : { + } : code - /* Initialization code and data: */ + RODATA - . = ALIGN(PAGE_SIZE); - __init_begin = .; - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) - { - _sinittext = .; - INIT_TEXT - _einittext = .; + .opd : AT(ADDR(.opd) - LOAD_OFFSET) { + *(.opd) } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) - { INIT_DATA } + /* + * Initialization code and data: + */ + . = ALIGN(PAGE_SIZE); + __init_begin = .; -#ifdef CONFIG_BLK_DEV_INITRD - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) - { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } -#endif + INIT_TEXT_SECTION(PAGE_SIZE) + INIT_DATA_SECTION(16) - . = ALIGN(16); - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) - { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) - { - __initcall_start = .; - INITCALLS - __initcall_end = .; + .data..patch.vtop : AT(ADDR(.data..patch.vtop) - LOAD_OFFSET) { + __start___vtop_patchlist = .; + *(.data..patch.vtop) + __end___vtop_patchlist = .; } - .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET) - { - __start___vtop_patchlist = .; - *(.data.patch.vtop) - __end___vtop_patchlist = .; + .data..patch.rse : AT(ADDR(.data..patch.rse) - LOAD_OFFSET) { + __start___rse_patchlist = .; + *(.data..patch.rse) + __end___rse_patchlist = .; } - .data.patch.rse : AT(ADDR(.data.patch.rse) - LOAD_OFFSET) - { - __start___rse_patchlist = .; - *(.data.patch.rse) - __end___rse_patchlist = .; + .data..patch.mckinley_e9 : AT(ADDR(.data..patch.mckinley_e9) - LOAD_OFFSET) { + __start___mckinley_e9_bundles = .; + *(.data..patch.mckinley_e9) + __end___mckinley_e9_bundles = .; } - .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET) - { - __start___mckinley_e9_bundles = .; - *(.data.patch.mckinley_e9) - __end___mckinley_e9_bundles = .; +#if defined(CONFIG_PARAVIRT) + . = ALIGN(16); + .paravirt_bundles : AT(ADDR(.paravirt_bundles) - LOAD_OFFSET) { + __start_paravirt_bundles = .; + *(.paravirt_bundles) + __stop_paravirt_bundles = .; } - -#if defined(CONFIG_IA64_GENERIC) - /* Machine Vector */ - . = ALIGN(16); - .machvec : AT(ADDR(.machvec) - LOAD_OFFSET) - { - machvec_start = .; - *(.machvec) - machvec_end = .; + . = ALIGN(16); + .paravirt_insts : AT(ADDR(.paravirt_insts) - LOAD_OFFSET) { + __start_paravirt_insts = .; + *(.paravirt_insts) + __stop_paravirt_insts = .; + } + . = ALIGN(16); + .paravirt_branches : AT(ADDR(.paravirt_branches) - LOAD_OFFSET) { + __start_paravirt_branches = .; + *(.paravirt_branches) + __stop_paravirt_branches = .; } #endif - . = ALIGN(8); - __con_initcall_start = .; - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) - { *(.con_initcall.init) } - __con_initcall_end = .; - __security_initcall_start = .; - .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) - { *(.security_initcall.init) } - __security_initcall_end = .; - . = ALIGN(PAGE_SIZE); - __init_end = .; - - /* The initial task and kernel stack */ - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) - { *(.data.init_task) } - - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) - { *(__special_page_section) - __start_gate_section = .; - *(.data.gate) - __stop_gate_section = .; - } - . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose - * kernel data - */ - - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) - { *(.data.read_mostly) } - - .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) - { *(.data.cacheline_aligned) } - - /* Per-cpu data: */ - percpu : { } :percpu - . = ALIGN(PERCPU_PAGE_SIZE); - __phys_per_cpu_start = .; - .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) - { - __per_cpu_start = .; - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; +#if defined(CONFIG_IA64_GENERIC) + /* Machine Vector */ + . = ALIGN(16); + .machvec : AT(ADDR(.machvec) - LOAD_OFFSET) { + machvec_start = .; + *(.machvec) + machvec_end = .; } - . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits - * into percpu page size - */ +#endif - data : { } :data - .data : AT(ADDR(.data) - LOAD_OFFSET) - { #ifdef CONFIG_SMP - . = ALIGN(PERCPU_PAGE_SIZE); - __cpu0_per_cpu = .; - . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */ + . = ALIGN(PERCPU_PAGE_SIZE); + __cpu0_per_cpu = .; + . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */ #endif + + . = ALIGN(PAGE_SIZE); + __init_end = .; + + .data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) { + PAGE_ALIGNED_DATA(PAGE_SIZE) + . = ALIGN(PAGE_SIZE); + __start_gate_section = .; + *(.data..gate) + __stop_gate_section = .; + } + /* + * make sure the gate page doesn't expose + * kernel data + */ + . = ALIGN(PAGE_SIZE); + + /* Per-cpu data: */ + . = ALIGN(PERCPU_PAGE_SIZE); + PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu) + __phys_per_cpu_start = __per_cpu_load; + /* + * ensure percpu data fits + * into percpu page size + */ + . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; + + data : { + } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) { + _sdata = .; + INIT_TASK_DATA(PAGE_SIZE) + CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES) + READ_MOSTLY_DATA(SMP_CACHE_BYTES) DATA_DATA *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } - . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ - .got : AT(ADDR(.got) - LOAD_OFFSET) - { *(.got.plt) *(.got) } - __gp = ADDR(.got) + 0x200000; - /* We want the small data sections together, so single-instruction offsets - can access them all, and initialized data all before uninitialized, so - we can shorten the on-disk segment size. */ - .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) - { *(.sdata) *(.sdata1) *(.srdata) } - _edata = .; - __bss_start = .; - .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) - { *(.sbss) *(.scommon) } - .bss : AT(ADDR(.bss) - LOAD_OFFSET) - { *(.bss) *(COMMON) } - __bss_stop = .; - - _end = .; - - code : { } :code - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - /* DWARF debug sections. - Symbols in the DWARF debugging sections are relative to the beginning - of the section so we begin them at 0. */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* These must appear regardless of . */ - /DISCARD/ : { *(.comment) } - /DISCARD/ : { *(.note) } + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) { + *(.got.plt) + *(.got) + } + __gp = ADDR(.got) + 0x200000; + + /* + * We want the small data sections together, + * so single-instruction offsets can access + * them all, and initialized data all before + * uninitialized, so we can shorten the + * on-disk segment size. + */ + .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) { + *(.sdata) + *(.sdata1) + *(.srdata) + } + _edata = .; + + BSS_SECTION(0, 0, 0) + + _end = .; + + code : { + } :code + + STABS_DEBUG + DWARF_DEBUG + + /* Default discards */ + DISCARDS } |
