diff options
Diffstat (limited to 'arch/ia64')
74 files changed, 987 insertions, 551 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index e19185d2655..de1bff65996 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -14,6 +14,7 @@ config IA64 select PCI if (!IA64_HP_SIM) select ACPI if (!IA64_HP_SIM) select PM if (!IA64_HP_SIM) + select ARCH_SUPPORTS_MSI default y help The Itanium Processor Family is Intel's 64-bit successor to @@ -30,6 +31,10 @@ config ZONE_DMA def_bool y depends on !IA64_SGI_SN2 +config QUICKLIST + bool + default y + config MMU bool default y @@ -438,6 +443,16 @@ config IA64_PALINFO To use this option, you have to ensure that the "/proc file system support" (CONFIG_PROC_FS) is enabled, too. +config IA64_MC_ERR_INJECT + tristate "MC error injection support" + help + Selets whether support for MC error injection. By enabling the + support, kernel provide sysfs interface for user application to + call MC error injection PAL procedure to inject various errors. + This is a useful tool for MCA testing. + + If you're unsure, do not select this option. + config SGI_SN def_bool y if (IA64_SGI_SN2 || IA64_GENERIC) @@ -457,7 +472,7 @@ config KEXEC help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot - but it is indepedent of the system firmware. And like a reboot + but it is independent of the system firmware. And like a reboot you can start any kernel with it, not just Linux. The name comes from the similiarity to the exec system call. diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig index 153bfdc0182..90bd9601cdd 100644 --- a/arch/ia64/defconfig +++ b/arch/ia64/defconfig @@ -164,6 +164,7 @@ CONFIG_COMPAT=y CONFIG_IA64_MCA_RECOVERY=y CONFIG_PERFMON=y CONFIG_IA64_PALINFO=y +# CONFIG_MC_ERR_INJECT is not set CONFIG_SGI_SN=y # CONFIG_IA64_ESI is not set diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c index 2153bcacbe6..94e57109fad 100644 --- a/arch/ia64/hp/common/hwsw_iommu.c +++ b/arch/ia64/hp/common/hwsw_iommu.c @@ -63,7 +63,7 @@ use_swiotlb (struct device *dev) return dev && dev->dma_mask && !hwiommu_dma_supported(dev, *dev->dma_mask); } -void +void __init hwsw_init (void) { /* default to a smallish 2MB sw I/O TLB */ diff --git a/arch/ia64/hp/sim/boot/fw-emu.c b/arch/ia64/hp/sim/boot/fw-emu.c index 5a0a7afcfc3..300acd913d9 100644 --- a/arch/ia64/hp/sim/boot/fw-emu.c +++ b/arch/ia64/hp/sim/boot/fw-emu.c @@ -287,7 +287,7 @@ sys_fw_init (const char *args, int arglen) memset(efi_systab, 0, sizeof(efi_systab)); efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE; - efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION; + efi_systab->hdr.revision = ((1 << 16) | 00); efi_systab->hdr.headersize = sizeof(efi_systab->hdr); efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0"); efi_systab->fw_revision = 1; diff --git a/arch/ia64/ia32/audit.c b/arch/ia64/ia32/audit.c index 92d7d0c8d93..8850fe40ea3 100644 --- a/arch/ia64/ia32/audit.c +++ b/arch/ia64/ia32/audit.c @@ -20,6 +20,11 @@ unsigned ia32_read_class[] = { ~0U }; +unsigned ia32_signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + int ia32_classify_syscall(unsigned syscall) { switch(syscall) { diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index 687e5fdc968..99b665e2b1d 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -52,43 +52,6 @@ ENTRY(ia32_clone) br.ret.sptk.many rp END(ia32_clone) -ENTRY(sys32_rt_sigsuspend) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs - mov loc0=rp - mov out0=in0 // mask - mov out1=in1 // sigsetsize - mov out2=sp // out2 = &sigscratch - .fframe 16 - adds sp=-16,sp // allocate dummy "sigscratch" - ;; - .body - br.call.sptk.many rp=ia32_rt_sigsuspend -1: .restore sp - adds sp=16,sp - mov rp=loc0 - mov ar.pfs=loc1 - br.ret.sptk.many rp -END(sys32_rt_sigsuspend) - -ENTRY(sys32_sigsuspend) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs - mov loc0=rp - mov out0=in2 // mask (first two args are ignored) - ;; - mov out1=sp // out1 = &sigscratch - .fframe 16 - adds sp=-16,sp // allocate dummy "sigscratch" - .body - br.call.sptk.many rp=ia32_sigsuspend -1: .restore sp - adds sp=16,sp - mov rp=loc0 - mov ar.pfs=loc1 - br.ret.sptk.many rp -END(sys32_sigsuspend) - GLOBAL_ENTRY(ia32_ret_from_clone) PT_REGS_UNWIND_INFO(0) { /* @@ -389,7 +352,7 @@ ia32_syscall_table: data8 sys_rt_sigpending data8 compat_sys_rt_sigtimedwait data8 sys32_rt_sigqueueinfo - data8 sys32_rt_sigsuspend + data8 compat_sys_rt_sigsuspend data8 sys32_pread /* 180 */ data8 sys32_pwrite data8 sys_chown /* 16-bit version */ diff --git a/arch/ia64/ia32/ia32_ldt.c b/arch/ia64/ia32/ia32_ldt.c index a152738c7d0..16d51c14684 100644 --- a/arch/ia64/ia32/ia32_ldt.c +++ b/arch/ia64/ia32/ia32_ldt.c @@ -10,7 +10,6 @@ #include <linux/string.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/vmalloc.h> #include <asm/uaccess.h> diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c index b3355a9ca2c..85e82f32e48 100644 --- a/arch/ia64/ia32/ia32_signal.c +++ b/arch/ia64/ia32/ia32_signal.c @@ -18,7 +18,6 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/stddef.h> #include <linux/syscalls.h> #include <linux/unistd.h> @@ -452,59 +451,20 @@ sigact_set_handler (struct k_sigaction *sa, unsigned int handler, unsigned int r sa->sa.sa_handler = (__sighandler_t) (((unsigned long) restorer << 32) | handler); } -long -__ia32_rt_sigsuspend (compat_sigset_t *sset, unsigned int sigsetsize, struct sigscratch *scr) +asmlinkage long +sys32_sigsuspend (int history0, int history1, old_sigset_t mask) { - extern long ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall); - sigset_t oldset, set; - - scr->scratch_unat = 0; /* avoid leaking kernel bits to user level */ - memset(&set, 0, sizeof(set)); - - memcpy(&set.sig, &sset->sig, sigsetsize); - - sigdelsetmask(&set, ~_BLOCKABLE); - + mask &= _BLOCKABLE; spin_lock_irq(¤t->sighand->siglock); - { - oldset = current->blocked; - current->blocked = set; - recalc_sigpending(); - } + current->saved_sigmask = current->blocked; + siginitset(¤t->blocked, mask); + recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - /* - * The return below usually returns to the signal handler. We need to pre-set the - * correct error code here to ensure that the right values get saved in sigcontext - * by ia64_do_signal. - */ - scr->pt.r8 = -EINTR; - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (ia64_do_signal(&oldset, scr, 1)) - return -EINTR; - } -} - -asmlinkage long -ia32_rt_sigsuspend (compat_sigset_t __user *uset, unsigned int sigsetsize, struct sigscratch *scr) -{ - compat_sigset_t set; - - if (sigsetsize > sizeof(compat_sigset_t)) - return -EINVAL; - - if (copy_from_user(&set.sig, &uset->sig, sigsetsize)) - return -EFAULT; - - return __ia32_rt_sigsuspend(&set, sigsetsize, scr); -} - -asmlinkage long -ia32_sigsuspend (unsigned int mask, struct sigscratch *scr) -{ - return __ia32_rt_sigsuspend((compat_sigset_t *) &mask, sizeof(mask), scr); + current->state = TASK_INTERRUPTIBLE; + schedule(); + set_thread_flag(TIF_RESTORE_SIGMASK); + return -ERESTARTNOHAND; } asmlinkage long @@ -811,7 +771,11 @@ get_sigframe (struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) } /* Legacy stack switching not supported */ - return (void __user *)((esp - frame_size) & -8ul); + esp -= frame_size; + /* Align the stack pointer according to the i386 ABI, + * i.e. so that on function entry ((sp + 4) & 15) == 0. */ + esp = ((esp + 4) & -16ul) - 4; + return (void __user *) esp; } static int diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c index 6af400a12ca..beea7a0b9dc 100644 --- a/arch/ia64/ia32/ia32_support.c +++ b/arch/ia64/ia32/ia32_support.c @@ -252,10 +252,8 @@ ia32_init (void) extern struct kmem_cache *partial_page_cachep; partial_page_cachep = kmem_cache_create("partial_page_cache", - sizeof(struct partial_page), 0, 0, - NULL, NULL); - if (!partial_page_cachep) - panic("Cannot create partial page SLAB cache"); + sizeof(struct partial_page), + 0, SLAB_PANIC, NULL, NULL); } #endif return 0; diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 098ee605bf5..33e5a598672 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_PCI_MSI) += msi_ia64.o mca_recovery-y += mca_drv.o mca_drv_asm.o +obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c index 4d4993a47e5..5a216c01992 100644 --- a/arch/ia64/kernel/acpi-processor.c +++ b/arch/ia64/kernel/acpi-processor.c @@ -44,7 +44,7 @@ static void init_intel_pdc(struct acpi_processor *pr) buf[0] = ACPI_PDC_REVISION_ID; buf[1] = 1; - buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; + buf[2] = ACPI_PDC_EST_CAPABILITY_SMP; obj->type = ACPI_TYPE_BUFFER; obj->buffer.length = 12; diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 3549c94467b..103dd8edda7 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -67,7 +67,8 @@ EXPORT_SYMBOL(pm_power_off); unsigned int acpi_cpei_override; unsigned int acpi_cpei_phys_cpuid; -const char *acpi_get_sysname(void) +const char __init * +acpi_get_sysname(void) { #ifdef CONFIG_IA64_GENERIC unsigned long rsdp_phys; @@ -791,7 +792,7 @@ static __init int setup_additional_cpus(char *s) early_param("additional_cpus", setup_additional_cpus); /* - * cpu_possible_map should be static, it cannot change as cpu's + * cpu_possible_map should be static, it cannot change as CPUs * are onlined, or offlined. The reason is per-cpu data-structures * are allocated by some modules at init time, and dont expect to * do this dynamically on cpu arrival/departure. diff --git a/arch/ia64/kernel/audit.c b/arch/ia64/kernel/audit.c index 04682555a28..f3802ae89b1 100644 --- a/arch/ia64/kernel/audit.c +++ b/arch/ia64/kernel/audit.c @@ -23,6 +23,20 @@ static unsigned chattr_class[] = { ~0U }; +static unsigned signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + +int audit_classify_arch(int arch) +{ +#ifdef CONFIG_IA32_SUPPORT + if (arch == AUDIT_ARCH_I386) + return 1; +#endif + return 0; +} + int audit_classify_syscall(int abi, unsigned syscall) { #ifdef CONFIG_IA32_SUPPORT @@ -49,15 +63,18 @@ static int __init audit_classes_init(void) extern __u32 ia32_write_class[]; extern __u32 ia32_read_class[]; extern __u32 ia32_chattr_class[]; + extern __u32 ia32_signal_class[]; audit_register_class(AUDIT_CLASS_WRITE_32, ia32_write_class); audit_register_class(AUDIT_CLASS_READ_32, ia32_read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ia32_dir_class); audit_register_class(AUDIT_CLASS_CHATTR_32, ia32_chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL_32, ia32_signal_class); #endif audit_register_class(AUDIT_CLASS_WRITE, write_class); audit_register_class(AUDIT_CLASS_READ, read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL, signal_class); return 0; } diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index 80a94e70782..1d64ef478dd 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c @@ -16,8 +16,8 @@ #include <linux/elfcore.h> #include <linux/sysctl.h> #include <linux/init.h> +#include <linux/kdebug.h> -#include <asm/kdebug.h> #include <asm/mca.h> int kdump_status[NR_CPUS]; @@ -74,7 +74,7 @@ crash_save_this_cpu(void) buf = (u64 *) per_cpu_ptr(crash_notes, cpu); if (!buf) return; - buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus, + buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, prstatus, sizeof(*prstatus)); final_note(buf); } @@ -156,24 +156,30 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) if (!kdump_on_init) return NOTIFY_DONE; - if (val != DIE_INIT_MONARCH_ENTER && - val != DIE_INIT_SLAVE_ENTER && + if (val != DIE_INIT_MONARCH_LEAVE && + val != DIE_INIT_SLAVE_LEAVE && + val != DIE_INIT_MONARCH_PROCESS && val != DIE_MCA_RENDZVOUS_LEAVE && val != DIE_MCA_MONARCH_LEAVE) return NOTIFY_DONE; nd = (struct ia64_mca_notify_die *)args->err; - /* Reason code 1 means machine check rendezous*/ - if ((val == DIE_INIT_MONARCH_ENTER || val == DIE_INIT_SLAVE_ENTER) && - nd->sos->rv_rc == 1) + /* Reason code 1 means machine check rendezvous*/ + if ((val == DIE_INIT_MONARCH_LEAVE || val == DIE_INIT_SLAVE_LEAVE + || val == DIE_INIT_MONARCH_PROCESS) && nd->sos->rv_rc == 1) return NOTIFY_DONE; switch (val) { - case DIE_INIT_MONARCH_ENTER: + case DIE_INIT_MONARCH_PROCESS: + atomic_set(&kdump_in_progress, 1); + *(nd->monarch_cpu) = -1; + break; + case DIE_INIT_MONARCH_LEAVE: machine_kdump_on_init(); break; - case DIE_INIT_SLAVE_ENTER: - unw_init_running(kdump_cpu_freeze, NULL); + case DIE_INIT_SLAVE_LEAVE: + if (atomic_read(&kdump_in_progress)) + unw_init_running(kdump_cpu_freeze, NULL); break; case DIE_MCA_RENDZVOUS_LEAVE: if (atomic_read(&kdump_in_progress)) @@ -215,8 +221,10 @@ static ctl_table sys_table[] = { static int machine_crash_setup(void) { + /* be notified before default_monarch_init_process */ static struct notifier_block kdump_init_notifier_nb = { .notifier_call = kdump_init_notifier, + .priority = 1, }; int ret; if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0) diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index f45f91d38ca..75ec3478d8a 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -445,11 +445,11 @@ efi_init (void) panic("Woah! Can't find EFI system table.\n"); if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) panic("Woah! EFI system table signature incorrect\n"); - if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0) - printk(KERN_WARNING "Warning: EFI system table major version mismatch: " - "got %d.%02d, expected %d.%02d\n", - efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, - EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff); + if ((efi.systab->hdr.revision >> 16) == 0) + printk(KERN_WARNING "Warning: EFI system table version " + "%d.%02d, expected 1.00 or greater\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff); config_tables = __va(efi.systab->tables); @@ -660,6 +660,29 @@ efi_memory_descriptor (unsigned long phys_addr) return NULL; } +static int +efi_memmap_intersects (unsigned long phys_addr, unsigned long size) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + unsigned long end; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + end = phys_addr + size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + + if (md->phys_addr < end && efi_md_end(md) > phys_addr) + return 1; + } + return 0; +} + u32 efi_mem_type (unsigned long phys_addr) { @@ -766,11 +789,28 @@ valid_phys_addr_range (unsigned long phys_addr, unsigned long size) int valid_mmap_phys_addr_range (unsigned long pfn, unsigned long size) { + unsigned long phys_addr = pfn << PAGE_SHIFT; + u64 attr; + + attr = efi_mem_attribute(phys_addr, size); + /* - * MMIO regions are often missing from the EFI memory map. - * We must allow mmap of them for programs like X, so we - * currently can't do any useful validation. + * /dev/mem mmap uses normal user pages, so we don't need the entire + * granule, but the entire region we're mapping must support the same + * attribute. */ + if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC) + return 1; + + /* + * Intel firmware doesn't tell us about all the MMIO regions, so + * in general we have to allow mmap requests. But if EFI *does* + * tell us about anything inside this region, we should deny it. + * The user can always map a smaller region to avoid the overlap. + */ + if (efi_memmap_intersects(phys_addr, size)) + return 0; + return 1; } diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index e7873eeae44..95f51751523 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -767,7 +767,7 @@ ENTRY(ia64_leave_syscall) ld8.fill r15=[r3] // M0|1 restore r15 mov b6=r18 // I0 restore b6 - addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A + LOAD_PHYS_STACK_REG_SIZE(r17) mov f9=f0 // F clear f9 (pKStk) br.cond.dpnt.many skip_rbs_switch // B @@ -775,7 +775,6 @@ ENTRY(ia64_leave_syscall) shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition cover // B add current frame into dirty partition & set cr.ifs ;; -(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8 mov r19=ar.bsp // M2 get new backing store pointer mov f10=f0 // F clear f10 @@ -953,9 +952,7 @@ GLOBAL_ENTRY(ia64_leave_kernel) shr.u r18=r19,16 // get byte size of existing "dirty" partition ;; mov r16=ar.bsp // get existing backing store pointer - addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 - ;; - ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8 + LOAD_PHYS_STACK_REG_SIZE(r17) (pKStk) br.cond.dpnt skip_rbs_switch /* @@ -1202,32 +1199,6 @@ ENTRY(notify_resume_user) br.ret.sptk.many rp END(notify_resume_user) -GLOBAL_ENTRY(sys_rt_sigsuspend) - .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) - alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! - mov r9=ar.unat - mov loc0=rp // save return address - mov out0=in0 // mask - mov out1=in1 // sigsetsize - adds out2=8,sp // out2=&sigscratch->ar_pfs - ;; - .fframe 16 - .spillsp ar.unat, 16 - st8 [sp]=r9,-16 // allocate space for ar.unat and save it - st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch - .body - br.call.sptk.many rp=ia64_rt_sigsuspend -.ret17: .restore sp - adds sp=16,sp // pop scratch stack space - ;; - ld8 r9=[sp] // load new unat from sw->caller_unat - mov rp=loc0 - ;; - mov ar.unat=r9 - mov ar.pfs=loc1 - br.ret.sptk.many rp -END(sys_rt_sigsuspend) - ENTRY(sys_rt_sigreturn) PT_REGS_UNWIND_INFO(0) /* @@ -1601,8 +1572,8 @@ sys_call_table: data8 sys_readlinkat data8 sys_fchmodat data8 sys_faccessat - data8 sys_ni_syscall // reserved for pselect - data8 sys_ni_syscall // 1295 reserved for ppoll + data8 sys_pselect6 + data8 sys_ppoll data8 sys_unshare data8 sys_splice data8 sys_set_robust_list @@ -1612,5 +1583,10 @@ sys_call_table: data8 sys_vmsplice data8 sys_ni_syscall // reserved for move_pages data8 sys_getcpu + data8 sys_epoll_pwait // 1305 + data8 sys_utimensat + data8 sys_signalfd + data8 sys_timerfd + data8 sys_eventfd .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c new file mode 100644 index 00000000000..b642648cc2a --- /dev/null +++ b/arch/ia64/kernel/err_inject.c @@ -0,0 +1,295 @@ +/* + * err_inject.c - + * 1.) Inject errors to a processor. + * 2.) Query error injection capabilities. + * This driver along with user space code can be acting as an error + * injection tool. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Written by: Fenghua Yu <fenghua.yu@intel.com>, Intel Corporation + * Copyright (C) 2006, Intel Corp. All rights reserved. + * + */ +#include <linux/sysdev.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/cpu.h> +#include <linux/module.h> + +#define ERR_INJ_DEBUG + +#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte; + +#define define_one_ro(name) \ +static SYSDEV_ATTR(name, 0444, show_##name, NULL) + +#define define_one_rw(name) \ +static SYSDEV_ATTR(name, 0644, show_##name, store_##name) + +static u64 call_start[NR_CPUS]; +static u64 phys_addr[NR_CPUS]; +static u64 err_type_info[NR_CPUS]; +static u64 err_struct_info[NR_CPUS]; +static struct { + u64 data1; + u64 data2; + u64 data3; +} __attribute__((__aligned__(16))) err_data_buffer[NR_CPUS]; +static s64 status[NR_CPUS]; +static u64 capabilities[NR_CPUS]; +static u64 resources[NR_CPUS]; + +#define show(name) \ +static ssize_t \ +show_##name(struct sys_device *dev, char *buf) \ +{ \ + u32 cpu=dev->id; \ + return sprintf(buf, "%lx\n", name[cpu]); \ +} + +#define store(name) \ +static ssize_t \ +store_##name(struct sys_device *dev, const char *buf, size_t size) \ +{ \ + unsigned int cpu=dev->id; \ + name[cpu] = simple_strtoull(buf, NULL, 16); \ + return size; \ +} + +show(call_start) + +/* It's user's responsibility to call the PAL procedure on a specific + * processor. The cpu number in driver is only used for storing data. + */ +static ssize_t +store_call_start(struct sys_device *dev, const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + unsigned long call_start = simple_strtoull(buf, NULL, 16); + +#ifdef ERR_INJ_DEBUG + printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu); + printk(KERN_DEBUG "err_type_info=%lx,\n", err_type_info[cpu]); + printk(KERN_DEBUG "err_struct_info=%lx,\n", err_struct_info[cpu]); + printk(KERN_DEBUG "err_data_buffer=%lx, %lx, %lx.\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3); +#endif + switch (call_start) { + case 0: /* Do nothing. */ + break; + case 1: /* Call pal_mc_error_inject in physical mode. */ + status[cpu]=ia64_pal_mc_error_inject_phys(err_type_info[cpu], + err_struct_info[cpu], + ia64_tpa(&err_data_buffer[cpu]), + &capabilities[cpu], + &resources[cpu]); + break; + case 2: /* Call pal_mc_error_inject in virtual mode. */ + status[cpu]=ia64_pal_mc_error_inject_virt(err_type_info[cpu], + err_struct_info[cpu], + ia64_tpa(&err_data_buffer[cpu]), + &capabilities[cpu], + &resources[cpu]); + break; + default: + status[cpu] = -EINVAL; + break; + } + +#ifdef ERR_INJ_DEBUG + printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]); + printk(KERN_DEBUG "capapbilities=%lx,\n", capabilities[cpu]); + printk(KERN_DEBUG "resources=%lx\n", resources[cpu]); +#endif + return size; +} + +show(err_type_info) +store(err_type_info) + +static ssize_t +show_virtual_to_phys(struct sys_device *dev, char *buf) +{ + unsigned int cpu=dev->id; + return sprintf(buf, "%lx\n", phys_addr[cpu]); +} + +static ssize_t +store_virtual_to_phys(struct sys_device *dev, const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + u64 virt_addr=simple_strtoull(buf, NULL, 16); + int ret; + + ret = get_user_pages(current, current->mm, virt_addr, + 1, VM_READ, 0, NULL, NULL); + if (ret<=0) { +#ifdef ERR_INJ_DEBUG + printk("Virtual address %lx is not existing.\n",virt_addr); +#endif + return -EINVAL; + } + + phys_addr[cpu] = ia64_tpa(virt_addr); + return size; +} + +show(err_struct_info) +store(err_struct_info) + +static ssize_t +show_err_data_buffer(struct sys_device *dev, char *buf) +{ + unsigned int cpu=dev->id; + + return sprintf(buf, "%lx, %lx, %lx\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3); +} + +static ssize_t +store_err_data_buffer(struct sys_device *dev, const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + int ret; + +#ifdef ERR_INJ_DEBUG + printk("write err_data_buffer=[%lx,%lx,%lx] on cpu%d\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3, + cpu); +#endif + ret=sscanf(buf, "%lx, %lx, %lx", + &err_data_buffer[cpu].data1, + &err_data_buffer[cpu].data2, + &err_data_buffer[cpu].data3); + if (ret!=ERR_DATA_BUFFER_SIZE) + return -EINVAL; + + return size; +} + +show(status) +show(capabilities) +show(resources) + +define_one_rw(call_start); +define_one_rw(err_type_info); +define_one_rw(err_struct_info); +define_one_rw(err_data_buffer); +define_one_rw(virtual_to_phys); +define_one_ro(status); +define_one_ro(capabilities); +define_one_ro(resources); + +static struct attribute *default_attrs[] = { + &attr_call_start.attr, + &attr_virtual_to_phys.attr, + &attr_err_type_info.attr, + &attr_err_struct_info.attr, + &attr_err_data_buffer.attr, + &attr_status.attr, + &attr_capabilities.attr, + &attr_resources.attr, + NULL +}; + +static struct attribute_group err_inject_attr_group = { + .attrs = default_attrs, + .name = "err_inject" +}; +/* Add/Remove err_inject interface for CPU device */ +static int __cpuinit err_inject_add_dev(struct sys_device * sys_dev) +{ + return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group); +} + +static int __cpuinit err_inject_remove_dev(struct sys_device * sys_dev) +{ + sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); + return 0; +} +static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + err_inject_add_dev(sys_dev); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + err_inject_remove_dev(sys_dev); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata err_inject_cpu_notifier = +{ + .notifier_call = err_inject_cpu_callback, +}; + +static int __init +err_inject_init(void) +{ + int i; + +#ifdef ERR_INJ_DEBUG + printk(KERN_INFO "Enter error injection driver.\n"); +#endif + for_each_online_cpu(i) { + err_inject_cpu_callback(&err_inject_cpu_notifier, CPU_ONLINE, + (void *)(long)i); + } + + register_hotcpu_notifier(&err_inject_cpu_notifier); + + return 0; +} + +static void __exit +err_inject_exit(void) +{ + int i; + struct sys_device *sys_dev; + +#ifdef ERR_INJ_DEBUG + printk(KERN_INFO "Exit error injection driver.\n"); +#endif + for_each_online_cpu(i) { + sys_dev = get_cpu_sysdev(i); + sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); + } + unregister_hotcpu_notifier(&err_inject_cpu_notifier); +} + +module_init(err_inject_init); +module_exit(err_inject_exit); + +MODULE_AUTHOR("Fenghua Yu <fenghua.yu@intel.com>"); +MODULE_DESCRIPTION("MC error injection kernel sysfs interface"); +MODULE_LICENSE("GPL"); diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index dcfbf3e7a9e..37f46527d23 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -87,7 +87,6 @@ #include <linux/list.h> #include <linux/pci.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/string.h> #include <linux/bootmem.h> @@ -1013,7 +1012,7 @@ iosapic_register_platform_intr (u32 int_type, unsigned int gsi, /* * ACPI calls this when it finds an entry for a legacy ISA IRQ override. */ -void __init +void __devinit iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, unsigned long polarity, unsigned long trigger) diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index ce49c85c928..407b4587048 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -4,7 +4,7 @@ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar * * This file contains the code used by various IRQ handling routines: - * asking for different IRQ's should be done through these routines + * asking for different IRQs should be done through these routines * instead of just grabbing them. Thus setups with different IRQ numbers * shouldn't result in any weird surprises, and installing new handlers * should be easier. @@ -12,7 +12,7 @@ * Copyright (C) Ashok Raj<ashok.raj@intel.com>, Intel Corporation 2004 * * 4/14/2004: Added code to handle cpu migration and do safe irq - * migration without lossing interrupts for iosapic + * migration without losing interrupts for iosapic * architecture. */ @@ -104,6 +104,17 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir) irq_redir[irq] = (char) (redir & 0xff); } } + +bool is_affinity_mask_valid(cpumask_t cpumask) +{ + if (ia64_platform_is("sn2")) { + /* Only allow one CPU to be specified in the smp_affinity mask */ + if (cpus_weight(cpumask) != 1) + return false; + } + return true; +} + #endif /* CONFIG_SMP */ #ifdef CONFIG_HOTPLUG_CPU @@ -179,7 +190,7 @@ void fixup_irqs(void) } /* - * Phase 1: Locate irq's bound to this cpu and + * Phase 1: Locate IRQs bound to this cpu and * relocate them for cpu removal. */ migrate_irqs(); diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 456f57b087c..bc47049f060 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -27,7 +27,6 @@ #include <linux/random.h> /* for rand_initialize_irq() */ #include <linux/signal.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/threads.h> #include <linux/bitops.h> #include <linux/irq.h> @@ -39,6 +38,7 @@ #include <asm/machvec.h> #include <asm/pgtable.h> #include <asm/system.h> +#include <asm/tlbflush.h> #ifdef CONFIG_PERFMON # include <asm/perfmon.h> @@ -127,8 +127,10 @@ void destroy_irq(unsigned int irq) #ifdef CONFIG_SMP # define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) +# define IS_LOCAL_TLB_FLUSH(vec) (vec == IA64_IPI_LOCAL_TLB_FLUSH) #else # define IS_RESCHEDULE(vec) (0) +# define IS_LOCAL_TLB_FLUSH(vec) (0) #endif /* * That's where the IVT branches when we get an external @@ -180,8 +182,11 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); ia64_srlz_d(); while (vector != IA64_SPURIOUS_INT_VECTOR) { - if (unlikely(IS_RESCHEDULE(vector))) - kstat_this_cpu.irqs[vector]++; + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { + smp_local_flush_tlb(); + kstat_this_cpu.irqs[vector]++; + } else if (unlikely(IS_RESCHEDULE(vector))) + kstat_this_cpu.irqs[vector]++; else { ia64_setreg(_IA64_REG_CR_TPR, vector); ia64_srlz_d(); @@ -227,8 +232,11 @@ void ia64_process_pending_intr(void) * Perform normal interrupt style processing */ while (vector != IA64_SPURIOUS_INT_VECTOR) { - if (unlikely(IS_RESCHEDULE(vector))) - kstat_this_cpu.irqs[vector]++; + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { + smp_local_flush_tlb(); + kstat_this_cpu.irqs[vector]++; + } else if (unlikely(IS_RESCHEDULE(vector))) + kstat_this_cpu.irqs[vector]++; else { struct pt_regs *old_regs = set_irq_regs(NULL); @@ -260,12 +268,12 @@ void ia64_process_pending_intr(void) #ifdef CONFIG_SMP -extern irqreturn_t handle_IPI (int irq, void *dev_id); static irqreturn_t dummy_handler (int irq, void *dev_id) { BUG(); } +extern irqreturn_t handle_IPI (int irq, void *dev_id); static struct irqaction ipi_irqaction = { .handler = handle_IPI, @@ -278,6 +286,13 @@ static struct irqaction resched_irqaction = { .flags = IRQF_DISABLED, .name = "resched" }; + +static struct irqaction tlb_irqaction = { + .handler = dummy_handler, + .flags = IRQF_DISABLED, + .name = "tlb_flush" +}; + #endif void @@ -303,6 +318,7 @@ init_IRQ (void) #ifdef CONFIG_SMP register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction); + register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction); #endif #ifdef CONFIG_PERFMON pfm_init_percpu(); diff --git a/arch/ia64/kernel/irq_lsapic.c b/arch/ia64/kernel/irq_lsapic.c index c2f07beb175..e56a7a36aca 100644 --- a/arch/ia64/kernel/irq_lsapic.c +++ b/arch/ia64/kernel/irq_lsapic.c @@ -23,7 +23,7 @@ lsapic_noop_startup (unsigned int irq) static void lsapic_noop (unsigned int irq) { - /* nuthing to do... */ + /* nothing to do... */ } static int lsapic_retrigger(unsigned int irq) diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 6b7fcbd3f6f..34f44d8be00 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -374,6 +374,7 @@ ENTRY(alt_dtlb_miss) movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) mov r21=cr.ipsr mov r31=pr + mov r24=PERCPU_ADDR ;; #ifdef CONFIG_DISABLE_VHPT shr.u r22=r16,61 // get the region number into r21 @@ -386,22 +387,30 @@ ENTRY(alt_dtlb_miss) (p8) mov r29=b0 // save b0 (p8) br.cond.dptk dtlb_fault #endif + cmp.ge p10,p11=r16,r24 // access to per_cpu_data? + tbit.z p12,p0=r16,61 // access to region 6? + mov r25=PERCPU_PAGE_SHIFT << 2 + mov r26=PERCPU_PAGE_SIZE + nop.m 0 + nop.b 0 + ;; +(p10) mov r19=IA64_KR(PER_CPU_DATA) +(p11) and r19=r19,r16 // clear non-ppn fields extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? - shr.u r18=r16,57 // move address bit 61 to bit 4 - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? ;; - andcm r18=0x10,r18 // bit 4=~address-bit(61) +(p10) sub r19=r19,r26 +(p10) mov cr.itir=r25 cmp.ne p8,p0=r0,r23 (p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field +(p12) dep r17=-1,r17,4,1 // set ma=UC for region 6 addr (p8) br.cond.spnt page_fault dep r21=-1,r21,IA64_PSR_ED_BIT,1 - or r19=r19,r17 // insert PTE control bits into r19 ;; - or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 + or r19=r19,r17 // insert PTE control bits into r19 (p6) mov cr.ipsr=r21 ;; (p7) itc.d r19 // insert the TLB entry diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 6cb56dd4056..5bc46f15134 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -29,9 +29,9 @@ #include <linux/slab.h> #include <linux/preempt.h> #include <linux/moduleloader.h> +#include <linux/kdebug.h> #include <asm/pgtable.h> -#include <asm/kdebug.h> #include <asm/sections.h> #include <asm/uaccess.h> @@ -151,12 +151,12 @@ static uint __kprobes is_cmp_ctype_unc_inst(uint template, uint slot, cmp_inst.l = kprobe_inst; if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) { - /* Integere compare - Register Register (A6 type)*/ + /* Integer compare - Register Register (A6 type)*/ if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1)) ctype_unc = 1; } else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) { - /* Integere compare - Immediate Register (A8 type)*/ + /* Integer compare - Immediate Register (A8 type)*/ if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1)) ctype_unc = 1; } @@ -370,14 +370,18 @@ static int __kprobes valid_kprobe_addr(int template, int slot, static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) { - kcb->prev_kprobe.kp = kprobe_running(); - kcb->prev_kprobe.status = kcb->kprobe_status; + unsigned int i; + i = atomic_add_return(1, &kcb->prev_kprobe_index); + kcb->prev_kprobe[i-1].kp = kprobe_running(); + kcb->prev_kprobe[i-1].status = kcb->kprobe_status; } static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; - kcb->kprobe_status = kcb->prev_kprobe.status; + unsigned int i; + i = atomic_sub_return(1, &kcb->prev_kprobe_index); + __get_cpu_var(current_kprobe) = kcb->prev_kprobe[i].kp; + kcb->kprobe_status = kcb->prev_kprobe[i].status; } static void __kprobes set_current_kprobe(struct kprobe *p, @@ -444,7 +448,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) break; } - BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); + kretprobe_assert(ri, orig_ret_address, trampoline_address); + regs->cr_iip = orig_ret_address; reset_current_kprobe(); @@ -464,23 +469,13 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) } /* Called with kretprobe_lock held */ -void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { - struct kretprobe_instance *ri; - - if ((ri = get_free_rp_inst(rp)) != NULL) { - ri->rp = rp; - ri->task = current; - ri->ret_addr = (kprobe_opcode_t *)regs->b0; - - /* Replace the return addr with trampoline addr */ - regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; + ri->ret_addr = (kprobe_opcode_t *)regs->b0; - add_rp_inst(ri); - } else { - rp->nmissed++; - } + /* Replace the return addr with trampoline addr */ + regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; } int __kprobes arch_prepare_kprobe(struct kprobe *p) @@ -825,7 +820,7 @@ out: return 1; } -static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr) +int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -909,13 +904,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, if (post_kprobes_handler(args->regs)) ret = NOTIFY_STOP; break; - case DIE_PAGE_FAULT: - /* kprobe_running() needs smp_processor_id() */ - preempt_disable(); - if (kprobe_running() && - kprobes_fault_handler(args->regs, args->trapnr)) - ret = NOTIFY_STOP; - preempt_enable(); default: break; } @@ -959,7 +947,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) /* * Callee owns the argument space and could overwrite it, eg * tail call optimization. So to be absolutely safe - * we save the argument space before transfering the control + * we save the argument space before transferring the control * to instrumented jprobe function which runs in * the process context */ @@ -1021,3 +1009,12 @@ int __init arch_init_kprobes(void) (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip; return register_kprobe(&trampoline_p); } + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + if (p->addr == + (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip) + return 1; + + return 0; +} diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c index 9620822270a..13df337508e 100644 --- a/arch/ia64/kernel/machvec.c +++ b/arch/ia64/kernel/machvec.c @@ -35,7 +35,7 @@ lookup_machvec (const char *name) return 0; } -void +void __init machvec_init (const char *name) { struct ia64_machine_vector *mv; diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 491687f84fb..1ead5ea6c5c 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -63,7 +63,6 @@ #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/irq.h> -#include <linux/smp_lock.h> #include <linux/bootmem.h> #include <linux/acpi.h> #include <linux/timer.h> @@ -72,9 +71,9 @@ #include <linux/smp.h> #include <linux/workqueue.h> #include <linux/cpumask.h> +#include <linux/kdebug.h> #include <asm/delay.h> -#include <asm/kdebug.h> #include <asm/machvec.h> #include <asm/meminit.h> #include <asm/page.h> @@ -119,7 +118,9 @@ static ia64_mc_info_t ia64_mc_info; #define CPE_HISTORY_LENGTH 5 #define CMC_HISTORY_LENGTH 5 +#ifdef CONFIG_ACPI static struct timer_list cpe_poll_timer; +#endif static struct timer_list cmc_poll_timer; /* * This variable tells whether we are currently in polling mode. @@ -272,7 +273,6 @@ static void ia64_mlogbuf_finish(int wait) mlogbuf_finished = 1; } -EXPORT_SYMBOL(ia64_mlogbuf_finish); /* * Print buffered messages from INIT context. @@ -1476,6 +1476,10 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi struct task_struct *g, *t; if (val != DIE_INIT_MONARCH_PROCESS) return NOTIFY_DONE; +#ifdef CONFIG_KEXEC + if (atomic_read(&kdump_in_progress)) + return NOTIFY_DONE; +#endif /* * FIXME: mlogbuf will brim over with INIT stack dumps. @@ -1690,7 +1694,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, ti->preempt_count = 1; ti->task = p; ti->cpu = cpu; - p->thread_info = ti; + p->stack = ti; p->state = TASK_UNINTERRUPTIBLE; cpu_set(cpu, p->cpus_allowed); INIT_LIST_HEAD(&p->tasks); diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index c6b607c00de..8c9c26aa6ae 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -101,14 +101,6 @@ ia64_do_tlb_purge: ;; srlz.d ;; - // 2. Purge DTR for PERCPU data. - movl r16=PERCPU_ADDR - mov r18=PERCPU_PAGE_SHIFT<<2 - ;; - ptr.d r16,r18 - ;; - srlz.d - ;; // 3. Purge ITR for PAL code. GET_THIS_PADDR(r2, ia64_mca_pal_base) ;; @@ -196,22 +188,6 @@ ia64_reload_tr: srlz.i srlz.d ;; - // 2. Reload DTR register for PERCPU data. - GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte) - ;; - movl r16=PERCPU_ADDR // vaddr - movl r18=PERCPU_PAGE_SHIFT<<2 - ;; - mov cr.itir=r18 - mov cr.ifa=r16 - ;; - ld8 r18=[r2] // load per-CPU PTE - mov r16=IA64_TR_PERCPU_DATA; - ;; - itr.d dtr[r16]=r18 - ;; - srlz.d - ;; // 3. Reload ITR for PAL code. GET_THIS_PADDR(r2, ia64_mca_pal_pte) ;; diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 832cf1e647e..aba813c2c15 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -14,7 +14,6 @@ #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/kallsyms.h> -#include <linux/smp_lock.h> #include <linux/bootmem.h> #include <linux/acpi.h> #include <linux/timer.h> @@ -439,7 +438,7 @@ is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci, * @peidx: pointer of index of processor error section * * Return value: - * target address on Success / 0 on Failue + * target address on Success / 0 on Failure */ static u64 get_target_identifier(peidx_table_t *peidx) @@ -702,7 +701,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, return fatal_mca("External bus check fatal status"); /* - * This is a local MCA and estimated as a recoverble error. + * This is a local MCA and estimated as a recoverable error. */ if (platform) return recover_from_platform_error(slidx, peidx, pbci, sos); diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index 158e3c51bb7..196287928ba 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -861,7 +861,7 @@ apply_relocate (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, /* * Modules contain a single unwind table which covers both the core and the init text * sections but since the two are not contiguous, we need to split this table up such that - * we can register (and unregister) each "segment" seperately. Fortunately, this sounds + * we can register (and unregister) each "segment" separately. Fortunately, this sounds * more complicated than it really is. */ static void diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index a71df9ae039..85829e27785 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -975,9 +975,11 @@ static int palinfo_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: create_palinfo_proc_entries(hotcpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: remove_palinfo_proc_entries(hotcpu); break; } diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c index bc11bb096f5..e796e29f8e1 100644 --- a/arch/ia64/kernel/patch.c +++ b/arch/ia64/kernel/patch.c @@ -195,3 +195,23 @@ ia64_patch_gate (void) ia64_patch_vtop(START(vtop), END(vtop)); ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); } + +void ia64_patch_phys_stack_reg(unsigned long val) +{ + s32 * offp = (s32 *) __start___phys_stack_reg_patchlist; + s32 * end = (s32 *) __end___phys_stack_reg_patchlist; + u64 ip, mask, imm; + + /* see instruction format A4: adds r1 = imm13, r3 */ + mask = (0x3fUL << 27) | (0x7f << 13); + imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13; + + while (offp < end) { + ip = (u64) offp + *offp; + ia64_patch(ip, mask, imm); + ia64_fc(ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index abc7ad03588..b7133cabdbe 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -23,7 +23,6 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/interrupt.h> -#include <linux/smp_lock.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/init.h> @@ -1319,7 +1318,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) { unsigned long flags; /* - * validy checks on cpu_mask have been done upstream + * validity checks on cpu_mask have been done upstream */ LOCK_PFS(flags); @@ -1385,7 +1384,7 @@ pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) { unsigned long flags; /* - * validy checks on cpu_mask have been done upstream + * validity checks on cpu_mask have been done upstream */ LOCK_PFS(flags); @@ -1836,7 +1835,7 @@ pfm_flush(struct file *filp, fl_owner_t id) /* * remove our file from the async queue, if we use this mode. * This can be done without the context being protected. We come - * here when the context has become unreacheable by other tasks. + * here when the context has become unreachable by other tasks. * * We may still have active monitoring at this point and we may * end up in pfm_overflow_handler(). However, fasync_helper() @@ -2133,7 +2132,7 @@ doit: filp->private_data = NULL; /* - * if we free on the spot, the context is now completely unreacheable + * if we free on the spot, the context is now completely unreachable * from the callers side. The monitored task side is also cut, so we * can freely cut. * @@ -2563,7 +2562,7 @@ pfm_reset_pmu_state(pfm_context_t *ctx) ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1; /* - * bitmask of all PMDs that are accesible to this context + * bitmask of all PMDs that are accessible to this context */ ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0]; @@ -3396,7 +3395,7 @@ pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) if (unlikely(!PMD_IS_IMPL(cnum))) goto error; /* * we can only read the register that we use. That includes - * the one we explicitely initialize AND the one we want included + * the one we explicitly initialize AND the one we want included * in the sampling buffer (smpl_regs). * * Having this restriction allows optimization in the ctxsw routine @@ -3716,7 +3715,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) * if non-blocking, then we ensure that the task will go into * pfm_handle_work() before returning to user mode. * - * We cannot explicitely reset another task, it MUST always + * We cannot explicitly reset another task, it MUST always * be done by the task itself. This works for system wide because * the tool that is controlling the session is logically doing * "self-monitoring". @@ -4645,7 +4644,7 @@ pfm_exit_thread(struct task_struct *task) switch(state) { case PFM_CTX_UNLOADED: /* - * only comes to thios function if pfm_context is not NULL, i.e., cannot + * only comes to this function if pfm_context is not NULL, i.e., cannot * be in unloaded state */ printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid); @@ -5248,7 +5247,7 @@ pfm_end_notify_user(pfm_context_t *ctx) /* * main overflow processing routine. - * it can be called from the interrupt path or explicitely during the context switch code + * it can be called from the interrupt path or explicitly during the context switch code */ static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs) diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h index 9becccda289..c4bec7a9d18 100644 --- a/arch/ia64/kernel/perfmon_mckinley.h +++ b/arch/ia64/kernel/perfmon_mckinley.h @@ -181,7 +181,7 @@ static pmu_config_t pmu_conf_mck={ .pmc_desc = pfm_mck_pmc_desc, .num_ibrs = 8, .num_dbrs = 8, - .use_rr_dbregs = 1 /* debug register are use for range retrictions */ + .use_rr_dbregs = 1 /* debug register are use for range restrictions */ }; diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index ae96d417699..af73b8dfde2 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -20,20 +20,19 @@ #include <linux/personality.h> #include <linux/sched.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/stddef.h> #include <linux/thread_info.h> #include <linux/unistd.h> #include <linux/efi.h> #include <linux/interrupt.h> #include <linux/delay.h> +#include <linux/kdebug.h> #include <asm/cpu.h> #include <asm/delay.h> #include <asm/elf.h> #include <asm/ia32.h> #include <asm/irq.h> -#include <asm/kdebug.h> #include <asm/kexec.h> #include <asm/pgalloc.h> #include <asm/processor.h> @@ -156,7 +155,7 @@ show_regs (struct pt_regs *regs) } void -do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall) +do_notify_resume_user (sigset_t *unused, struct sigscratch *scr, long in_syscall) { if (fsys_mode(current, &scr->pt)) { /* defer signal-handling etc. until we return to privilege-level 0. */ @@ -171,8 +170,8 @@ do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall #endif /* deal with pending signal delivery */ - if (test_thread_flag(TIF_SIGPENDING)) - ia64_do_signal(oldset, scr, in_syscall); + if (test_thread_flag(TIF_SIGPENDING)||test_thread_flag(TIF_RESTORE_SIGMASK)) + ia64_do_signal(scr, in_syscall); } static int pal_halt = 1; @@ -237,6 +236,7 @@ void cpu_idle_wait(void) { unsigned int cpu, this_cpu = get_cpu(); cpumask_t map; + cpumask_t tmp = current->cpus_allowed; set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); put_cpu(); @@ -258,6 +258,7 @@ void cpu_idle_wait(void) } cpus_and(map, map, cpu_online_map); } while (!cpus_empty(map)); + set_cpus_allowed(current, tmp); } EXPORT_SYMBOL_GPL(cpu_idle_wait); @@ -762,6 +763,9 @@ get_wchan (struct task_struct *p) unsigned long ip; int count = 0; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + /* * Note: p may not be a blocked task (it could be current or * another process running on some other CPU. Rather than @@ -772,6 +776,8 @@ get_wchan (struct task_struct *p) */ unw_init_from_blocked_task(&info, p); do { + if (p->state == TASK_RUNNING) + return 0; if (unw_unwind(&info) < 0) return 0; unw_get_ip(&info, &ip); diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S index ae473e3f2a0..903babd22d6 100644 --- a/arch/ia64/kernel/relocate_kernel.S +++ b/arch/ia64/kernel/relocate_kernel.S @@ -94,7 +94,7 @@ GLOBAL_ENTRY(relocate_new_kernel) 4: srlz.i ;; - //purge TR entry for kernel text and data + // purge TR entry for kernel text and data movl r16=KERNEL_START mov r18=KERNEL_TR_PAGE_SHIFT<<2 ;; @@ -104,15 +104,6 @@ GLOBAL_ENTRY(relocate_new_kernel) srlz.i ;; - // purge TR entry for percpu data - movl r16=PERCPU_ADDR - mov r18=PERCPU_PAGE_SHIFT<<2 - ;; - ptr.d r16,r18 - ;; - srlz.d - ;; - // purge TR entry for pal code mov r16=in3 mov r18=IA64_GRANULE_SHIFT<<2 diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c index 37c876f95db..27c2ef445a5 100644 --- a/arch/ia64/kernel/sal.c +++ b/arch/ia64/kernel/sal.c @@ -134,7 +134,7 @@ set_smp_redirect (int flag) * interrupt redirection. The reason is this would require that * All interrupts be stopped and hard bind the irq to a cpu. * Later when the interrupt is fired we need to set the redir hint - * on again in the vector. This is combersome for something that the + * on again in the vector. This is cumbersome for something that the * user mode irq balancer will solve anyways. */ no_int_routing=1; diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index af9f8754d84..25cd75f50ab 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -42,7 +42,6 @@ #include <linux/proc_fs.h> #include <linux/module.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/timer.h> #include <linux/vmalloc.h> @@ -163,7 +162,7 @@ static DEFINE_SPINLOCK(data_saved_lock); /** salinfo_platform_oemdata - optional callback to decode oemdata from an error * record. * @sect_header: pointer to the start of the section to decode. - * @oemdata: returns vmalloc area containing the decded output. + * @oemdata: returns vmalloc area containing the decoded output. * @oemdata_size: returns length of decoded output (strlen). * * Description: If user space asks for oem data to be decoded by the kernel @@ -583,6 +582,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu struct salinfo_data *data; switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: spin_lock_irqsave(&data_saved_lock, flags); for (i = 0, data = salinfo_data; i < ARRAY_SIZE(salinfo_data); @@ -593,6 +593,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu spin_unlock_irqrestore(&data_saved_lock, flags); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: spin_lock_irqsave(&data_saved_lock, flags); for (i = 0, data = salinfo_data; i < ARRAY_SIZE(salinfo_data); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index dc7dd7648ec..eaa6a24bc0b 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -75,7 +75,6 @@ extern void ia64_setup_printk_clock(void); DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info); DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); -DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8); unsigned long ia64_cycles_per_usec; struct ia64_boot_param *ia64_boot_param; struct screen_info screen_info; @@ -577,7 +576,7 @@ setup_arch (char **cmdline_p) } /* - * Display cpu info for all cpu's. + * Display cpu info for all CPUs. */ static int show_cpuinfo (struct seq_file *m, void *v) @@ -762,7 +761,7 @@ identify_cpu (struct cpuinfo_ia64 *c) c->cpu = smp_processor_id(); /* below default values will be overwritten by identify_siblings() - * for Multi-Threading/Multi-Core capable cpu's + * for Multi-Threading/Multi-Core capable CPUs */ c->threads_per_core = c->cores_per_socket = c->num_log = 1; c->socket_id = -1; @@ -787,7 +786,7 @@ identify_cpu (struct cpuinfo_ia64 *c) c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); } -void +void __init setup_per_cpu_areas (void) { /* start_kernel() requires this... */ @@ -869,6 +868,7 @@ void __cpuinit cpu_init (void) { extern void __cpuinit ia64_mmu_init (void *); + static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG; unsigned long num_phys_stacked; pal_vm_info_2_u_t vmi; unsigned int max_ctx; @@ -947,7 +947,7 @@ cpu_init (void) ia32_cpu_init(); #endif - /* Clear ITC to eliminiate sched_clock() overflows in human time. */ + /* Clear ITC to eliminate sched_clock() overflows in human time. */ ia64_set_itc(0); /* disable all local interrupt sources: */ @@ -982,7 +982,10 @@ cpu_init (void) num_phys_stacked = 96; } /* size of physical stacked register partition plus 8 bytes: */ - __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8; + if (num_phys_stacked > max_num_phys_stacked) { + ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8); + max_num_phys_stacked = num_phys_stacked; + } platform_cpu_init(); pm_idle = default_idle; } diff --git a/arch/ia64/kernel/sigframe.h b/arch/ia64/kernel/sigframe.h index 37b986cb86e..9fd9a1933b3 100644 --- a/arch/ia64/kernel/sigframe.h +++ b/arch/ia64/kernel/sigframe.h @@ -22,4 +22,4 @@ struct sigframe { struct sigcontext sc; }; -extern long ia64_do_signal (sigset_t *, struct sigscratch *, long); +extern void ia64_do_signal (struct sigscratch *, long); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 77f8b49c788..aeec8184e86 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -14,7 +14,6 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/stddef.h> #include <linux/tty.h> #include <linux/binfmts.h> @@ -41,47 +40,6 @@ # define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0]) #endif -long -ia64_rt_sigsuspend (sigset_t __user *uset, size_t sigsetsize, struct sigscratch *scr) -{ - sigset_t oldset, set; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - - if (!access_ok(VERIFY_READ, uset, sigsetsize)) - return -EFAULT; - - if (GET_SIGSET(&set, uset)) - return -EFAULT; - - sigdelsetmask(&set, ~_BLOCKABLE); - - spin_lock_irq(¤t->sighand->siglock); - { - oldset = current->blocked; - current->blocked = set; - recalc_sigpending(); - } - spin_unlock_irq(¤t->sighand->siglock); - - /* - * The return below usually returns to the signal handler. We need to - * pre-set the correct error code here to ensure that the right values - * get saved in sigcontext by ia64_do_signal. - */ - scr->pt.r8 = EINTR; - scr->pt.r10 = -1; - - while (1) { - current->state = TASK_INTERRUPTIBLE; - schedule(); - if (ia64_do_signal(&oldset, scr, 1)) - return -EINTR; - } -} - asmlinkage long sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2, long arg3, long arg4, long arg5, long arg6, long arg7, @@ -478,10 +436,11 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse * Note that `init' is a special process: it doesn't get signals it doesn't want to * handle. Thus you cannot kill init even with a SIGKILL even by mistake. */ -long -ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) +void +ia64_do_signal (struct sigscratch *scr, long in_syscall) { struct k_sigaction ka; + sigset_t *oldset; siginfo_t info; long restart = in_syscall; long errno = scr->pt.r8; @@ -493,9 +452,11 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) * doing anything if so. */ if (!user_mode(&scr->pt)) - return 0; + return; - if (!oldset) + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else oldset = ¤t->blocked; /* @@ -558,8 +519,15 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) * Whee! Actually deliver the signal. If the delivery failed, we need to * continue to iterate in this loop so we can deliver the SIGSEGV... */ - if (handle_signal(signr, &ka, &info, oldset, scr)) - return 1; + if (handle_signal(signr, &ka, &info, oldset, scr)) { + /* a signal was successfully delivered; the saved + * sigmask will have been stored in the signal frame, + * and will be restored by sigreturn, so we can simply + * clear the TIF_RESTORE_SIGMASK flag */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + clear_thread_flag(TIF_RESTORE_SIGMASK); + return; + } } /* Did we come from a system call? */ @@ -585,5 +553,11 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) } } } - return 0; + + /* if there's no signal to deliver, we just put the saved sigmask + * back */ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } } diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 55ddd809b02..b3a47f986e1 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -50,6 +50,18 @@ #include <asm/mca.h> /* + * Note: alignment of 4 entries/cacheline was empirically determined + * to be a good tradeoff between hot cachelines & spreading the array + * across too many cacheline. + */ +static struct local_tlb_flush_counts { + unsigned int count; +} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; + +static DEFINE_PER_CPU(unsigned int, shadow_flush_counts[NR_CPUS]) ____cacheline_aligned; + + +/* * Structure and data for smp_call_function(). This is designed to minimise static memory * requirements. It also looks cleaner. */ @@ -174,7 +186,7 @@ handle_IPI (int irq, void *dev_id) } /* - * Called with preeemption disabled. + * Called with preemption disabled. */ static inline void send_IPI_single (int dest_cpu, int op) @@ -184,7 +196,7 @@ send_IPI_single (int dest_cpu, int op) } /* - * Called with preeemption disabled. + * Called with preemption disabled. */ static inline void send_IPI_allbutself (int op) @@ -198,7 +210,7 @@ send_IPI_allbutself (int op) } /* - * Called with preeemption disabled. + * Called with preemption disabled. */ static inline void send_IPI_all (int op) @@ -211,7 +223,7 @@ send_IPI_all (int op) } /* - * Called with preeemption disabled. + * Called with preemption disabled. */ static inline void send_IPI_self (int op) @@ -240,7 +252,7 @@ kdump_smp_send_init(void) } #endif /* - * Called with preeemption disabled. + * Called with preemption disabled. */ void smp_send_reschedule (int cpu) @@ -248,6 +260,62 @@ smp_send_reschedule (int cpu) platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); } +/* + * Called with preemption disabled. + */ +static void +smp_send_local_flush_tlb (int cpu) +{ + platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0); +} + +void +smp_local_flush_tlb(void) +{ + /* + * Use atomic ops. Otherwise, the load/increment/store sequence from + * a "++" operation can have the line stolen between the load & store. + * The overhead of the atomic op in negligible in this case & offers + * significant benefit for the brief periods where lots of cpus + * are simultaneously flushing TLBs. + */ + ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq); + local_flush_tlb_all(); +} + +#define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */ + +void +smp_flush_tlb_cpumask(cpumask_t xcpumask) +{ + unsigned int *counts = __ia64_per_cpu_var(shadow_flush_counts); + cpumask_t cpumask = xcpumask; + int mycpu, cpu, flush_mycpu = 0; + + preempt_disable(); + mycpu = smp_processor_id(); + + for_each_cpu_mask(cpu, cpumask) + counts[cpu] = local_tlb_flush_counts[cpu].count; + + mb(); + for_each_cpu_mask(cpu, cpumask) { + if (cpu == mycpu) + flush_mycpu = 1; + else + smp_send_local_flush_tlb(cpu); + } + + if (flush_mycpu) + smp_local_flush_tlb(); + + for_each_cpu_mask(cpu, cpumask) + while(counts[cpu] == local_tlb_flush_counts[cpu].count) + udelay(FLUSH_DELAY); + + preempt_enable(); +} + void smp_flush_tlb_all (void) { diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index ff7df439da6..3c9d8e6089c 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -35,7 +35,6 @@ #include <linux/mm.h> #include <linux/notifier.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/spinlock.h> #include <linux/efi.h> #include <linux/percpu.h> @@ -371,7 +370,7 @@ smp_setup_percpu_timer (void) { } -static void __devinit +static void __cpuinit smp_callin (void) { int cpuid, phys_id, itc_master; @@ -457,7 +456,7 @@ smp_callin (void) /* * Activate a secondary processor. head.S calls this. */ -int __devinit +int __cpuinit start_secondary (void *unused) { /* Early console may use I/O ports */ @@ -695,7 +694,7 @@ int migrate_platform_irqs(unsigned int cpu) set_cpei_target_cpu(new_cpei_cpu); desc = irq_desc + ia64_cpe_irq; /* - * Switch for now, immediatly, we need to do fake intr + * Switch for now, immediately, we need to do fake intr * as other interrupts, but need to study CPEI behaviour with * polling before making changes. */ @@ -841,7 +840,7 @@ __cpu_up (unsigned int cpu) } /* - * Assume that CPU's have been discovered by some platform-dependent interface. For + * Assume that CPUs have been discovered by some platform-dependent interface. For * SoftSDV/Lion, that would be ACPI. * * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP(). @@ -855,7 +854,7 @@ init_smp_config(void) } *ap_startup; long sal_ret; - /* Tell SAL where to drop the AP's. */ + /* Tell SAL where to drop the APs. */ ap_startup = (struct fptr *) start_ap; sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ, ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0); diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 9ef62a3fbfa..1eda194b955 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -13,7 +13,6 @@ #include <linux/shm.h> #include <linux/file.h> /* doh, must come after sched.h... */ #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/syscalls.h> #include <linux/highuid.h> #include <linux/hugetlb.h> @@ -33,6 +32,13 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len if (len > RGN_MAP_LIMIT) return -ENOMEM; + /* handle fixed mapping: prevent overlap with huge pages */ + if (flags & MAP_FIXED) { + if (is_hugepage_only_range(mm, addr, len)) + return -EINVAL; + return addr; + } + #ifdef CONFIG_HUGETLB_PAGE if (REGION_NUMBER(addr) == RGN_HPAGE) addr = 0; diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 39e0cd3a088..a06667c7acc 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -235,7 +235,7 @@ ia64_init_itm (void) static struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = IRQF_DISABLED, + .flags = IRQF_DISABLED | IRQF_IRQPOLL, .name = "timer" }; diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 687500ddb4b..94ae3c87d82 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -412,9 +412,11 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, sys_dev = get_cpu_sysdev(cpu); switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cache_add_dev(sys_dev); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: cache_remove_dev(sys_dev); break; } diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 765cbe5ba6a..15ad85da15a 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -16,33 +16,17 @@ #include <linux/hardirq.h> #include <linux/kprobes.h> #include <linux/delay.h> /* for ssleep() */ +#include <linux/kdebug.h> #include <asm/fpswa.h> #include <asm/ia32.h> #include <asm/intrinsics.h> #include <asm/processor.h> #include <asm/uaccess.h> -#include <asm/kdebug.h> fpswa_interface_t *fpswa_interface; EXPORT_SYMBOL(fpswa_interface); -ATOMIC_NOTIFIER_HEAD(ia64die_chain); - -int -register_die_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&ia64die_chain, nb); -} -EXPORT_SYMBOL_GPL(register_die_notifier); - -int -unregister_die_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&ia64die_chain, nb); -} -EXPORT_SYMBOL_GPL(unregister_die_notifier); - void __init trap_init (void) { @@ -59,9 +43,9 @@ die (const char *str, struct pt_regs *regs, long err) u32 lock_owner; int lock_owner_depth; } die = { - .lock = SPIN_LOCK_UNLOCKED, - .lock_owner = -1, - .lock_owner_depth = 0 + .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock_owner = -1, + .lock_owner_depth = 0 }; static int die_counter; int cpu = get_cpu(); @@ -320,7 +304,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) * Lower 4 bits are used as a count. Upper bits are a sequence * number that is updated when count is reset. The cmpxchg will * fail is seqno has changed. This minimizes mutiple cpus - * reseting the count. + * resetting the count. */ if (current_jiffies > last.time) (void) cmpxchg_acq(&last.count, count, 16 + (count & ~15)); diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 1e357550c77..fe6aa5a9f8f 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -15,7 +15,6 @@ */ #include <linux/kernel.h> #include <linux/sched.h> -#include <linux/smp_lock.h> #include <linux/tty.h> #include <asm/intrinsics.h> diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c index 93d5a3b41f6..b0b08b5f3ec 100644 --- a/arch/ia64/kernel/unwind.c +++ b/arch/ia64/kernel/unwind.c @@ -2,7 +2,7 @@ * Copyright (C) 1999-2004 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> * Copyright (C) 2003 Fenghua Yu <fenghua.yu@intel.com> - * - Change pt_regs_off() to make it less dependant on pt_regs structure. + * - Change pt_regs_off() to make it less dependent on pt_regs structure. */ /* * This file implements call frame unwind support for the Linux @@ -60,6 +60,7 @@ # define UNW_DEBUG_ON(n) unw_debug_level >= n /* Do not code a printk level, not all debug lines end in newline */ # define UNW_DPRINT(n, ...) if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__) +# undef inline # define inline #else /* !UNW_DEBUG */ # define UNW_DEBUG_ON(n) 0 @@ -145,7 +146,7 @@ static struct { # endif } unw = { .tables = &unw.kernel_table, - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(unw.lock), .save_order = { UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR, UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR @@ -1859,7 +1860,7 @@ int unw_unwind (struct unw_frame_info *info) { unsigned long prev_ip, prev_sp, prev_bsp; - unsigned long ip, pr, num_regs; + unsigned long ip, pr, num_regs, rp_loc, pfs_loc; STAT(unsigned long start, flags;) int retval; @@ -1869,14 +1870,16 @@ unw_unwind (struct unw_frame_info *info) prev_sp = info->sp; prev_bsp = info->bsp; - /* restore the ip */ - if (!info->rp_loc) { + /* validate the return IP pointer */ + rp_loc = (unsigned long) info->rp_loc; + if ((rp_loc < info->regstk.limit) || (rp_loc > info->regstk.top)) { /* FIXME: should really be level 0 but it occurs too often. KAO */ UNW_DPRINT(1, "unwind.%s: failed to locate return link (ip=0x%lx)!\n", __FUNCTION__, info->ip); STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); return -1; } + /* restore the ip */ ip = info->ip = *info->rp_loc; if (ip < GATE_ADDR) { UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __FUNCTION__, ip); @@ -1884,12 +1887,14 @@ unw_unwind (struct unw_frame_info *info) return -1; } - /* restore the cfm: */ - if (!info->pfs_loc) { + /* validate the previous stack frame pointer */ + pfs_loc = (unsigned long) info->pfs_loc; + if ((pfs_loc < info->regstk.limit) || (pfs_loc > info->regstk.top)) { UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __FUNCTION__); STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); return -1; } + /* restore the cfm: */ info->cfm_loc = info->pfs_loc; /* restore the bsp: */ @@ -1943,9 +1948,9 @@ EXPORT_SYMBOL(unw_unwind); int unw_unwind_to_user (struct unw_frame_info *info) { - unsigned long ip, sp, pr = 0; + unsigned long ip, sp, pr = info->pr; - while (unw_unwind(info) >= 0) { + do { unw_get_sp(info, &sp); if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp) < IA64_PT_REGS_SIZE) { @@ -1963,7 +1968,7 @@ unw_unwind_to_user (struct unw_frame_info *info) __FUNCTION__, ip); return -1; } - } + } while (unw_unwind(info) >= 0); unw_get_ip(info, &ip); UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", __FUNCTION__, ip); @@ -1991,13 +1996,16 @@ init_frame_info (struct unw_frame_info *info, struct task_struct *t, memset(info, 0, sizeof(*info)); rbslimit = (unsigned long) t + IA64_RBS_OFFSET; + stklimit = (unsigned long) t + IA64_STK_OFFSET; + rbstop = sw->ar_bspstore; - if (rbstop - (unsigned long) t >= IA64_STK_OFFSET) + if (rbstop > stklimit || rbstop < rbslimit) rbstop = rbslimit; - stklimit = (unsigned long) t + IA64_STK_OFFSET; if (stktop <= rbstop) stktop = rbstop; + if (stktop > stklimit) + stktop = stklimit; info->regstk.limit = rbslimit; info->regstk.top = rbstop; diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S index 25dd55e4db2..5a65965c8b5 100644 --- a/arch/ia64/kernel/vmlinux.lds.S +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -44,7 +44,7 @@ SECTIONS .text : AT(ADDR(.text) - LOAD_OFFSET) { IVT_TEXT - *(.text) + TEXT_TEXT SCHED_TEXT LOCK_TEXT KPROBES_TEXT @@ -78,6 +78,13 @@ SECTIONS __stop___mca_table = .; } + .data.patch.phys_stack_reg : AT(ADDR(.data.patch.phys_stack_reg) - LOAD_OFFSET) + { + __start___phys_stack_reg_patchlist = .; + *(.data.patch.phys_stack_reg) + __end___phys_stack_reg_patchlist = .; + } + /* Global data */ _data = .; @@ -207,7 +214,12 @@ SECTIONS data : { } :data .data : AT(ADDR(.data) - LOAD_OFFSET) - { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } + { + DATA_DATA + *(.data1) + *(.gnu.linkonce.d*) + CONSTRUCTORS + } . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ .got : AT(ADDR(.got) - LOAD_OFFSET) diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c index 503dfe6d145..118daf5a063 100644 --- a/arch/ia64/lib/csum_partial_copy.c +++ b/arch/ia64/lib/csum_partial_copy.c @@ -128,6 +128,8 @@ csum_partial_copy_from_user(const void __user *src, void *dst, return (__force __wsum)result; } +EXPORT_SYMBOL(csum_partial_copy_from_user); + __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) { diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c index 44ce5ed9444..7ac8592a35b 100644 --- a/arch/ia64/mm/contig.c +++ b/arch/ia64/mm/contig.c @@ -88,7 +88,7 @@ void show_mem(void) printk(KERN_INFO "%d pages shared\n", total_shared); printk(KERN_INFO "%d pages swap cached\n", total_cached); printk(KERN_INFO "Total of %ld pages in page table cache\n", - pgtable_quicklist_total_size()); + quicklist_total_size()); printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages()); } diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 872da7a2acc..0dbf0e81f8c 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -317,7 +317,7 @@ static void __meminit scatter_node_data(void) * node_online_map is not set for hot-added nodes at this time, * because we are halfway through initialization of the new node's * structures. If for_each_online_node() is used, a new node's - * pg_data_ptrs will be not initialized. Insted of using it, + * pg_data_ptrs will be not initialized. Instead of using it, * pgdat_list[] is checked. */ for_each_node(node) { @@ -561,7 +561,7 @@ void show_mem(void) printk(KERN_INFO "%d pages shared\n", total_shared); printk(KERN_INFO "%d pages swap cached\n", total_cached); printk(KERN_INFO "Total of %ld pages in page table cache\n", - pgtable_quicklist_total_size()); + quicklist_total_size()); printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages()); } @@ -693,6 +693,7 @@ void __init paging_init(void) zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); } +#ifdef CONFIG_MEMORY_HOTPLUG pg_data_t *arch_alloc_nodedata(int nid) { unsigned long size = compute_pernodesize(nid); @@ -710,3 +711,4 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) pgdat_list[update_node] = update_pgdat; scatter_node_data(); } +#endif diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 59f3ab93761..b87f785c241 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -7,49 +7,36 @@ #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> -#include <linux/smp_lock.h> #include <linux/interrupt.h> #include <linux/kprobes.h> +#include <linux/kdebug.h> #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/system.h> #include <asm/uaccess.h> -#include <asm/kdebug.h> extern void die (char *, struct pt_regs *, long); #ifdef CONFIG_KPROBES -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); - -/* Hook to register for page fault notifications */ -int register_page_fault_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -} - -int unregister_page_fault_notifier(struct notifier_block *nb) +static inline int notify_page_fault(struct pt_regs *regs, int trap) { - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); -} + int ret = 0; + + if (!user_mode(regs)) { + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); + if (kprobe_running() && kprobes_fault_handler(regs, trap)) + ret = 1; + preempt_enable(); + } -static inline int notify_page_fault(enum die_val val, const char *str, - struct pt_regs *regs, long err, int trap, int sig) -{ - struct die_args args = { - .regs = regs, - .str = str, - .err = err, - .trapnr = trap, - .signr = sig - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); + return ret; } #else -static inline int notify_page_fault(enum die_val val, const char *str, - struct pt_regs *regs, long err, int trap, int sig) +static inline int notify_page_fault(struct pt_regs *regs, int trap) { - return NOTIFY_DONE; + return 0; } #endif @@ -118,8 +105,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re /* * This is to handle the kprobes on user space access instructions */ - if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, code, TRAP_BRKPT, - SIGSEGV) == NOTIFY_STOP) + if (notify_page_fault(regs, TRAP_BRKPT)) return; down_read(&mm->mmap_sem); diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 0c7e94edc20..1346b7f0539 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -13,7 +13,6 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/pagemap.h> -#include <linux/smp_lock.h> #include <linux/slab.h> #include <linux/sysctl.h> #include <asm/mman.h> @@ -148,6 +147,14 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u return -ENOMEM; if (len & ~HPAGE_MASK) return -EINVAL; + + /* Handle MAP_FIXED */ + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(addr, len, pgoff)) + return -EINVAL; + return addr; + } + /* This code assumes that RGN_HPAGE != 0. */ if ((REGION_NUMBER(addr) != RGN_HPAGE) || (addr & (HPAGE_SIZE - 1))) addr = HPAGE_REGION_BASE; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 4f36987eea7..c14abefabaf 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -39,9 +39,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist); -DEFINE_PER_CPU(long, __pgtable_quicklist_size); - extern void ia64_tlb_init (void); unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL; @@ -56,54 +53,6 @@ EXPORT_SYMBOL(vmem_map); struct page *zero_page_memmap_ptr; /* map entry for zero page */ EXPORT_SYMBOL(zero_page_memmap_ptr); -#define MIN_PGT_PAGES 25UL -#define MAX_PGT_FREES_PER_PASS 16L -#define PGT_FRACTION_OF_NODE_MEM 16 - -static inline long -max_pgt_pages(void) -{ - u64 node_free_pages, max_pgt_pages; - -#ifndef CONFIG_NUMA - node_free_pages = nr_free_pages(); -#else - node_free_pages = node_page_state(numa_node_id(), NR_FREE_PAGES); -#endif - max_pgt_pages = node_free_pages / PGT_FRACTION_OF_NODE_MEM; - max_pgt_pages = max(max_pgt_pages, MIN_PGT_PAGES); - return max_pgt_pages; -} - -static inline long -min_pages_to_free(void) -{ - long pages_to_free; - - pages_to_free = pgtable_quicklist_size - max_pgt_pages(); - pages_to_free = min(pages_to_free, MAX_PGT_FREES_PER_PASS); - return pages_to_free; -} - -void -check_pgt_cache(void) -{ - long pages_to_free; - - if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES)) - return; - - preempt_disable(); - while (unlikely((pages_to_free = min_pages_to_free()) > 0)) { - while (pages_to_free--) { - free_page((unsigned long)pgtable_quicklist_alloc()); - } - preempt_enable(); - preempt_disable(); - } - preempt_enable(); -} - void lazy_mmu_prot_update (pte_t pte) { @@ -121,7 +70,7 @@ lazy_mmu_prot_update (pte_t pte) return; /* i-cache is already coherent with d-cache */ if (PageCompound(page)) { - order = (unsigned long) (page[1].lru.prev); + order = compound_order(page); flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT)); } else @@ -355,7 +304,7 @@ setup_gate (void) void __devinit ia64_mmu_init (void *my_cpu_data) { - unsigned long psr, pta, impl_va_bits; + unsigned long pta, impl_va_bits; extern void __devinit tlb_init (void); #ifdef CONFIG_DISABLE_VHPT @@ -364,15 +313,6 @@ ia64_mmu_init (void *my_cpu_data) # define VHPT_ENABLE_BIT 1 #endif - /* Pin mapping for percpu area into TLB */ - psr = ia64_clear_ic(); - ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR, - pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)), - PERCPU_PAGE_SHIFT); - - ia64_set_psr(psr); - ia64_srlz_i(); - /* * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped * address space. The IA-64 architecture guarantees that at least 50 bits of diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c index 4280c074d64..2a140627dfd 100644 --- a/arch/ia64/mm/ioremap.c +++ b/arch/ia64/mm/ioremap.c @@ -1,5 +1,5 @@ /* - * (c) Copyright 2006 Hewlett-Packard Development Company, L.P. + * (c) Copyright 2006, 2007 Hewlett-Packard Development Company, L.P. * Bjorn Helgaas <bjorn.helgaas@hp.com> * * This program is free software; you can redistribute it and/or modify @@ -10,51 +10,101 @@ #include <linux/compiler.h> #include <linux/module.h> #include <linux/efi.h> +#include <linux/io.h> +#include <linux/vmalloc.h> #include <asm/io.h> #include <asm/meminit.h> static inline void __iomem * -__ioremap (unsigned long offset, unsigned long size) +__ioremap (unsigned long phys_addr) { - return (void __iomem *) (__IA64_UNCACHED_OFFSET | offset); + return (void __iomem *) (__IA64_UNCACHED_OFFSET | phys_addr); } void __iomem * -ioremap (unsigned long offset, unsigned long size) +ioremap (unsigned long phys_addr, unsigned long size) { + void __iomem *addr; + struct vm_struct *area; + unsigned long offset; + pgprot_t prot; u64 attr; unsigned long gran_base, gran_size; + unsigned long page_base; /* * For things in kern_memmap, we must use the same attribute * as the rest of the kernel. For more details, see * Documentation/ia64/aliasing.txt. */ - attr = kern_mem_attribute(offset, size); + attr = kern_mem_attribute(phys_addr, size); if (attr & EFI_MEMORY_WB) - return (void __iomem *) phys_to_virt(offset); + return (void __iomem *) phys_to_virt(phys_addr); else if (attr & EFI_MEMORY_UC) - return __ioremap(offset, size); + return __ioremap(phys_addr); /* * Some chipsets don't support UC access to memory. If * WB is supported for the whole granule, we prefer that. */ - gran_base = GRANULEROUNDDOWN(offset); - gran_size = GRANULEROUNDUP(offset + size) - gran_base; + gran_base = GRANULEROUNDDOWN(phys_addr); + gran_size = GRANULEROUNDUP(phys_addr + size) - gran_base; if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB) - return (void __iomem *) phys_to_virt(offset); + return (void __iomem *) phys_to_virt(phys_addr); - return __ioremap(offset, size); + /* + * WB is not supported for the whole granule, so we can't use + * the region 7 identity mapping. If we can safely cover the + * area with kernel page table mappings, we can use those + * instead. + */ + page_base = phys_addr & PAGE_MASK; + size = PAGE_ALIGN(phys_addr + size) - page_base; + if (efi_mem_attribute(page_base, size) & EFI_MEMORY_WB) { + prot = PAGE_KERNEL; + + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + + area->phys_addr = phys_addr; + addr = (void __iomem *) area->addr; + if (ioremap_page_range((unsigned long) addr, + (unsigned long) addr + size, phys_addr, prot)) { + vunmap((void __force *) addr); + return NULL; + } + + return (void __iomem *) (offset + (char __iomem *)addr); + } + + return __ioremap(phys_addr); } EXPORT_SYMBOL(ioremap); void __iomem * -ioremap_nocache (unsigned long offset, unsigned long size) +ioremap_nocache (unsigned long phys_addr, unsigned long size) { - if (kern_mem_attribute(offset, size) & EFI_MEMORY_WB) + if (kern_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) return NULL; - return __ioremap(offset, size); + return __ioremap(phys_addr); } EXPORT_SYMBOL(ioremap_nocache); + +void +iounmap (volatile void __iomem *addr) +{ + if (REGION_NUMBER(addr) == RGN_GATE) + vunmap((void *) ((unsigned long) addr & PAGE_MASK)); +} +EXPORT_SYMBOL(iounmap); diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index ffad7624436..fa4e6d4810f 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -32,9 +32,9 @@ static struct { } purge; struct ia64_ctx ia64_ctx = { - .lock = SPIN_LOCK_UNLOCKED, - .next = 1, - .max_ctx = ~0U + .lock = __SPIN_LOCK_UNLOCKED(ia64_ctx.lock), + .next = 1, + .max_ctx = ~0U }; DEFINE_PER_CPU(u8, ia64_need_tlb_flush); diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 0e83f3b419b..73696b4a2ee 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -18,7 +18,6 @@ #include <linux/init.h> #include <linux/ioport.h> #include <linux/slab.h> -#include <linux/smp_lock.h> #include <linux/spinlock.h> #include <asm/machvec.h> @@ -355,10 +354,13 @@ pci_acpi_scan_root(struct acpi_device *device, int domain, int bus) acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window, &windows); - controller->window = kmalloc_node(sizeof(*controller->window) * windows, - GFP_KERNEL, controller->node); - if (!controller->window) - goto out2; + if (windows) { + controller->window = + kmalloc_node(sizeof(*controller->window) * windows, + GFP_KERNEL, controller->node); + if (!controller->window) + goto out2; + } name = kmalloc(16, GFP_KERNEL); if (!name) @@ -659,8 +661,6 @@ pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma) return -EINVAL; prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size, vma->vm_page_prot); - if (pgprot_val(prot) != pgprot_val(pgprot_noncached(vma->vm_page_prot))) - return -EINVAL; addr = pci_get_legacy_mem(bus); if (IS_ERR(addr)) diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index ff1c5560117..b362d6d6a8c 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -63,7 +63,7 @@ static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode) * Use the block transfer engine to move kernel memory from src to dest * using the assigned mode. * - * Paramaters: + * Parameters: * src - physical address of the transfer source. * dest - physical address of the transfer destination. * len - number of bytes to transfer from source to dest. @@ -247,7 +247,7 @@ EXPORT_SYMBOL(bte_copy); * use the block transfer engine to move kernel * memory from src to dest using the assigned mode. * - * Paramaters: + * Parameters: * src - physical address of the transfer source. * dest - physical address of the transfer destination. * len - number of bytes to transfer from source to dest. @@ -255,7 +255,7 @@ EXPORT_SYMBOL(bte_copy); * for IBCT0/1 in the SGI documentation. * * NOTE: If the source, dest, and len are all cache line aligned, - * then it would be _FAR_ preferrable to use bte_copy instead. + * then it would be _FAR_ preferable to use bte_copy instead. */ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) { @@ -300,7 +300,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) * a standard bte copy. * * One nasty exception to the above rule is when the - * source and destination are not symetrically + * source and destination are not symmetrically * mis-aligned. If the source offset from the first * cache line is different from the destination offset, * we make the first section be the entire transfer @@ -337,7 +337,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) if (footBcopyDest == (headBcopyDest + headBcopyLen)) { /* - * We have two contigous bcopy + * We have two contiguous bcopy * blocks. Merge them. */ headBcopyLen += footBcopyLen; @@ -375,7 +375,7 @@ bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode) } else { /* - * The transfer is not symetric, we will + * The transfer is not symmetric, we will * allocate a buffer large enough for all the * data, bte_copy into that buffer and then * bcopy to the destination. diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c index b6fcf8164f2..27c5936ccfe 100644 --- a/arch/ia64/sn/kernel/bte_error.c +++ b/arch/ia64/sn/kernel/bte_error.c @@ -105,7 +105,7 @@ int shub1_bte_error_handler(unsigned long _nodepda) } BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id())); - /* Reenable both bte interfaces */ + /* Re-enable both bte interfaces */ imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM); imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1; REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval); @@ -243,7 +243,7 @@ bte_crb_error_handler(cnodeid_t cnode, int btenum, /* * The caller has already figured out the error type, we save that - * in the bte handle structure for the thread excercising the + * in the bte handle structure for the thread exercising the * interface to consume. */ bte->bh_error = ioe->ie_errortype + BTEFAIL_OFFSET; diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c index fcf7f93c4b6..2c3f9dfca78 100644 --- a/arch/ia64/sn/kernel/huberror.c +++ b/arch/ia64/sn/kernel/huberror.c @@ -8,7 +8,6 @@ #include <linux/types.h> #include <linux/interrupt.h> -#include <linux/pci.h> #include <asm/delay.h> #include <asm/sn/sn_sal.h> #include "ioerror.h" diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c index d48bcd83253..787ed642dd4 100644 --- a/arch/ia64/sn/kernel/io_common.c +++ b/arch/ia64/sn/kernel/io_common.c @@ -364,7 +364,7 @@ void sn_bus_store_sysdata(struct pci_dev *dev) element = kzalloc(sizeof(struct sysdata_el), GFP_KERNEL); if (!element) { - dev_dbg(dev, "%s: out of memory!\n", __FUNCTION__); + dev_dbg(&dev->dev, "%s: out of memory!\n", __FUNCTION__); return; } element->sysdata = SN_PCIDEV_INFO(dev); @@ -479,7 +479,7 @@ sn_io_early_init(void) } /* - * prime sn_pci_provider[]. Individial provider init routines will + * prime sn_pci_provider[]. Individual provider init routines will * override their respective default entries. */ diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 8d2a1bfbfe7..7f6d2360a26 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -59,6 +59,22 @@ void sn_intr_free(nasid_t local_nasid, int local_widget, (u64) sn_irq_info->irq_cookie, 0, 0); } +u64 sn_intr_redirect(nasid_t local_nasid, int local_widget, + struct sn_irq_info *sn_irq_info, + nasid_t req_nasid, int req_slice) +{ + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT, + (u64) SAL_INTR_REDIRECT, (u64) local_nasid, + (u64) local_widget, __pa(sn_irq_info), + (u64) req_nasid, (u64) req_slice, 0); + + return ret_stuff.status; +} + static unsigned int sn_startup_irq(unsigned int irq) { return 0; @@ -127,15 +143,8 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, struct sn_irq_info *new_irq_info; struct sn_pcibus_provider *pci_provider; - new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); - if (new_irq_info == NULL) - return NULL; - - memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); - - bridge = (u64) new_irq_info->irq_bridge; + bridge = (u64) sn_irq_info->irq_bridge; if (!bridge) { - kfree(new_irq_info); return NULL; /* irq is not a device interrupt */ } @@ -145,8 +154,25 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, local_widget = TIO_SWIN_WIDGETNUM(bridge); else local_widget = SWIN_WIDGETNUM(bridge); - vector = sn_irq_info->irq_irq; + + /* Make use of SAL_INTR_REDIRECT if PROM supports it */ + status = sn_intr_redirect(local_nasid, local_widget, sn_irq_info, nasid, slice); + if (!status) { + new_irq_info = sn_irq_info; + goto finish_up; + } + + /* + * PROM does not support SAL_INTR_REDIRECT, or it failed. + * Revert to old method. + */ + new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); + if (new_irq_info == NULL) + return NULL; + + memcpy(new_irq_info, sn_irq_info, sizeof(struct sn_irq_info)); + /* Free the old PROM new_irq_info structure */ sn_intr_free(local_nasid, local_widget, new_irq_info); unregister_intr_pda(new_irq_info); @@ -162,11 +188,18 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, return NULL; } + register_intr_pda(new_irq_info); + spin_lock(&sn_irq_info_lock); + list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); + spin_unlock(&sn_irq_info_lock); + call_rcu(&sn_irq_info->rcu, sn_irq_info_free); + + +finish_up: /* Update kernels new_irq_info with new target info */ cpuid = nasid_slice_to_cpuid(new_irq_info->irq_nasid, new_irq_info->irq_slice); new_irq_info->irq_cpuid = cpuid; - register_intr_pda(new_irq_info); pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; @@ -178,11 +211,6 @@ struct sn_irq_info *sn_retarget_vector(struct sn_irq_info *sn_irq_info, pci_provider && pci_provider->target_interrupt) (pci_provider->target_interrupt)(new_irq_info); - spin_lock(&sn_irq_info_lock); - list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); - spin_unlock(&sn_irq_info_lock); - call_rcu(&sn_irq_info->rcu, sn_irq_info_free); - #ifdef CONFIG_SMP cpuphys = cpu_physical_id(cpuid); set_irq_affinity_info((vector & 0xff), cpuphys, 0); diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index 49873aa4a37..83f190ffe35 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -87,7 +87,6 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) if (irq < 0) return irq; - set_irq_msi(irq, entry); /* * Set up the vector plumbing. Let the prom (via sn_intr_alloc) * decide which cpu to direct this msi at by default. @@ -144,10 +143,11 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) */ msg.data = 0x100 + irq; + set_irq_msi(irq, entry); write_msi_msg(irq, &msg); set_irq_chip_and_handler(irq, &sn_msi_chip, handle_edge_irq); - return irq; + return 0; } #ifdef CONFIG_SMP diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index a9bed5ca2ed..684b1c984a4 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -167,7 +167,7 @@ void __init early_sn_setup(void) * IO on SN2 is done via SAL calls, early_printk won't work without this. * * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c. - * Any changes to those file may have to be made hereas well. + * Any changes to those file may have to be made here as well. */ efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab); config_tables = __va(efi_systab->tables); @@ -194,7 +194,7 @@ void __init early_sn_setup(void) } extern int platform_intr_list[]; -static int __initdata shub_1_1_found; +static int __cpuinitdata shub_1_1_found; /* * sn_check_for_wars diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 601747b1e22..033c8a9f000 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -46,6 +46,9 @@ DECLARE_PER_CPU(struct ptc_stats, ptcstats); static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); +/* 0 = old algorithm (no IPI flushes), 1 = ipi deadlock flush, 2 = ipi instead of SHUB ptc, >2 = always ipi */ +static int sn2_flush_opt = 0; + extern unsigned long sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long, @@ -76,6 +79,8 @@ struct ptc_stats { unsigned long shub_itc_clocks; unsigned long shub_itc_clocks_max; unsigned long shub_ptc_flushes_not_my_mm; + unsigned long shub_ipi_flushes; + unsigned long shub_ipi_flushes_itc_clocks; }; #define sn2_ptctest 0 @@ -99,7 +104,7 @@ static inline unsigned long wait_piowc(void) * * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order. * Context switching user threads which have memory-mapped MMIO may cause - * PIOs to issue from seperate CPUs, thus the PIO writes must be drained + * PIOs to issue from separate CPUs, thus the PIO writes must be drained * from the previous CPU's Shub before execution resumes on the new CPU. */ void sn_migrate(struct task_struct *task) @@ -121,6 +126,18 @@ void sn_tlb_migrate_finish(struct mm_struct *mm) flush_tlb_mm(mm); } +static void +sn2_ipi_flush_all_tlb(struct mm_struct *mm) +{ + unsigned long itc; + + itc = ia64_get_itc(); + smp_flush_tlb_cpumask(mm->cpu_vm_mask); + itc = ia64_get_itc() - itc; + __get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc; + __get_cpu_var(ptcstats).shub_ipi_flushes++; +} + /** * sn2_global_tlb_purge - globally purge translation cache of virtual address range * @mm: mm_struct containing virtual address range @@ -154,7 +171,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0; short nasids[MAX_NUMNODES], nix; nodemask_t nodes_flushed; - int active, max_active, deadlock; + int active, max_active, deadlock, flush_opt = sn2_flush_opt; + + if (flush_opt > 2) { + sn2_ipi_flush_all_tlb(mm); + return; + } nodes_clear(nodes_flushed); i = 0; @@ -189,6 +211,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, return; } + if (flush_opt == 2) { + sn2_ipi_flush_all_tlb(mm); + preempt_enable(); + return; + } + itc = ia64_get_itc(); nix = 0; for_each_node_mask(cnode, nodes_flushed) @@ -256,6 +284,8 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, } if (active >= max_active || i == (nix - 1)) { if ((deadlock = wait_piowc())) { + if (flush_opt == 1) + goto done; sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1); if (reset_max_active_on_deadlock()) max_active = 1; @@ -267,6 +297,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, start += (1UL << nbits); } while (start < end); +done: itc2 = ia64_get_itc() - itc2; __get_cpu_var(ptcstats).shub_itc_clocks += itc2; if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) @@ -279,6 +310,11 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, spin_unlock_irqrestore(PTC_LOCK(shub1), flags); + if (flush_opt == 1 && deadlock) { + __get_cpu_var(ptcstats).deadlocks++; + sn2_ipi_flush_all_tlb(mm); + } + preempt_enable(); } @@ -425,24 +461,42 @@ static int sn2_ptc_seq_show(struct seq_file *file, void *data) if (!cpu) { seq_printf(file, - "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2\n"); - seq_printf(file, "# ptctest %d\n", sn2_ptctest); + "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2 ipi_fluches ipi_nsec\n"); + seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt); } if (cpu < NR_CPUS && cpu_online(cpu)) { stat = &per_cpu(ptcstats, cpu); - seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, + seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, stat->deadlocks, 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec, stat->shub_ptc_flushes_not_my_mm, - stat->deadlocks2); + stat->deadlocks2, + stat->shub_ipi_flushes, + 1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec); } return 0; } +static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, size_t count, loff_t *data) +{ + int cpu; + char optstr[64]; + + if (copy_from_user(optstr, user, count)) + return -EFAULT; + optstr[count - 1] = '\0'; + sn2_flush_opt = simple_strtoul(optstr, NULL, 0); + + for_each_online_cpu(cpu) + memset(&per_cpu(ptcstats, cpu), 0, sizeof(struct ptc_stats)); + + return count; +} + static struct seq_operations sn2_ptc_seq_ops = { .start = sn2_ptc_seq_start, .next = sn2_ptc_seq_next, @@ -458,6 +512,7 @@ static int sn2_ptc_proc_open(struct inode *inode, struct file *file) static const struct file_operations proc_sn2_ptc_operations = { .open = sn2_ptc_proc_open, .read = seq_read, + .write = sn2_ptc_proc_write, .llseek = seq_lseek, .release = seq_release, }; diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c index c08db9c2375..44ccc0d789c 100644 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ b/arch/ia64/sn/kernel/xpc_channel.c @@ -293,7 +293,7 @@ xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst, /* - * Pull the remote per partititon specific variables from the specified + * Pull the remote per partition specific variables from the specified * partition. */ enum xpc_retval @@ -461,7 +461,7 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch) // >>> may want to check for ch->flags & XPC_C_DISCONNECTING between // >>> iterations of the for-loop, bail if set? - // >>> should we impose a minumum #of entries? like 4 or 8? + // >>> should we impose a minimum #of entries? like 4 or 8? for (nentries = ch->local_nentries; nentries > 0; nentries--) { nbytes = nentries * ch->msg_size; @@ -514,7 +514,7 @@ xpc_allocate_remote_msgqueue(struct xpc_channel *ch) // >>> may want to check for ch->flags & XPC_C_DISCONNECTING between // >>> iterations of the for-loop, bail if set? - // >>> should we impose a minumum #of entries? like 4 or 8? + // >>> should we impose a minimum #of entries? like 4 or 8? for (nentries = ch->remote_nentries; nentries > 0; nentries--) { nbytes = nentries * ch->msg_size; @@ -1478,7 +1478,7 @@ xpc_teardown_infrastructure(struct xpc_partition *part) /* - * Before proceding with the teardown we have to wait until all + * Before proceeding with the teardown we have to wait until all * existing references cease. */ wait_event(part->teardown_wq, (atomic_read(&part->references) == 0)); diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index 68355ef6f84..e336e1692a7 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -55,9 +55,9 @@ #include <linux/delay.h> #include <linux/reboot.h> #include <linux/completion.h> +#include <linux/kdebug.h> #include <asm/sn/intr.h> #include <asm/sn/sn_sal.h> -#include <asm/kdebug.h> #include <asm/uaccess.h> #include <asm/sn/xpc.h> @@ -1332,7 +1332,7 @@ xpc_init(void) dev_warn(xpc_part, "can't register reboot notifier\n"); } - /* add ourselves to the die_notifier list (i.e., ia64die_chain) */ + /* add ourselves to the die_notifier list */ ret = register_die_notifier(&xpc_die_notifier); if (ret != 0) { dev_warn(xpc_part, "can't register die notifier\n"); diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index 57c723f5cba..7ba403232cb 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c @@ -574,7 +574,7 @@ xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version, u64 remote_vars_pa, struct xpc_vars *remote_vars) { part->remote_rp_version = remote_rp_version; - dev_dbg(xpc_part, " remote_rp_version = 0x%016lx\n", + dev_dbg(xpc_part, " remote_rp_version = 0x%016x\n", part->remote_rp_version); part->remote_rp_stamp = *remote_rp_stamp; diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c index 5419acb89a8..e58fcadff2e 100644 --- a/arch/ia64/sn/kernel/xpnet.c +++ b/arch/ia64/sn/kernel/xpnet.c @@ -24,7 +24,6 @@ #include <linux/module.h> #include <linux/kernel.h> -#include <linux/pci.h> #include <linux/init.h> #include <linux/ioport.h> #include <linux/netdevice.h> @@ -344,8 +343,8 @@ xpnet_dev_open(struct net_device *dev) enum xpc_retval ret; - dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %d, " - "%d)\n", XPC_NET_CHANNEL, xpnet_connection_activity, + dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, " + "%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity, XPNET_MSG_SIZE, XPNET_MSG_NENTRIES, XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS); @@ -532,7 +531,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) dev_dbg(xpnet, "destination Partitions mask (dp) = 0x%lx\n", dp); /* - * If we wanted to allow promiscous mode to work like an + * If we wanted to allow promiscuous mode to work like an * unswitched network, this would be a good point to OR in a * mask of partitions which should be receiving all packets. */ diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 7a291a27151..d79ddacfba2 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -333,7 +333,7 @@ int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size) /* * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work * around hw issues at the pci bus level. SGI proms older than - * 4.10 don't implment this. + * 4.10 don't implement this. */ SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE, @@ -348,7 +348,7 @@ int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size) /* * If the above failed, retry using the SAL_PROBE call which should * be present in all proms (but which cannot work round PCI chipset - * bugs). This code is retained for compatability with old + * bugs). This code is retained for compatibility with old * pre-4.10 proms, and should be removed at some point in the future. */ @@ -379,7 +379,7 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) /* * First, try the SN_SAL_IOIF_PCI_SAFE SAL call which can work * around hw issues at the pci bus level. SGI proms older than - * 4.10 don't implment this. + * 4.10 don't implement this. */ SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE, @@ -394,7 +394,7 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) /* * If the above failed, retry using the SAL_PROBE call which should * be present in all proms (but which cannot work round PCI chipset - * bugs). This code is retained for compatability with old + * bugs). This code is retained for compatibility with old * pre-4.10 proms, and should be removed at some point in the future. */ diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c index 935029fc400..239b3cedcf2 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -30,7 +30,7 @@ static void mark_ate(struct ate_resource *ate_resource, int start, int number, /* * find_free_ate: Find the first free ate index starting from the given - * index for the desired consequtive count. + * index for the desired consecutive count. */ static int find_free_ate(struct ate_resource *ate_resource, int start, int count) @@ -88,7 +88,7 @@ static inline int alloc_ate_resource(struct ate_resource *ate_resource, return -1; /* - * Find the required number of free consequtive ates. + * Find the required number of free consecutive ates. */ start_index = find_free_ate(ate_resource, ate_resource->lowest_free_index, @@ -105,7 +105,7 @@ static inline int alloc_ate_resource(struct ate_resource *ate_resource, /* * Allocate "count" contiguous Bridge Address Translation Entries * on the specified bridge to be used for PCI to XTALK mappings. - * Indices in rm map range from 1..num_entries. Indicies returned + * Indices in rm map range from 1..num_entries. Indices returned * to caller range from 0..num_entries-1. * * Return the start index on success, -1 on failure. diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index 95af40cb22f..e626e50a938 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -201,7 +201,7 @@ pcibr_dmatrans_direct32(struct pcidev_info * info, } /* - * Wrapper routine for free'ing DMA maps + * Wrapper routine for freeing DMA maps * DMA mappings for Direct 64 and 32 do not have any DMA maps. */ void diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index 8a2cb4e691f..b9bedbd6e1d 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -223,7 +223,7 @@ tioca_fastwrite_enable(struct tioca_kernel *tioca_kern) /* * Scan all vga controllers on this bus making sure they all - * suport FW. If not, return. + * support FW. If not, return. */ list_for_each_entry(pdev, tioca_kern->ca_devices, bus_list) { @@ -364,7 +364,7 @@ tioca_dma_d48(struct pci_dev *pdev, u64 paddr) * @req_size: len (bytes) to map * * Map @paddr into CA address space using the GART mechanism. The mapped - * dma_addr_t is guarenteed to be contiguous in CA bus space. + * dma_addr_t is guaranteed to be contiguous in CA bus space. */ static dma_addr_t tioca_dma_mapped(struct pci_dev *pdev, u64 paddr, size_t req_size) @@ -526,7 +526,7 @@ tioca_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, int dma_flags) return 0; /* - * If card is 64 or 48 bit addresable, use a direct mapping. 32 + * If card is 64 or 48 bit addressable, use a direct mapping. 32 * bit direct is so restrictive w.r.t. where the memory resides that * we don't use it even though CA has some support. */ diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index 35f854fb612..f4c0b961a93 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -256,9 +256,9 @@ pcidev_to_tioce(struct pci_dev *pdev, struct tioce __iomem **base, * @ct_addr: the coretalk address to map * @len: number of bytes to map * - * Given the addressing type, set up various paramaters that define the + * Given the addressing type, set up various parameters that define the * ATE pool to use. Search for a contiguous block of entries to cover the - * length, and if enough resources exist, fill in the ATE's and construct a + * length, and if enough resources exist, fill in the ATEs and construct a * tioce_dmamap struct to track the mapping. */ static u64 @@ -581,8 +581,8 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count, */ if (!mapaddr && !barrier && dma_mask >= 0xffffffffffUL) { /* - * We have two options for 40-bit mappings: 16GB "super" ATE's - * and 64MB "regular" ATE's. We'll try both if needed for a + * We have two options for 40-bit mappings: 16GB "super" ATEs + * and 64MB "regular" ATEs. We'll try both if needed for a * given mapping but which one we try first depends on the * size. For requests >64MB, prefer to use a super page with * regular as the fallback. Otherwise, try in the reverse order. @@ -687,8 +687,8 @@ tioce_error_intr_handler(int irq, void *arg) } /** - * tioce_reserve_m32 - reserve M32 ate's for the indicated address range - * @tioce_kernel: TIOCE context to reserve ate's for + * tioce_reserve_m32 - reserve M32 ATEs for the indicated address range + * @tioce_kernel: TIOCE context to reserve ATEs for * @base: starting bus address to reserve * @limit: last bus address to reserve * @@ -763,7 +763,7 @@ tioce_kern_init(struct tioce_common *tioce_common) /* * Set PMU pagesize to the largest size available, and zero out - * the ate's. + * the ATEs. */ tioce_mmr = (struct tioce __iomem *)tioce_common->ce_pcibus.bs_base; @@ -784,7 +784,7 @@ tioce_kern_init(struct tioce_common *tioce_common) } /* - * Reserve ATE's corresponding to reserved address ranges. These + * Reserve ATEs corresponding to reserved address ranges. These * include: * * Memory space covered by each PPB mem base/limit register |