diff options
Diffstat (limited to 'arch/ia64/kernel/smp.c')
| -rw-r--r-- | arch/ia64/kernel/smp.c | 331 |
1 files changed, 147 insertions, 184 deletions
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 657ac99a451..9fcd4e63048 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -30,8 +30,9 @@ #include <linux/delay.h> #include <linux/efi.h> #include <linux/bitops.h> +#include <linux/kexec.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/current.h> #include <asm/delay.h> #include <asm/machvec.h> @@ -43,54 +44,39 @@ #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/sal.h> -#include <asm/system.h> #include <asm/tlbflush.h> #include <asm/unistd.h> #include <asm/mca.h> /* - * Structure and data for smp_call_function(). This is designed to minimise static memory - * requirements. It also looks cleaner. + * Note: alignment of 4 entries/cacheline was empirically determined + * to be a good tradeoff between hot cachelines & spreading the array + * across too many cacheline. */ -static __cacheline_aligned DEFINE_SPINLOCK(call_lock); +static struct local_tlb_flush_counts { + unsigned int count; +} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; -struct call_data_struct { - void (*func) (void *info); - void *info; - long wait; - atomic_t started; - atomic_t finished; -}; - -static volatile struct call_data_struct *call_data; +static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS], + shadow_flush_counts); #define IPI_CALL_FUNC 0 #define IPI_CPU_STOP 1 +#define IPI_CALL_FUNC_SINGLE 2 +#define IPI_KDUMP_CPU_STOP 3 /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ -static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned; +static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, ipi_operation); extern void cpu_halt (void); -void -lock_ipi_calllock(void) -{ - spin_lock_irq(&call_lock); -} - -void -unlock_ipi_calllock(void) -{ - spin_unlock_irq(&call_lock); -} - static void -stop_this_cpu (void) +stop_this_cpu(void) { /* * Remove this CPU: */ - cpu_clear(smp_processor_id(), cpu_online_map); + set_cpu_online(smp_processor_id(), false); max_xtp(); local_irq_disable(); cpu_halt(); @@ -108,7 +94,7 @@ cpu_die(void) } irqreturn_t -handle_IPI (int irq, void *dev_id, struct pt_regs *regs) +handle_IPI (int irq, void *dev_id) { int this_cpu = get_cpu(); unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation); @@ -124,40 +110,23 @@ handle_IPI (int irq, void *dev_id, struct pt_regs *regs) ops &= ~(1 << which); switch (which) { - case IPI_CALL_FUNC: - { - struct call_data_struct *data; - void (*func)(void *info); - void *info; - int wait; - - /* release the 'pointer lock' */ - data = (struct call_data_struct *) call_data; - func = data->func; - info = data->info; - wait = data->wait; - - mb(); - atomic_inc(&data->started); - /* - * At this point the structure may be gone unless - * wait is true. - */ - (*func)(info); - - /* Notify the sending CPU that the task is done. */ - mb(); - if (wait) - atomic_inc(&data->finished); - } - break; - - case IPI_CPU_STOP: + case IPI_CPU_STOP: stop_this_cpu(); break; - - default: - printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); + case IPI_CALL_FUNC: + generic_smp_call_function_interrupt(); + break; + case IPI_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; +#ifdef CONFIG_KEXEC + case IPI_KDUMP_CPU_STOP: + unw_init_running(kdump_cpu_freeze, NULL); + break; +#endif + default: + printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", + this_cpu, which); break; } } while (ops); @@ -167,8 +136,10 @@ handle_IPI (int irq, void *dev_id, struct pt_regs *regs) return IRQ_HANDLED; } + + /* - * Called with preeemption disabled. + * Called with preemption disabled. */ static inline void send_IPI_single (int dest_cpu, int op) @@ -178,7 +149,7 @@ send_IPI_single (int dest_cpu, int op) } /* - * Called with preeemption disabled. + * Called with preemption disabled. */ static inline void send_IPI_allbutself (int op) @@ -192,7 +163,20 @@ send_IPI_allbutself (int op) } /* - * Called with preeemption disabled. + * Called with preemption disabled. + */ +static inline void +send_IPI_mask(const struct cpumask *mask, int op) +{ + unsigned int cpu; + + for_each_cpu(cpu, mask) { + send_IPI_single(cpu, op); + } +} + +/* + * Called with preemption disabled. */ static inline void send_IPI_all (int op) @@ -205,7 +189,7 @@ send_IPI_all (int op) } /* - * Called with preeemption disabled. + * Called with preemption disabled. */ static inline void send_IPI_self (int op) @@ -213,155 +197,134 @@ send_IPI_self (int op) send_IPI_single(smp_processor_id(), op); } +#ifdef CONFIG_KEXEC +void +kdump_smp_send_stop(void) +{ + send_IPI_allbutself(IPI_KDUMP_CPU_STOP); +} + +void +kdump_smp_send_init(void) +{ + unsigned int cpu, self_cpu; + self_cpu = smp_processor_id(); + for_each_online_cpu(cpu) { + if (cpu != self_cpu) { + if(kdump_status[cpu] == 0) + platform_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0); + } + } +} +#endif /* - * Called with preeemption disabled. + * Called with preemption disabled. */ void smp_send_reschedule (int cpu) { platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); } +EXPORT_SYMBOL_GPL(smp_send_reschedule); -void -smp_flush_tlb_all (void) +/* + * Called with preemption disabled. + */ +static void +smp_send_local_flush_tlb (int cpu) { - on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); + platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0); } void -smp_flush_tlb_mm (struct mm_struct *mm) +smp_local_flush_tlb(void) { - preempt_disable(); - /* this happens for the common case of a single-threaded fork(): */ - if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1)) - { - local_finish_flush_tlb_mm(mm); - preempt_enable(); - return; - } - - preempt_enable(); /* - * We could optimize this further by using mm->cpu_vm_mask to track which CPUs - * have been running in the address space. It's not clear that this is worth the - * trouble though: to avoid races, we have to raise the IPI on the target CPU - * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is - * rather trivial. + * Use atomic ops. Otherwise, the load/increment/store sequence from + * a "++" operation can have the line stolen between the load & store. + * The overhead of the atomic op in negligible in this case & offers + * significant benefit for the brief periods where lots of cpus + * are simultaneously flushing TLBs. */ - on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1); + ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq); + local_flush_tlb_all(); } -/* - * Run a function on another CPU - * <func> The function to run. This must be fast and non-blocking. - * <info> An arbitrary pointer to pass to the function. - * <nonatomic> Currently unused. - * <wait> If true, wait until function has completed on other CPUs. - * [RETURNS] 0 on success, else a negative status code. - * - * Does not return until the remote CPU is nearly ready to execute <func> - * or is or has executed. - */ +#define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */ -int -smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic, - int wait) +void +smp_flush_tlb_cpumask(cpumask_t xcpumask) { - struct call_data_struct data; - int cpus = 1; - int me = get_cpu(); /* prevent preemption and reschedule on another processor */ - - if (cpuid == me) { - printk(KERN_INFO "%s: trying to call self\n", __FUNCTION__); - put_cpu(); - return -EBUSY; - } + unsigned short *counts = __ia64_per_cpu_var(shadow_flush_counts); + cpumask_t cpumask = xcpumask; + int mycpu, cpu, flush_mycpu = 0; - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); + preempt_disable(); + mycpu = smp_processor_id(); - spin_lock_bh(&call_lock); + for_each_cpu_mask(cpu, cpumask) + counts[cpu] = local_tlb_flush_counts[cpu].count & 0xffff; - call_data = &data; - mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ - send_IPI_single(cpuid, IPI_CALL_FUNC); + mb(); + for_each_cpu_mask(cpu, cpumask) { + if (cpu == mycpu) + flush_mycpu = 1; + else + smp_send_local_flush_tlb(cpu); + } - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); + if (flush_mycpu) + smp_local_flush_tlb(); - if (wait) - while (atomic_read(&data.finished) != cpus) - cpu_relax(); - call_data = NULL; + for_each_cpu_mask(cpu, cpumask) + while(counts[cpu] == (local_tlb_flush_counts[cpu].count & 0xffff)) + udelay(FLUSH_DELAY); - spin_unlock_bh(&call_lock); - put_cpu(); - return 0; + preempt_enable(); } -EXPORT_SYMBOL(smp_call_function_single); -/* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ - -/* - * [SUMMARY] Run a function on all other CPUs. - * <func> The function to run. This must be fast and non-blocking. - * <info> An arbitrary pointer to pass to the function. - * <nonatomic> currently unused. - * <wait> If true, wait (atomically) until function has completed on other CPUs. - * [RETURNS] 0 on success, else a negative status code. - * - * Does not return until remote CPUs are nearly ready to execute <func> or are or have - * executed. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ -int -smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) +void +smp_flush_tlb_all (void) { - struct call_data_struct data; - int cpus = num_online_cpus()-1; - - if (!cpus) - return 0; - - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - spin_lock(&call_lock); - - call_data = &data; - mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ - send_IPI_allbutself(IPI_CALL_FUNC); + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1); +} - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); +void +smp_flush_tlb_mm (struct mm_struct *mm) +{ + cpumask_var_t cpus; + preempt_disable(); + /* this happens for the common case of a single-threaded fork(): */ + if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1)) + { + local_finish_flush_tlb_mm(mm); + preempt_enable(); + return; + } + if (!alloc_cpumask_var(&cpus, GFP_ATOMIC)) { + smp_call_function((void (*)(void *))local_finish_flush_tlb_mm, + mm, 1); + } else { + cpumask_copy(cpus, mm_cpumask(mm)); + smp_call_function_many(cpus, + (void (*)(void *))local_finish_flush_tlb_mm, mm, 1); + free_cpumask_var(cpus); + } + local_irq_disable(); + local_finish_flush_tlb_mm(mm); + local_irq_enable(); + preempt_enable(); +} - if (wait) - while (atomic_read(&data.finished) != cpus) - cpu_relax(); - call_data = NULL; +void arch_send_call_function_single_ipi(int cpu) +{ + send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE); +} - spin_unlock(&call_lock); - return 0; +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + send_IPI_mask(mask, IPI_CALL_FUNC); } -EXPORT_SYMBOL(smp_call_function); /* * this function calls the 'stop' function on all other CPUs in the system. @@ -372,7 +335,7 @@ smp_send_stop (void) send_IPI_allbutself(IPI_CPU_STOP); } -int __init +int setup_profiling_timer (unsigned int multiplier) { return -EINVAL; |
