diff options
Diffstat (limited to 'arch/ia64/kernel/smp.c')
| -rw-r--r-- | arch/ia64/kernel/smp.c | 311 |
1 files changed, 123 insertions, 188 deletions
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index b1b9aa4364b..9fcd4e63048 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -32,7 +32,7 @@ #include <linux/bitops.h> #include <linux/kexec.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/current.h> #include <asm/delay.h> #include <asm/machvec.h> @@ -44,55 +44,39 @@ #include <asm/processor.h> #include <asm/ptrace.h> #include <asm/sal.h> -#include <asm/system.h> #include <asm/tlbflush.h> #include <asm/unistd.h> #include <asm/mca.h> /* - * Structure and data for smp_call_function(). This is designed to minimise static memory - * requirements. It also looks cleaner. + * Note: alignment of 4 entries/cacheline was empirically determined + * to be a good tradeoff between hot cachelines & spreading the array + * across too many cacheline. */ -static __cacheline_aligned DEFINE_SPINLOCK(call_lock); +static struct local_tlb_flush_counts { + unsigned int count; +} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; -struct call_data_struct { - void (*func) (void *info); - void *info; - long wait; - atomic_t started; - atomic_t finished; -}; - -static volatile struct call_data_struct *call_data; +static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS], + shadow_flush_counts); #define IPI_CALL_FUNC 0 #define IPI_CPU_STOP 1 +#define IPI_CALL_FUNC_SINGLE 2 #define IPI_KDUMP_CPU_STOP 3 /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ -static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned; +static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, ipi_operation); extern void cpu_halt (void); -void -lock_ipi_calllock(void) -{ - spin_lock_irq(&call_lock); -} - -void -unlock_ipi_calllock(void) -{ - spin_unlock_irq(&call_lock); -} - static void -stop_this_cpu (void) +stop_this_cpu(void) { /* * Remove this CPU: */ - cpu_clear(smp_processor_id(), cpu_online_map); + set_cpu_online(smp_processor_id(), false); max_xtp(); local_irq_disable(); cpu_halt(); @@ -126,44 +110,23 @@ handle_IPI (int irq, void *dev_id) ops &= ~(1 << which); switch (which) { - case IPI_CALL_FUNC: - { - struct call_data_struct *data; - void (*func)(void *info); - void *info; - int wait; - - /* release the 'pointer lock' */ - data = (struct call_data_struct *) call_data; - func = data->func; - info = data->info; - wait = data->wait; - - mb(); - atomic_inc(&data->started); - /* - * At this point the structure may be gone unless - * wait is true. - */ - (*func)(info); - - /* Notify the sending CPU that the task is done. */ - mb(); - if (wait) - atomic_inc(&data->finished); - } - break; - - case IPI_CPU_STOP: + case IPI_CPU_STOP: stop_this_cpu(); break; -#ifdef CONFIG_CRASH_DUMP - case IPI_KDUMP_CPU_STOP: + case IPI_CALL_FUNC: + generic_smp_call_function_interrupt(); + break; + case IPI_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; +#ifdef CONFIG_KEXEC + case IPI_KDUMP_CPU_STOP: unw_init_running(kdump_cpu_freeze, NULL); break; #endif - default: - printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); + default: + printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", + this_cpu, which); break; } } while (ops); @@ -173,8 +136,10 @@ handle_IPI (int irq, void *dev_id) return IRQ_HANDLED; } + + /* - * Called with preeemption disabled. + * Called with preemption disabled. */ static inline void send_IPI_single (int dest_cpu, int op) @@ -184,7 +149,7 @@ send_IPI_single (int dest_cpu, int op) } /* - * Called with preeemption disabled. + * Called with preemption disabled. */ static inline void send_IPI_allbutself (int op) @@ -198,7 +163,20 @@ send_IPI_allbutself (int op) } /* - * Called with preeemption disabled. + * Called with preemption disabled. + */ +static inline void +send_IPI_mask(const struct cpumask *mask, int op) +{ + unsigned int cpu; + + for_each_cpu(cpu, mask) { + send_IPI_single(cpu, op); + } +} + +/* + * Called with preemption disabled. */ static inline void send_IPI_all (int op) @@ -211,7 +189,7 @@ send_IPI_all (int op) } /* - * Called with preeemption disabled. + * Called with preemption disabled. */ static inline void send_IPI_self (int op) @@ -219,15 +197,15 @@ send_IPI_self (int op) send_IPI_single(smp_processor_id(), op); } -#ifdef CONFIG_CRASH_DUMP +#ifdef CONFIG_KEXEC void -kdump_smp_send_stop() +kdump_smp_send_stop(void) { send_IPI_allbutself(IPI_KDUMP_CPU_STOP); } void -kdump_smp_send_init() +kdump_smp_send_init(void) { unsigned int cpu, self_cpu; self_cpu = smp_processor_id(); @@ -240,156 +218,113 @@ kdump_smp_send_init() } #endif /* - * Called with preeemption disabled. + * Called with preemption disabled. */ void smp_send_reschedule (int cpu) { platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); } +EXPORT_SYMBOL_GPL(smp_send_reschedule); -void -smp_flush_tlb_all (void) +/* + * Called with preemption disabled. + */ +static void +smp_send_local_flush_tlb (int cpu) { - on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); + platform_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0); } void -smp_flush_tlb_mm (struct mm_struct *mm) +smp_local_flush_tlb(void) { - preempt_disable(); - /* this happens for the common case of a single-threaded fork(): */ - if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1)) - { - local_finish_flush_tlb_mm(mm); - preempt_enable(); - return; - } - - preempt_enable(); /* - * We could optimize this further by using mm->cpu_vm_mask to track which CPUs - * have been running in the address space. It's not clear that this is worth the - * trouble though: to avoid races, we have to raise the IPI on the target CPU - * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is - * rather trivial. + * Use atomic ops. Otherwise, the load/increment/store sequence from + * a "++" operation can have the line stolen between the load & store. + * The overhead of the atomic op in negligible in this case & offers + * significant benefit for the brief periods where lots of cpus + * are simultaneously flushing TLBs. */ - on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1); + ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq); + local_flush_tlb_all(); } -/* - * Run a function on another CPU - * <func> The function to run. This must be fast and non-blocking. - * <info> An arbitrary pointer to pass to the function. - * <nonatomic> Currently unused. - * <wait> If true, wait until function has completed on other CPUs. - * [RETURNS] 0 on success, else a negative status code. - * - * Does not return until the remote CPU is nearly ready to execute <func> - * or is or has executed. - */ +#define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */ -int -smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic, - int wait) +void +smp_flush_tlb_cpumask(cpumask_t xcpumask) { - struct call_data_struct data; - int cpus = 1; - int me = get_cpu(); /* prevent preemption and reschedule on another processor */ - - if (cpuid == me) { - printk(KERN_INFO "%s: trying to call self\n", __FUNCTION__); - put_cpu(); - return -EBUSY; - } + unsigned short *counts = __ia64_per_cpu_var(shadow_flush_counts); + cpumask_t cpumask = xcpumask; + int mycpu, cpu, flush_mycpu = 0; - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); + preempt_disable(); + mycpu = smp_processor_id(); - spin_lock_bh(&call_lock); + for_each_cpu_mask(cpu, cpumask) + counts[cpu] = local_tlb_flush_counts[cpu].count & 0xffff; - call_data = &data; - mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ - send_IPI_single(cpuid, IPI_CALL_FUNC); + mb(); + for_each_cpu_mask(cpu, cpumask) { + if (cpu == mycpu) + flush_mycpu = 1; + else + smp_send_local_flush_tlb(cpu); + } - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); + if (flush_mycpu) + smp_local_flush_tlb(); - if (wait) - while (atomic_read(&data.finished) != cpus) - cpu_relax(); - call_data = NULL; + for_each_cpu_mask(cpu, cpumask) + while(counts[cpu] == (local_tlb_flush_counts[cpu].count & 0xffff)) + udelay(FLUSH_DELAY); - spin_unlock_bh(&call_lock); - put_cpu(); - return 0; + preempt_enable(); } -EXPORT_SYMBOL(smp_call_function_single); -/* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ +void +smp_flush_tlb_all (void) +{ + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1); +} -/* - * [SUMMARY] Run a function on all other CPUs. - * <func> The function to run. This must be fast and non-blocking. - * <info> An arbitrary pointer to pass to the function. - * <nonatomic> currently unused. - * <wait> If true, wait (atomically) until function has completed on other CPUs. - * [RETURNS] 0 on success, else a negative status code. - * - * Does not return until remote CPUs are nearly ready to execute <func> or are or have - * executed. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ -int -smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) +void +smp_flush_tlb_mm (struct mm_struct *mm) { - struct call_data_struct data; - int cpus; - - spin_lock(&call_lock); - cpus = num_online_cpus() - 1; - if (!cpus) { - spin_unlock(&call_lock); - return 0; + cpumask_var_t cpus; + preempt_disable(); + /* this happens for the common case of a single-threaded fork(): */ + if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1)) + { + local_finish_flush_tlb_mm(mm); + preempt_enable(); + return; } + if (!alloc_cpumask_var(&cpus, GFP_ATOMIC)) { + smp_call_function((void (*)(void *))local_finish_flush_tlb_mm, + mm, 1); + } else { + cpumask_copy(cpus, mm_cpumask(mm)); + smp_call_function_many(cpus, + (void (*)(void *))local_finish_flush_tlb_mm, mm, 1); + free_cpumask_var(cpus); + } + local_irq_disable(); + local_finish_flush_tlb_mm(mm); + local_irq_enable(); + preempt_enable(); +} - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ - send_IPI_allbutself(IPI_CALL_FUNC); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (wait) - while (atomic_read(&data.finished) != cpus) - cpu_relax(); - call_data = NULL; +void arch_send_call_function_single_ipi(int cpu) +{ + send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE); +} - spin_unlock(&call_lock); - return 0; +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + send_IPI_mask(mask, IPI_CALL_FUNC); } -EXPORT_SYMBOL(smp_call_function); /* * this function calls the 'stop' function on all other CPUs in the system. @@ -400,7 +335,7 @@ smp_send_stop (void) send_IPI_allbutself(IPI_CPU_STOP); } -int __init +int setup_profiling_timer (unsigned int multiplier) { return -EINVAL; |
