diff options
Diffstat (limited to 'kernel/irq_work.c')
| -rw-r--r-- | kernel/irq_work.c | 221 | 
1 files changed, 131 insertions, 90 deletions
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index f16763ff848..a82170e2fa7 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -5,66 +5,49 @@   * context. The enqueueing is NMI-safe.   */ +#include <linux/bug.h>  #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h>  #include <linux/irq_work.h> +#include <linux/percpu.h>  #include <linux/hardirq.h> +#include <linux/irqflags.h> +#include <linux/sched.h> +#include <linux/tick.h> +#include <linux/cpu.h> +#include <linux/notifier.h> +#include <asm/processor.h> -/* - * An entry can be in one of four states: - * - * free	     NULL, 0 -> {claimed}       : free to be used - * claimed   NULL, 3 -> {pending}       : claimed to be enqueued - * pending   next, 3 -> {busy}          : queued, pending callback - * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed - * - * We use the lower two bits of the next pointer to keep PENDING and BUSY - * flags. - */ -#define IRQ_WORK_PENDING	1UL -#define IRQ_WORK_BUSY		2UL -#define IRQ_WORK_FLAGS		3UL - -static inline bool irq_work_is_set(struct irq_work *entry, int flags) -{ -	return (unsigned long)entry->next & flags; -} - -static inline struct irq_work *irq_work_next(struct irq_work *entry) -{ -	unsigned long next = (unsigned long)entry->next; -	next &= ~IRQ_WORK_FLAGS; -	return (struct irq_work *)next; -} - -static inline struct irq_work *next_flags(struct irq_work *entry, int flags) -{ -	unsigned long next = (unsigned long)entry; -	next |= flags; -	return (struct irq_work *)next; -} - -static DEFINE_PER_CPU(struct irq_work *, irq_work_list); +static DEFINE_PER_CPU(struct llist_head, irq_work_list); +static DEFINE_PER_CPU(int, irq_work_raised);  /*   * Claim the entry so that no one else will poke at it.   */ -static bool irq_work_claim(struct irq_work *entry) +static bool irq_work_claim(struct irq_work *work)  { -	struct irq_work *next, *nflags; +	unsigned long flags, oflags, nflags; -	do { -		next = entry->next; -		if ((unsigned long)next & IRQ_WORK_PENDING) +	/* +	 * Start with our best wish as a premise but only trust any +	 * flag value after cmpxchg() result. +	 */ +	flags = work->flags & ~IRQ_WORK_PENDING; +	for (;;) { +		nflags = flags | IRQ_WORK_FLAGS; +		oflags = cmpxchg(&work->flags, flags, nflags); +		if (oflags == flags) +			break; +		if (oflags & IRQ_WORK_PENDING)  			return false; -		nflags = next_flags(next, IRQ_WORK_FLAGS); -	} while (cmpxchg(&entry->next, next, nflags) != next); +		flags = oflags; +		cpu_relax(); +	}  	return true;  } -  void __weak arch_irq_work_raise(void)  {  	/* @@ -73,92 +56,150 @@ void __weak arch_irq_work_raise(void)  }  /* - * Queue the entry and raise the IPI if needed. + * Enqueue the irq_work @entry unless it's already pending + * somewhere. + * + * Can be re-enqueued while the callback is still in progress.   */ -static void __irq_work_queue(struct irq_work *entry) +bool irq_work_queue(struct irq_work *work)  { -	struct irq_work **head, *next; +	/* Only queue if not already pending */ +	if (!irq_work_claim(work)) +		return false; -	head = &get_cpu_var(irq_work_list); +	/* Queue the entry and raise the IPI if needed. */ +	preempt_disable(); -	do { -		next = *head; -		/* Can assign non-atomic because we keep the flags set. */ -		entry->next = next_flags(next, IRQ_WORK_FLAGS); -	} while (cmpxchg(head, next, entry) != next); +	llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); + +	/* +	 * If the work is not "lazy" or the tick is stopped, raise the irq +	 * work interrupt (if supported by the arch), otherwise, just wait +	 * for the next tick. +	 */ +	if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) { +		if (!this_cpu_cmpxchg(irq_work_raised, 0, 1)) +			arch_irq_work_raise(); +	} -	/* The list was empty, raise self-interrupt to start processing. */ -	if (!irq_work_next(entry)) -		arch_irq_work_raise(); +	preempt_enable(); -	put_cpu_var(irq_work_list); +	return true;  } +EXPORT_SYMBOL_GPL(irq_work_queue); -/* - * Enqueue the irq_work @entry, returns true on success, failure when the - * @entry was already enqueued by someone else. - * - * Can be re-enqueued while the callback is still in progress. - */ -bool irq_work_queue(struct irq_work *entry) +bool irq_work_needs_cpu(void)  { -	if (!irq_work_claim(entry)) { -		/* -		 * Already enqueued, can't do! -		 */ +	struct llist_head *this_list; + +	this_list = &__get_cpu_var(irq_work_list); +	if (llist_empty(this_list))  		return false; -	} -	__irq_work_queue(entry); +	/* All work should have been flushed before going offline */ +	WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); +  	return true;  } -EXPORT_SYMBOL_GPL(irq_work_queue); -/* - * Run the irq_work entries on this cpu. Requires to be ran from hardirq - * context with local IRQs disabled. - */ -void irq_work_run(void) +static void __irq_work_run(void)  { -	struct irq_work *list, **head; +	unsigned long flags; +	struct irq_work *work; +	struct llist_head *this_list; +	struct llist_node *llnode; -	head = &__get_cpu_var(irq_work_list); -	if (*head == NULL) + +	/* +	 * Reset the "raised" state right before we check the list because +	 * an NMI may enqueue after we find the list empty from the runner. +	 */ +	__this_cpu_write(irq_work_raised, 0); +	barrier(); + +	this_list = &__get_cpu_var(irq_work_list); +	if (llist_empty(this_list))  		return; -	BUG_ON(!in_irq());  	BUG_ON(!irqs_disabled()); -	list = xchg(head, NULL); -	while (list != NULL) { -		struct irq_work *entry = list; +	llnode = llist_del_all(this_list); +	while (llnode != NULL) { +		work = llist_entry(llnode, struct irq_work, llnode); -		list = irq_work_next(list); +		llnode = llist_next(llnode);  		/* -		 * Clear the PENDING bit, after this point the @entry +		 * Clear the PENDING bit, after this point the @work  		 * can be re-used. +		 * Make it immediately visible so that other CPUs trying +		 * to claim that work don't rely on us to handle their data +		 * while we are in the middle of the func.  		 */ -		entry->next = next_flags(NULL, IRQ_WORK_BUSY); -		entry->func(entry); +		flags = work->flags & ~IRQ_WORK_PENDING; +		xchg(&work->flags, flags); + +		work->func(work);  		/*  		 * Clear the BUSY bit and return to the free state if  		 * no-one else claimed it meanwhile.  		 */ -		cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL); +		(void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);  	}  } + +/* + * Run the irq_work entries on this cpu. Requires to be ran from hardirq + * context with local IRQs disabled. + */ +void irq_work_run(void) +{ +	BUG_ON(!in_irq()); +	__irq_work_run(); +}  EXPORT_SYMBOL_GPL(irq_work_run);  /*   * Synchronize against the irq_work @entry, ensures the entry is not   * currently in use.   */ -void irq_work_sync(struct irq_work *entry) +void irq_work_sync(struct irq_work *work)  {  	WARN_ON_ONCE(irqs_disabled()); -	while (irq_work_is_set(entry, IRQ_WORK_BUSY)) +	while (work->flags & IRQ_WORK_BUSY)  		cpu_relax();  }  EXPORT_SYMBOL_GPL(irq_work_sync); + +#ifdef CONFIG_HOTPLUG_CPU +static int irq_work_cpu_notify(struct notifier_block *self, +			       unsigned long action, void *hcpu) +{ +	long cpu = (long)hcpu; + +	switch (action) { +	case CPU_DYING: +		/* Called from stop_machine */ +		if (WARN_ON_ONCE(cpu != smp_processor_id())) +			break; +		__irq_work_run(); +		break; +	default: +		break; +	} +	return NOTIFY_OK; +} + +static struct notifier_block cpu_notify; + +static __init int irq_work_init_cpu_notifier(void) +{ +	cpu_notify.notifier_call = irq_work_cpu_notify; +	cpu_notify.priority = 0; +	register_cpu_notifier(&cpu_notify); +	return 0; +} +device_initcall(irq_work_init_cpu_notifier); + +#endif /* CONFIG_HOTPLUG_CPU */  | 
