aboutsummaryrefslogtreecommitdiff
path: root/kernel/irq_work.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/irq_work.c')
-rw-r--r--kernel/irq_work.c221
1 files changed, 131 insertions, 90 deletions
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index f16763ff848..a82170e2fa7 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -5,66 +5,49 @@
* context. The enqueueing is NMI-safe.
*/
+#include <linux/bug.h>
#include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/irq_work.h>
+#include <linux/percpu.h>
#include <linux/hardirq.h>
+#include <linux/irqflags.h>
+#include <linux/sched.h>
+#include <linux/tick.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <asm/processor.h>
-/*
- * An entry can be in one of four states:
- *
- * free NULL, 0 -> {claimed} : free to be used
- * claimed NULL, 3 -> {pending} : claimed to be enqueued
- * pending next, 3 -> {busy} : queued, pending callback
- * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
- *
- * We use the lower two bits of the next pointer to keep PENDING and BUSY
- * flags.
- */
-#define IRQ_WORK_PENDING 1UL
-#define IRQ_WORK_BUSY 2UL
-#define IRQ_WORK_FLAGS 3UL
-
-static inline bool irq_work_is_set(struct irq_work *entry, int flags)
-{
- return (unsigned long)entry->next & flags;
-}
-
-static inline struct irq_work *irq_work_next(struct irq_work *entry)
-{
- unsigned long next = (unsigned long)entry->next;
- next &= ~IRQ_WORK_FLAGS;
- return (struct irq_work *)next;
-}
-
-static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
-{
- unsigned long next = (unsigned long)entry;
- next |= flags;
- return (struct irq_work *)next;
-}
-
-static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
+static DEFINE_PER_CPU(struct llist_head, irq_work_list);
+static DEFINE_PER_CPU(int, irq_work_raised);
/*
* Claim the entry so that no one else will poke at it.
*/
-static bool irq_work_claim(struct irq_work *entry)
+static bool irq_work_claim(struct irq_work *work)
{
- struct irq_work *next, *nflags;
+ unsigned long flags, oflags, nflags;
- do {
- next = entry->next;
- if ((unsigned long)next & IRQ_WORK_PENDING)
+ /*
+ * Start with our best wish as a premise but only trust any
+ * flag value after cmpxchg() result.
+ */
+ flags = work->flags & ~IRQ_WORK_PENDING;
+ for (;;) {
+ nflags = flags | IRQ_WORK_FLAGS;
+ oflags = cmpxchg(&work->flags, flags, nflags);
+ if (oflags == flags)
+ break;
+ if (oflags & IRQ_WORK_PENDING)
return false;
- nflags = next_flags(next, IRQ_WORK_FLAGS);
- } while (cmpxchg(&entry->next, next, nflags) != next);
+ flags = oflags;
+ cpu_relax();
+ }
return true;
}
-
void __weak arch_irq_work_raise(void)
{
/*
@@ -73,92 +56,150 @@ void __weak arch_irq_work_raise(void)
}
/*
- * Queue the entry and raise the IPI if needed.
+ * Enqueue the irq_work @entry unless it's already pending
+ * somewhere.
+ *
+ * Can be re-enqueued while the callback is still in progress.
*/
-static void __irq_work_queue(struct irq_work *entry)
+bool irq_work_queue(struct irq_work *work)
{
- struct irq_work **head, *next;
+ /* Only queue if not already pending */
+ if (!irq_work_claim(work))
+ return false;
- head = &get_cpu_var(irq_work_list);
+ /* Queue the entry and raise the IPI if needed. */
+ preempt_disable();
- do {
- next = *head;
- /* Can assign non-atomic because we keep the flags set. */
- entry->next = next_flags(next, IRQ_WORK_FLAGS);
- } while (cmpxchg(head, next, entry) != next);
+ llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
+
+ /*
+ * If the work is not "lazy" or the tick is stopped, raise the irq
+ * work interrupt (if supported by the arch), otherwise, just wait
+ * for the next tick.
+ */
+ if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
+ if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
+ arch_irq_work_raise();
+ }
- /* The list was empty, raise self-interrupt to start processing. */
- if (!irq_work_next(entry))
- arch_irq_work_raise();
+ preempt_enable();
- put_cpu_var(irq_work_list);
+ return true;
}
+EXPORT_SYMBOL_GPL(irq_work_queue);
-/*
- * Enqueue the irq_work @entry, returns true on success, failure when the
- * @entry was already enqueued by someone else.
- *
- * Can be re-enqueued while the callback is still in progress.
- */
-bool irq_work_queue(struct irq_work *entry)
+bool irq_work_needs_cpu(void)
{
- if (!irq_work_claim(entry)) {
- /*
- * Already enqueued, can't do!
- */
+ struct llist_head *this_list;
+
+ this_list = &__get_cpu_var(irq_work_list);
+ if (llist_empty(this_list))
return false;
- }
- __irq_work_queue(entry);
+ /* All work should have been flushed before going offline */
+ WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
+
return true;
}
-EXPORT_SYMBOL_GPL(irq_work_queue);
-/*
- * Run the irq_work entries on this cpu. Requires to be ran from hardirq
- * context with local IRQs disabled.
- */
-void irq_work_run(void)
+static void __irq_work_run(void)
{
- struct irq_work *list, **head;
+ unsigned long flags;
+ struct irq_work *work;
+ struct llist_head *this_list;
+ struct llist_node *llnode;
- head = &__get_cpu_var(irq_work_list);
- if (*head == NULL)
+
+ /*
+ * Reset the "raised" state right before we check the list because
+ * an NMI may enqueue after we find the list empty from the runner.
+ */
+ __this_cpu_write(irq_work_raised, 0);
+ barrier();
+
+ this_list = &__get_cpu_var(irq_work_list);
+ if (llist_empty(this_list))
return;
- BUG_ON(!in_irq());
BUG_ON(!irqs_disabled());
- list = xchg(head, NULL);
- while (list != NULL) {
- struct irq_work *entry = list;
+ llnode = llist_del_all(this_list);
+ while (llnode != NULL) {
+ work = llist_entry(llnode, struct irq_work, llnode);
- list = irq_work_next(list);
+ llnode = llist_next(llnode);
/*
- * Clear the PENDING bit, after this point the @entry
+ * Clear the PENDING bit, after this point the @work
* can be re-used.
+ * Make it immediately visible so that other CPUs trying
+ * to claim that work don't rely on us to handle their data
+ * while we are in the middle of the func.
*/
- entry->next = next_flags(NULL, IRQ_WORK_BUSY);
- entry->func(entry);
+ flags = work->flags & ~IRQ_WORK_PENDING;
+ xchg(&work->flags, flags);
+
+ work->func(work);
/*
* Clear the BUSY bit and return to the free state if
* no-one else claimed it meanwhile.
*/
- cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
+ (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
}
}
+
+/*
+ * Run the irq_work entries on this cpu. Requires to be ran from hardirq
+ * context with local IRQs disabled.
+ */
+void irq_work_run(void)
+{
+ BUG_ON(!in_irq());
+ __irq_work_run();
+}
EXPORT_SYMBOL_GPL(irq_work_run);
/*
* Synchronize against the irq_work @entry, ensures the entry is not
* currently in use.
*/
-void irq_work_sync(struct irq_work *entry)
+void irq_work_sync(struct irq_work *work)
{
WARN_ON_ONCE(irqs_disabled());
- while (irq_work_is_set(entry, IRQ_WORK_BUSY))
+ while (work->flags & IRQ_WORK_BUSY)
cpu_relax();
}
EXPORT_SYMBOL_GPL(irq_work_sync);
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int irq_work_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ long cpu = (long)hcpu;
+
+ switch (action) {
+ case CPU_DYING:
+ /* Called from stop_machine */
+ if (WARN_ON_ONCE(cpu != smp_processor_id()))
+ break;
+ __irq_work_run();
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block cpu_notify;
+
+static __init int irq_work_init_cpu_notifier(void)
+{
+ cpu_notify.notifier_call = irq_work_cpu_notify;
+ cpu_notify.priority = 0;
+ register_cpu_notifier(&cpu_notify);
+ return 0;
+}
+device_initcall(irq_work_init_cpu_notifier);
+
+#endif /* CONFIG_HOTPLUG_CPU */