aboutsummaryrefslogtreecommitdiff
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c1375
1 files changed, 914 insertions, 461 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 81f2457811e..cecd4ffe2c4 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,7 +41,9 @@
#include <linux/debug_locks.h>
#include <linux/lockdep.h>
#include <linux/idr.h>
+#include <linux/jhash.h>
#include <linux/hashtable.h>
+#include <linux/rculist.h>
#include "workqueue_internal.h"
@@ -63,7 +65,6 @@ enum {
* create_worker() is in progress.
*/
POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
- POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
POOL_FREEZING = 1 << 3, /* freeze in progress */
@@ -80,6 +81,7 @@ enum {
NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */
+ UNBOUND_POOL_HASH_ORDER = 6, /* hashed by pool->attrs */
BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
@@ -118,13 +120,18 @@ enum {
* F: wq->flush_mutex protected.
*
* W: workqueue_lock protected.
+ *
+ * R: workqueue_lock protected for writes. Sched-RCU protected for reads.
+ *
+ * FR: wq->flush_mutex and workqueue_lock protected for writes. Sched-RCU
+ * protected for reads.
*/
/* struct worker is defined in workqueue_internal.h */
struct worker_pool {
spinlock_t lock; /* the pool lock */
- unsigned int cpu; /* I: the associated cpu */
+ int cpu; /* I: the associated cpu */
int id; /* I: pool ID */
unsigned int flags; /* X: flags */
@@ -142,15 +149,26 @@ struct worker_pool {
DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
/* L: hash of busy workers */
+ struct mutex manager_arb; /* manager arbitration */
struct mutex assoc_mutex; /* protect POOL_DISASSOCIATED */
struct ida worker_ida; /* L: for worker IDs */
+ struct workqueue_attrs *attrs; /* I: worker attributes */
+ struct hlist_node hash_node; /* R: unbound_pool_hash node */
+ int refcnt; /* refcnt for unbound pools */
+
/*
* The current concurrency level. As it's likely to be accessed
* from other CPUs during try_to_wake_up(), put it in a separate
* cacheline.
*/
atomic_t nr_running ____cacheline_aligned_in_smp;
+
+ /*
+ * Destruction of pool is sched-RCU protected to allow dereferences
+ * from get_work_pool().
+ */
+ struct rcu_head rcu;
} ____cacheline_aligned_in_smp;
/*
@@ -164,12 +182,24 @@ struct pool_workqueue {
struct workqueue_struct *wq; /* I: the owning workqueue */
int work_color; /* L: current color */
int flush_color; /* L: flushing color */
+ int refcnt; /* L: reference count */
int nr_in_flight[WORK_NR_COLORS];
/* L: nr of in_flight works */
int nr_active; /* L: nr of active works */
int max_active; /* L: max active works */
struct list_head delayed_works; /* L: delayed works */
-};
+ struct list_head pwqs_node; /* FR: node on wq->pwqs */
+ struct list_head mayday_node; /* W: node on wq->maydays */
+
+ /*
+ * Release of unbound pwq is punted to system_wq. See put_pwq()
+ * and pwq_unbound_release_workfn() for details. pool_workqueue
+ * itself is also sched-RCU protected so that the first pwq can be
+ * determined without grabbing workqueue_lock.
+ */
+ struct work_struct unbound_release_work;
+ struct rcu_head rcu;
+} __aligned(1 << WORK_STRUCT_FLAG_BITS);
/*
* Structure used to wait for workqueue flush.
@@ -181,37 +211,13 @@ struct wq_flusher {
};
/*
- * All cpumasks are assumed to be always set on UP and thus can't be
- * used to determine whether there's something to be done.
- */
-#ifdef CONFIG_SMP
-typedef cpumask_var_t mayday_mask_t;
-#define mayday_test_and_set_cpu(cpu, mask) \
- cpumask_test_and_set_cpu((cpu), (mask))
-#define mayday_clear_cpu(cpu, mask) cpumask_clear_cpu((cpu), (mask))
-#define for_each_mayday_cpu(cpu, mask) for_each_cpu((cpu), (mask))
-#define alloc_mayday_mask(maskp, gfp) zalloc_cpumask_var((maskp), (gfp))
-#define free_mayday_mask(mask) free_cpumask_var((mask))
-#else
-typedef unsigned long mayday_mask_t;
-#define mayday_test_and_set_cpu(cpu, mask) test_and_set_bit(0, &(mask))
-#define mayday_clear_cpu(cpu, mask) clear_bit(0, &(mask))
-#define for_each_mayday_cpu(cpu, mask) if ((cpu) = 0, (mask))
-#define alloc_mayday_mask(maskp, gfp) true
-#define free_mayday_mask(mask) do { } while (0)
-#endif
-
-/*
* The externally visible workqueue abstraction is an array of
* per-CPU workqueues:
*/
struct workqueue_struct {
unsigned int flags; /* W: WQ_* flags */
- union {
- struct pool_workqueue __percpu *pcpu;
- struct pool_workqueue *single;
- unsigned long v;
- } pool_wq; /* I: pwq's */
+ struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwq's */
+ struct list_head pwqs; /* FR: all pwqs of this wq */
struct list_head list; /* W: list of all workqueues */
struct mutex flush_mutex; /* protects wq flushing */
@@ -222,7 +228,7 @@ struct workqueue_struct {
struct list_head flusher_queue; /* F: flush waiters */
struct list_head flusher_overflow; /* F: flush overflow list */
- mayday_mask_t mayday_mask; /* cpus requesting rescue */
+ struct list_head maydays; /* W: pwqs requesting rescue */
struct worker *rescuer; /* I: rescue worker */
int nr_drainers; /* W: drain in progress */
@@ -233,6 +239,13 @@ struct workqueue_struct {
char name[]; /* I: workqueue name */
};
+static struct kmem_cache *pwq_cache;
+
+/* hash of all unbound pools keyed by pool->attrs */
+static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
+
+static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
+
struct workqueue_struct *system_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_wq);
struct workqueue_struct *system_highpri_wq __read_mostly;
@@ -247,61 +260,52 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>
-#define for_each_std_worker_pool(pool, cpu) \
- for ((pool) = &std_worker_pools(cpu)[0]; \
- (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++)
+#define assert_rcu_or_wq_lock() \
+ rcu_lockdep_assert(rcu_read_lock_sched_held() || \
+ lockdep_is_held(&workqueue_lock), \
+ "sched RCU or workqueue lock should be held")
+
+#define for_each_cpu_worker_pool(pool, cpu) \
+ for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
+ (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
+ (pool)++)
#define for_each_busy_worker(worker, i, pool) \
hash_for_each(pool->busy_hash, i, worker, hentry)
-static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
- unsigned int sw)
-{
- if (cpu < nr_cpu_ids) {
- if (sw & 1) {
- cpu = cpumask_next(cpu, mask);
- if (cpu < nr_cpu_ids)
- return cpu;
- }
- if (sw & 2)
- return WORK_CPU_UNBOUND;
- }
- return WORK_CPU_END;
-}
-
-static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask,
- struct workqueue_struct *wq)
-{
- return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
-}
-
-/*
- * CPU iterators
+/**
+ * for_each_pool - iterate through all worker_pools in the system
+ * @pool: iteration cursor
+ * @id: integer used for iteration
*
- * An extra cpu number is defined using an invalid cpu number
- * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any
- * specific CPU. The following iterators are similar to for_each_*_cpu()
- * iterators but also considers the unbound CPU.
+ * This must be called either with workqueue_lock held or sched RCU read
+ * locked. If the pool needs to be used beyond the locking in effect, the
+ * caller is responsible for guaranteeing that the pool stays online.
*
- * for_each_wq_cpu() : possible CPUs + WORK_CPU_UNBOUND
- * for_each_online_wq_cpu() : online CPUs + WORK_CPU_UNBOUND
- * for_each_pwq_cpu() : possible CPUs for bound workqueues,
- * WORK_CPU_UNBOUND for unbound workqueues
+ * The if/else clause exists only for the lockdep assertion and can be
+ * ignored.
*/
-#define for_each_wq_cpu(cpu) \
- for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3); \
- (cpu) < WORK_CPU_END; \
- (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, 3))
-
-#define for_each_online_wq_cpu(cpu) \
- for ((cpu) = __next_wq_cpu(-1, cpu_online_mask, 3); \
- (cpu) < WORK_CPU_END; \
- (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
+#define for_each_pool(pool, id) \
+ idr_for_each_entry(&worker_pool_idr, pool, id) \
+ if (({ assert_rcu_or_wq_lock(); false; })) { } \
+ else
-#define for_each_pwq_cpu(cpu, wq) \
- for ((cpu) = __next_pwq_cpu(-1, cpu_possible_mask, (wq)); \
- (cpu) < WORK_CPU_END; \
- (cpu) = __next_pwq_cpu((cpu), cpu_possible_mask, (wq)))
+/**
+ * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
+ * @pwq: iteration cursor
+ * @wq: the target workqueue
+ *
+ * This must be called either with workqueue_lock held or sched RCU read
+ * locked. If the pwq needs to be used beyond the locking in effect, the
+ * caller is responsible for guaranteeing that the pwq stays online.
+ *
+ * The if/else clause exists only for the lockdep assertion and can be
+ * ignored.
+ */
+#define for_each_pwq(pwq, wq) \
+ list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
+ if (({ assert_rcu_or_wq_lock(); false; })) { } \
+ else
#ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -429,66 +433,46 @@ static bool workqueue_freezing; /* W: have wqs started freezing? */
* POOL_DISASSOCIATED set, and their workers have WORKER_UNBOUND set.
*/
static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
- cpu_std_worker_pools);
-static struct worker_pool unbound_std_worker_pools[NR_STD_WORKER_POOLS];
+ cpu_worker_pools);
-/* idr of all pools */
-static DEFINE_MUTEX(worker_pool_idr_mutex);
+/*
+ * idr of all pools. Modifications are protected by workqueue_lock. Read
+ * accesses are protected by sched-RCU protected.
+ */
static DEFINE_IDR(worker_pool_idr);
static int worker_thread(void *__worker);
-static struct worker_pool *std_worker_pools(int cpu)
-{
- if (cpu != WORK_CPU_UNBOUND)
- return per_cpu(cpu_std_worker_pools, cpu);
- else
- return unbound_std_worker_pools;
-}
-
-static int std_worker_pool_pri(struct worker_pool *pool)
-{
- return pool - std_worker_pools(pool->cpu);
-}
-
/* allocate ID and assign it to @pool */
static int worker_pool_assign_id(struct worker_pool *pool)
{
int ret;
- mutex_lock(&worker_pool_idr_mutex);
- idr_pre_get(&worker_pool_idr, GFP_KERNEL);
- ret = idr_get_new(&worker_pool_idr, pool, &pool->id);
- mutex_unlock(&worker_pool_idr_mutex);
+ do {
+ if (!idr_pre_get(&worker_pool_idr, GFP_KERNEL))
+ return -ENOMEM;
+
+ spin_lock_irq(&workqueue_lock);
+ ret = idr_get_new(&worker_pool_idr, pool, &pool->id);
+ spin_unlock_irq(&workqueue_lock);
+ } while (ret == -EAGAIN);
return ret;
}
-/*
- * Lookup worker_pool by id. The idr currently is built during boot and
- * never modified. Don't worry about locking for now.
+/**
+ * first_pwq - return the first pool_workqueue of the specified workqueue
+ * @wq: the target workqueue
+ *
+ * This must be called either with workqueue_lock held or sched RCU read
+ * locked. If the pwq needs to be used beyond the locking in effect, the
+ * caller is responsible for guaranteeing that the pwq stays online.
*/
-static struct worker_pool *worker_pool_by_id(int pool_id)
+static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
{
- return idr_find(&worker_pool_idr, pool_id);
-}
-
-static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
-{
- struct worker_pool *pools = std_worker_pools(cpu);
-
- return &pools[highpri];
-}
-
-static struct pool_workqueue *get_pwq(unsigned int cpu,
- struct workqueue_struct *wq)
-{
- if (!(wq->flags & WQ_UNBOUND)) {
- if (likely(cpu < nr_cpu_ids))
- return per_cpu_ptr(wq->pool_wq.pcpu, cpu);
- } else if (likely(cpu == WORK_CPU_UNBOUND))
- return wq->pool_wq.single;
- return NULL;
+ assert_rcu_or_wq_lock();
+ return list_first_or_null_rcu(&wq->pwqs, struct pool_workqueue,
+ pwqs_node);
}
static unsigned int work_color_to_flags(int color)
@@ -530,7 +514,7 @@ static int work_next_color(int color)
static inline void set_work_data(struct work_struct *work, unsigned long data,
unsigned long flags)
{
- BUG_ON(!work_pending(work));
+ WARN_ON_ONCE(!work_pending(work));
atomic_long_set(&work->data, data | flags | work_static(work));
}
@@ -582,13 +566,23 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
* @work: the work item of interest
*
* Return the worker_pool @work was last associated with. %NULL if none.
+ *
+ * Pools are created and destroyed under workqueue_lock, and allows read
+ * access under sched-RCU read lock. As such, this function should be
+ * called under workqueue_lock or with preemption disabled.
+ *
+ * All fields of the returned pool are accessible as long as the above
+ * mentioned locking is in effect. If the returned pool needs to be used
+ * beyond the critical section, the caller is responsible for ensuring the
+ * returned pool is and stays online.
*/
static struct worker_pool *get_work_pool(struct work_struct *work)
{
unsigned long data = atomic_long_read(&work->data);
- struct worker_pool *pool;
int pool_id;
+ assert_rcu_or_wq_lock();
+
if (data & WORK_STRUCT_PWQ)
return ((struct pool_workqueue *)
(data & WORK_STRUCT_WQ_DATA_MASK))->pool;
@@ -597,9 +591,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
if (pool_id == WORK_OFFQ_POOL_NONE)
return NULL;
- pool = worker_pool_by_id(pool_id);
- WARN_ON_ONCE(!pool);
- return pool;
+ return idr_find(&worker_pool_idr, pool_id);
}
/**
@@ -688,7 +680,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
/* Do we have too many workers and should some go away? */
static bool too_many_workers(struct worker_pool *pool)
{
- bool managing = pool->flags & POOL_MANAGING_WORKERS;
+ bool managing = mutex_is_locked(&pool->manager_arb);
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
int nr_busy = pool->nr_workers - nr_idle;
@@ -743,7 +735,7 @@ static void wake_up_worker(struct worker_pool *pool)
* CONTEXT:
* spin_lock_irq(rq->lock)
*/
-void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
+void wq_worker_waking_up(struct task_struct *task, int cpu)
{
struct worker *worker = kthread_data(task);
@@ -768,8 +760,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
* RETURNS:
* Worker task on @cpu to wake up, %NULL if none.
*/
-struct task_struct *wq_worker_sleeping(struct task_struct *task,
- unsigned int cpu)
+struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
{
struct worker *worker = kthread_data(task), *to_wakeup = NULL;
struct worker_pool *pool;
@@ -785,7 +776,8 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
pool = worker->pool;
/* this can only happen on the local cpu */
- BUG_ON(cpu != raw_smp_processor_id());
+ if (WARN_ON_ONCE(cpu != raw_smp_processor_id()))
+ return NULL;
/*
* The counterpart of the following dec_and_test, implied mb,
@@ -960,6 +952,45 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
*nextp = n;
}
+/**
+ * get_pwq - get an extra reference on the specified pool_workqueue
+ * @pwq: pool_workqueue to get
+ *
+ * Obtain an extra reference on @pwq. The caller should guarantee that
+ * @pwq has positive refcnt and be holding the matching pool->lock.
+ */
+static void get_pwq(struct pool_workqueue *pwq)
+{
+ lockdep_assert_held(&pwq->pool->lock);
+ WARN_ON_ONCE(pwq->refcnt <= 0);
+ pwq->refcnt++;
+}
+
+/**
+ * put_pwq - put a pool_workqueue reference
+ * @pwq: pool_workqueue to put
+ *
+ * Drop a reference of @pwq. If its refcnt reaches zero, schedule its
+ * destruction. The caller should be holding the matching pool->lock.
+ */
+static void put_pwq(struct pool_workqueue *pwq)
+{
+ lockdep_assert_held(&pwq->pool->lock);
+ if (likely(--pwq->refcnt))
+ return;
+ if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
+ return;
+ /*
+ * @pwq can't be released under pool->lock, bounce to
+ * pwq_unbound_release_workfn(). This never recurses on the same
+ * pool->lock as this path is taken only for unbound workqueues and
+ * the release work item is scheduled on a per-cpu workqueue. To
+ * avoid lockdep warning, unbound pool->locks are given lockdep
+ * subclass of 1 in get_unbound_pool().
+ */
+ schedule_work(&pwq->unbound_release_work);
+}
+
static void pwq_activate_delayed_work(struct work_struct *work)
{
struct pool_workqueue *pwq = get_work_pwq(work);
@@ -991,9 +1022,9 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
*/
static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
{
- /* ignore uncolored works */
+ /* uncolored work items don't participate in flushing or nr_active */
if (color == WORK_NO_COLOR)
- return;
+ goto out_put;
pwq->nr_in_flight[color]--;
@@ -1006,11 +1037,11 @@ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
/* is flush in progress and are we at the flushing tip? */
if (likely(pwq->flush_color != color))
- return;
+ goto out_put;
/* are there still in-flight works? */
if (pwq->nr_in_flight[color])
- return;
+ goto out_put;
/* this pwq is done, clear flush_color */
pwq->flush_color = -1;
@@ -1021,6 +1052,8 @@ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
*/
if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
complete(&pwq->wq->first_flusher->done);
+out_put:
+ put_pwq(pwq);
}
/**
@@ -1143,6 +1176,7 @@ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
/* we own @work, set data and link */
set_work_pwq(work, pwq, extra_flags);
list_add_tail(&work->entry, head);
+ get_pwq(pwq);
/*
* Ensure either worker_sched_deactivated() sees the above
@@ -1171,10 +1205,11 @@ static bool is_chained_work(struct workqueue_struct *wq)
return worker && worker->current_pwq->wq == wq;
}
-static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
+static void __queue_work(int cpu, struct workqueue_struct *wq,
struct work_struct *work)
{
struct pool_workqueue *pwq;
+ struct worker_pool *last_pool;
struct list_head *worklist;
unsigned int work_flags;
unsigned int req_cpu = cpu;
@@ -1190,48 +1225,62 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
debug_work_activate(work);
/* if dying, only works from the same workqueue are allowed */
- if (unlikely(wq->flags & WQ_DRAINING) &&
+ if (unlikely(wq->flags & __WQ_DRAINING) &&
WARN_ON_ONCE(!is_chained_work(wq)))
return;
-
- /* determine the pwq to use */
+retry:
+ /* pwq which will be used unless @work is executing elsewhere */
if (!(wq->flags & WQ_UNBOUND)) {
- struct worker_pool *last_pool;
-
if (cpu == WORK_CPU_UNBOUND)
cpu = raw_smp_processor_id();
+ pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
+ } else {
+ pwq = first_pwq(wq);
+ }
- /*
- * It's multi cpu. If @work was previously on a different
- * cpu, it might still be running there, in which case the
- * work needs to be queued on that cpu to guarantee
- * non-reentrancy.
- */
- pwq = get_pwq(cpu, wq);
- last_pool = get_work_pool(work);
-
- if (last_pool && last_pool != pwq->pool) {
- struct worker *worker;
+ /*
+ * If @work was previously on a different pool, it might still be
+ * running there, in which case the work needs to be queued on that
+ * pool to guarantee non-reentrancy.
+ */
+ last_pool = get_work_pool(work);
+ if (last_pool && last_pool != pwq->pool) {
+ struct worker *worker;
- spin_lock(&last_pool->lock);
+ spin_lock(&last_pool->lock);
- worker = find_worker_executing_work(last_pool, work);
+ worker = find_worker_executing_work(last_pool, work);
- if (worker && worker->current_pwq->wq == wq) {
- pwq = get_pwq(last_pool->cpu, wq);
- } else {
- /* meh... not running there, queue here */
- spin_unlock(&last_pool->lock);
- spin_lock(&pwq->pool->lock);
- }
+ if (worker && worker->current_pwq->wq == wq) {
+ pwq = worker->current_pwq;
} else {
+ /* meh... not running there, queue here */
+ spin_unlock(&last_pool->lock);
spin_lock(&pwq->pool->lock);
}
} else {
- pwq = get_pwq(WORK_CPU_UNBOUND, wq);
spin_lock(&pwq->pool->lock);
}
+ /*
+ * pwq is determined and locked. For unbound pools, we could have
+ * raced with pwq release and it could already be dead. If its
+ * refcnt is zero, repeat pwq selection. Note that pwqs never die
+ * without another pwq replacing it as the first pwq or while a
+ * work item is executing on it, so the retying is guaranteed to
+ * make forward-progress.
+ */
+ if (unlikely(!pwq->refcnt)) {
+ if (wq->flags & WQ_UNBOUND) {
+ spin_unlock(&pwq->pool->lock);
+ cpu_relax();
+ goto retry;
+ }
+ /* oops */
+ WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
+ wq->name, cpu);
+ }
+
/* pwq determined, queue */
trace_workqueue_queue_work(req_cpu, pwq, work);
@@ -1458,9 +1507,10 @@ static void worker_enter_idle(struct worker *worker)
{
struct worker_pool *pool = worker->pool;
- BUG_ON(worker->flags & WORKER_IDLE);
- BUG_ON(!list_empty(&worker->entry) &&
- (worker->hentry.next || worker->hentry.pprev));
+ if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
+ WARN_ON_ONCE(!list_empty(&worker->entry) &&
+ (worker->hentry.next || worker->hentry.pprev)))
+ return;
/* can't use worker_set_flags(), also called from start_worker() */
worker->flags |= WORKER_IDLE;
@@ -1497,22 +1547,25 @@ static void worker_leave_idle(struct worker *worker)
{
struct worker_pool *pool = worker->pool;
- BUG_ON(!(worker->flags & WORKER_IDLE));
+ if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
+ return;
worker_clr_flags(worker, WORKER_IDLE);
pool->nr_idle--;
list_del_init(&worker->entry);
}
/**
- * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock pool
- * @worker: self
+ * worker_maybe_bind_and_lock - try to bind %current to worker_pool and lock it
+ * @pool: target worker_pool
+ *
+ * Bind %current to the cpu of @pool if it is associated and lock @pool.
*
* Works which are scheduled while the cpu is online must at least be
* scheduled to a worker which is bound to the cpu so that if they are
* flushed from cpu callbacks while cpu is going down, they are
* guaranteed to execute on the cpu.
*
- * This function is to be used by rogue workers and rescuers to bind
+ * This function is to be used by unbound workers and rescuers to bind
* themselves to the target cpu and may race with cpu going down or
* coming online. kthread_bind() can't be used because it may put the
* worker to already dead cpu and set_cpus_allowed_ptr() can't be used
@@ -1533,12 +1586,9 @@ static void worker_leave_idle(struct worker *worker)
* %true if the associated pool is online (@worker is successfully
* bound), %false if offline.
*/
-static bool worker_maybe_bind_and_lock(struct worker *worker)
+static bool worker_maybe_bind_and_lock(struct worker_pool *pool)
__acquires(&pool->lock)
{
- struct worker_pool *pool = worker->pool;
- struct task_struct *task = worker->task;
-
while (true) {
/*
* The following call may fail, succeed or succeed
@@ -1547,14 +1597,13 @@ __acquires(&pool->lock)
* against POOL_DISASSOCIATED.
*/
if (!(pool->flags & POOL_DISASSOCIATED))
- set_cpus_allowed_ptr(task, get_cpu_mask(pool->cpu));
+ set_cpus_allowed_ptr(current, pool->attrs->cpumask);
spin_lock_irq(&pool->lock);
if (pool->flags & POOL_DISASSOCIATED)
return false;
- if (task_cpu(task) == pool->cpu &&
- cpumask_equal(&current->cpus_allowed,
- get_cpu_mask(pool->cpu)))
+ if (task_cpu(current) == pool->cpu &&
+ cpumask_equal(&current->cpus_allowed, pool->attrs->cpumask))
return true;
spin_unlock_irq(&pool->lock);
@@ -1576,7 +1625,7 @@ __acquires(&pool->lock)
static void idle_worker_rebind(struct worker *worker)
{
/* CPU may go down again inbetween, clear UNBOUND only on success */
- if (worker_maybe_bind_and_lock(worker))
+ if (worker_maybe_bind_and_lock(worker->pool))
worker_clr_flags(worker, WORKER_UNBOUND);
/* rebind complete, become available again */
@@ -1594,7 +1643,7 @@ static void busy_worker_rebind_fn(struct work_struct *work)
{
struct worker *worker = container_of(work, struct worker, rebind_work);
- if (worker_maybe_bind_and_lock(worker))
+ if (worker_maybe_bind_and_lock(worker->pool))
worker_clr_flags(worker, WORKER_UNBOUND);
spin_unlock_irq(&worker->pool->lock);
@@ -1660,12 +1709,12 @@ static void rebind_workers(struct worker_pool *pool)
* wq doesn't really matter but let's keep @worker->pool
* and @pwq->pool consistent for sanity.
*/
- if (std_worker_pool_pri(worker->pool))
+ if (worker->pool->attrs->nice < 0)
wq = system_highpri_wq;
else
wq = system_wq;
- insert_work(get_pwq(pool->cpu, wq), rebind_work,
+ insert_work(per_cpu_ptr(wq->cpu_pwqs, pool->cpu), rebind_work,
worker->scheduled.next,
work_color_to_flags(WORK_NO_COLOR));
}
@@ -1702,7 +1751,7 @@ static struct worker *alloc_worker(void)
*/
static struct worker *create_worker(struct worker_pool *pool)
{
- const char *pri = std_worker_pool_pri(pool) ? "H" : "";
+ const char *pri = pool->attrs->nice < 0 ? "H" : "";
struct worker *worker = NULL;
int id = -1;
@@ -1722,34 +1771,34 @@ static struct worker *create_worker(struct worker_pool *pool)
worker->pool = pool;
worker->id = id;
- if (pool->cpu != WORK_CPU_UNBOUND)
+ if (pool->cpu >= 0)
worker->task = kthread_create_on_node(worker_thread,
worker, cpu_to_node(pool->cpu),
- "kworker/%u:%d%s", pool->cpu, id, pri);
+ "kworker/%d:%d%s", pool->cpu, id, pri);
else
worker->task = kthread_create(worker_thread, worker,
- "kworker/u:%d%s", id, pri);
+ "kworker/u%d:%d%s",
+ pool->id, id, pri);
if (IS_ERR(worker->task))
goto fail;
- if (std_worker_pool_pri(pool))
- set_user_nice(worker->task, HIGHPRI_NICE_LEVEL);
+ set_user_nice(worker->task, pool->attrs->nice);
+ set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
/*
- * Determine CPU binding of the new worker depending on
- * %POOL_DISASSOCIATED. The caller is responsible for ensuring the
- * flag remains stable across this function. See the comments
- * above the flag definition for details.
- *
- * As an unbound worker may later become a regular one if CPU comes
- * online, make sure every worker has %PF_THREAD_BOUND set.
+ * %PF_THREAD_BOUND is used to prevent userland from meddling with
+ * cpumask of workqueue workers. This is an abuse. We need
+ * %PF_NO_SETAFFINITY.
*/
- if (!(pool->flags & POOL_DISASSOCIATED)) {
- kthread_bind(worker->task, pool->cpu);
- } else {
- worker->task->flags |= PF_THREAD_BOUND;
+ worker->task->flags |= PF_THREAD_BOUND;
+
+ /*
+ * The caller is responsible for ensuring %POOL_DISASSOCIATED
+ * remains stable across this function. See the comments above the
+ * flag definition for details.
+ */
+ if (pool->flags & POOL_DISASSOCIATED)
worker->flags |= WORKER_UNBOUND;
- }
return worker;
fail:
@@ -1794,8 +1843,9 @@ static void destroy_worker(struct worker *worker)
int id = worker->id;
/* sanity check frenzy */
- BUG_ON(worker->current_work);
- BUG_ON(!list_empty(&worker->scheduled));
+ if (WARN_ON(worker->current_work) ||
+ WARN_ON(!list_empty(&worker->scheduled)))
+ return;
if (worker->flags & WORKER_STARTED)
pool->nr_workers--;
@@ -1840,23 +1890,21 @@ static void idle_worker_timeout(unsigned long __pool)
spin_unlock_irq(&pool->lock);
}
-static bool send_mayday(struct work_struct *work)
+static void send_mayday(struct work_struct *work)
{
struct pool_workqueue *pwq = get_work_pwq(work);
struct workqueue_struct *wq = pwq->wq;
- unsigned int cpu;
- if (!(wq->flags & WQ_RESCUER))
- return false;
+ lockdep_assert_held(&workqueue_lock);
+
+ if (!wq->rescuer)
+ return;
/* mayday mayday mayday */
- cpu = pwq->pool->cpu;
- /* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
- if (cpu == WORK_CPU_UNBOUND)
- cpu = 0;
- if (!mayday_test_and_set_cpu(cpu, wq->mayday_mask))
+ if (list_empty(&pwq->mayday_node)) {
+ list_add_tail(&pwq->mayday_node, &wq->maydays);
wake_up_process(wq->rescuer->task);
- return true;
+ }
}
static void pool_mayday_timeout(unsigned long __pool)
@@ -1864,7 +1912,8 @@ static void pool_mayday_timeout(unsigned long __pool)
struct worker_pool *pool = (void *)__pool;
struct work_struct *work;
- spin_lock_irq(&pool->lock);
+ spin_lock_irq(&workqueue_lock); /* for wq->maydays */
+ spin_lock(&pool->lock);
if (need_to_create_worker(pool)) {
/*
@@ -1877,7 +1926,8 @@ static void pool_mayday_timeout(unsigned long __pool)
send_mayday(work);
}
- spin_unlock_irq(&pool->lock);
+ spin_unlock(&pool->lock);
+ spin_unlock_irq(&workqueue_lock);
mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
}
@@ -1924,7 +1974,8 @@ restart:
del_timer_sync(&pool->mayday_timer);
spin_lock_irq(&pool->lock);
start_worker(worker);
- BUG_ON(need_to_create_worker(pool));
+ if (WARN_ON_ONCE(need_to_create_worker(pool)))
+ goto restart;
return true;
}
@@ -2008,19 +2059,17 @@ static bool manage_workers(struct worker *worker)
struct worker_pool *pool = worker->pool;
bool ret = false;
- if (pool->flags & POOL_MANAGING_WORKERS)
+ if (!mutex_trylock(&pool->manager_arb))
return ret;
- pool->flags |= POOL_MANAGING_WORKERS;
-
/*
* To simplify both worker management and CPU hotplug, hold off
* management while hotplug is in progress. CPU hotplug path can't
- * grab %POOL_MANAGING_WORKERS to achieve this because that can
- * lead to idle worker depletion (all become busy thinking someone
- * else is managing) which in turn can result in deadlock under
- * extreme circumstances. Use @pool->assoc_mutex to synchronize
- * manager against CPU hotplug.
+ * grab @pool->manager_arb to achieve this because that can lead to
+ * idle worker depletion (all become busy thinking someone else is
+ * managing) which in turn can result in deadlock under extreme
+ * circumstances. Use @pool->assoc_mutex to synchronize manager
+ * against CPU hotplug.
*
* assoc_mutex would always be free unless CPU hotplug is in
* progress. trylock first without dropping @pool->lock.
@@ -2039,7 +2088,7 @@ static bool manage_workers(struct worker *worker)
* on @pool's current state. Try it and adjust
* %WORKER_UNBOUND accordingly.
*/
- if (worker_maybe_bind_and_lock(worker))
+ if (worker_maybe_bind_and_lock(pool))
worker->flags &= ~WORKER_UNBOUND;
else
worker->flags |= WORKER_UNBOUND;
@@ -2056,8 +2105,8 @@ static bool manage_workers(struct worker *worker)
ret |= maybe_destroy_workers(pool);
ret |= maybe_create_worker(pool);
- pool->flags &= ~POOL_MANAGING_WORKERS;
mutex_unlock(&pool->assoc_mutex);
+ mutex_unlock(&pool->manager_arb);
return ret;
}
@@ -2257,7 +2306,7 @@ recheck:
* preparing to process a work or actually processing it.
* Make sure nobody diddled with it while I was sleeping.
*/
- BUG_ON(!list_empty(&worker->scheduled));
+ WARN_ON_ONCE(!list_empty(&worker->scheduled));
/*
* When control reaches this point, we're guaranteed to have
@@ -2306,7 +2355,7 @@ sleep:
* @__rescuer: self
*
* Workqueue rescuer thread function. There's one rescuer for each
- * workqueue which has WQ_RESCUER set.
+ * workqueue which has WQ_MEM_RECLAIM set.
*
* Regular work processing on a pool may block trying to create a new
* worker which uses GFP_KERNEL allocation which has slight chance of
@@ -2325,8 +2374,6 @@ static int rescuer_thread(void *__rescuer)
struct worker *rescuer = __rescuer;
struct workqueue_struct *wq = rescuer->rescue_wq;
struct list_head *scheduled = &rescuer->scheduled;
- bool is_unbound = wq->flags & WQ_UNBOUND;
- unsigned int cpu;
set_user_nice(current, RESCUER_NICE_LEVEL);
@@ -2344,28 +2391,29 @@ repeat:
return 0;
}
- /*
- * See whether any cpu is asking for help. Unbounded
- * workqueues use cpu 0 in mayday_mask for CPU_UNBOUND.
- */
- for_each_mayday_cpu(cpu, wq->mayday_mask) {
- unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
- struct pool_workqueue *pwq = get_pwq(tcpu, wq);
+ /* see whether any pwq is asking for help */
+ spin_lock_irq(&workqueue_lock);
+
+ while (!list_empty(&wq->maydays)) {
+ struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
+ struct pool_workqueue, mayday_node);
struct worker_pool *pool = pwq->pool;
struct work_struct *work, *n;
__set_current_state(TASK_RUNNING);
- mayday_clear_cpu(cpu, wq->mayday_mask);
+ list_del_init(&pwq->mayday_node);
+
+ spin_unlock_irq(&workqueue_lock);
/* migrate to the target cpu if possible */
+ worker_maybe_bind_and_lock(pool);
rescuer->pool = pool;
- worker_maybe_bind_and_lock(rescuer);
/*
* Slurp in all works issued via this workqueue and
* process'em.
*/
- BUG_ON(!list_empty(&rescuer->scheduled));
+ WARN_ON_ONCE(!list_empty(&rescuer->scheduled));
list_for_each_entry_safe(work, n, &pool->worklist, entry)
if (get_work_pwq(work) == pwq)
move_linked_works(work, scheduled, &n);
@@ -2380,9 +2428,13 @@ repeat:
if (keep_working(pool))
wake_up_worker(pool);
- spin_unlock_irq(&pool->lock);
+ rescuer->pool = NULL;
+ spin_unlock(&pool->lock);
+ spin_lock(&workqueue_lock);
}
+ spin_unlock_irq(&workqueue_lock);
+
/* rescuers should never participate in concurrency management */
WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
schedule();
@@ -2496,21 +2548,22 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
int flush_color, int work_color)
{
bool wait = false;
- unsigned int cpu;
+ struct pool_workqueue *pwq;
if (flush_color >= 0) {
- BUG_ON(atomic_read(&wq->nr_pwqs_to_flush));
+ WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
atomic_set(&wq->nr_pwqs_to_flush, 1);
}
- for_each_pwq_cpu(cpu, wq) {
- struct pool_workqueue *pwq = get_pwq(cpu, wq);
+ local_irq_disable();
+
+ for_each_pwq(pwq, wq) {
struct worker_pool *pool = pwq->pool;
- spin_lock_irq(&pool->lock);
+ spin_lock(&pool->lock);
if (flush_color >= 0) {
- BUG_ON(pwq->flush_color != -1);
+ WARN_ON_ONCE(pwq->flush_color != -1);
if (pwq->nr_in_flight[flush_color]) {
pwq->flush_color = flush_color;
@@ -2520,13 +2573,15 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
}
if (work_color >= 0) {
- BUG_ON(work_color != work_next_color(pwq->work_color));
+ WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
pwq->work_color = work_color;
}
- spin_unlock_irq(&pool->lock);
+ spin_unlock(&pool->lock);
}
+ local_irq_enable();
+
if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
complete(&wq->first_flusher->done);
@@ -2568,13 +2623,13 @@ void flush_workqueue(struct workqueue_struct *wq)
* becomes our flush_color and work_color is advanced
* by one.