aboutsummaryrefslogtreecommitdiff
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c2828
1 files changed, 1967 insertions, 861 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b48cd597145..154aa12af48 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,7 +41,11 @@
#include <linux/debug_locks.h>
#include <linux/lockdep.h>
#include <linux/idr.h>
+#include <linux/jhash.h>
#include <linux/hashtable.h>
+#include <linux/rculist.h>
+#include <linux/nodemask.h>
+#include <linux/moduleparam.h>
#include "workqueue_internal.h"
@@ -58,12 +62,11 @@ enum {
* %WORKER_UNBOUND set and concurrency management disabled, and may
* be executing on any CPU. The pool behaves as an unbound one.
*
- * Note that DISASSOCIATED can be flipped only while holding
- * assoc_mutex to avoid changing binding state while
+ * Note that DISASSOCIATED should be flipped only while holding
+ * manager_mutex to avoid changing binding state while
* create_worker() is in progress.
*/
POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
- POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
POOL_FREEZING = 1 << 3, /* freeze in progress */
@@ -74,12 +77,14 @@ enum {
WORKER_PREP = 1 << 3, /* preparing to run works */
WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */
WORKER_UNBOUND = 1 << 7, /* worker is unbound */
+ WORKER_REBOUND = 1 << 8, /* worker was rebound */
- WORKER_NOT_RUNNING = WORKER_PREP | WORKER_UNBOUND |
- WORKER_CPU_INTENSIVE,
+ WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
+ WORKER_UNBOUND | WORKER_REBOUND,
NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */
+ UNBOUND_POOL_HASH_ORDER = 6, /* hashed by pool->attrs */
BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
@@ -97,6 +102,8 @@ enum {
*/
RESCUER_NICE_LEVEL = -20,
HIGHPRI_NICE_LEVEL = -20,
+
+ WQ_NAME_LEN = 24,
};
/*
@@ -115,16 +122,26 @@ enum {
* cpu or grabbing pool->lock is enough for read access. If
* POOL_DISASSOCIATED is set, it's identical to L.
*
- * F: wq->flush_mutex protected.
+ * MG: pool->manager_mutex and pool->lock protected. Writes require both
+ * locks. Reads can happen under either lock.
+ *
+ * PL: wq_pool_mutex protected.
+ *
+ * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
+ *
+ * WQ: wq->mutex protected.
*
- * W: workqueue_lock protected.
+ * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
+ *
+ * MD: wq_mayday_lock protected.
*/
/* struct worker is defined in workqueue_internal.h */
struct worker_pool {
spinlock_t lock; /* the pool lock */
- unsigned int cpu; /* I: the associated cpu */
+ int cpu; /* I: the associated cpu */
+ int node; /* I: the associated node ID */
int id; /* I: pool ID */
unsigned int flags; /* X: flags */
@@ -138,12 +155,18 @@ struct worker_pool {
struct timer_list idle_timer; /* L: worker idle timeout */
struct timer_list mayday_timer; /* L: SOS timer for workers */
- /* workers are chained either in busy_hash or idle_list */
+ /* a workers is either on busy_hash or idle_list, or the manager */
DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
/* L: hash of busy workers */
- struct mutex assoc_mutex; /* protect POOL_DISASSOCIATED */
- struct ida worker_ida; /* L: for worker IDs */
+ /* see manage_workers() for details on the two manager mutexes */
+ struct mutex manager_arb; /* manager arbitration */
+ struct mutex manager_mutex; /* manager exclusion */
+ struct idr worker_idr; /* MG: worker IDs and iteration */
+
+ struct workqueue_attrs *attrs; /* I: worker attributes */
+ struct hlist_node hash_node; /* PL: unbound_pool_hash node */
+ int refcnt; /* PL: refcnt for unbound pools */
/*
* The current concurrency level. As it's likely to be accessed
@@ -151,6 +174,12 @@ struct worker_pool {
* cacheline.
*/
atomic_t nr_running ____cacheline_aligned_in_smp;
+
+ /*
+ * Destruction of pool is sched-RCU protected to allow dereferences
+ * from get_work_pool().
+ */
+ struct rcu_head rcu;
} ____cacheline_aligned_in_smp;
/*
@@ -164,75 +193,107 @@ struct pool_workqueue {
struct workqueue_struct *wq; /* I: the owning workqueue */
int work_color; /* L: current color */
int flush_color; /* L: flushing color */
+ int refcnt; /* L: reference count */
int nr_in_flight[WORK_NR_COLORS];
/* L: nr of in_flight works */
int nr_active; /* L: nr of active works */
int max_active; /* L: max active works */
struct list_head delayed_works; /* L: delayed works */
-};
+ struct list_head pwqs_node; /* WR: node on wq->pwqs */
+ struct list_head mayday_node; /* MD: node on wq->maydays */
+
+ /*
+ * Release of unbound pwq is punted to system_wq. See put_pwq()
+ * and pwq_unbound_release_workfn() for details. pool_workqueue
+ * itself is also sched-RCU protected so that the first pwq can be
+ * determined without grabbing wq->mutex.
+ */
+ struct work_struct unbound_release_work;
+ struct rcu_head rcu;
+} __aligned(1 << WORK_STRUCT_FLAG_BITS);
/*
* Structure used to wait for workqueue flush.
*/
struct wq_flusher {
- struct list_head list; /* F: list of flushers */
- int flush_color; /* F: flush color waiting for */
+ struct list_head list; /* WQ: list of flushers */
+ int flush_color; /* WQ: flush color waiting for */
struct completion done; /* flush completion */
};
-/*
- * All cpumasks are assumed to be always set on UP and thus can't be
- * used to determine whether there's something to be done.
- */
-#ifdef CONFIG_SMP
-typedef cpumask_var_t mayday_mask_t;
-#define mayday_test_and_set_cpu(cpu, mask) \
- cpumask_test_and_set_cpu((cpu), (mask))
-#define mayday_clear_cpu(cpu, mask) cpumask_clear_cpu((cpu), (mask))
-#define for_each_mayday_cpu(cpu, mask) for_each_cpu((cpu), (mask))
-#define alloc_mayday_mask(maskp, gfp) zalloc_cpumask_var((maskp), (gfp))
-#define free_mayday_mask(mask) free_cpumask_var((mask))
-#else
-typedef unsigned long mayday_mask_t;
-#define mayday_test_and_set_cpu(cpu, mask) test_and_set_bit(0, &(mask))
-#define mayday_clear_cpu(cpu, mask) clear_bit(0, &(mask))
-#define for_each_mayday_cpu(cpu, mask) if ((cpu) = 0, (mask))
-#define alloc_mayday_mask(maskp, gfp) true
-#define free_mayday_mask(mask) do { } while (0)
-#endif
+struct wq_device;
/*
- * The externally visible workqueue abstraction is an array of
- * per-CPU workqueues:
+ * The externally visible workqueue. It relays the issued work items to
+ * the appropriate worker_pool through its pool_workqueues.
*/
struct workqueue_struct {
- unsigned int flags; /* W: WQ_* flags */
- union {
- struct pool_workqueue __percpu *pcpu;
- struct pool_workqueue *single;
- unsigned long v;
- } pool_wq; /* I: pwq's */
- struct list_head list; /* W: list of all workqueues */
-
- struct mutex flush_mutex; /* protects wq flushing */
- int work_color; /* F: current work color */
- int flush_color; /* F: current flush color */
+ struct list_head pwqs; /* WR: all pwqs of this wq */
+ struct list_head list; /* PL: list of all workqueues */
+
+ struct mutex mutex; /* protects this wq */
+ int work_color; /* WQ: current work color */
+ int flush_color; /* WQ: current flush color */
atomic_t nr_pwqs_to_flush; /* flush in progress */
- struct wq_flusher *first_flusher; /* F: first flusher */
- struct list_head flusher_queue; /* F: flush waiters */
- struct list_head flusher_overflow; /* F: flush overflow list */
+ struct wq_flusher *first_flusher; /* WQ: first flusher */
+ struct list_head flusher_queue; /* WQ: flush waiters */
+ struct list_head flusher_overflow; /* WQ: flush overflow list */
- mayday_mask_t mayday_mask; /* cpus requesting rescue */
+ struct list_head maydays; /* MD: pwqs requesting rescue */
struct worker *rescuer; /* I: rescue worker */
- int nr_drainers; /* W: drain in progress */
- int saved_max_active; /* W: saved pwq max_active */
+ int nr_drainers; /* WQ: drain in progress */
+ int saved_max_active; /* WQ: saved pwq max_active */
+
+ struct workqueue_attrs *unbound_attrs; /* WQ: only for unbound wqs */
+ struct pool_workqueue *dfl_pwq; /* WQ: only for unbound wqs */
+
+#ifdef CONFIG_SYSFS
+ struct wq_device *wq_dev; /* I: for sysfs interface */
+#endif
#ifdef CONFIG_LOCKDEP
struct lockdep_map lockdep_map;
#endif
- char name[]; /* I: workqueue name */
+ char name[WQ_NAME_LEN]; /* I: workqueue name */
+
+ /* hot fields used during command issue, aligned to cacheline */
+ unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
+ struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
+ struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */
};
+static struct kmem_cache *pwq_cache;
+
+static int wq_numa_tbl_len; /* highest possible NUMA node id + 1 */
+static cpumask_var_t *wq_numa_possible_cpumask;
+ /* possible CPUs of each node */
+
+static bool wq_disable_numa;
+module_param_named(disable_numa, wq_disable_numa, bool, 0444);
+
+static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
+
+/* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
+static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
+
+static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
+static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
+
+static LIST_HEAD(workqueues); /* PL: list of all workqueues */
+static bool workqueue_freezing; /* PL: have wqs started freezing? */
+
+/* the per-cpu worker pools */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
+ cpu_worker_pools);
+
+static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */
+
+/* PL: hash of all unbound pools keyed by pool->attrs */
+static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
+
+/* I: attributes used when instantiating standard unbound pools on demand */
+static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
+
struct workqueue_struct *system_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_wq);
struct workqueue_struct *system_highpri_wq __read_mostly;
@@ -244,64 +305,87 @@ EXPORT_SYMBOL_GPL(system_unbound_wq);
struct workqueue_struct *system_freezable_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_freezable_wq);
+static int worker_thread(void *__worker);
+static void copy_workqueue_attrs(struct workqueue_attrs *to,
+ const struct workqueue_attrs *from);
+
#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>
-#define for_each_std_worker_pool(pool, cpu) \
- for ((pool) = &std_worker_pools(cpu)[0]; \
- (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++)
+#define assert_rcu_or_pool_mutex() \
+ rcu_lockdep_assert(rcu_read_lock_sched_held() || \
+ lockdep_is_held(&wq_pool_mutex), \
+ "sched RCU or wq_pool_mutex should be held")
-#define for_each_busy_worker(worker, i, pool) \
- hash_for_each(pool->busy_hash, i, worker, hentry)
+#define assert_rcu_or_wq_mutex(wq) \
+ rcu_lockdep_assert(rcu_read_lock_sched_held() || \
+ lockdep_is_held(&wq->mutex), \
+ "sched RCU or wq->mutex should be held")
-static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
- unsigned int sw)
-{
- if (cpu < nr_cpu_ids) {
- if (sw & 1) {
- cpu = cpumask_next(cpu, mask);
- if (cpu < nr_cpu_ids)
- return cpu;
- }
- if (sw & 2)
- return WORK_CPU_UNBOUND;
- }
- return WORK_CPU_END;
-}
+#ifdef CONFIG_LOCKDEP
+#define assert_manager_or_pool_lock(pool) \
+ WARN_ONCE(debug_locks && \
+ !lockdep_is_held(&(pool)->manager_mutex) && \
+ !lockdep_is_held(&(pool)->lock), \
+ "pool->manager_mutex or ->lock should be held")
+#else
+#define assert_manager_or_pool_lock(pool) do { } while (0)
+#endif
-static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask,
- struct workqueue_struct *wq)
-{
- return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
-}
+#define for_each_cpu_worker_pool(pool, cpu) \
+ for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
+ (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
+ (pool)++)
-/*
- * CPU iterators
+/**
+ * for_each_pool - iterate through all worker_pools in the system
+ * @pool: iteration cursor
+ * @pi: integer used for iteration
*
- * An extra cpu number is defined using an invalid cpu number
- * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any
- * specific CPU. The following iterators are similar to for_each_*_cpu()
- * iterators but also considers the unbound CPU.
+ * This must be called either with wq_pool_mutex held or sched RCU read
+ * locked. If the pool needs to be used beyond the locking in effect, the
+ * caller is responsible for guaranteeing that the pool stays online.
*
- * for_each_wq_cpu() : possible CPUs + WORK_CPU_UNBOUND
- * for_each_online_wq_cpu() : online CPUs + WORK_CPU_UNBOUND
- * for_each_pwq_cpu() : possible CPUs for bound workqueues,
- * WORK_CPU_UNBOUND for unbound workqueues
+ * The if/else clause exists only for the lockdep assertion and can be
+ * ignored.
*/
-#define for_each_wq_cpu(cpu) \
- for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3); \
- (cpu) < WORK_CPU_END; \
- (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, 3))
+#define for_each_pool(pool, pi) \
+ idr_for_each_entry(&worker_pool_idr, pool, pi) \
+ if (({ assert_rcu_or_pool_mutex(); false; })) { } \
+ else
-#define for_each_online_wq_cpu(cpu) \
- for ((cpu) = __next_wq_cpu(-1, cpu_online_mask, 3); \
- (cpu) < WORK_CPU_END; \
- (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
+/**
+ * for_each_pool_worker - iterate through all workers of a worker_pool
+ * @worker: iteration cursor
+ * @wi: integer used for iteration
+ * @pool: worker_pool to iterate workers of
+ *
+ * This must be called with either @pool->manager_mutex or ->lock held.
+ *
+ * The if/else clause exists only for the lockdep assertion and can be
+ * ignored.
+ */
+#define for_each_pool_worker(worker, wi, pool) \
+ idr_for_each_entry(&(pool)->worker_idr, (worker), (wi)) \
+ if (({ assert_manager_or_pool_lock((pool)); false; })) { } \
+ else
-#define for_each_pwq_cpu(cpu, wq) \
- for ((cpu) = __next_pwq_cpu(-1, cpu_possible_mask, (wq)); \
- (cpu) < WORK_CPU_END; \
- (cpu) = __next_pwq_cpu((cpu), cpu_possible_mask, (wq)))
+/**
+ * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
+ * @pwq: iteration cursor
+ * @wq: the target workqueue
+ *
+ * This must be called either with wq->mutex held or sched RCU read locked.
+ * If the pwq needs to be used beyond the locking in effect, the caller is
+ * responsible for guaranteeing that the pwq stays online.
+ *
+ * The if/else clause exists only for the lockdep assertion and can be
+ * ignored.
+ */
+#define for_each_pwq(pwq, wq) \
+ list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node) \
+ if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
+ else
#ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -419,77 +503,35 @@ static inline void debug_work_activate(struct work_struct *work) { }
static inline void debug_work_deactivate(struct work_struct *work) { }
#endif
-/* Serializes the accesses to the list of workqueues. */
-static DEFINE_SPINLOCK(workqueue_lock);
-static LIST_HEAD(workqueues);
-static bool workqueue_freezing; /* W: have wqs started freezing? */
-
-/*
- * The CPU and unbound standard worker pools. The unbound ones have
- * POOL_DISASSOCIATED set, and their workers have WORKER_UNBOUND set.
- */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
- cpu_std_worker_pools);
-static struct worker_pool unbound_std_worker_pools[NR_STD_WORKER_POOLS];
-
-/* idr of all pools */
-static DEFINE_MUTEX(worker_pool_idr_mutex);
-static DEFINE_IDR(worker_pool_idr);
-
-static int worker_thread(void *__worker);
-
-static struct worker_pool *std_worker_pools(int cpu)
-{
- if (cpu != WORK_CPU_UNBOUND)
- return per_cpu(cpu_std_worker_pools, cpu);
- else
- return unbound_std_worker_pools;
-}
-
-static int std_worker_pool_pri(struct worker_pool *pool)
-{
- return pool - std_worker_pools(pool->cpu);
-}
-
/* allocate ID and assign it to @pool */
static int worker_pool_assign_id(struct worker_pool *pool)
{
int ret;
- mutex_lock(&worker_pool_idr_mutex);
+ lockdep_assert_held(&wq_pool_mutex);
+
ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL);
- if (ret >= 0)
+ if (ret >= 0) {
pool->id = ret;
- mutex_unlock(&worker_pool_idr_mutex);
-
- return ret < 0 ? ret : 0;
+ return 0;
+ }
+ return ret;
}
-/*
- * Lookup worker_pool by id. The idr currently is built during boot and
- * never modified. Don't worry about locking for now.
+/**
+ * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
+ * @wq: the target workqueue
+ * @node: the node ID
+ *
+ * This must be called either with pwq_lock held or sched RCU read locked.
+ * If the pwq needs to be used beyond the locking in effect, the caller is
+ * responsible for guaranteeing that the pwq stays online.
*/
-static struct worker_pool *worker_pool_by_id(int pool_id)
+static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
+ int node)
{
- return idr_find(&worker_pool_idr, pool_id);
-}
-
-static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
-{
- struct worker_pool *pools = std_worker_pools(cpu);
-
- return &pools[highpri];
-}
-
-static struct pool_workqueue *get_pwq(unsigned int cpu,
- struct workqueue_struct *wq)
-{
- if (!(wq->flags & WQ_UNBOUND)) {
- if (likely(cpu < nr_cpu_ids))
- return per_cpu_ptr(wq->pool_wq.pcpu, cpu);
- } else if (likely(cpu == WORK_CPU_UNBOUND))
- return wq->pool_wq.single;
- return NULL;
+ assert_rcu_or_wq_mutex(wq);
+ return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
}
static unsigned int work_color_to_flags(int color)
@@ -531,7 +573,7 @@ static int work_next_color(int color)
static inline void set_work_data(struct work_struct *work, unsigned long data,
unsigned long flags)
{
- BUG_ON(!work_pending(work));
+ WARN_ON_ONCE(!work_pending(work));
atomic_long_set(&work->data, data | flags | work_static(work));
}
@@ -583,13 +625,23 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
* @work: the work item of interest
*
* Return the worker_pool @work was last associated with. %NULL if none.
+ *
+ * Pools are created and destroyed under wq_pool_mutex, and allows read
+ * access under sched-RCU read lock. As such, this function should be
+ * called under wq_pool_mutex or with preemption disabled.
+ *
+ * All fields of the returned pool are accessible as long as the above
+ * mentioned locking is in effect. If the returned pool needs to be used
+ * beyond the critical section, the caller is responsible for ensuring the
+ * returned pool is and stays online.
*/
static struct worker_pool *get_work_pool(struct work_struct *work)
{
unsigned long data = atomic_long_read(&work->data);
- struct worker_pool *pool;
int pool_id;
+ assert_rcu_or_pool_mutex();
+
if (data & WORK_STRUCT_PWQ)
return ((struct pool_workqueue *)
(data & WORK_STRUCT_WQ_DATA_MASK))->pool;
@@ -598,9 +650,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
if (pool_id == WORK_OFFQ_POOL_NONE)
return NULL;
- pool = worker_pool_by_id(pool_id);
- WARN_ON_ONCE(!pool);
- return pool;
+ return idr_find(&worker_pool_idr, pool_id);
}
/**
@@ -689,7 +739,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
/* Do we have too many workers and should some go away? */
static bool too_many_workers(struct worker_pool *pool)
{
- bool managing = pool->flags & POOL_MANAGING_WORKERS;
+ bool managing = mutex_is_locked(&pool->manager_arb);
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
int nr_busy = pool->nr_workers - nr_idle;
@@ -744,7 +794,7 @@ static void wake_up_worker(struct worker_pool *pool)
* CONTEXT:
* spin_lock_irq(rq->lock)
*/
-void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
+void wq_worker_waking_up(struct task_struct *task, int cpu)
{
struct worker *worker = kthread_data(task);
@@ -769,8 +819,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
* RETURNS:
* Worker task on @cpu to wake up, %NULL if none.
*/
-struct task_struct *wq_worker_sleeping(struct task_struct *task,
- unsigned int cpu)
+struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
{
struct worker *worker = kthread_data(task), *to_wakeup = NULL;
struct worker_pool *pool;
@@ -786,7 +835,8 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
pool = worker->pool;
/* this can only happen on the local cpu */
- BUG_ON(cpu != raw_smp_processor_id());
+ if (WARN_ON_ONCE(cpu != raw_smp_processor_id()))
+ return NULL;
/*
* The counterpart of the following dec_and_test, implied mb,
@@ -891,13 +941,12 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
* recycled work item as currently executing and make it wait until the
* current execution finishes, introducing an unwanted dependency.
*
- * This function checks the work item address, work function and workqueue
- * to avoid false positives. Note that this isn't complete as one may
- * construct a work function which can introduce dependency onto itself
- * through a recycled work item. Well, if somebody wants to shoot oneself
- * in the foot that badly, there's only so much we can do, and if such
- * deadlock actually occurs, it should be easy to locate the culprit work
- * function.
+ * This function checks the work item address and work function to avoid
+ * false positives. Note that this isn't complete as one may construct a
+ * work function which can introduce dependency onto itself through a
+ * recycled work item. Well, if somebody wants to shoot oneself in the
+ * foot that badly, there's only so much we can do, and if such deadlock
+ * actually occurs, it should be easy to locate the culprit work function.
*
* CONTEXT:
* spin_lock_irq(pool->lock).
@@ -961,6 +1010,64 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
*nextp = n;
}
+/**
+ * get_pwq - get an extra reference on the specified pool_workqueue
+ * @pwq: pool_workqueue to get
+ *
+ * Obtain an extra reference on @pwq. The caller should guarantee that
+ * @pwq has positive refcnt and be holding the matching pool->lock.
+ */
+static void get_pwq(struct pool_workqueue *pwq)
+{
+ lockdep_assert_held(&pwq->pool->lock);
+ WARN_ON_ONCE(pwq->refcnt <= 0);
+ pwq->refcnt++;
+}
+
+/**
+ * put_pwq - put a pool_workqueue reference
+ * @pwq: pool_workqueue to put
+ *
+ * Drop a reference of @pwq. If its refcnt reaches zero, schedule its
+ * destruction. The caller should be holding the matching pool->lock.
+ */
+static void put_pwq(struct pool_workqueue *pwq)
+{
+ lockdep_assert_held(&pwq->pool->lock);
+ if (likely(--pwq->refcnt))
+ return;
+ if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
+ return;
+ /*
+ * @pwq can't be released under pool->lock, bounce to
+ * pwq_unbound_release_workfn(). This never recurses on the same
+ * pool->lock as this path is taken only for unbound workqueues and
+ * the release work item is scheduled on a per-cpu workqueue. To
+ * avoid lockdep warning, unbound pool->locks are given lockdep
+ * subclass of 1 in get_unbound_pool().
+ */
+ schedule_work(&pwq->unbound_release_work);
+}
+
+/**
+ * put_pwq_unlocked - put_pwq() with surrounding pool lock/unlock
+ * @pwq: pool_workqueue to put (can be %NULL)
+ *
+ * put_pwq() with locking. This function also allows %NULL @pwq.
+ */
+static void put_pwq_unlocked(struct pool_workqueue *pwq)
+{
+ if (pwq) {
+ /*
+ * As both pwqs and pools are sched-RCU protected, the
+ * following lock operations are safe.
+ */
+ spin_lock_irq(&pwq->pool->lock);
+ put_pwq(pwq);
+ spin_unlock_irq(&pwq->pool->lock);
+ }
+}
+
static void pwq_activate_delayed_work(struct work_struct *work)
{
struct pool_workqueue *pwq = get_work_pwq(work);
@@ -992,9 +1099,9 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
*/
static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
{
- /* ignore uncolored works */
+ /* uncolored work items don't participate in flushing or nr_active */
if (color == WORK_NO_COLOR)
- return;
+ goto out_put;
pwq->nr_in_flight[color]--;
@@ -1007,11 +1114,11 @@ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
/* is flush in progress and are we at the flushing tip? */
if (likely(pwq->flush_color != color))
- return;
+ goto out_put;
/* are there still in-flight works? */
if (pwq->nr_in_flight[color])
- return;
+ goto out_put;
/* this pwq is done, clear flush_color */
pwq->flush_color = -1;
@@ -1022,6 +1129,8 @@ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
*/
if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
complete(&pwq->wq->first_flusher->done);
+out_put:
+ put_pwq(pwq);
}
/**
@@ -1144,11 +1253,12 @@ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
/* we own @work, set data and link */
set_work_pwq(work, pwq, extra_flags);
list_add_tail(&work->entry, head);
+ get_pwq(pwq);
/*
- * Ensure either worker_sched_deactivated() sees the above
- * list_add_tail() or we see zero nr_running to avoid workers
- * lying around lazily while there are works to be processed.
+ * Ensure either wq_worker_sleeping() sees the above
+ * list_add_tail() or we see zero nr_running to avoid workers lying
+ * around lazily while there are works to be processed.
*/
smp_mb();
@@ -1172,10 +1282,11 @@ static bool is_chained_work(struct workqueue_struct *wq)
return worker && worker->current_pwq->wq == wq;
}
-static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
+static void __queue_work(int cpu, struct workqueue_struct *wq,
struct work_struct *work)
{
struct pool_workqueue *pwq;
+ struct worker_pool *last_pool;
struct list_head *worklist;
unsigned int work_flags;
unsigned int req_cpu = cpu;
@@ -1191,48 +1302,62 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
debug_work_activate(work);
/* if dying, only works from the same workqueue are allowed */
- if (unlikely(wq->flags & WQ_DRAINING) &&
+ if (unlikely(wq->flags & __WQ_DRAINING) &&
WARN_ON_ONCE(!is_chained_work(wq)))
return;
+retry:
+ if (req_cpu == WORK_CPU_UNBOUND)
+ cpu = raw_smp_processor_id();
- /* determine the pwq to use */
- if (!(wq->flags & WQ_UNBOUND)) {
- struct worker_pool *last_pool;
-
- if (cpu == WORK_CPU_UNBOUND)
- cpu = raw_smp_processor_id();
-
- /*
- * It's multi cpu. If @work was previously on a different
- * cpu, it might still be running there, in which case the
- * work needs to be queued on that cpu to guarantee
- * non-reentrancy.
- */
- pwq = get_pwq(cpu, wq);
- last_pool = get_work_pool(work);
+ /* pwq which will be used unless @work is executing elsewhere */
+ if (!(wq->flags & WQ_UNBOUND))
+ pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
+ else
+ pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
- if (last_pool && last_pool != pwq->pool) {
- struct worker *worker;
+ /*
+ * If @work was previously on a different pool, it might still be
+ * running there, in which case the work needs to be queued on that
+ * pool to guarantee non-reentrancy.
+ */
+ last_pool = get_work_pool(work);
+ if (last_pool && last_pool != pwq->pool) {
+ struct worker *worker;
- spin_lock(&last_pool->lock);
+ spin_lock(&last_pool->lock);
- worker = find_worker_executing_work(last_pool, work);
+ worker = find_worker_executing_work(last_pool, work);
- if (worker && worker->current_pwq->wq == wq) {
- pwq = get_pwq(last_pool->cpu, wq);
- } else {
- /* meh... not running there, queue here */
- spin_unlock(&last_pool->lock);
- spin_lock(&pwq->pool->lock);
- }
+ if (worker && worker->current_pwq->wq == wq) {
+ pwq = worker->current_pwq;
} else {
+ /* meh... not running there, queue here */
+ spin_unlock(&last_pool->lock);
spin_lock(&pwq->pool->lock);
}
} else {
- pwq = get_pwq(WORK_CPU_UNBOUND, wq);
spin_lock(&pwq->pool->lock);
}
+ /*
+ * pwq is determined and locked. For unbound pools, we could have
+ * raced with pwq release and it could already be dead. If its
+ * refcnt is zero, repeat pwq selection. Note that pwqs never die
+ * without another pwq replacing it in the numa_pwq_tbl or while
+ * work items are executing on it, so the retrying is guaranteed to
+ * make forward-progress.
+ */
+ if (unlikely(!pwq->refcnt)) {
+ if (wq->flags & WQ_UNBOUND) {
+ spin_unlock(&pwq->pool->lock);
+ cpu_relax();
+ goto retry;
+ }
+ /* oops */
+ WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
+ wq->name, cpu);
+ }
+
/* pwq determined, queue */
trace_workqueue_queue_work(req_cpu, pwq, work);
@@ -1287,22 +1412,6 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
}
EXPORT_SYMBOL_GPL(queue_work_on);
-/**
- * queue_work - queue work on a workqueue
- * @wq: workqueue to use
- * @work: work to queue
- *
- * Returns %false if @work was already on a queue, %true otherwise.
- *
- * We queue the work to the CPU on which it was submitted, but if the CPU dies
- * it can be processed by another CPU.
- */
-bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
-{
- return queue_work_on(WORK_CPU_UNBOUND, wq, work);
-}
-EXPORT_SYMBOL_GPL(queue_work);
-
void delayed_work_timer_fn(unsigned long __data)
{
struct delayed_work *dwork = (struct delayed_work *)__data;
@@ -1378,21 +1487,6 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
EXPORT_SYMBOL_GPL(queue_delayed_work_on);
/**
- * queue_delayed_work - queue work on a workqueue after delay
- * @wq: workqueue to use
- * @dwork: delayable work to queue
- * @delay: number of jiffies to wait before queueing
- *
- * Equivalent to queue_delayed_work_on() but tries to use the local CPU.
- */
-bool queue_delayed_work(struct workqueue_struct *wq,
- struct delayed_work *dwork, unsigned long delay)
-{
- return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
-}
-EXPORT_SYMBOL_GPL(queue_delayed_work);
-
-/**
* mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
* @cpu: CPU number to execute work on
* @wq: workqueue to use
@@ -1431,21 +1525,6 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
EXPORT_SYMBOL_GPL(mod_delayed_work_on);
/**
- * mod_delayed_work - modify delay of or queue a delayed work
- * @wq: workqueue to use
- * @dwork: work to queue
- * @delay: number of jiffies to wait before queueing
- *
- * mod_delayed_work_on() on local CPU.
- */
-bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork,
- unsigned long delay)
-{
- return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
-}
-EXPORT_SYMBOL_GPL(mod_delayed_work);
-
-/**
* worker_enter_idle - enter idle state
* @worker: worker which is entering idle state
*
@@ -1459,9 +1538,10 @@ static void worker_enter_idle(struct worker *worker)
{
struct worker_pool *pool = worker->pool;
- BUG_ON(worker->flags & WORKER_IDLE);
- BUG_ON(!list_empty(&worker->entry) &&
- (worker->hentry.next || worker->hentry.pprev));
+ if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
+ WARN_ON_ONCE(!list_empty(&worker->entry) &&
+ (worker->hentry.next || worker->hentry.pprev)))
+ return;
/* can't use worker_set_flags(), also called from start_worker() */
worker->flags |= WORKER_IDLE;
@@ -1498,22 +1578,25 @@ static void worker_leave_idle(struct worker *worker)
{
struct worker_pool *pool = worker->pool;
- BUG_ON(!(worker->flags & WORKER_IDLE));
+ if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
+ return;
worker_clr_flags(worker, WORKER_IDLE);
pool->nr_idle--;
list_del_init(&worker->entry);
}
/**
- * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock pool
- * @worker: self
+ * worker_maybe_bind_and_lock - try to bind %current to worker_pool and lock it
+ * @pool: target worker_pool
+ *
+ * Bind %current to the cpu of @pool if it is associated and lock @pool.
*
* Works which are scheduled while the cpu is online must at least be
* scheduled to a worker which is bound to the cpu so that if they are
* flushed from cpu callbacks while cpu is going down, they are
* guaranteed to execute on the cpu.
*
- * This function is to be used by rogue workers and rescuers to bind
+ * This function is to be used by unbound workers and rescuers to bind
* themselves to the target cpu and may race with cpu going down or
* coming online. kthread_bind() can't be used because it may put the
* worker to already dead cpu and set_cpus_allowed_ptr() can't be used
@@ -1534,12 +1617,9 @@ static void worker_leave_idle(struct worker *worker)
* %true if the associated pool is online (@worker is successfully
* bound), %false if offline.
*/
-static bool worker_maybe_bind_and_lock(struct worker *worker)
+static bool worker_maybe_bind_and_lock(struct worker_pool *pool)
__acquires(&pool->lock)
{
- struct worker_pool *pool = worker->pool;
- struct task_struct *task = worker->task;
-
while (true) {
/*
* The following call may fail, succeed or succeed
@@ -1548,14 +1628,13 @@ __acquires(&pool->lock)
* against POOL_DISASSOCIATED.
*/
if (!(pool->flags & POOL_DISASSOCIATED))
- set_cpus_allowed_ptr(task, get_cpu_mask(pool->cpu));
+ set_cpus_allowed_ptr(current, pool->attrs->cpumask);
spin_lock_irq(&pool->lock);
if (pool->flags & POOL_DISASSOCIATED)
return false;
- if (task_cpu(task) == pool->cpu &&
- cpumask_equal(&current->cpus_allowed,
- get_cpu_mask(pool->cpu)))
+ if (task_cpu(current) == pool->cpu &&
+ cpumask_equal(&current->cpus_allowed, pool->attrs->cpumask))
return true;
spin_unlock_irq(&pool->lock);
@@ -1570,108 +1649,6 @@ __acquires(&pool->lock)
}
}
-/*
- * Rebind an idle @worker to its CPU. worker_thread() will test
- * list_empty(@worker->entry) before leaving idle and call this function.
- */
-static void idle_worker_rebind(struct worker *worker)
-{
- /* CPU may go down again inbetween, clear UNBOUND only on success */
- if (worker_maybe_bind_and_lock(worker))
- worker_clr_flags(worker, WORKER_UNBOUND);
-
- /* rebind complete, become available again */
- list_add(&worker->entry, &worker->pool->idle_list);
- spin_unlock_irq(&worker->pool->lock);
-}
-
-/*
- * Function for @worker->rebind.work used to rebind unbound busy workers to
- * the associated cpu which is coming back online. This is scheduled by
- * cpu up but can race with other cpu hotplug operations and may be
- * executed twice without intervening cpu down.
- */
-static void busy_worker_rebind_fn(struct work_struct *work)
-{
- struct worker *worker = container_of(work, struct worker, rebind_work);
-
- if (worker_maybe_bind_and_lock(worker))
- worker_clr_flags(worker, WORKER_UNBOUND);
-
- spin_unlock_irq(&worker->pool->lock);
-}
-
-/**
- * rebind_workers - rebind all workers of a pool to the associated CPU
- * @pool: pool of interest
- *
- * @pool->cpu is coming online. Rebind all workers to the CPU. Rebinding
- * is different for idle and busy ones.
- *
- * Idle ones will be removed from the idle_list and woken up. They will
- * add themselves back after completing rebind. This ensures that the
- * idle_list doesn't contain any unbound workers when re-bound busy workers
- * try to perform local wake-ups for concurrency management.
- *
- * Busy workers can rebind after they finish their current work items.
- * Queueing the rebind work item at the head of the scheduled list is
- * enough. Note that nr_running will be properly bumped as busy workers
- * rebind.
- *
- * On return, all non-manager workers are scheduled for rebind - see
- * manage_workers() for the manager special case. Any idle worker
- * including the manager will not appear on @idle_list until rebind is
- * complete, making local wake-ups safe.
- */
-static void rebind_workers(struct worker_pool *pool)
-{
- struct worker *worker, *n;
- int i;
-
- lockdep_assert_held(&pool->assoc_mutex);
- lockdep_assert_held(&pool->lock);
-