aboutsummaryrefslogtreecommitdiff
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c732
1 files changed, 360 insertions, 372 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ee8e29a2320..35974ac6960 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -16,9 +16,10 @@
*
* This is the generic async execution mechanism. Work items as are
* executed in process context. The worker pool is shared and
- * automatically managed. There is one worker pool for each CPU and
- * one extra for works which are better served by workers which are
- * not bound to any specific CPU.
+ * automatically managed. There are two worker pools for each CPU (one for
+ * normal work items and the other for high priority ones) and some extra
+ * pools for workqueues which are not bound to any specific CPU - the
+ * number of these backing pools is dynamic.
*
* Please read Documentation/workqueue.txt for details.
*/
@@ -64,15 +65,12 @@ enum {
* be executing on any CPU. The pool behaves as an unbound one.
*
* Note that DISASSOCIATED should be flipped only while holding
- * manager_mutex to avoid changing binding state while
- * create_worker() is in progress.
+ * attach_mutex to avoid changing binding state while
+ * worker_attach_to_pool() is in progress.
*/
- POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
- POOL_FREEZING = 1 << 3, /* freeze in progress */
/* worker flags */
- WORKER_STARTED = 1 << 0, /* started */
WORKER_DIE = 1 << 1, /* die die die */
WORKER_IDLE = 1 << 2, /* is idle */
WORKER_PREP = 1 << 3, /* preparing to run works */
@@ -99,10 +97,10 @@ enum {
/*
* Rescue workers are used only on emergencies and shared by
- * all cpus. Give -20.
+ * all cpus. Give MIN_NICE.
*/
- RESCUER_NICE_LEVEL = -20,
- HIGHPRI_NICE_LEVEL = -20,
+ RESCUER_NICE_LEVEL = MIN_NICE,
+ HIGHPRI_NICE_LEVEL = MIN_NICE,
WQ_NAME_LEN = 24,
};
@@ -123,8 +121,7 @@ enum {
* cpu or grabbing pool->lock is enough for read access. If
* POOL_DISASSOCIATED is set, it's identical to L.
*
- * MG: pool->manager_mutex and pool->lock protected. Writes require both
- * locks. Reads can happen under either lock.
+ * A: pool->attach_mutex protected.
*
* PL: wq_pool_mutex protected.
*
@@ -162,8 +159,11 @@ struct worker_pool {
/* see manage_workers() for details on the two manager mutexes */
struct mutex manager_arb; /* manager arbitration */
- struct mutex manager_mutex; /* manager exclusion */
- struct idr worker_idr; /* MG: worker IDs and iteration */
+ struct mutex attach_mutex; /* attach/detach exclusion */
+ struct list_head workers; /* A: attached workers */
+ struct completion *detach_completion; /* all workers detached */
+
+ struct ida worker_ida; /* worker IDs for task name */
struct workqueue_attrs *attrs; /* I: worker attributes */
struct hlist_node hash_node; /* PL: unbound_pool_hash node */
@@ -272,6 +272,15 @@ static cpumask_var_t *wq_numa_possible_cpumask;
static bool wq_disable_numa;
module_param_named(disable_numa, wq_disable_numa, bool, 0444);
+/* see the comment above the definition of WQ_POWER_EFFICIENT */
+#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
+static bool wq_power_efficient = true;
+#else
+static bool wq_power_efficient;
+#endif
+
+module_param_named(power_efficient, wq_power_efficient, bool, 0444);
+
static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
/* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -295,6 +304,9 @@ static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
/* I: attributes used when instantiating standard unbound pools on demand */
static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
+/* I: attributes used when instantiating ordered pools on demand */
+static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
+
struct workqueue_struct *system_wq __read_mostly;
EXPORT_SYMBOL(system_wq);
struct workqueue_struct *system_highpri_wq __read_mostly;
@@ -305,6 +317,10 @@ struct workqueue_struct *system_unbound_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_unbound_wq);
struct workqueue_struct *system_freezable_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_freezable_wq);
+struct workqueue_struct *system_power_efficient_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_power_efficient_wq);
+struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
static int worker_thread(void *__worker);
static void copy_workqueue_attrs(struct workqueue_attrs *to,
@@ -323,16 +339,6 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
lockdep_is_held(&wq->mutex), \
"sched RCU or wq->mutex should be held")
-#ifdef CONFIG_LOCKDEP
-#define assert_manager_or_pool_lock(pool) \
- WARN_ONCE(debug_locks && \
- !lockdep_is_held(&(pool)->manager_mutex) && \
- !lockdep_is_held(&(pool)->lock), \
- "pool->manager_mutex or ->lock should be held")
-#else
-#define assert_manager_or_pool_lock(pool) do { } while (0)
-#endif
-
#define for_each_cpu_worker_pool(pool, cpu) \
for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
(pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
@@ -358,17 +364,16 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
/**
* for_each_pool_worker - iterate through all workers of a worker_pool
* @worker: iteration cursor
- * @wi: integer used for iteration
* @pool: worker_pool to iterate workers of
*
- * This must be called with either @pool->manager_mutex or ->lock held.
+ * This must be called with @pool->attach_mutex.
*
* The if/else clause exists only for the lockdep assertion and can be
* ignored.
*/
-#define for_each_pool_worker(worker, wi, pool) \
- idr_for_each_entry(&(pool)->worker_idr, (worker), (wi)) \
- if (({ assert_manager_or_pool_lock((pool)); false; })) { } \
+#define for_each_pool_worker(worker, pool) \
+ list_for_each_entry((worker), &(pool)->workers, node) \
+ if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
else
/**
@@ -499,19 +504,33 @@ void destroy_work_on_stack(struct work_struct *work)
}
EXPORT_SYMBOL_GPL(destroy_work_on_stack);
+void destroy_delayed_work_on_stack(struct delayed_work *work)
+{
+ destroy_timer_on_stack(&work->timer);
+ debug_object_free(&work->work, &work_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
+
#else
static inline void debug_work_activate(struct work_struct *work) { }
static inline void debug_work_deactivate(struct work_struct *work) { }
#endif
-/* allocate ID and assign it to @pool */
+/**
+ * worker_pool_assign_id - allocate ID and assing it to @pool
+ * @pool: the pool pointer of interest
+ *
+ * Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned
+ * successfully, -errno on failure.
+ */
static int worker_pool_assign_id(struct worker_pool *pool)
{
int ret;
lockdep_assert_held(&wq_pool_mutex);
- ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL);
+ ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
+ GFP_KERNEL);
if (ret >= 0) {
pool->id = ret;
return 0;
@@ -527,6 +546,8 @@ static int worker_pool_assign_id(struct worker_pool *pool)
* This must be called either with pwq_lock held or sched RCU read locked.
* If the pwq needs to be used beyond the locking in effect, the caller is
* responsible for guaranteeing that the pwq stays online.
+ *
+ * Return: The unbound pool_workqueue for @node.
*/
static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
int node)
@@ -625,8 +646,6 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
* get_work_pool - return the worker_pool a given work was associated with
* @work: the work item of interest
*
- * Return the worker_pool @work was last associated with. %NULL if none.
- *
* Pools are created and destroyed under wq_pool_mutex, and allows read
* access under sched-RCU read lock. As such, this function should be
* called under wq_pool_mutex or with preemption disabled.
@@ -635,6 +654,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work)
* mentioned locking is in effect. If the returned pool needs to be used
* beyond the critical section, the caller is responsible for ensuring the
* returned pool is and stays online.
+ *
+ * Return: The worker_pool @work was last associated with. %NULL if none.
*/
static struct worker_pool *get_work_pool(struct work_struct *work)
{
@@ -658,7 +679,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
* get_work_pool_id - return the worker pool ID a given work is associated with
* @work: the work item of interest
*
- * Return the worker_pool ID @work was last associated with.
+ * Return: The worker_pool ID @work was last associated with.
* %WORK_OFFQ_POOL_NONE if none.
*/
static int get_work_pool_id(struct work_struct *work)
@@ -730,13 +751,6 @@ static bool need_to_create_worker(struct worker_pool *pool)
return need_more_worker(pool) && !may_start_working(pool);
}
-/* Do I need to be the manager? */
-static bool need_to_manage_workers(struct worker_pool *pool)
-{
- return need_to_create_worker(pool) ||
- (pool->flags & POOL_MANAGE_WORKERS);
-}
-
/* Do we have too many workers and should some go away? */
static bool too_many_workers(struct worker_pool *pool)
{
@@ -758,8 +772,8 @@ static bool too_many_workers(struct worker_pool *pool)
* Wake up functions.
*/
-/* Return the first worker. Safe with preemption disabled */
-static struct worker *first_worker(struct worker_pool *pool)
+/* Return the first idle worker. Safe with preemption disabled */
+static struct worker *first_idle_worker(struct worker_pool *pool)
{
if (unlikely(list_empty(&pool->idle_list)))
return NULL;
@@ -778,7 +792,7 @@ static struct worker *first_worker(struct worker_pool *pool)
*/
static void wake_up_worker(struct worker_pool *pool)
{
- struct worker *worker = first_worker(pool);
+ struct worker *worker = first_idle_worker(pool);
if (likely(worker))
wake_up_process(worker->task);
@@ -817,7 +831,7 @@ void wq_worker_waking_up(struct task_struct *task, int cpu)
* CONTEXT:
* spin_lock_irq(rq->lock)
*
- * RETURNS:
+ * Return:
* Worker task on @cpu to wake up, %NULL if none.
*/
struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
@@ -852,7 +866,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
*/
if (atomic_dec_and_test(&pool->nr_running) &&
!list_empty(&pool->worklist))
- to_wakeup = first_worker(pool);
+ to_wakeup = first_idle_worker(pool);
return to_wakeup ? to_wakeup->task : NULL;
}
@@ -952,8 +966,8 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
* CONTEXT:
* spin_lock_irq(pool->lock).
*
- * RETURNS:
- * Pointer to worker which is executing @work if found, NULL
+ * Return:
+ * Pointer to worker which is executing @work if found, %NULL
* otherwise.
*/
static struct worker *find_worker_executing_work(struct worker_pool *pool,
@@ -1141,14 +1155,16 @@ out_put:
* @flags: place to store irq state
*
* Try to grab PENDING bit of @work. This function can handle @work in any
- * stable state - idle, on timer or on worklist. Return values are
+ * stable state - idle, on timer or on worklist.
*
+ * Return:
* 1 if @work was pending and we successfully stole PENDING
* 0 if @work was idle and we claimed PENDING
* -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
* -ENOENT if someone else is canceling @work, this state may persist
* for arbitrarily long
*
+ * Note:
* On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
* interrupted while holding PENDING and @work off queue, irq must be
* disabled on entry. This, combined with delayed_work->timer being
@@ -1302,7 +1318,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
debug_work_activate(work);
- /* if dying, only works from the same workqueue are allowed */
+ /* if draining, only works from the same workqueue are allowed */
if (unlikely(wq->flags & __WQ_DRAINING) &&
WARN_ON_ONCE(!is_chained_work(wq)))
return;
@@ -1390,10 +1406,10 @@ retry:
* @wq: workqueue to use
* @work: work to queue
*
- * Returns %false if @work was already on a queue, %true otherwise.
- *
* We queue the work to a specific CPU, the caller must ensure it
* can't go away.
+ *
+ * Return: %false if @work was already on a queue, %true otherwise.
*/
bool queue_work_on(int cpu, struct workqueue_struct *wq,
struct work_struct *work)
@@ -1463,7 +1479,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
* @dwork: work to queue
* @delay: number of jiffies to wait before queueing
*
- * Returns %false if @work was already on a queue, %true otherwise. If
+ * Return: %false if @work was already on a queue, %true otherwise. If
* @delay is zero and @dwork is idle, it will be scheduled for immediate
* execution.
*/
@@ -1499,7 +1515,7 @@ EXPORT_SYMBOL(queue_delayed_work_on);
* zero, @work is guaranteed to be scheduled immediately regardless of its
* current state.
*
- * Returns %false if @dwork was idle and queued, %true if @dwork was
+ * Return: %false if @dwork was idle and queued, %true if @dwork was
* pending and its timer was modified.
*
* This function is safe to call from any context including IRQ handler.
@@ -1586,70 +1602,6 @@ static void worker_leave_idle(struct worker *worker)
list_del_init(&worker->entry);
}
-/**
- * worker_maybe_bind_and_lock - try to bind %current to worker_pool and lock it
- * @pool: target worker_pool
- *
- * Bind %current to the cpu of @pool if it is associated and lock @pool.
- *
- * Works which are scheduled while the cpu is online must at least be
- * scheduled to a worker which is bound to the cpu so that if they are
- * flushed from cpu callbacks while cpu is going down, they are
- * guaranteed to execute on the cpu.
- *
- * This function is to be used by unbound workers and rescuers to bind
- * themselves to the target cpu and may race with cpu going down or
- * coming online. kthread_bind() can't be used because it may put the
- * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
- * verbatim as it's best effort and blocking and pool may be
- * [dis]associated in the meantime.
- *
- * This function tries set_cpus_allowed() and locks pool and verifies the
- * binding against %POOL_DISASSOCIATED which is set during
- * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker
- * enters idle state or fetches works without dropping lock, it can
- * guarantee the scheduling requirement described in the first paragraph.
- *
- * CONTEXT:
- * Might sleep. Called without any lock but returns with pool->lock
- * held.
- *
- * RETURNS:
- * %true if the associated pool is online (@worker is successfully
- * bound), %false if offline.
- */
-static bool worker_maybe_bind_and_lock(struct worker_pool *pool)
-__acquires(&pool->lock)
-{
- while (true) {
- /*
- * The following call may fail, succeed or succeed
- * without actually migrating the task to the cpu if
- * it races with cpu hotunplug operation. Verify
- * against POOL_DISASSOCIATED.
- */
- if (!(pool->flags & POOL_DISASSOCIATED))
- set_cpus_allowed_ptr(current, pool->attrs->cpumask);
-
- spin_lock_irq(&pool->lock);
- if (pool->flags & POOL_DISASSOCIATED)
- return false;
- if (task_cpu(current) == pool->cpu &&
- cpumask_equal(&current->cpus_allowed, pool->attrs->cpumask))
- return true;
- spin_unlock_irq(&pool->lock);
-
- /*
- * We've raced with CPU hot[un]plug. Give it a breather
- * and retry migration. cond_resched() is required here;
- * otherwise, we might deadlock against cpu_stop trying to
- * bring down the CPU on non-preemptive kernel.
- */
- cpu_relax();
- cond_resched();
- }
-}
-
static struct worker *alloc_worker(void)
{
struct worker *worker;
@@ -1658,6 +1610,7 @@ static struct worker *alloc_worker(void)
if (worker) {
INIT_LIST_HEAD(&worker->entry);
INIT_LIST_HEAD(&worker->scheduled);
+ INIT_LIST_HEAD(&worker->node);
/* on creation a worker is in !idle && prep state */
worker->flags = WORKER_PREP;
}
@@ -1665,17 +1618,73 @@ static struct worker *alloc_worker(void)
}
/**
+ * worker_attach_to_pool() - attach a worker to a pool
+ * @worker: worker to be attached
+ * @pool: the target pool
+ *
+ * Attach @worker to @pool. Once attached, the %WORKER_UNBOUND flag and
+ * cpu-binding of @worker are kept coordinated with the pool across
+ * cpu-[un]hotplugs.
+ */
+static void worker_attach_to_pool(struct worker *worker,
+ struct worker_pool *pool)
+{
+ mutex_lock(&pool->attach_mutex);
+
+ /*
+ * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
+ * online CPUs. It'll be re-applied when any of the CPUs come up.
+ */
+ set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
+
+ /*
+ * The pool->attach_mutex ensures %POOL_DISASSOCIATED remains
+ * stable across this function. See the comments above the
+ * flag definition for details.
+ */
+ if (pool->flags & POOL_DISASSOCIATED)
+ worker->flags |= WORKER_UNBOUND;
+
+ list_add_tail(&worker->node, &pool->workers);
+
+ mutex_unlock(&pool->attach_mutex);
+}
+
+/**
+ * worker_detach_from_pool() - detach a worker from its pool
+ * @worker: worker which is attached to its pool
+ * @pool: the pool @worker is attached to
+ *
+ * Undo the attaching which had been done in worker_attach_to_pool(). The
+ * caller worker shouldn't access to the pool after detached except it has
+ * other reference to the pool.
+ */
+static void worker_detach_from_pool(struct worker *worker,
+ struct worker_pool *pool)
+{
+ struct completion *detach_completion = NULL;
+
+ mutex_lock(&pool->attach_mutex);
+ list_del(&worker->node);
+ if (list_empty(&pool->workers))
+ detach_completion = pool->detach_completion;
+ mutex_unlock(&pool->attach_mutex);
+
+ if (detach_completion)
+ complete(detach_completion);
+}
+
+/**
* create_worker - create a new workqueue worker
* @pool: pool the new worker will belong to
*
- * Create a new worker which is bound to @pool. The returned worker
- * can be started by calling start_worker() or destroyed using
- * destroy_worker().
+ * Create a new worker which is attached to @pool. The new worker must be
+ * started by start_worker().
*
* CONTEXT:
* Might sleep. Does GFP_KERNEL allocations.
*
- * RETURNS:
+ * Return:
* Pointer to the newly created worker.
*/
static struct worker *create_worker(struct worker_pool *pool)
@@ -1684,19 +1693,8 @@ static struct worker *create_worker(struct worker_pool *pool)
int id = -1;
char id_buf[16];
- lockdep_assert_held(&pool->manager_mutex);
-
- /*
- * ID is needed to determine kthread name. Allocate ID first
- * without installing the pointer.
- */
- idr_preload(GFP_KERNEL);
- spin_lock_irq(&pool->lock);
-
- id = idr_alloc(&pool->worker_idr, NULL, 0, 0, GFP_NOWAIT);
-
- spin_unlock_irq(&pool->lock);
- idr_preload_end();
+ /* ID is needed to determine kthread name */
+ id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
if (id < 0)
goto fail;
@@ -1718,37 +1716,19 @@ static struct worker *create_worker(struct worker_pool *pool)
if (IS_ERR(worker->task))
goto fail;
- /*
- * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
- * online CPUs. It'll be re-applied when any of the CPUs come up.
- */
set_user_nice(worker->task, pool->attrs->nice);
- set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
/* prevent userland from meddling with cpumask of workqueue workers */
worker->task->flags |= PF_NO_SETAFFINITY;
- /*
- * The caller is responsible for ensuring %POOL_DISASSOCIATED
- * remains stable across this function. See the comments above the
- * flag definition for details.
- */
- if (pool->flags & POOL_DISASSOCIATED)
- worker->flags |= WORKER_UNBOUND;
-
- /* successful, commit the pointer to idr */
- spin_lock_irq(&pool->lock);
- idr_replace(&pool->worker_idr, worker, worker->id);
- spin_unlock_irq(&pool->lock);
+ /* successful, attach the worker to the pool */
+ worker_attach_to_pool(worker, pool);
return worker;
fail:
- if (id >= 0) {
- spin_lock_irq(&pool->lock);
- idr_remove(&pool->worker_idr, id);
- spin_unlock_irq(&pool->lock);
- }
+ if (id >= 0)
+ ida_simple_remove(&pool->worker_ida, id);
kfree(worker);
return NULL;
}
@@ -1764,7 +1744,6 @@ fail:
*/
static void start_worker(struct worker *worker)
{
- worker->flags |= WORKER_STARTED;
worker->pool->nr_workers++;
worker_enter_idle(worker);
wake_up_process(worker->task);
@@ -1775,13 +1754,13 @@ static void start_worker(struct worker *worker)
* @pool: the target pool
*
* Grab the managership of @pool and create and start a new worker for it.
+ *
+ * Return: 0 on success. A negative error code otherwise.
*/
static int create_and_start_worker(struct worker_pool *pool)
{
struct worker *worker;
- mutex_lock(&pool->manager_mutex);
-
worker = create_worker(pool);
if (worker) {
spin_lock_irq(&pool->lock);
@@ -1789,8 +1768,6 @@ static int create_and_start_worker(struct worker_pool *pool)
spin_unlock_irq(&pool->lock);
}
- mutex_unlock(&pool->manager_mutex);
-
return worker ? 0 : -ENOMEM;
}
@@ -1798,39 +1775,30 @@ static int create_and_start_worker(struct worker_pool *pool)
* destroy_worker - destroy a workqueue worker
* @worker: worker to be destroyed
*
- * Destroy @worker and adjust @pool stats accordingly.
+ * Destroy @worker and adjust @pool stats accordingly. The worker should
+ * be idle.
*
* CONTEXT:
- * spin_lock_irq(pool->lock) which is released and regrabbed.
+ * spin_lock_irq(pool->lock).
*/
static void destroy_worker(struct worker *worker)
{
struct worker_pool *pool = worker->pool;
- lockdep_assert_held(&pool->manager_mutex);
lockdep_assert_held(&pool->lock);
/* sanity check frenzy */
if (WARN_ON(worker->current_work) ||
- WARN_ON(!list_empty(&worker->scheduled)))
+ WARN_ON(!list_empty(&worker->scheduled)) ||
+ WARN_ON(!(worker->flags & WORKER_IDLE)))
return;
- if (worker->flags & WORKER_STARTED)
- pool->nr_workers--;
- if (worker->flags & WORKER_IDLE)
- pool->nr_idle--;
+ pool->nr_workers--;
+ pool->nr_idle--;
list_del_init(&worker->entry);
worker->flags |= WORKER_DIE;
-
- idr_remove(&pool->worker_idr, worker->id);
-
- spin_unlock_irq(&pool->lock);
-
- kthread_stop(worker->task);
- kfree(worker);
-
- spin_lock_irq(&pool->lock);
+ wake_up_process(worker->task);
}
static void idle_worker_timeout(unsigned long __pool)
@@ -1839,7 +1807,7 @@ static void idle_worker_timeout(unsigned long __pool)
spin_lock_irq(&pool->lock);
- if (too_many_workers(pool)) {
+ while (too_many_workers(pool)) {
struct worker *worker;
unsigned long expires;
@@ -1847,13 +1815,12 @@ static void idle_worker_timeout(unsigned long __pool)
worker = list_entry(pool->idle_list.prev, struct worker, entry);
expires = worker->last_active + IDLE_WORKER_TIMEOUT;
- if (time_before(jiffies, expires))
+ if (time_before(jiffies, expires)) {
mod_timer(&pool->idle_timer, expires);
- else {
- /* it's been idle for too long, wake up manager */
- pool->flags |= POOL_MANAGE_WORKERS;
- wake_up_worker(pool);
+ break;
}
+
+ destroy_worker(worker);
}
spin_unlock_irq(&pool->lock);
@@ -1871,6 +1838,12 @@ static void send_mayday(struct work_struct *work)
/* mayday mayday mayday */
if (list_empty(&pwq->mayday_node)) {
+ /*
+ * If @pwq is for an unbound wq, its base ref may be put at
+ * any time due to an attribute change. Pin @pwq until the
+ * rescuer is done with it.
+ */
+ get_pwq(pwq);
list_add_tail(&pwq->mayday_node, &wq->maydays);
wake_up_process(wq->rescuer->task);
}
@@ -1919,7 +1892,7 @@ static void pool_mayday_timeout(unsigned long __pool)
* multiple times. Does GFP_KERNEL allocations. Called only from
* manager.
*
- * RETURNS:
+ * Return:
* %false if no action was taken and pool->lock stayed locked, %true
* otherwise.
*/
@@ -1966,44 +1939,6 @@ restart:
}
/**
- * maybe_destroy_worker - destroy workers which have been idle for a while
- * @pool: pool to destroy workers for
- *
- * Destroy @pool workers which have been idle for longer than
- * IDLE_WORKER_TIMEOUT.
- *
- * LOCKING:
- * spin_lock_irq(pool->lock) which may be released and regrabbed
- * multiple times. Called only from manager.
- *
- * RETURNS:
- * %false if no action was taken and pool->lock stayed locked, %true
- * otherwise.
- */
-static bool maybe_destroy_workers(struct worker_pool *pool)
-{
- bool ret = false;
-
- while (too_many_workers(pool)) {
- struct worker *worker;
- unsigned long expires;
-
- worker = list_entry(pool->idle_list.prev, struct worker, entry);
- expires = worker->last_active + IDLE_WORKER_TIMEOUT;
-
- if (time_before(jiffies, expires)) {
- mod_timer(&pool->idle_timer, expires);
- break;
- }
-
- destroy_worker(worker);
- ret = true;
- }
-
- return ret;
-}
-
-/**
* manage_workers - manage worker pool
* @worker: self
*
@@ -2019,9 +1954,12 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
* spin_lock_irq(pool->lock) which may be released and regrabbed
* multiple times. Does GFP_KERNEL allocations.
*
- * RETURNS:
- * spin_lock_irq(pool->lock) which may be released and regrabbed
- * multiple times. Does GFP_KERNEL allocations.
+ * Return:
+ * %false if the pool don't need management and the caller can safely start
+ * processing works, %true indicates that the function released pool->lock
+ * and reacquired it to perform some management function and that the
+ * conditions that the caller verified while holding the lock before
+ * calling the function might no longer be true.
*/
static bool manage_workers(struct worker *worker)
{
@@ -2029,8 +1967,6 @@ static bool manage_workers(struct worker *worker)
bool ret = false;
/*
- * Managership is governed by two mutexes - manager_arb and
- * manager_mutex. manager_arb handles arbitration of manager role.
* Anyone who successfully grabs manager_arb wins the arbitration
* and becomes the manager. mutex_trylock() on pool->manager_arb
* failure while holding pool->lock reliably indicates that someone
@@ -2039,40 +1975,12 @@ static bool manage_workers(struct worker *worker)
* grabbing manager_arb is responsible for actually performing
* manager duties. If manager_arb is grabbed and released without
* actual management, the pool may stall indefinitely.
- *
- * manager_mutex is used for exclusion of actual management
- * operations. The holder of manager_mutex can be sure that none
- * of management operations, including creation and destruction of
- * workers, won't take place until the mutex is released. Because
- * manager_mutex doesn't interfere with manager role arbitration,
- * it is guaranteed that the pool's management, while may be
- * delayed, won't be disturbed by someone else grabbing
- * manager_mutex.
*/
if (!mutex_trylock(&pool->manager_arb))
return ret;
- /*
- * With manager arbitration won, manager_mutex would be free in
- * most cases. trylock first without dropping @pool->lock.
- */
- if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
- spin_unlock_irq(&pool->lock);
- mutex_lock(&pool->manager_mutex);
- spin_lock_irq(&pool->lock);
- ret = true;
- }
-
- pool->flags &= ~POOL_MANAGE_WORKERS;
-
- /*
- * Destroy and then create so that may_start_working() is true
- * on return.
- */
- ret |= maybe_destroy_workers(pool);
ret |= maybe_create_worker(pool);
- mutex_unlock(&pool->manager_mutex);
mutex_unlock(&pool->manager_arb);
return ret;
}
@@ -2188,6 +2096,15 @@ __acquires(&pool->lock)
dump_stack();
}
+ /*
+ * The following prevents a kworker from hogging CPU on !PREEMPT
+ * kernels, where a requeueing work item waiting for something to
+ * happen could deadlock with stop_machine as such work item could
+ * indefinitely requeue itself while all other CPUs are trapped in
+ * stop_machine.
+ */
+ cond_resched();
+
spin_lock_irq(&pool->lock);
/* clear cpu intensive status */
@@ -2233,6 +2150,8 @@ static void process_scheduled_works(struct worker *worker)
* work items regardless of their specific target workqueue. The only
* exception is work items which belong to workqueues with a rescuer which
* will be explained in rescuer_thread().
+ *
+ * Return: 0
*/
static int worker_thread(void *__worker)
{
@@ -2249,6 +2168,11 @@ woke_up:
spin_unlock_irq(&pool->lock);
WARN_ON_ONCE(!list_empty(&worker->entry));
worker->task->flags &= ~PF_WQ_WORKER;
+
+ set_task_comm(worker->task, "kworker/dying");
+ ida_simple_remove(&pool->worker_ida, worker->id);
+ worker_detach_from_pool(worker, pool);
+ kfree(worker);
return 0;
}
@@ -2296,9 +2220,6 @@ recheck:
worker_set_flags(worker, WORKER_PREP, false);
sleep:
- if (unlikely(need_to_manage_workers(pool)) && manage_workers(worker))
- goto recheck;
-
/*
* pool->lock is held and there's no work to process and no need to
* manage, sleep. Workers are woken up only while holding
@@ -2331,12 +2252,15 @@ sleep:
* those works so that forward progress can be guaranteed.
*
* This should happen rarely.
+ *
+ * Return: 0
*/
static int rescuer_thread(void *__rescuer)
{
struct worker *rescuer = __rescuer;
struct workqueue_struct *wq = rescuer->rescue_wq;
struct list_head *scheduled = &rescuer->scheduled;
+ bool should_stop;
set_user_nice(current, RESCUER_NICE_LEVEL);
@@ -2348,11 +2272,15 @@ static int rescuer_thread(void *__rescuer)
repeat:
set_current_state(TASK_INTERRUPTIBLE);
- if (kthread_should_stop()) {
- __set_current_state(TASK_RUNNING);
- rescuer->task->flags &= ~PF_WQ_WORKER;
- return 0;
- }
+ /*
+ * By the time the rescuer is requested to stop, the workqueue
+ * shouldn't have any work pending, but @wq->maydays may still have
+ * pwq(s) queued. This can happen by non-rescuer workers consuming
+ * all the work items before the rescuer got to them. Go through
+ * @wq->maydays processing before acting on should_stop so that the
+ * list is always empty on exit.
+ */
+ should_stop = kthread_should_stop();
/* see whether any pwq is asking for help */
spin_lock_irq(&wq_mayday_lock);
@@ -2368,8 +2296,9 @@ repeat:
spin_unlock_irq(&wq_mayday_lock);
- /* migrate to the target cpu if possible */
- worker_maybe_bind_and_lock(pool);
+ worker_attach_to_pool(rescuer, pool);
+
+ spin_lock_irq(&pool->lock);
rescuer->pool = pool;
/*
@@ -2382,6 +2311,17 @@ repeat:
move_linked_works(work, scheduled, &n);
process_scheduled_works(rescuer);
+ spin_unlock_irq(&pool->lock);
+
+ worker_detach_from_pool(rescuer, pool);
+
+ spin_lock_irq(&pool->lock);
+
+ /*
+ * Put the reference grabbed by send_mayday(). @pool won't
+ * go away while we're holding its lock.
+ */
+ put_pwq(pwq);
/*
* Leave this pool. If keep_working() is %true, notify a
@@ -2398,6 +2338,12 @@ repeat:
spin_unlock_irq(&wq_mayday_lock);
+ if (should_stop) {
+ __set_current_state(TASK_RUNNING);
+ rescuer->task->flags &= ~PF_WQ_WORKER;
+ return 0;
+ }
+
/* rescuers should never participate in concurrency management */
WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
schedule();
@@ -2503,7 +2449,7 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
* CONTEXT:
* mutex_lock(wq->mutex).
*
- * RETURNS:
+ * Return:
* %true if @flush_color >= 0 and there's something to flush. %false
* otherwise.
*/
@@ -2811,7 +2757,7 @@ already_gone:
* Wait until @work has finished execution. @work is guaranteed to be idle
* on return if it hasn't been requeued since flush started.
*
- * RETURNS:
+ * Return:
* %true if flush_work() waited for the work to finish execution,
* %false if it was already idle.
*/
@@ -2871,7 +2817,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
* The caller must ensure that the workqueue on which @work was last
* queued can't be destroyed before this function returns.
*
- * RETURNS:
+ * Return:
* %true if @work was pending, %false otherwise.
*/
bool cancel_work_sync(struct work_struct *work)
@@ -2888,7 +2834,7 @@ EXPORT_SYMBOL_GPL(cancel_work_sync);
* immediate execution. Like flush_work(), this function only
* considers the last queueing instance of @dwork.
*
- * RETURNS:
+ * Return:
* %true if flush_work() waited for the work to finish execution,
* %false if it was already idle.
*/
@@ -2906,11 +2852,15 @@ EXPORT_SYMBOL(flush_delayed_work);
* cancel_delayed_work - cancel a delayed work
* @dwork: delayed_work to cancel
*
- * Kill off a pending delayed_work. Returns %true if @dwork was pending
- * and canceled; %false if wasn't pending. Note that the work callback
- * function may still be running on return, unless it returns %true and the
- * work doesn't re-arm itself. Explicitly flush or use
- * cancel_delayed_work_sync() to wait on it.
+ * Kill off a pending delayed_work.
+ *
+ * Return: %true if @dwork was pending and canceled; %false if it wasn't
+ * pending.
+ *
+ * Note:
+ * The work callback function may still be running on return, unless
+ * it returns %true and the work doesn't re-arm itself. Explicitly flush or
+ * use cancel_delayed_work_sync() to wait on it.
*
* This function is safe to call from any context including IRQ handler.
*/
@@ -2939,7 +2889,7 @@ EXPORT_SYMBOL(cancel_delayed_work);
*
* This is cancel_work_sync() for delayed works.
*
- * RETURNS:
+ * Return:
* %true if @dwork was pending, %false otherwise.
*/
bool cancel_delayed_work_sync(struct delayed_work *dwork)
@@ -2956,7 +2906,7 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
* system workqueue and blocks until all CPUs have completed.
* schedule_on_each_cpu() is very slow.
*
- * RETURNS:
+ * Return:
* 0 on success, -errno on failure.
*/
int schedule_on_each_cpu(work_func_t func)
@@ -3024,7 +2974,7 @@ EXPORT_SYMBOL(flush_scheduled_work);
* Executes the function immediately if process context is available,
* otherwise schedules the function for delayed execution.
*
- * Returns: 0 - function was executed
+ * Return: 0 - function was executed
* 1 - function was scheduled for execution
*/
int execute_in_process_context(work_func_t fn, struct execute_work *ew)
@@ -3068,25 +3018,26 @@ static struct workqueue_struct *dev_to_wq(struct device *dev)
return wq_dev->wq;
}
-static ssize_t wq_per_cpu_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct workqueue_struct *wq = dev_to_wq(dev);
return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
}
+static DEVICE_ATTR_RO(per_cpu);
-static ssize_t wq_max_active_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t max_active_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct workqueue_struct *wq = dev_to_wq(dev);
return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
}
-static ssize_t wq_max_active_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t max_active_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
{
struct workqueue_struct *wq = dev_to_wq(dev);
int val;
@@ -3097,12 +3048,14 @@ static ssize_t wq_max_active_store(struct device *dev,
workqueue_set_max_active(wq, val);
return count;
}
+static DEVICE_ATTR_RW(max_active);
-static struct device_attribute wq_sysfs_attrs[] = {
- __ATTR(per_cpu, 0444, wq_per_cpu_show, NULL),
- __ATTR(max_active, 0644, wq_max_active_show, wq_max_active_store),
- __ATTR_NULL,
+static struct attribute *wq_sysfs_attrs[] = {
+ &dev_attr_per_cpu.attr,
+ &dev_attr_max_active.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(wq_sysfs);
static ssize_t wq_pool_ids_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -3164,7 +3117,7 @@ static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
return -ENOMEM;
if (sscanf(buf, "%d", &attrs->nice) == 1 &&
- attrs->nice >= -20 && attrs->nice <= 19)
+ attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
ret = apply_workqueue_attrs(wq, attrs);
else
ret = -EINVAL;
@@ -3252,7 +3205,7 @@ static struct device_attribute wq_sysfs_unbound_attrs[] = {
static struct bus_type wq_subsys = {
.name = "workqueue",
- .dev_attrs = wq_sysfs_attrs,
+ .dev_groups = wq_sysfs_groups,
};
static int __init wq_sysfs_init(void)
@@ -3281,7 +3234,7 @@ static void wq_device_release(struct device *dev)
* apply_workqueue_attrs() may race against userland updating the
* attributes.
*
- * Returns 0 on success, -errno on failure.
+ * Return: 0 on success, -errno on failure.
*/
int workqueue_sysfs_register(struct workqueue_struct *wq)
{
@@ -3331,6 +3284,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
}
}
+ dev_set_uevent_suppress(&wq_dev->dev, false);
kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
return 0;
}
@@ -3374,7 +3328,9 @@ void free_workqueue_attrs(struct workqueue_attrs *attrs)
* @gfp_mask: allocation mask to use
*
* Allocate a new workqueue_attrs, initialize with default settings and
- * return it. Returns NULL on failure.
+ * return it.
+ *
+ * Return: The allocated new workqueue_attr on success. %NULL on failure.
*/
struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
{
@@ -3398,6 +3354,12 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
{
to->nice = from->nice;
cpumask_copy(to->cpumask, from->cpumask);
+ /*
+ * Unlike hash and equality test, this function doesn't ignore
+ * ->no_numa as it is used for both pool and wq attrs. Instead,
+ * get_unbound_pool() explicitly clears ->no_numa after copying.
+ */
+ to->no_numa = from->no_numa;
}
/* hash value of the content of @attr */
@@ -3427,7 +3389,8 @@ static bool wqattrs_equal(const struct workqueue_attrs *a,
* @pool: worker_pool to initialize
*
* Initiailize a newly zalloc'd @pool. It also allocates @pool->attrs.
- * Returns 0 on success, -errno on failure. Even on failure, all fields
+ *
+ * Return: 0 on success, -errno on failure. Even on failure, all fields
* inside @pool proper are initialized and put_unbound_pool() can be called
* on @pool safely to release it.
*/
@@ -3450,9 +3413,10 @@ static int init_worker_pool(struct worker_pool *pool)
(unsigned long)pool);
mutex_init(&pool->manager_arb);
- mutex_init(&pool->manager_mutex);
- idr_init(&pool->worker_idr);
+ mutex_init(&pool->attach_mutex);
+ INIT_LIST_HEAD(&pool->workers);
+ ida_init(&pool->worker_ida);
INIT_HLIST_NODE(&pool->hash_node);
pool->refcnt = 1;
@@ -3467,7 +3431,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
{
struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
- idr_destroy(&pool->worker_idr);
+ ida_destroy(&pool->worker_ida);
free_workqueue_attrs(pool->attrs);
kfree(pool);
}
@@ -3485,6 +3449,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
*/
static void put_unbound_pool(struct worker_pool *pool)
{
+ DECLARE_COMPLETION_ONSTACK(detach_completion);
struct worker *worker;
lockdep_assert_held(&wq_pool_mutex);
@@ -3505,18 +3470,24 @@ static void put_unbound_pool(struct worker_pool *pool)
/*
* Become the manager and destroy all workers. Grabbing
* manager_arb prevents @pool's workers from blocking on
- * manager_mutex.
+ * attach_mutex.
*/
mutex_lock(&pool->manager_arb);
- mutex_lock(&pool->manager_mutex);
- spin_lock_irq(&pool->lock);
- while ((worker = first_worker(pool)))
+ spin_lock_irq(&pool->lock);
+ while ((worker = first_idle_worker(pool)))
destroy_worker(worker);
WARN_ON(pool->nr_workers || pool->nr_idle);
-
spin_unlock_irq(&pool->lock);
- mutex_unlock(&pool->manager_mutex);
+
+ mutex_lock(&pool->attach_mutex);
+ if (!list_empty(&pool->workers))
+ pool->detach_completion = &detach_completion;
+ mutex_unlock(&pool->attach_mutex);
+
+ if (pool->detach_completion)
+ wait_for_completion(pool->detach_completion);
+
mutex_unlock(&pool->manager_arb);
/* shut down the timers */
@@ -3534,9 +3505,12 @@ static void put_unbound_pool(struct worker_pool *pool)
* Obtain a worker_pool which has the same attributes as @attrs, bump the
* reference count and return it. If there already is a matching
* worker_pool, it will be used; otherwise, this function attempts to
- * create a new one. On failure, returns NULL.
+ * create a new one.
*
* Should be called with wq_pool_mutex held.
+ *
+ * Return: On success, a worker_pool with the same attributes as @attrs.
+ * On failure, %NULL.
*/
static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
{
@@ -3559,12 +3533,15 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
if (!pool || init_worker_pool(pool) < 0)
goto fail;
- if (workqueue_freezing)
- pool->flags |= POOL_FREEZING;
-
lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */
copy_workqueue_attrs(pool->attrs, attrs);
+ /*
+ * no_numa isn't a worker_pool attribute, always clear it. See
+ * 'struct workqueue_attrs' comments for detail.
+ */
+ pool->attrs->no_numa = false;
+
/* if cpumask is contained inside a NUMA node, we belong to that node */
if (wq_numa_enabled) {
for_each_node(node) {
@@ -3662,7 +3639,12 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
spin_lock_irq(&pwq->pool->lock);
- if (!freezable || !(pwq->pool->flags & POOL_FREEZING)) {
+ /*
+ * During [un]freezing, the caller is responsible for ensuring that
+ * this function is called at least once after @workqueue_freezing
+ * is updated and visible.
+ */
+ if (!freezable || !workqueue_freezing) {
pwq->max_active = wq->saved_max_active;
while (!list_empty(&pwq->delayed_works) &&
@@ -3766,9 +3748,7 @@ static void free_unbound_pwq(struct pool_workqueue *pwq)
*
* Calculate the cpumask a workqueue with @attrs should use on @node. If
* @cpu_going_down is >= 0, that cpu is considered offline during
- * calculation. The result is stored in @cpumask. This function returns
- * %true if the resulting @cpumask is different from @attrs->cpumask,
- * %false if equal.
+ * calculation. The result is stored in @cpumask.
*
* If NUMA affinity is not enabled, @attrs->cpumask is always used. If
* enabled and @node has online CPUs requested by @attrs, the returned
@@ -3777,6 +3757,9 @@ static void free_unbound_pwq(struct pool_workqueue *pwq)
*
* The caller is responsible for ensuring that the cpumask of @node stays
* stable.
+ *
+ * Return: %true if the resulting @cpumask is different from @attrs->cpumask,
+ * %false if equal.
*/
static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
int cpu_going_down, cpumask_t *cpumask)
@@ -3830,8 +3813,9 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
* items finish. Note that a work item which repeatedly requeues itself
* back-to-back will stay on its current pwq.
*
- * Performs GFP_KERNEL allocations. Returns 0 on success and -errno on
- * failure.
+ * Performs GFP_KERNEL allocations.
+ *
+ * Return: 0 on success and -errno on failure.
*/
int apply_workqueue_attrs(struct workqueue_struct *wq,
const struct workqueue_attrs *attrs)
@@ -3992,17 +3976,13 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
* Let's determine what needs to be done. If the target cpumask is
* different from wq's, we need to compare it to @pwq's and create
* a new one if they don't match. If the target cpumask equals
- * wq's, the default pwq should be used. If @pwq is already the
- * default one, nothing to do; otherwise, install the default one.
+ * wq's, the default pwq should be used.
*/
if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
goto out_unlock;
} else {
- if (pwq == wq->dfl_pwq)
- goto out_unlock;
- else
- goto use_dfl_pwq;
+ goto use_dfl_pwq;
}
mutex_unlock(&wq->mutex);
@@ -4010,9 +3990,10 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
/* create a new pwq */
pwq = alloc_unbound_pwq(wq, target_attrs);
if (!pwq) {
- pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
- wq->name);
- goto out_unlock;
+ pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
+ wq->name);
+ mutex_lock(&wq->mutex);
+ goto use_dfl_pwq;
}
/*
@@ -4038,7 +4019,7 @@ out_unlock:
static int alloc_and_link_pwqs(struct workqueue_struct *wq)
{
bool highpri = wq->flags & WQ_HIGHPRI;
- int cpu;
+ int cpu, ret;
if (!(wq->flags & WQ_UNBOUND)) {
wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
@@ -4058,6 +4039,13 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
mutex_unlock(&wq->mutex);
}
return 0;
+ } else if (wq->flags & __WQ_ORDERED) {
+ ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
+ /* there should only be single pwq for ordering guarantee */
+ WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
+ wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
+ "ordering guarantee broken for workqueue %s\n", wq->name);
+ return ret;
} else {
return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
}
@@ -4086,6 +4074,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
struct workqueue_struct *wq;
struct pool_workqueue *pwq;
+ /* see the comment above the definition of WQ_POWER_EFFICIENT */
+ if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
+ flags |= WQ_UNBOUND;
+
/* allocate wq and format name */
if (flags & WQ_UNBOUND)
tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
@@ -4295,6 +4287,8 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
*
* Determine whether %current is a workqueue rescuer. Can be used from
* work functions to determine whether it's being run off the rescuer task.
+ *
+ * Return: %true if %current is a workqueue rescuer. %false otherwise.
*/
bool current_is_workqueue_rescuer(void)
{
@@ -4318,7 +4312,7 @@ bool current_is_workqueue_rescuer(void)
* workqueue being congested on one CPU doesn't mean the workqueue is also
* contested on other CPUs / NUMA nodes.
*
- * RETURNS:
+ * Return:
* %true if congested, %false otherwise.
*/
bool workqueue_congested(int cpu, struct workqueue_struct *wq)
@@ -4351,7 +4345,7 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
* synchronization around this function and the test result is
* unreliable and only useful as advisory hints or for debugging.
*
- * RETURNS:
+ * Return:
* OR'd bitmask of WORK_BUSY_* bits.
*/
unsigned int work_busy(struct work_struct *work)
@@ -4474,28 +4468,27 @@ static void wq_unbind_fn(struct work_struct *work)
int cpu = smp_processor_id();
struct worker_pool *pool;
struct worker *worker;
- int wi;
for_each_cpu_worker_pool(pool, cpu) {
WARN_ON_ONCE(cpu != smp_processor_id());
- mutex_lock(&pool->manager_mutex);
+ mutex_lock(&pool->attach_mutex);
spin_lock_irq(&pool->lock);
/*
- * We've blocked all manager operations. Make all workers
+ * We've blocked all attach/detach operations. Make all workers
* unbound and set DISASSOCIATED. Before this, all workers
* except for the ones which are still executing works from
* before the last CPU down must be on the cpu. After
* this, they may become diasporas.
*/
- for_each_pool_worker(worker, wi, pool)
+ for_each_pool_worker(worker, pool)
worker->flags |= WORKER_UNBOUND;
pool->flags |= POOL_DISASSOCIATED;
spin_unlock_irq(&pool->lock);
- mutex_unlock(&pool->manager_mutex);
+ mutex_unlock(&pool->attach_mutex);
/*
* Call schedule() so that we cross rq->lock and thus can
@@ -4535,9 +4528,8 @@ static void wq_unbind_fn(struct work_struct *work)
static void rebind_workers(struct worker_pool *pool)
{
struct worker *worker;
- int wi;
- lockdep_assert_held(&pool->manager_mutex);
+ lockdep_assert_held(&pool->attach_mutex);
/*
* Restore CPU affinity of all workers. As all idle workers should
@@ -4546,13 +4538,13 @@ static void rebind_workers(struct worker_pool *pool)
* of all workers first and then clear UNBOUND. As we're called
* from CPU_ONLINE, the following shouldn't fail.
*/
- for_each_pool_worker(worker, wi, pool)
+ for_each_pool_worker(worker, pool)
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
pool->attrs->cpumask) < 0);
spin_lock_irq(&pool->lock);
- for_each_pool_worker(worker, wi, pool) {
+ for_each_pool_worker(worker, pool) {
unsigned int worker_flags = worker->flags;
/*
@@ -4604,9 +4596,8 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
{
static cpumask_t cpumask;
struct worker *worker;
- int wi;
- lockdep_assert_held(&pool->manager_mutex);
+ lockdep_assert_held(&pool->attach_mutex);
/* is @cpu allowed for @pool? */
if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
@@ -4618,7 +4609,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
return;
/* as we're called from CPU_ONLINE, the following shouldn't fail */
- for_each_pool_worker(worker, wi, pool)
+ for_each_pool_worker(worker, pool)
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
pool->attrs->cpumask) < 0);
}
@@ -4627,7 +4618,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
* Workqueues should be brought up before normal priority CPU notifiers.
* This will be registered high priority CPU notifier.
*/
-static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
+static int workqueue_cpu_up_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
@@ -4651,7 +4642,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
mutex_lock(&wq_pool_mutex);
for_each_pool(pool, pi) {
- mutex_lock(&pool->manager_mutex);
+ mutex_lock(&pool->attach_mutex);
if (pool->cpu == cpu) {
spin_lock_irq(&pool->lock);
@@ -4663,7 +4654,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
restore_unbound_workers_cpumask(pool, cpu);
}
- mutex_unlock(&pool->manager_mutex);
+ mutex_unlock(&pool->attach_mutex);
}
/* update NUMA affinity of unbound workqueues */
@@ -4680,7 +4671,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
* Workqueues should be brought down after normal priority CPU notifiers.
* This will be registered as low priority CPU notifier.
*/
-static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
+static int workqueue_cpu_down_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
@@ -4702,6 +4693,7 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
/* wait for per-cpu unbinding to finish */
flush_work(&unbind_work);
+ destroy_work_on_stack(&unbind_work);
break;
}
return NOTIFY_OK;
@@ -4729,9 +4721,10 @@ static void work_for_cpu_fn(struct work_struct *work)
* @fn: the function to run
* @arg: the function arg
*
- * This will return the value @fn returns.
* It is up to the caller to ensure that the cpu doesn't go offline.
* The caller must not hold any locks which would prevent @fn from completing.
+ *
+ * Return: The value @fn returns.
*/
long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
{
@@ -4740,6 +4733,7 @@ long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
schedule_work_on(cpu, &wfc.work);
flush_work(&wfc.work);
+ destroy_work_on_stack(&wfc.work);
return wfc.ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu);
@@ -4759,24 +4753,14 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
*/
void freeze_workqueues_begin(void)
{
- struct worker_pool *pool;
struct workqueue_struct *wq;
struct pool_workqueue *pwq;
- int pi;
mutex_lock(&wq_pool_mutex);
WARN_ON_ONCE(workqueue_freezing);
workqueue_freezing = true;
- /* set FREEZING */
- for_each_pool(pool, pi) {
- spin_lock_irq(&pool->lock);
- WARN_ON_ONCE(pool->flags & POOL_FREEZING);
- pool->flags |= POOL_FREEZING;
- spin_unlock_irq(&pool->lock);
- }
-
list_for_each_entry(wq, &workqueues, list) {
mutex_lock(&wq->mutex);
for_each_pwq(pwq, wq)
@@ -4796,7 +4780,7 @@ void freeze_workqueues_begin(void)
* CONTEXT:
* Grabs and releases wq_pool_mutex.
*
- * RETURNS:
+ * Return:
* %true if some freezable workqueues are still busy. %false if freezing
* is complete.
*/
@@ -4846,21 +4830,13 @@ void thaw_workqueues(void)
{
struct workqueue_struct *wq;
struct pool_workqueue *pwq;
- struct worker_pool *pool;
- int pi;
mutex_lock(&wq_pool_mutex);
if (!workqueue_freezing)
goto out_unlock;
- /* clear FREEZING */
- for_each_pool(pool, pi) {
- spin_lock_irq(&pool->lock);
- WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
- pool->flags &= ~POOL_FREEZING;
- spin_unlock_irq(&pool->lock);
- }
+ workqueue_freezing = false;
/* restore max_active and repopulate worklist */
list_for_each_entry(wq, &workqueues, list) {
@@ -4870,7 +4846,6 @@ void thaw_workqueues(void)
mutex_unlock(&wq->mutex);
}
- workqueue_freezing = false;
out_unlock:
mutex_unlock(&wq_pool_mutex);
}
@@ -4905,7 +4880,7 @@ static void __init wq_numa_init(void)
BUG_ON(!tbl);
for_each_node(node)
- BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
+ BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
node_online(node) ? node : NUMA_NO_NODE));
for_each_possible_cpu(cpu) {
@@ -4927,10 +4902,6 @@ static int __init init_workqueues(void)
int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
int i, cpu;
- /* make sure we have enough bits for OFFQ pool ID */
- BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
- WORK_CPU_END * NR_STD_WORKER_POOLS);
-
WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
@@ -4969,13 +4940,23 @@ static int __init init_workqueues(void)
}
}
- /* create default unbound wq attrs */
+ /* create default unbound and ordered wq attrs */
for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
struct workqueue_attrs *attrs;
BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
attrs->nice = std_nice[i];
unbound_std_wq_attrs[i] = attrs;
+
+ /*
+ * An ordered wq should have only one pwq as ordering is
+ * guaranteed by max_active which is enforced by pwqs.
+ * Turn off NUMA so that dfl_pwq is used for all nodes.
+ */
+ BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL)));
+ attrs->nice = std_nice[i];
+ attrs->no_numa = true;
+ ordered_wq_attrs[i] = attrs;
}
system_wq = alloc_workqueue("events", 0, 0);
@@ -4985,8 +4966,15 @@ static int __init init_workqueues(void)
WQ_UNBOUND_MAX_ACTIVE);
system_freezable_wq = alloc_workqueue("events_freezable",
WQ_FREEZABLE, 0);
+ system_power_efficient_wq = alloc_workqueue("events_power_efficient",
+ WQ_POWER_EFFICIENT, 0);
+ system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
+ WQ_FREEZABLE | WQ_POWER_EFFICIENT,
+ 0);
BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
- !system_unbound_wq || !system_freezable_wq);
+ !system_unbound_wq || !system_freezable_wq ||
+ !system_power_efficient_wq ||
+ !system_freezable_power_efficient_wq);
return 0;
}
early_initcall(init_workqueues);