diff options
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r-- | kernel/workqueue.c | 1530 |
1 files changed, 710 insertions, 820 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index fbc6576a83c..f4feacad381 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -41,32 +41,31 @@ #include <linux/debug_locks.h> #include <linux/lockdep.h> #include <linux/idr.h> +#include <linux/hashtable.h> -#include "workqueue_sched.h" +#include "workqueue_internal.h" enum { /* - * global_cwq flags + * worker_pool flags * - * A bound gcwq is either associated or disassociated with its CPU. + * A bound pool is either associated or disassociated with its CPU. * While associated (!DISASSOCIATED), all workers are bound to the * CPU and none has %WORKER_UNBOUND set and concurrency management * is in effect. * * While DISASSOCIATED, the cpu may be offline and all workers have * %WORKER_UNBOUND set and concurrency management disabled, and may - * be executing on any CPU. The gcwq behaves as an unbound one. + * be executing on any CPU. The pool behaves as an unbound one. * * Note that DISASSOCIATED can be flipped only while holding - * assoc_mutex of all pools on the gcwq to avoid changing binding - * state while create_worker() is in progress. + * assoc_mutex to avoid changing binding state while + * create_worker() is in progress. */ - GCWQ_DISASSOCIATED = 1 << 0, /* cpu can't serve workers */ - GCWQ_FREEZING = 1 << 1, /* freeze in progress */ - - /* pool flags */ POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */ + POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ + POOL_FREEZING = 1 << 3, /* freeze in progress */ /* worker flags */ WORKER_STARTED = 1 << 0, /* started */ @@ -79,11 +78,9 @@ enum { WORKER_NOT_RUNNING = WORKER_PREP | WORKER_UNBOUND | WORKER_CPU_INTENSIVE, - NR_WORKER_POOLS = 2, /* # worker pools per gcwq */ + NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */ BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */ - BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER, - BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1, MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ @@ -111,48 +108,24 @@ enum { * P: Preemption protected. Disabling preemption is enough and should * only be modified and accessed from the local cpu. * - * L: gcwq->lock protected. Access with gcwq->lock held. + * L: pool->lock protected. Access with pool->lock held. * - * X: During normal operation, modification requires gcwq->lock and - * should be done only from local cpu. Either disabling preemption - * on local cpu or grabbing gcwq->lock is enough for read access. - * If GCWQ_DISASSOCIATED is set, it's identical to L. + * X: During normal operation, modification requires pool->lock and should + * be done only from local cpu. Either disabling preemption on local + * cpu or grabbing pool->lock is enough for read access. If + * POOL_DISASSOCIATED is set, it's identical to L. * * F: wq->flush_mutex protected. * * W: workqueue_lock protected. */ -struct global_cwq; -struct worker_pool; - -/* - * The poor guys doing the actual heavy lifting. All on-duty workers - * are either serving the manager role, on idle list or on busy hash. - */ -struct worker { - /* on idle list while idle, on busy hash table while busy */ - union { - struct list_head entry; /* L: while idle */ - struct hlist_node hentry; /* L: while busy */ - }; - - struct work_struct *current_work; /* L: work being processed */ - struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */ - struct list_head scheduled; /* L: scheduled works */ - struct task_struct *task; /* I: worker task */ - struct worker_pool *pool; /* I: the associated pool */ - /* 64 bytes boundary on 64bit, 32 on 32bit */ - unsigned long last_active; /* L: last active timestamp */ - unsigned int flags; /* X: flags */ - int id; /* I: worker id */ - - /* for rebinding worker to CPU */ - struct work_struct rebind_work; /* L: for busy worker */ -}; +/* struct worker is defined in workqueue_internal.h */ struct worker_pool { - struct global_cwq *gcwq; /* I: the owning gcwq */ + spinlock_t lock; /* the pool lock */ + unsigned int cpu; /* I: the associated cpu */ + int id; /* I: pool ID */ unsigned int flags; /* X: flags */ struct list_head worklist; /* L: list of pending works */ @@ -165,34 +138,28 @@ struct worker_pool { struct timer_list idle_timer; /* L: worker idle timeout */ struct timer_list mayday_timer; /* L: SOS timer for workers */ - struct mutex assoc_mutex; /* protect GCWQ_DISASSOCIATED */ - struct ida worker_ida; /* L: for worker IDs */ -}; - -/* - * Global per-cpu workqueue. There's one and only one for each cpu - * and all works are queued and processed here regardless of their - * target workqueues. - */ -struct global_cwq { - spinlock_t lock; /* the gcwq lock */ - unsigned int cpu; /* I: the associated cpu */ - unsigned int flags; /* L: GCWQ_* flags */ - - /* workers are chained either in busy_hash or pool idle_list */ - struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE]; + /* workers are chained either in busy_hash or idle_list */ + DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER); /* L: hash of busy workers */ - struct worker_pool pools[NR_WORKER_POOLS]; - /* normal and highpri pools */ + struct mutex assoc_mutex; /* protect POOL_DISASSOCIATED */ + struct ida worker_ida; /* L: for worker IDs */ + + /* + * The current concurrency level. As it's likely to be accessed + * from other CPUs during try_to_wake_up(), put it in a separate + * cacheline. + */ + atomic_t nr_running ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp; /* - * The per-CPU workqueue. The lower WORK_STRUCT_FLAG_BITS of - * work_struct->data are used for flags and thus cwqs need to be - * aligned at two's power of the number of flag bits. + * The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS + * of work_struct->data are used for flags and the remaining high bits + * point to the pwq; thus, pwqs need to be aligned at two's power of the + * number of flag bits. */ -struct cpu_workqueue_struct { +struct pool_workqueue { struct worker_pool *pool; /* I: the associated pool */ struct workqueue_struct *wq; /* I: the owning workqueue */ int work_color; /* L: current color */ @@ -241,16 +208,16 @@ typedef unsigned long mayday_mask_t; struct workqueue_struct { unsigned int flags; /* W: WQ_* flags */ union { - struct cpu_workqueue_struct __percpu *pcpu; - struct cpu_workqueue_struct *single; + struct pool_workqueue __percpu *pcpu; + struct pool_workqueue *single; unsigned long v; - } cpu_wq; /* I: cwq's */ + } pool_wq; /* I: pwq's */ struct list_head list; /* W: list of all workqueues */ struct mutex flush_mutex; /* protects wq flushing */ int work_color; /* F: current work color */ int flush_color; /* F: current flush color */ - atomic_t nr_cwqs_to_flush; /* flush in progress */ + atomic_t nr_pwqs_to_flush; /* flush in progress */ struct wq_flusher *first_flusher; /* F: first flusher */ struct list_head flusher_queue; /* F: flush waiters */ struct list_head flusher_overflow; /* F: flush overflow list */ @@ -259,7 +226,7 @@ struct workqueue_struct { struct worker *rescuer; /* I: rescue worker */ int nr_drainers; /* W: drain in progress */ - int saved_max_active; /* W: saved cwq max_active */ + int saved_max_active; /* W: saved pwq max_active */ #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif @@ -280,16 +247,15 @@ EXPORT_SYMBOL_GPL(system_freezable_wq); #define CREATE_TRACE_POINTS #include <trace/events/workqueue.h> -#define for_each_worker_pool(pool, gcwq) \ - for ((pool) = &(gcwq)->pools[0]; \ - (pool) < &(gcwq)->pools[NR_WORKER_POOLS]; (pool)++) +#define for_each_std_worker_pool(pool, cpu) \ + for ((pool) = &std_worker_pools(cpu)[0]; \ + (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++) -#define for_each_busy_worker(worker, i, pos, gcwq) \ - for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ - hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry) +#define for_each_busy_worker(worker, i, pos, pool) \ + hash_for_each(pool->busy_hash, i, pos, worker, hentry) -static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask, - unsigned int sw) +static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, + unsigned int sw) { if (cpu < nr_cpu_ids) { if (sw & 1) { @@ -300,42 +266,42 @@ static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask, if (sw & 2) return WORK_CPU_UNBOUND; } - return WORK_CPU_NONE; + return WORK_CPU_END; } -static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, - struct workqueue_struct *wq) +static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask, + struct workqueue_struct *wq) { - return __next_gcwq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2); + return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2); } /* * CPU iterators * - * An extra gcwq is defined for an invalid cpu number + * An extra cpu number is defined using an invalid cpu number * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any - * specific CPU. The following iterators are similar to - * for_each_*_cpu() iterators but also considers the unbound gcwq. + * specific CPU. The following iterators are similar to for_each_*_cpu() + * iterators but also considers the unbound CPU. * - * for_each_gcwq_cpu() : possible CPUs + WORK_CPU_UNBOUND - * for_each_online_gcwq_cpu() : online CPUs + WORK_CPU_UNBOUND - * for_each_cwq_cpu() : possible CPUs for bound workqueues, + * for_each_wq_cpu() : possible CPUs + WORK_CPU_UNBOUND + * for_each_online_wq_cpu() : online CPUs + WORK_CPU_UNBOUND + * for_each_pwq_cpu() : possible CPUs for bound workqueues, * WORK_CPU_UNBOUND for unbound workqueues */ -#define for_each_gcwq_cpu(cpu) \ - for ((cpu) = __next_gcwq_cpu(-1, cpu_possible_mask, 3); \ - (cpu) < WORK_CPU_NONE; \ - (cpu) = __next_gcwq_cpu((cpu), cpu_possible_mask, 3)) +#define for_each_wq_cpu(cpu) \ + for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3); \ + (cpu) < WORK_CPU_END; \ + (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, 3)) -#define for_each_online_gcwq_cpu(cpu) \ - for ((cpu) = __next_gcwq_cpu(-1, cpu_online_mask, 3); \ - (cpu) < WORK_CPU_NONE; \ - (cpu) = __next_gcwq_cpu((cpu), cpu_online_mask, 3)) +#define for_each_online_wq_cpu(cpu) \ + for ((cpu) = __next_wq_cpu(-1, cpu_online_mask, 3); \ + (cpu) < WORK_CPU_END; \ + (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3)) -#define for_each_cwq_cpu(cpu, wq) \ - for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, (wq)); \ - (cpu) < WORK_CPU_NONE; \ - (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq))) +#define for_each_pwq_cpu(cpu, wq) \ + for ((cpu) = __next_pwq_cpu(-1, cpu_possible_mask, (wq)); \ + (cpu) < WORK_CPU_END; \ + (cpu) = __next_pwq_cpu((cpu), cpu_possible_mask, (wq))) #ifdef CONFIG_DEBUG_OBJECTS_WORK @@ -459,57 +425,69 @@ static LIST_HEAD(workqueues); static bool workqueue_freezing; /* W: have wqs started freezing? */ /* - * The almighty global cpu workqueues. nr_running is the only field - * which is expected to be used frequently by other cpus via - * try_to_wake_up(). Put it in a separate cacheline. + * The CPU and unbound standard worker pools. The unbound ones have + * POOL_DISASSOCIATED set, and their workers have WORKER_UNBOUND set. */ -static DEFINE_PER_CPU(struct global_cwq, global_cwq); -static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_WORKER_POOLS]); +static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], + cpu_std_worker_pools); +static struct worker_pool unbound_std_worker_pools[NR_STD_WORKER_POOLS]; -/* - * Global cpu workqueue and nr_running counter for unbound gcwq. The - * gcwq is always online, has GCWQ_DISASSOCIATED set, and all its - * workers have WORKER_UNBOUND set. - */ -static struct global_cwq unbound_global_cwq; -static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = { - [0 ... NR_WORKER_POOLS - 1] = ATOMIC_INIT(0), /* always 0 */ -}; +/* idr of all pools */ +static DEFINE_MUTEX(worker_pool_idr_mutex); +static DEFINE_IDR(worker_pool_idr); static int worker_thread(void *__worker); -static int worker_pool_pri(struct worker_pool *pool) +static struct worker_pool *std_worker_pools(int cpu) { - return pool - pool->gcwq->pools; + if (cpu != WORK_CPU_UNBOUND) + return per_cpu(cpu_std_worker_pools, cpu); + else + return unbound_std_worker_pools; } -static struct global_cwq *get_gcwq(unsigned int cpu) +static int std_worker_pool_pri(struct worker_pool *pool) { - if (cpu != WORK_CPU_UNBOUND) - return &per_cpu(global_cwq, cpu); - else - return &unbound_global_cwq; + return pool - std_worker_pools(pool->cpu); } -static atomic_t *get_pool_nr_running(struct worker_pool *pool) +/* allocate ID and assign it to @pool */ +static int worker_pool_assign_id(struct worker_pool *pool) { - int cpu = pool->gcwq->cpu; - int idx = worker_pool_pri(pool); + int ret; - if (cpu != WORK_CPU_UNBOUND) - return &per_cpu(pool_nr_running, cpu)[idx]; - else - return &unbound_pool_nr_running[idx]; + mutex_lock(&worker_pool_idr_mutex); + idr_pre_get(&worker_pool_idr, GFP_KERNEL); + ret = idr_get_new(&worker_pool_idr, pool, &pool->id); + mutex_unlock(&worker_pool_idr_mutex); + + return ret; } -static struct cpu_workqueue_struct *get_cwq(unsigned int cpu, - struct workqueue_struct *wq) +/* + * Lookup worker_pool by id. The idr currently is built during boot and + * never modified. Don't worry about locking for now. + */ +static struct worker_pool *worker_pool_by_id(int pool_id) +{ + return idr_find(&worker_pool_idr, pool_id); +} + +static struct worker_pool *get_std_worker_pool(int cpu, bool highpri) +{ + struct worker_pool *pools = std_worker_pools(cpu); + + return &pools[highpri]; +} + +static struct pool_workqueue *get_pwq(unsigned int cpu, + struct workqueue_struct *wq) { if (!(wq->flags & WQ_UNBOUND)) { if (likely(cpu < nr_cpu_ids)) - return per_cpu_ptr(wq->cpu_wq.pcpu, cpu); + return per_cpu_ptr(wq->pool_wq.pcpu, cpu); } else if (likely(cpu == WORK_CPU_UNBOUND)) - return wq->cpu_wq.single; + return wq->pool_wq.single; return NULL; } @@ -530,19 +508,19 @@ static int work_next_color(int color) } /* - * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data - * contain the pointer to the queued cwq. Once execution starts, the flag - * is cleared and the high bits contain OFFQ flags and CPU number. + * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data + * contain the pointer to the queued pwq. Once execution starts, the flag + * is cleared and the high bits contain OFFQ flags and pool ID. * - * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling() - * and clear_work_data() can be used to set the cwq, cpu or clear + * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling() + * and clear_work_data() can be used to set the pwq, pool or clear * work->data. These functions should only be called while the work is * owned - ie. while the PENDING bit is set. * - * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to - * a work. gcwq is available once the work has been queued anywhere after - * initialization until it is sync canceled. cwq is available only while - * the work item is queued. + * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq + * corresponding to a work. Pool is available once the work has been + * queued anywhere after initialization until it is sync canceled. pwq is + * available only while the work item is queued. * * %WORK_OFFQ_CANCELING is used to mark a work item which is being * canceled. While being canceled, a work item may have its PENDING set @@ -556,16 +534,22 @@ static inline void set_work_data(struct work_struct *work, unsigned long data, atomic_long_set(&work->data, data | flags | work_static(work)); } -static void set_work_cwq(struct work_struct *work, - struct cpu_workqueue_struct *cwq, +static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq, unsigned long extra_flags) { - set_work_data(work, (unsigned long)cwq, - WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); + set_work_data(work, (unsigned long)pwq, + WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags); } -static void set_work_cpu_and_clear_pending(struct work_struct *work, - unsigned int cpu) +static void set_work_pool_and_keep_pending(struct work_struct *work, + int pool_id) +{ + set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, + WORK_STRUCT_PENDING); +} + +static void set_work_pool_and_clear_pending(struct work_struct *work, + int pool_id) { /* * The following wmb is paired with the implied mb in @@ -574,67 +558,92 @@ static void set_work_cpu_and_clear_pending(struct work_struct *work, * owner. */ smp_wmb(); - set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0); + set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0); } static void clear_work_data(struct work_struct *work) { - smp_wmb(); /* see set_work_cpu_and_clear_pending() */ - set_work_data(work, WORK_STRUCT_NO_CPU, 0); + smp_wmb(); /* see set_work_pool_and_clear_pending() */ + set_work_data(work, WORK_STRUCT_NO_POOL, 0); } -static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work) +static struct pool_workqueue *get_work_pwq(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); - if (data & WORK_STRUCT_CWQ) + if (data & WORK_STRUCT_PWQ) return (void *)(data & WORK_STRUCT_WQ_DATA_MASK); else return NULL; } -static struct global_cwq *get_work_gcwq(struct work_struct *work) +/** + * get_work_pool - return the worker_pool a given work was associated with + * @work: the work item of interest + * + * Return the worker_pool @work was last associated with. %NULL if none. + */ +static struct worker_pool *get_work_pool(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); - unsigned int cpu; + struct worker_pool *pool; + int pool_id; - if (data & WORK_STRUCT_CWQ) - return ((struct cpu_workqueue_struct *) - (data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq; + if (data & WORK_STRUCT_PWQ) + return ((struct pool_workqueue *) + (data & WORK_STRUCT_WQ_DATA_MASK))->pool; - cpu = data >> WORK_OFFQ_CPU_SHIFT; - if (cpu == WORK_CPU_NONE) + pool_id = data >> WORK_OFFQ_POOL_SHIFT; + if (pool_id == WORK_OFFQ_POOL_NONE) return NULL; - BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND); - return get_gcwq(cpu); + pool = worker_pool_by_id(pool_id); + WARN_ON_ONCE(!pool); + return pool; +} + +/** + * get_work_pool_id - return the worker pool ID a given work is associated with + * @work: the work item of interest + * + * Return the worker_pool ID @work was last associated with. + * %WORK_OFFQ_POOL_NONE if none. + */ +static int get_work_pool_id(struct work_struct *work) +{ + unsigned long data = atomic_long_read(&work->data); + + if (data & WORK_STRUCT_PWQ) + return ((struct pool_workqueue *) + (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id; + + return data >> WORK_OFFQ_POOL_SHIFT; } static void mark_work_canceling(struct work_struct *work) { - struct global_cwq *gcwq = get_work_gcwq(work); - unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE; + unsigned long pool_id = get_work_pool_id(work); - set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING, - WORK_STRUCT_PENDING); + pool_id <<= WORK_OFFQ_POOL_SHIFT; + set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING); } static bool work_is_canceling(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); - return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING); + return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING); } /* * Policy functions. These define the policies on how the global worker * pools are managed. Unless noted otherwise, these functions assume that - * they're being called with gcwq->lock held. + * they're being called with pool->lock held. */ static bool __need_more_worker(struct worker_pool *pool) { - return !atomic_read(get_pool_nr_running(pool)); + return !atomic_read(&pool->nr_running); } /* @@ -642,7 +651,7 @@ static bool __need_more_worker(struct worker_pool *pool) * running workers. * * Note that, because unbound workers never contribute to nr_running, this - * function will always return %true for unbound gcwq as long as the + * function will always return %true for unbound pools as long as the * worklist isn't empty. */ static bool need_more_worker(struct worker_pool *pool) @@ -659,9 +668,8 @@ static bool may_start_working(struct worker_pool *pool) /* Do I need to keep working? Called from currently running workers. */ static bool keep_working(struct worker_pool *pool) { - atomic_t *nr_running = get_pool_nr_running(pool); - - return !list_empty(&pool->worklist) && atomic_read(nr_running) <= 1; + return !list_empty(&pool->worklist) && + atomic_read(&pool->nr_running) <= 1; } /* Do we need a new worker? Called from manager. */ @@ -714,7 +722,7 @@ static struct worker *first_worker(struct worker_pool *pool) * Wake up the first idle worker of @pool. * * CONTEXT: - * spin_lock_irq(gcwq->lock). + * spin_lock_irq(pool->lock). */ static void wake_up_worker(struct worker_pool *pool) { @@ -740,8 +748,8 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) struct worker *worker = kthread_data(task); if (!(worker->flags & WORKER_NOT_RUNNING)) { - WARN_ON_ONCE(worker->pool->gcwq->cpu != cpu); - atomic_inc(get_pool_nr_running(worker->pool)); + WARN_ON_ONCE(worker->pool->cpu != cpu); + atomic_inc(&worker->pool->nr_running); } } @@ -764,12 +772,18 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, unsigned int cpu) { struct worker *worker = kthread_data(task), *to_wakeup = NULL; - struct worker_pool *pool = worker->pool; - atomic_t *nr_running = get_pool_nr_running(pool); + struct worker_pool *pool; + /* + * Rescuers, which may not have all the fields set up like normal + * workers, also reach here, let's not access anything before + * checking NOT_RUNNING. + */ if (worker->flags & WORKER_NOT_RUNNING) return NULL; + pool = worker->pool; + /* this can only happen on the local cpu */ BUG_ON(cpu != raw_smp_processor_id()); @@ -781,10 +795,11 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, * NOT_RUNNING is clear. This means that we're bound to and * running on the local cpu w/ rq lock held and preemption * disabled, which in turn means that none else could be - * manipulating idle_list, so dereferencing idle_list without gcwq + * manipulating idle_list, so dereferencing idle_list without pool * lock is safe. */ - if (atomic_dec_and_test(nr_running) && !list_empty(&pool->worklist)) + if (atomic_dec_and_test(&pool->nr_running) && + !list_empty(&pool->worklist)) to_wakeup = first_worker(pool); return to_wakeup ? to_wakeup->task : NULL; } @@ -800,7 +815,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, * woken up. * * CONTEXT: - * spin_lock_irq(gcwq->lock) + * spin_lock_irq(pool->lock) */ static inline void worker_set_flags(struct worker *worker, unsigned int flags, bool wakeup) @@ -816,14 +831,12 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags, */ if ((flags & WORKER_NOT_RUNNING) && !(worker->flags & WORKER_NOT_RUNNING)) { - atomic_t *nr_running = get_pool_nr_running(pool); - if (wakeup) { - if (atomic_dec_and_test(nr_running) && + if (atomic_dec_and_test(&pool->nr_running) && !list_empty(&pool->worklist)) wake_up_worker(pool); } else - atomic_dec(nr_running); + atomic_dec(&pool->nr_running); } worker->flags |= flags; @@ -837,7 +850,7 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags, * Clear @flags in @worker->flags and adjust nr_running accordingly. * * CONTEXT: - * spin_lock_irq(gcwq->lock) + * spin_lock_irq(pool->lock) */ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) { @@ -855,87 +868,56 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) */ if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) if (!(worker->flags & WORKER_NOT_RUNNING)) - atomic_inc(get_pool_nr_running(pool)); + atomic_inc(&pool->nr_running); } /** - * busy_worker_head - return the busy hash head for a work - * @gcwq: gcwq of interest - * @work: work to be hashed - * - * Return hash head of @gcwq for @work. - * - * CONTEXT: - * spin_lock_irq(gcwq->lock). - * - * RETURNS: - * Pointer to the hash head. - */ -static struct hlist_head *busy_worker_head(struct global_cwq *gcwq, - struct work_struct *work) -{ - const int base_shift = ilog2(sizeof(struct work_struct)); - unsigned long v = (unsigned long)work; - - /* simple shift and fold hash, do we need something better? */ - v >>= base_shift; - v += v >> BUSY_WORKER_HASH_ORDER; - v &= BUSY_WORKER_HASH_MASK; - - return &gcwq->busy_hash[v]; -} - -/** - * __find_worker_executing_work - find worker which is executing a work - * @gcwq: gcwq of interest - * @bwh: hash head as returned by busy_worker_head() + * find_worker_executing_work - find worker which is executing a work + * @pool: pool of interest * @work: work to find worker for * - * Find a worker which is executing @work on @gcwq. @bwh should be - * the hash head obtained by calling busy_worker_head() with the same - * work. + * Find a worker which is executing @work on @pool by searching + * @pool->busy_hash which is keyed by the address of @work. For a worker + * to match, its current execution should match the address of @work and + * its work function. This is to avoid unwanted dependency between + * unrelated work executions through a work item being recycled while still + * being executed. + * + * This is a bit tricky. A work item may be freed once its execution + * starts and nothing prevents the freed area from being recycled for + * another work item. If the same work item address ends up being reused + * before the original execution finishes, workqueue will identify the + * recycled work item as currently executing and make it wait until the + * current execution finishes, introducing an unwanted dependency. + * + * This function checks the work item address, work function and workqueue + * to avoid false positives. Note that this isn't complete as one may + * construct a work function which can introduce dependency onto itself + * through a recycled work item. Well, if somebody wants to shoot oneself + * in the foot that badly, there's only so much we can do, and if such + * deadlock actually occurs, it should be easy to locate the culprit work + * function. * * CONTEXT: - * spin_lock_irq(gcwq->lock). + * spin_lock_irq(pool->lock). * * RETURNS: * Pointer to worker which is executing @work if found, NULL * otherwise. */ -static struct worker *__find_worker_executing_work(struct global_cwq *gcwq, - struct hlist_head *bwh, - struct work_struct *work) +static struct worker *find_worker_executing_work(struct worker_pool *pool, + struct work_struct *work) { struct worker *worker; struct hlist_node *tmp; - hlist_for_each_entry(worker, tmp, bwh, hentry) - if (worker->current_work == work) + hash_for_each_possible(pool->busy_hash, worker, tmp, hentry, + (unsigned long)work) + if (worker->current_work == work && + worker->current_func == work->func) return worker; - return NULL; -} -/** - * find_worker_executing_work - find worker which is executing a work - * @gcwq: gcwq of interest - * @work: work to find worker for - * - * Find a worker which is executing @work on @gcwq. This function is - * identical to __find_worker_executing_work() except that this - * function calculates @bwh itself. - * - * CONTEXT: - * spin_lock_irq(gcwq->lock). - * - * RETURNS: - * Pointer to worker which is executing @work if found, NULL - * otherwise. - */ -static struct worker *find_worker_executing_work(struct global_cwq *gcwq, - struct work_struct *work) -{ - return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work), - work); + return NULL; } /** @@ -953,7 +935,7 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq, * nested inside outer list_for_each_entry_safe(). * * CONTEXT: - * spin_lock_irq(gcwq->lock). + * spin_lock_irq(pool->lock). */ static void move_linked_works(struct work_struct *work, struct list_head *head, struct work_struct **nextp) @@ -979,67 +961,67 @@ static void move_linked_works(struct work_struct *work, struct list_head *head, *nextp = n; } -static void cwq_activate_delayed_work(struct work_struct *work) +static void pwq_activate_delayed_work(struct work_struct *work) { - struct cpu_workqueue_struct *cwq = get_work_cwq(work); + struct pool_workqueue *pwq = get_work_pwq(work); trace_workqueue_activate_work(work); - move_linked_works(work, &cwq->pool->worklist, NULL); + move_linked_works(work, &pwq->pool->worklist, NULL); __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); - cwq->nr_active++; + pwq->nr_active++; } -static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) +static void pwq_activate_first_delayed(struct pool_workqueue *pwq) { - struct work_struct *work = list_first_entry(&cwq->delayed_works, + struct work_struct *work = list_first_entry(&pwq->delayed_works, struct work_struct, entry); - cwq_activate_delayed_work(work); + pwq_activate_delayed_work(work); } /** - * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight - * @cwq: cwq of interest + * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight + * @pwq: pwq of interest * @color: color of work which left the queue * * A work either has completed or is removed from pending queue, - * decrement nr_in_flight of its cwq and handle workqueue flushing. + * decrement nr_in_flight of its pwq and handle workqueue flushing. * * CONTEXT: - * spin_lock_irq(gcwq->lock). + * spin_lock_irq(pool->lock). */ -static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) +static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color) { /* ignore uncolored works */ if (color == WORK_NO_COLOR) return; - cwq->nr_in_flight[color]--; + pwq->nr_in_flight[color]--; - cwq->nr_active--; - if (!list_empty(&cwq->delayed_works)) { + pwq->nr_active--; + if (!list_empty(&pwq->delayed_works)) { /* one down, submit a delayed one */ - if (cwq->nr_active < cwq->max_active) - cwq_activate_first_delayed(cwq); + if (pwq->nr_active < pwq->max_active) + pwq_activate_first_delayed(pwq); } /* is flush in progress and are we at the flushing tip? */ - if (likely(cwq->flush_color != color)) + if (likely(pwq->flush_color != color)) return; /* are there still in-flight works? */ - if (cwq->nr_in_flight[color]) + if (pwq->nr_in_flight[color]) return; - /* this cwq is done, clear flush_color */ - cwq->flush_color = -1; + /* this pwq is done, clear flush_color */ + pwq->flush_color = -1; /* - * If this was the last cwq, wake up the first flusher. It + * If this was the last pwq, wake up the first flusher. It * will handle the rest. */ - if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush)) - complete(&cwq->wq->first_flusher->done); + if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush)) + complete(&pwq->wq->first_flusher->done); } /** @@ -1070,7 +1052,8 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) static int try_to_grab_pending(struct work_struct *work, bool is_dwork, unsigned long *flags) { - struct global_cwq *gcwq; + struct worker_pool *pool; + struct pool_workqueue *pwq; local_irq_save(*flags); @@ -1095,41 +1078,43 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, * The queueing is in progress, or it is already queued. Try to * steal it from ->worklist without clearing WORK_STRUCT_PENDING. */ - gcwq = get_work_gcwq(work); - if (!gcwq) + pool = get_work_pool(work); + if (!pool) goto fail; - spin_lock(&gcwq->lock); - if (!list_empty(&work->entry)) { + spin_lock(&pool->lock); + /* + * work->data is guaranteed to point to pwq only while the work + * item is queued on pwq->wq, and both updating work->data to point + * to pwq on queueing and to pool on dequeueing are done under + * pwq->pool->lock. This in turn guarantees that, if work->data + * points to pwq which is associated with a locked pool, the work + * item is currently queued on that pool. + */ + pwq = get_work_pwq(work); + if (pwq && pwq->pool == pool) { + debug_work_deactivate(work); + /* - * This work is queued, but perhaps we locked the wrong gcwq. - * In that case we must see the new value after rmb(), see - * insert_work()->wmb(). + * A delayed work item cannot be grabbed directly because + * it might have linked NO_COLOR work items which, if left + * on the delayed_list, will confuse pwq->nr_active + * management later on and cause stall. Make sure the work + * item is activated before grabbing. */ - smp_rmb(); - if (gcwq == get_work_gcwq(work)) { - debug_work_deactivate(work); + if (*work_data_bits(work) & WORK_STRUCT_DELAYED) + pwq_activate_delayed_work(work); - /* - * A delayed work item cannot be grabbed directly - * because it might have linked NO_COLOR work items - * which, if left on the delayed_list, will confuse - * cwq->nr_active management later on and cause - * stall. Make sure the work item is activated - * before grabbing. - */ - if (*work_data_bits(work) & WORK_STRUCT_DELAYED) - cwq_activate_delayed_work(work); + list_del_init(&work->entry); + pwq_dec_nr_in_flight(get_work_pwq(work), get_work_color(work)); - list_del_init(&work->entry); - cwq_dec_nr_in_flight(get_work_cwq(work), - get_work_color(work)); + /* work->data points to pwq iff queued, point to pool */ + set_work_pool_and_keep_pending(work, pool->id); - spin_unlock(&gcwq->lock); - return 1; - } + spin_unlock(&pool->lock); + return 1; } - spin_unlock(&gcwq->lock); + spin_unlock(&pool->lock); fail: local_irq_restore(*flags); if (work_is_canceling(work)) @@ -1139,33 +1124,25 @@ fail: } /** - * insert_work - insert a work into gcwq - * @cwq: cwq @work belongs to + * insert_work - insert a work into a pool + * @pwq: pwq @work belongs to * @work: work to insert * @head: insertion point * @extra_flags: extra WORK_STRUCT_* flags to set * - * Insert @work which belongs to @cwq into @gcwq after @head. - * @extra_flags is or'd to work_struct flags. + * Insert @work which belongs to @pwq after @head. @extra_flags is or'd to + * work_struct flags. * * CONTEXT: - * spin_lock_irq(gcwq->lock). + * spin_lock_irq(pool->lock). */ -static void insert_work(struct cpu_workqueue_struct *cwq, - struct work_struct *work, struct list_head *head, - unsigned int extra_flags) +static void insert_work(struct pool_workqueue *pwq, struct work_struct *work, + struct list_head *head, unsigned int extra_flags) { - struct worker_pool *pool = cwq->pool; + struct worker_pool *pool = pwq->pool; /* we own @work, set data and link */ - set_work_cwq(work, cwq, extra_flags); - - /* - * Ensure that we get the right work->data if we see the - * result of list_add() below, see try_to_grab_pending(). - */ - smp_wmb(); - + set_work_pwq(work, pwq, extra_flags); list_add_tail(&work->entry, head); /* @@ -1181,41 +1158,24 @@ static void insert_work(struct cpu_workqueue_struct *cwq, /* * Test whether @work is being queued from another work executing on the - * |