diff options
-rw-r--r-- | include/linux/async.h | 1 | ||||
-rw-r--r-- | include/linux/workqueue.h | 35 | ||||
-rw-r--r-- | include/trace/events/workqueue.h | 10 | ||||
-rw-r--r-- | kernel/async.c | 14 | ||||
-rw-r--r-- | kernel/sched/core.c | 2 | ||||
-rw-r--r-- | kernel/workqueue.c | 1530 | ||||
-rw-r--r-- | kernel/workqueue_internal.h | 65 | ||||
-rw-r--r-- | kernel/workqueue_sched.h | 9 |
8 files changed, 818 insertions, 848 deletions
diff --git a/include/linux/async.h b/include/linux/async.h index 7a24fe9b44b..345169cfa30 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -52,4 +52,5 @@ extern void async_synchronize_full_domain(struct async_domain *domain); extern void async_synchronize_cookie(async_cookie_t cookie); extern void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain); +extern bool current_is_async(void); #endif diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 2b58905d350..8afab27cdbc 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -27,7 +27,7 @@ void delayed_work_timer_fn(unsigned long __data); enum { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */ - WORK_STRUCT_CWQ_BIT = 2, /* data points to cwq */ + WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */ WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ @@ -40,7 +40,7 @@ enum { WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT, - WORK_STRUCT_CWQ = 1 << WORK_STRUCT_CWQ_BIT, + WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT, WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT, #ifdef CONFIG_DEBUG_OBJECTS_WORK WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT, @@ -57,29 +57,36 @@ enum { /* special cpu IDs */ WORK_CPU_UNBOUND = NR_CPUS, - WORK_CPU_NONE = NR_CPUS + 1, - WORK_CPU_LAST = WORK_CPU_NONE, + WORK_CPU_END = NR_CPUS + 1, /* - * Reserve 7 bits off of cwq pointer w/ debugobjects turned - * off. This makes cwqs aligned to 256 bytes and allows 15 - * workqueue flush colors. + * Reserve 7 bits off of pwq pointer w/ debugobjects turned off. + * This makes pwqs aligned to 256 bytes and allows 15 workqueue + * flush colors. */ WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, - /* data contains off-queue information when !WORK_STRUCT_CWQ */ + /* data contains off-queue information when !WORK_STRUCT_PWQ */ WORK_OFFQ_FLAG_BASE = WORK_STRUCT_FLAG_BITS, WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE), + /* + * When a work item is off queue, its high bits point to the last + * pool it was on. Cap at 31 bits and use the highest number to + * indicate that no pool is associated. + */ WORK_OFFQ_FLAG_BITS = 1, - WORK_OFFQ_CPU_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, + WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, + WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, + WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, + WORK_OFFQ_POOL_NONE = (1LU << WORK_OFFQ_POOL_BITS) - 1, /* convenience constants */ WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1, WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK, - WORK_STRUCT_NO_CPU = (unsigned long)WORK_CPU_NONE << WORK_OFFQ_CPU_SHIFT, + WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT, /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, @@ -95,13 +102,16 @@ struct work_struct { #endif }; -#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU) +#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL) #define WORK_DATA_STATIC_INIT() \ - ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU | WORK_STRUCT_STATIC) + ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC) struct delayed_work { struct work_struct work; struct timer_list timer; + + /* target workqueue and CPU ->timer uses to queue ->work */ + struct workqueue_struct *wq; int cpu; }; @@ -426,7 +436,6 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, int max_active); extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq); -extern unsigned int work_cpu(struct work_struct *work); extern unsigned int work_busy(struct work_struct *work); /* diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index f28d1b65f17..bf0e18ba6cf 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -27,7 +27,7 @@ DECLARE_EVENT_CLASS(workqueue_work, /** * workqueue_queue_work - called when a work gets queued * @req_cpu: the requested cpu - * @cwq: pointer to struct cpu_workqueue_struct + * @pwq: pointer to struct pool_workqueue * @work: pointer to struct work_struct * * This event occurs when a work is queued immediately or once a @@ -36,10 +36,10 @@ DECLARE_EVENT_CLASS(workqueue_work, */ TRACE_EVENT(workqueue_queue_work, - TP_PROTO(unsigned int req_cpu, struct cpu_workqueue_struct *cwq, + TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq, struct work_struct *work), - TP_ARGS(req_cpu, cwq, work), + TP_ARGS(req_cpu, pwq, work), TP_STRUCT__entry( __field( void *, work ) @@ -52,9 +52,9 @@ TRACE_EVENT(workqueue_queue_work, TP_fast_assign( __entry->work = work; __entry->function = work->func; - __entry->workqueue = cwq->wq; + __entry->workqueue = pwq->wq; __entry->req_cpu = req_cpu; - __entry->cpu = cwq->pool->gcwq->cpu; + __entry->cpu = pwq->pool->cpu; ), TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u", diff --git a/kernel/async.c b/kernel/async.c index 6f34904a0b5..6c68fc3fae7 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -57,6 +57,8 @@ asynchronous and synchronous parts of the kernel. #include <linux/slab.h> #include <linux/workqueue.h> +#include "workqueue_internal.h" + static async_cookie_t next_cookie = 1; #define MAX_WORK 32768 @@ -353,3 +355,15 @@ void async_synchronize_cookie(async_cookie_t cookie) async_synchronize_cookie_domain(cookie, &async_running); } EXPORT_SYMBOL_GPL(async_synchronize_cookie); + +/** + * current_is_async - is %current an async worker task? + * + * Returns %true if %current is an async worker task. + */ +bool current_is_async(void) +{ + struct worker *worker = current_wq_worker(); + + return worker && worker->current_func == async_run_entry_fn; +} diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4a88f1d5156..03d7784b7bd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -83,7 +83,7 @@ #endif #include "sched.h" -#include "../workqueue_sched.h" +#include "../workqueue_internal.h" #include "../smpboot.h" #define CREATE_TRACE_POINTS diff --git a/kernel/workqueue.c b/kernel/workqueue.c index fbc6576a83c..f4feacad381 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -41,32 +41,31 @@ #include <linux/debug_locks.h> #include <linux/lockdep.h> #include <linux/idr.h> +#include <linux/hashtable.h> -#include "workqueue_sched.h" +#include "workqueue_internal.h" enum { /* - * global_cwq flags + * worker_pool flags * - * A bound gcwq is either associated or disassociated with its CPU. + * A bound pool is either associated or disassociated with its CPU. * While associated (!DISASSOCIATED), all workers are bound to the * CPU and none has %WORKER_UNBOUND set and concurrency management * is in effect. * * While DISASSOCIATED, the cpu may be offline and all workers have * %WORKER_UNBOUND set and concurrency management disabled, and may - * be executing on any CPU. The gcwq behaves as an unbound one. + * be executing on any CPU. The pool behaves as an unbound one. * * Note that DISASSOCIATED can be flipped only while holding - * assoc_mutex of all pools on the gcwq to avoid changing binding - * state while create_worker() is in progress. + * assoc_mutex to avoid changing binding state while + * create_worker() is in progress. */ - GCWQ_DISASSOCIATED = 1 << 0, /* cpu can't serve workers */ - GCWQ_FREEZING = 1 << 1, /* freeze in progress */ - - /* pool flags */ POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */ + POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ + POOL_FREEZING = 1 << 3, /* freeze in progress */ /* worker flags */ WORKER_STARTED = 1 << 0, /* started */ @@ -79,11 +78,9 @@ enum { WORKER_NOT_RUNNING = WORKER_PREP | WORKER_UNBOUND | WORKER_CPU_INTENSIVE, - NR_WORKER_POOLS = 2, /* # worker pools per gcwq */ + NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */ BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */ - BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER, - BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1, MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ @@ -111,48 +108,24 @@ enum { * P: Preemption protected. Disabling preemption is enough and should * only be modified and accessed from the local cpu. * - * L: gcwq->lock protected. Access with gcwq->lock held. + * L: pool->lock protected. Access with pool->lock held. * - * X: During normal operation, modification requires gcwq->lock and - * should be done only from local cpu. Either disabling preemption - * on local cpu or grabbing gcwq->lock is enough for read access. - * If GCWQ_DISASSOCIATED is set, it's identical to L. + * X: During normal operation, modification requires pool->lock and should + * be done only from local cpu. Either disabling preemption on local + * cpu or grabbing pool->lock is enough for read access. If + * POOL_DISASSOCIATED is set, it's identical to L. * * F: wq->flush_mutex protected. * * W: workqueue_lock protected. */ -struct global_cwq; -struct worker_pool; - -/* - * The poor guys doing the actual heavy lifting. All on-duty workers - * are either serving the manager role, on idle list or on busy hash. - */ -struct worker { - /* on idle list while idle, on busy hash table while busy */ - union { - struct list_head entry; /* L: while idle */ - struct hlist_node hentry; /* L: while busy */ - }; - - struct work_struct *current_work; /* L: work being processed */ - struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */ - struct list_head scheduled; /* L: scheduled works */ - struct task_struct *task; /* I: worker task */ - struct worker_pool *pool; /* I: the associated pool */ - /* 64 bytes boundary on 64bit, 32 on 32bit */ - unsigned long last_active; /* L: last active timestamp */ - unsigned int flags; /* X: flags */ - int id; /* I: worker id */ - - /* for rebinding worker to CPU */ - struct work_struct rebind_work; /* L: for busy worker */ -}; +/* struct worker is defined in workqueue_internal.h */ struct worker_pool { - struct global_cwq *gcwq; /* I: the owning gcwq */ + spinlock_t lock; /* the pool lock */ + unsigned int cpu; /* I: the associated cpu */ + int id; /* I: pool ID */ unsigned int flags; /* X: flags */ struct list_head worklist; /* L: list of pending works */ @@ -165,34 +138,28 @@ struct worker_pool { struct timer_list idle_timer; /* L: worker idle timeout */ struct timer_list mayday_timer; /* L: SOS timer for workers */ - struct mutex assoc_mutex; /* protect GCWQ_DISASSOCIATED */ - struct ida worker_ida; /* L: for worker IDs */ -}; - -/* - * Global per-cpu workqueue. There's one and only one for each cpu - * and all works are queued and processed here regardless of their - * target workqueues. - */ -struct global_cwq { - spinlock_t lock; /* the gcwq lock */ - unsigned int cpu; /* I: the associated cpu */ - unsigned int flags; /* L: GCWQ_* flags */ - - /* workers are chained either in busy_hash or pool idle_list */ - struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE]; + /* workers are chained either in busy_hash or idle_list */ + DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER); /* L: hash of busy workers */ - struct worker_pool pools[NR_WORKER_POOLS]; - /* normal and highpri pools */ + struct mutex assoc_mutex; /* protect POOL_DISASSOCIATED */ + struct ida worker_ida; /* L: for worker IDs */ + + /* + * The current concurrency level. As it's likely to be accessed + * from other CPUs during try_to_wake_up(), put it in a separate + * cacheline. + */ + atomic_t nr_running ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp; /* - * The per-CPU workqueue. The lower WORK_STRUCT_FLAG_BITS of - * work_struct->data are used for flags and thus cwqs need to be - * aligned at two's power of the number of flag bits. + * The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS + * of work_struct->data are used for flags and the remaining high bits + * point to the pwq; thus, pwqs need to be aligned at two's power of the + * number of flag bits. */ -struct cpu_workqueue_struct { +struct pool_workqueue { struct worker_pool *pool; /* I: the associated pool */ struct workqueue_struct *wq; /* I: the owning workqueue */ int work_color; /* L: current color */ @@ -241,16 +208,16 @@ typedef unsigned long mayday_mask_t; struct workqueue_struct { unsigned int flags; /* W: WQ_* flags */ union { - struct cpu_workqueue_struct __percpu *pcpu; - struct cpu_workqueue_struct *single; + struct pool_workqueue __percpu *pcpu; + struct pool_workqueue *single; unsigned long v; - } cpu_wq; /* I: cwq's */ + } pool_wq; /* I: pwq's */ struct list_head list; /* W: list of all workqueues */ struct mutex flush_mutex; /* protects wq flushing */ int work_color; /* F: current work color */ int flush_color; /* F: current flush color */ - atomic_t nr_cwqs_to_flush; /* flush in progress */ + atomic_t nr_pwqs_to_flush; /* flush in progress */ struct wq_flusher *first_flusher; /* F: first flusher */ struct list_head flusher_queue; /* F: flush waiters */ struct list_head flusher_overflow; /* F: flush overflow list */ @@ -259,7 +226,7 @@ struct workqueue_struct { struct worker *rescuer; /* I: rescue worker */ int nr_drainers; /* W: drain in progress */ - int saved_max_active; /* W: saved cwq max_active */ + int saved_max_active; /* W: saved pwq max_active */ #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif @@ -280,16 +247,15 @@ EXPORT_SYMBOL_GPL(system_freezable_wq); #define CREATE_TRACE_POINTS #include <trace/events/workqueue.h> -#define for_each_worker_pool(pool, gcwq) \ - for ((pool) = &(gcwq)->pools[0]; \ - (pool) < &(gcwq)->pools[NR_WORKER_POOLS]; (pool)++) +#define for_each_std_worker_pool(pool, cpu) \ + for ((pool) = &std_worker_pools(cpu)[0]; \ + (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++) -#define for_each_busy_worker(worker, i, pos, gcwq) \ - for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ - hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry) +#define for_each_busy_worker(worker, i, pos, pool) \ + hash_for_each(pool->busy_hash, i, pos, worker, hentry) -static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask, - unsigned int sw) +static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, + unsigned int sw) { if (cpu < nr_cpu_ids) { if (sw & 1) { @@ -300,42 +266,42 @@ static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask, if (sw & 2) return WORK_CPU_UNBOUND; } - return WORK_CPU_NONE; + return WORK_CPU_END; } -static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, - struct workqueue_struct *wq) +static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask, + struct workqueue_struct *wq) { - return __next_gcwq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2); + return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2); } /* * CPU iterators * - * An extra gcwq is defined for an invalid cpu number + * An extra cpu number is defined using an invalid cpu number * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any - * specific CPU. The following iterators are similar to - * for_each_*_cpu() iterators but also considers the unbound gcwq. + * specific CPU. The following iterators are similar to for_each_*_cpu() + * iterators but also considers the unbound CPU. * - * for_each_gcwq_cpu() : possible CPUs + WORK_CPU_UNBOUND - * for_each_online_gcwq_cpu() : online CPUs + WORK_CPU_UNBOUND - * for_each_cwq_cpu() : possible CPUs for bound workqueues, + * for_each_wq_cpu() : possible CPUs + WORK_CPU_UNBOUND + * for_each_online_wq_cpu() : online CPUs + WORK_CPU_UNBOUND + * for_each_pwq_cpu() : possible CPUs for bound workqueues, * WORK_CPU_UNBOUND for unbound workqueues */ -#define for_each_gcwq_cpu(cpu) \ - for ((cpu) = __next_gcwq_cpu(-1, cpu_possible_mask, 3); \ - (cpu) < WORK_CPU_NONE; \ - (cpu) = __next_gcwq_cpu((cpu), cpu_possible_mask, 3)) +#define for_each_wq_cpu(cpu) \ + for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3); \ + (cpu) < WORK_CPU_END; \ + (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, 3)) -#define for_each_online_gcwq_cpu(cpu) \ - for ((cpu) = __next_gcwq_cpu(-1, cpu_online_mask, 3); \ - (cpu) < WORK_CPU_NONE; \ - (cpu) = __next_gcwq_cpu((cpu), cpu_online_mask, 3)) +#define for_each_online_wq_cpu(cpu) \ + for ((cpu) = __next_wq_cpu(-1, cpu_online_mask, 3); \ + (cpu) < WORK_CPU_END; \ + (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3)) -#define for_each_cwq_cpu(cpu, wq) \ - for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, (wq)); \ - (cpu) < WORK_CPU_NONE; \ - (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq))) +#define for_each_pwq_cpu(cpu, wq) \ + for ((cpu) = __next_pwq_cpu(-1, cpu_possible_mask, (wq)); \ + (cpu) < WORK_CPU_END; \ + (cpu) = __next_pwq_cpu((cpu), cpu_possible_mask, (wq))) #ifdef CONFIG_DEBUG_OBJECTS_WORK @@ -459,57 +425,69 @@ static LIST_HEAD(workqueues); static bool workqueue_freezing; /* W: have wqs started freezing? */ /* - * The almighty global cpu workqueues. nr_running is the only field - * which is expected to be used frequently by other cpus via - * try_to_wake_up(). Put it in a separate cacheline. + * The CPU and unbound standard worker pools. The unbound ones have + * POOL_DISASSOCIATED set, and their workers have WORKER_UNBOUND set. */ -static DEFINE_PER_CPU(struct global_cwq, global_cwq); -static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_WORKER_POOLS]); +static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], + cpu_std_worker_pools); +static struct worker_pool unbound_std_worker_pools[NR_STD_WORKER_POOLS]; -/* - * Global cpu workqueue and nr_running counter for unbound gcwq. The - * gcwq is always online, has GCWQ_DISASSOCIATED set, and all its - * workers have WORKER_UNBOUND set. - */ -static struct global_cwq unbound_global_cwq; -static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = { - [0 ... NR_WORKER_POOLS - 1] = ATOMIC_INIT(0), /* always 0 */ -}; +/* idr of all pools */ +static DEFINE_MUTEX(worker_pool_idr_mutex); +static DEFINE_IDR(worker_pool_idr); static int worker_thread(void *__worker); -static int worker_pool_pri(struct worker_pool *pool) +static struct worker_pool *std_worker_pools(int cpu) { - return pool - pool->gcwq->pools; + if (cpu != WORK_CPU_UNBOUND) + return per_cpu(cpu_std_worker_pools, cpu); + else + return unbound_std_worker_pools; } -static struct global_cwq *get_gcwq(unsigned int cpu) +static int std_worker_pool_pri(struct worker_pool *pool) { - if (cpu != WORK_CPU_UNBOUND) - return &per_cpu(global_cwq, cpu); - else - return &unbound_global_cwq; + return pool - std_worker_pools(pool->cpu); } -static atomic_t *get_pool_nr_running(struct worker_pool *pool) +/* allocate ID and assign it to @pool */ +static int worker_pool_assign_id(struct worker_pool *pool) { - int cpu = pool->gcwq->cpu; - int idx = worker_pool_pri(pool); + int ret; - if (cpu != WORK_CPU_UNBOUND) - return &per_cpu(pool_nr_running, cpu)[idx]; - else - return &unbound_pool_nr_running[idx]; + mutex_lock(&worker_pool_idr_mutex); + idr_pre_get(&worker_pool_idr, GFP_KERNEL); + ret = idr_get_new(&worker_pool_idr, pool, &pool->id); + mutex_unlock(&worker_pool_idr_mutex); + + return ret; } -static struct cpu_workqueue_struct *get_cwq(unsigned int cpu, - struct workqueue_struct *wq) +/* + * Lookup worker_pool by id. The idr currently is built during boot and + * never modified. Don't worry about locking for now. + */ +static struct worker_pool *worker_pool_by_id(int pool_id) +{ + return idr_find(&worker_pool_idr, pool_id); +} + +static struct worker_pool *get_std_worker_pool(int cpu, bool highpri) +{ + struct worker_pool *pools = std_worker_pools(cpu); + + return &pools[highpri]; +} + +static struct pool_workqueue *get_pwq(unsigned int cpu, + struct workqueue_struct *wq) { if (!(wq->flags & WQ_UNBOUND)) { if (likely(cpu < nr_cpu_ids)) - return per_cpu_ptr(wq->cpu_wq.pcpu, cpu); + return per_cpu_ptr(wq->pool_wq.pcpu, cpu); } else if (likely(cpu == WORK_CPU_UNBOUND)) - return wq->cpu_wq.single; + return wq->pool_wq.single; return NULL; } @@ -530,19 +508,19 @@ static int work_next_color(int color) } /* - * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data - * contain the pointer to the queued cwq. Once execution starts, the flag - * is cleared and the high bits contain OFFQ flags and CPU number. + * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data + * contain the pointer to the queued pwq. Once execution starts, the flag + * is cleared and the high bits contain OFFQ flags and pool ID. * - * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling() - * and clear_work_data() can be used to set the cwq, cpu or clear + * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling() + * and clear_work_data() can be used to set the pwq, pool or clear * work->data. These functions should only be called while the work is * owned - ie. while the PENDING bit is set. * - * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to - * a work. gcwq is available once the work has been queued anywhere after - * initialization until it is sync canceled. cwq is available only while - * the work item is queued. + * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq + * corresponding to a work. Pool is available once the work has been + * queued anywhere after initialization until it is sync canceled. pwq is + * available only while the work item is queued. * * %WORK_OFFQ_CANCELING is used to mark a work item which is being * canceled. While being canceled, a work item may have its PENDING set @@ -556,16 +534,22 @@ static inline void set_work_data(struct work_struct *work, unsigned long data, atomic_long_set(&work->data, data | flags | work_static(work)); } -static void set_work_cwq(struct work_struct *work, - struct cpu_workqueue_struct *cwq, +static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq, unsigned long extra_flags) { - set_work_data(work, (unsigned long)cwq, - WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); + set_work_data(work, (unsigned long)pwq, + WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags); } -static void set_work_cpu_and_clear_pending(struct work_struct *work, - unsigned int cpu) +static void set_work_pool_and_keep_pending(struct work_struct *work, + int pool_id) +{ + set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, + WORK_STRUCT_PENDING); +} + +static void set_work_pool_and_clear_pending(struct work_struct *work, + int pool_id) { /* * The following wmb is paired with the implied mb in @@ -574,67 +558,92 @@ static void set_work_cpu_and_clear_pending(struct work_struct *work, * owner. */ smp_wmb(); - set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0); + set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0); } static void clear_work_data(struct work_struct *work) { - smp_wmb(); /* see set_work_cpu_and_clear_pending() */ - set_work_data(work, WORK_STRUCT_NO_CPU, 0); + smp_wmb(); /* see set_work_pool_and_clear_pending() */ + set_work_data(work, WORK_STRUCT_NO_POOL, 0); } -static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work) +static struct pool_workqueue *get_work_pwq(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); - if (data & WORK_STRUCT_CWQ) + if (data & WORK_STRUCT_PWQ) return (void *)(data & WORK_STRUCT_WQ_DATA_MASK); else return NULL; } -static struct global_cwq *get_work_gcwq(struct work_struct *work) +/** + * get_work_pool - return the worker_pool a given work was associated with + * @work: the work item of interest + * + * Return the worker_pool @work was last associated with. %NULL if none. + */ +static struct worker_pool *get_work_pool(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); - unsigned int cpu; + struct worker_pool *pool; + int pool_id; - if (data & WORK_STRUCT_CWQ) - return ((struct cpu_workqueue_struct *) - (data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq; + if (data & WORK_STRUCT_PWQ) + return ((struct pool_workqueue *) + (data & WORK_STRUCT_WQ_DATA_MASK))->pool; - cpu = data >> WORK_OFFQ_CPU_SHIFT; - if (cpu == WORK_CPU_NONE) + pool_id = data >> WORK_OFFQ_POOL_SHIFT; + if (pool_id == WORK_OFFQ_POOL_NONE) return NULL; - BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND); - return get_gcwq(cpu); + pool = worker_pool_by_id(pool_id); + WARN_ON_ONCE(!pool); + return pool; +} + +/** + * get_work_pool_id - return the worker pool ID a given work is associated with + * @work: the work item of interest + * + * Return the worker_pool ID @work was last associated with. + * %WORK_OFFQ_POOL_NONE if none. + */ +static int get_work_pool_id(struct work_struct *work) +{ + unsigned long data = atomic_long_read(&work->data); + + if (data & WORK_STRUCT_PWQ) + return ((struct pool_workqueue *) + (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id; + + return data >> WORK_OFFQ_POOL_SHIFT; } static void mark_work_canceling(struct work_struct *work) { - struct global_cwq *gcwq = get_work_gcwq(work); - unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE; + unsigned long pool_id = get_work_pool_id(work); - set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING, - WORK_STRUCT_PENDING); + pool_id <<= WORK_OFFQ_POOL_SHIFT; + set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING); } static bool work_is_canceling(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); - return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING); + return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING); } /* * Policy functions. These define the policies on how the global worker * pools are managed. Unless noted otherwise, these functions assume that - * they're being called with gcwq->lock held. + * they're being called with pool->lock held. */ static bool __need_more_worker(struct worker_pool *pool) { - return !atomic_read(get_pool_nr_running(pool)); + return !atomic_read(&pool->nr_running); } /* @@ -642,7 +651,7 @@ static bool __need_more_worker(struct worker_pool *pool) * running workers. * * Note that, because unbound workers never contribute to nr_running, this - * function will always return %true for unbound gcwq as long as the + * function will always return %true for unbound pools as long as the * worklist isn't empty. */ static bool need_more_worker(struct worker_pool *pool) @@ -659,9 +668,8 @@ static bool may_start_working(struct worker_pool *pool) /* Do I need to keep working? Called from currently running workers. */ static bool keep_working(struct worker_pool *pool) { - atomic_t *nr_running = get_pool_nr_running(pool); - - return !list_empty(&pool->worklist) && atomic_read(nr_running) <= 1; + return !list_empty(&pool->worklist) && + atomic_read(&pool->nr_running) <= 1; } /* Do we need a new worker? Called from manager. */ @@ -714,7 +722,7 @@ static struct worker *first_worker(struct worker_pool *pool) * Wake up the first idle worker of @pool. * * CONTEXT: - * spin_lock_irq(gcwq->lock). + * spin_lock_irq(pool->lock). */ static void wake_up_worker(struct worker_pool *pool) { @@ -740,8 +748,8 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) struct worker *worker = kthread_data(task); if (!(worker->flags & WORKER_NOT_RUNNING)) { - WARN_ON_ONCE(worker->pool->gcwq->cpu != cpu); - atomic_inc(get_pool_nr_running(worker->pool)); + WARN_ON_ONCE(worker->pool->cpu != cpu); + atomic_inc(&worker->pool->nr_running); } } @@ -764,12 +772,18 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, unsigned int cpu) { struct worker *worker = kthread_data(task), *to_wakeup = NULL; - struct worker_pool *pool = worker->pool; - atomic_t *nr_running = get_pool_nr_running(pool); + struct worker_pool *pool; + /* + * Rescuers, which may not have all the fields set up like normal + * workers, also reach here, let's not access anything before + * checking NOT_RUNNING. + */ if (worker->flags & WORKER_NOT_RUNNING) return NULL; + pool = worker->pool; + /* this can only happen on the local cpu */ BUG_ON(cpu != raw_smp_processor_id()); @@ -781,10 +795,11 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, * NOT_RUNNING is clear. This means that we're bound to and * running on the local cpu w/ rq lock held and preemption * disabled, which in turn means that none else could be - * manipulating idle_list, so dereferencing idle_list without gcwq + * manipulating idle_list, so dereferencing idle_list without pool * lock is safe. */ - if (atomic_dec_and_test(nr_running) && !list_empty(&pool->worklist)) + if (atomic_dec_and_test(&pool->nr_running) && + !list_empty(&pool->worklist)) to_wakeup = first_worker(pool); return to_wakeup ? to_wakeup->task : NULL; } @@ -800,7 +815,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, * woken up. * * CONTEXT: - * spin_lock_irq(gcwq->lock) + * spin_lock_irq(pool->lock) */ static inline void worker_set_flags(struct worker *worker, unsigned int flags, bool wakeup) @@ -816,14 +831,12 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags, */ if ((flags & WORKER_NOT_RUNNING) && !(worker->flags & WORKER_NOT_RUNNING)) { - atomic_t *nr_running = get_pool_nr_running(pool); - if (wakeup) { - if (atomic_dec_and_test(nr_running) && + if (atomic_dec_and_test(&pool->nr_running) && !list_empty(&pool->worklist)) wake_up_worker(pool); } else - atomic_dec(nr_running); + atomic_dec(&pool->nr_running); } worker->flags |= flags; @@ -837,7 +850,7 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags, * Clear @flags in @worker->flags and adjust nr_running accordingly. * * CONTEXT: - * spin_lock_irq(gcwq->lock) + * spin_lock_irq(pool->lock) */ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) { @@ -855,87 +868,56 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) */ if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) if (!(worker->flags & WORKER_NOT_RUNNING)) - atomic_inc(get_pool_nr_running(pool)); + atomic_inc(&pool->nr_running); } /** - * busy_worker_head - return the busy hash head for a work - * @gcwq: gcwq of interest - * @work: work to be hashed - * - * Return hash head of @gcwq for @work. - * - * CONTEXT: - * spin_lock_irq(gcwq->lock). - * - * RETURNS: - * Pointer to the hash head. - */ -static struct hlist_head *busy_worker_head(struct global_cwq *gcwq, - struct work_struct *work) -{ - const int base_shift = ilog2(sizeof(struct work_struct)); - unsigned long v = (unsigned long)work; - - /* simple shift and fold hash, do we need something better? */ - v >>= base_shift; - v += v >> BUSY_WORKER_HASH_ORDER; - v &= BUSY_WORKER_HASH_MASK; - - return &gcwq->busy_hash[v]; -} - -/** - * __find_worker_executing_work - find worker which is executing a work - * @gcwq: gcwq of interest - * @bwh: hash head as returned by busy_worker_head() + * find_worker_executing_work - find worker which is executing a work + * @pool: pool of interest * @work: work to find worker for * - * Find a worker which is executing @work on @gcwq. @bwh should be - * the hash head obtained by calling busy_worker_head() with the same - * work. + * Find a worker which is executing @work on @pool by searching + * @pool->busy_hash which is keyed by the address of @work. For a worker + * to match, its current execution should match the address of @work and + * its work function. This is to avoid unwanted dependency between + * unrelated work executions through a work item being recycled while still + * being executed. + * + * This is a bit tricky. A work item may be freed once its execution + * starts and nothing prevents the freed area from being recycled for + * another work item. If the same work item address ends up being reused + * before the original execution finishes, workqueue will identify the + * recycled work item as currently executing and make it wait until the + * current execution finishes, introducing an unwanted dependency. + * + * This function checks the work item address, work function and workqueue + * to avoid false positives. Note that this isn't complete as one may + * construct a work function which can introduce dependency onto itself + * through a recycled work item. Well, if somebody wants to shoot oneself + * in the foot that badly, there's only so much we can do, and if such + * deadlock actually occurs, it should be easy to locate the culprit work + * function. * * CONTEXT: - * spin_lock_irq(gcwq->lock). + * spin_lock_irq(pool->lock). * * RETURNS: * Pointer to worker which is executing @work if found, NULL * otherwise. */ -static struct worker *__find_worker_executing_work(struct global_cwq *gcwq, - struct hlist_head *bwh, - struct work_struct *work) +static struct worker *find_worker_executing_work(struct worker_pool *pool, + struct work_struct *work) { struct worker *worker; struct hlist_node *tmp; - hlist_for_each_entry(worker, tmp, bwh, hentry) - if (worker->current_work == work) + hash_for_each_possible(pool->busy_hash, worker, tmp, hentry, + (unsigned long)work) + if (worker->current_work == work && + worker->current_func == work->func) return worker; - return NULL; -} -/** - * find_worker_executing_work - find worker which is executing a work - * @gcwq: gcwq of interest - * @work: work to find worker for - * - * Find a worker which is executing @work on @gcwq. This function is - * identical to __find_worker_executing_work() except tha |