1 /*
2 * kernel/workqueue.c - generic async execution with shared worker pool
3 *
4 * Copyright (C) 2002 Ingo Molnar
5 *
6 * Derived from the taskqueue/keventd code by:
7 * David Woodhouse <dwmw2@infradead.org>
8 * Andrew Morton
9 * Kai Petzke <wpp@marie.physik.tu-berlin.de>
10 * Theodore Ts'o <tytso@mit.edu>
11 *
12 * Made to use alloc_percpu by Christoph Lameter.
13 *
14 * Copyright (C) 2010 SUSE Linux Products GmbH
15 * Copyright (C) 2010 Tejun Heo <tj@kernel.org>
16 *
17 * This is the generic async execution mechanism. Work items as are
18 * executed in process context. The worker pool is shared and
19 * automatically managed. There is one worker pool for each CPU and
20 * one extra for works which are better served by workers which are
21 * not bound to any specific CPU.
22 *
23 * Please read Documentation/workqueue.txt for details.
24 */
25
26 #include <linux/export.h>
27 #include <linux/kernel.h>
28 #include <linux/sched.h>
29 #include <linux/init.h>
30 #include <linux/signal.h>
31 #include <linux/completion.h>
32 #include <linux/workqueue.h>
33 #include <linux/slab.h>
34 #include <linux/cpu.h>
35 #include <linux/notifier.h>
36 #include <linux/kthread.h>
37 #include <linux/hardirq.h>
38 #include <linux/mempolicy.h>
39 #include <linux/freezer.h>
40 #include <linux/kallsyms.h>
41 #include <linux/debug_locks.h>
42 #include <linux/lockdep.h>
43 #include <linux/idr.h>
44
45 #include "workqueue_sched.h"
46
47 enum {
48 /* global_cwq flags */
49 GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
50 GCWQ_MANAGING_WORKERS = 1 << 1, /* managing workers */
51 GCWQ_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
52 GCWQ_FREEZING = 1 << 3, /* freeze in progress */
53 GCWQ_HIGHPRI_PENDING = 1 << 4, /* highpri works on queue */
54
55 /* worker flags */
56 WORKER_STARTED = 1 << 0, /* started */
57 WORKER_DIE = 1 << 1, /* die die die */
58 WORKER_IDLE = 1 << 2, /* is idle */
59 WORKER_PREP = 1 << 3, /* preparing to run works */
60 WORKER_ROGUE = 1 << 4, /* not bound to any cpu */
61 WORKER_REBIND = 1 << 5, /* mom is home, come back */
62 WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */
63 WORKER_UNBOUND = 1 << 7, /* worker is unbound */
64
65 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_ROGUE | WORKER_REBIND |
66 WORKER_CPU_INTENSIVE | WORKER_UNBOUND,
67
68 /* gcwq->trustee_state */
69 TRUSTEE_START = 0, /* start */
70 TRUSTEE_IN_CHARGE = 1, /* trustee in charge of gcwq */
71 TRUSTEE_BUTCHER = 2, /* butcher workers */
72 TRUSTEE_RELEASE = 3, /* release workers */
73 TRUSTEE_DONE = 4, /* trustee is done */
74
75 BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
76 BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER,
77 BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1,
78
79 MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
80 IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
81
82 MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
83 /* call for help after 10ms
84 (min two ticks) */
85 MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */
86 CREATE_COOLDOWN = HZ, /* time to breath after fail */
87 TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */
88
89 /*
90 * Rescue workers are used only on emergencies and shared by
91 * all cpus. Give -20.
92 */
93 RESCUER_NICE_LEVEL = -20,
94 };
95
96 /*
97 * Structure fields follow one of the following exclusion rules.
98 *
99 * I: Modifiable by initialization/destruction paths and read-only for
100 * everyone else.
101 *
102 * P: Preemption protected. Disabling preemption is enough and should
103 * only be modified and accessed from the local cpu.
104 *
105 * L: gcwq->lock protected. Access with gcwq->lock held.
106 *
107 * X: During normal operation, modification requires gcwq->lock and
108 * should be done only from local cpu. Either disabling preemption
109 * on local cpu or grabbing gcwq->lock is enough for read access.
110 * If GCWQ_DISASSOCIATED is set, it's identical to L.
111 *
112 * F: wq->flush_mutex protected.
113 *
114 * W: workqueue_lock protected.
115 */
116
117 struct global_cwq;
118
119 /*
120 * The poor guys doing the actual heavy lifting. All on-duty workers
121 * are either serving the manager role, on idle list or on busy hash.
122 */
123 struct worker {
124 /* on idle list while idle, on busy hash table while busy */
125 union {
126 struct list_head entry; /* L: while idle */
127 struct hlist_node hentry; /* L: while busy */
128 };
129
130 struct work_struct *current_work; /* L: work being processed */
131 work_func_t current_func; /* L: current_work's fn */
132 struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
133 struct list_head scheduled; /* L: scheduled works */
134 struct task_struct *task; /* I: worker task */
135 struct global_cwq *gcwq; /* I: the associated gcwq */
136 /* 64 bytes boundary on 64bit, 32 on 32bit */
137 unsigned long last_active; /* L: last active timestamp */
138 unsigned int flags; /* X: flags */
139 int id; /* I: worker id */
140 struct work_struct rebind_work; /* L: rebind worker to cpu */
141 };
142
143 /*
144 * Global per-cpu workqueue. There's one and only one for each cpu
145 * and all works are queued and processed here regardless of their
146 * target workqueues.
147 */
148 struct global_cwq {
149 spinlock_t lock; /* the gcwq lock */
150 struct list_head worklist; /* L: list of pending works */
151 unsigned int cpu; /* I: the associated cpu */
152 unsigned int flags; /* L: GCWQ_* flags */
153
154 int nr_workers; /* L: total number of workers */
155 int nr_idle; /* L: currently idle ones */
156
157 /* workers are chained either in the idle_list or busy_hash */
158 struct list_head idle_list; /* X: list of idle workers */
159 struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE];
160 /* L: hash of busy workers */
161
162 struct timer_list idle_timer; /* L: worker idle timeout */
163 struct timer_list mayday_timer; /* L: SOS timer for dworkers */
164
165 struct ida worker_ida; /* L: for worker IDs */
166
167 struct task_struct *trustee; /* L: for gcwq shutdown */
168 unsigned int trustee_state; /* L: trustee state */
169 wait_queue_head_t trustee_wait; /* trustee wait */
170 struct worker *first_idle; /* L: first idle worker */
171 } ____cacheline_aligned_in_smp;
172
173 /*
174 * The per-CPU workqueue. The lower WORK_STRUCT_FLAG_BITS of
175 * work_struct->data are used for flags and thus cwqs need to be
176 * aligned at two's power of the number of flag bits.
177 */
178 struct cpu_workqueue_struct {
179 struct global_cwq *gcwq; /* I: the associated gcwq */
180 struct workqueue_struct *wq; /* I: the owning workqueue */
181 int work_color; /* L: current color */
182 int flush_color; /* L: flushing color */
183 int nr_in_flight[WORK_NR_COLORS];
184 /* L: nr of in_flight works */
185 int nr_active; /* L: nr of active works */
186 int max_active; /* L: max active works */
187 struct list_head delayed_works; /* L: delayed works */
188 };
189
190 /*
191 * Structure used to wait for workqueue flush.
192 */
193 struct wq_flusher {
194 struct list_head list; /* F: list of flushers */
195 int flush_color; /* F: flush color waiting for */
196 struct completion done; /* flush completion */
197 };
198
199 /*
200 * All cpumasks are assumed to be always set on UP and thus can't be
201 * used to determine whether there's something to be done.
202 */
203 #ifdef CONFIG_SMP
204 typedef cpumask_var_t mayday_mask_t;
205 #define mayday_test_and_set_cpu(cpu, mask) \
206 cpumask_test_and_set_cpu((cpu), (mask))
207 #define mayday_clear_cpu(cpu, mask) cpumask_clear_cpu((cpu), (mask))
208 #define for_each_mayday_cpu(cpu, mask) for_each_cpu((cpu), (mask))
209 #define alloc_mayday_mask(maskp, gfp) zalloc_cpumask_var((maskp), (gfp))
210 #define free_mayday_mask(mask) free_cpumask_var((mask))
211 #else
212 typedef unsigned long mayday_mask_t;
213 #define mayday_test_and_set_cpu(cpu, mask) test_and_set_bit(0, &(mask))
214 #define mayday_clear_cpu(cpu, mask) clear_bit(0, &(mask))
215 #define for_each_mayday_cpu(cpu, mask) if ((cpu) = 0, (mask))
216 #define alloc_mayday_mask(maskp, gfp) true
217 #define free_mayday_mask(mask) do { } while (0)
218 #endif
219
220 /*
221 * The externally visible workqueue abstraction is an array of
222 * per-CPU workqueues:
223 */
224 struct workqueue_struct {
225 unsigned int flags; /* W: WQ_* flags */
226 union {
227 struct cpu_workqueue_struct __percpu *pcpu;
228 struct cpu_workqueue_struct *single;
229 unsigned long v;
230 } cpu_wq; /* I: cwq's */
231 struct list_head list; /* W: list of all workqueues */
232
233 struct mutex flush_mutex; /* protects wq flushing */
234 int work_color; /* F: current work color */
235 int flush_color; /* F: current flush color */
236 atomic_t nr_cwqs_to_flush; /* flush in progress */
237 struct wq_flusher *first_flusher; /* F: first flusher */
238 struct list_head flusher_queue; /* F: flush waiters */
239 struct list_head flusher_overflow; /* F: flush overflow list */
240
241 mayday_mask_t mayday_mask; /* cpus requesting rescue */
242 struct worker *rescuer; /* I: rescue worker */
243
244 int nr_drainers; /* W: drain in progress */
245 int saved_max_active; /* W: saved cwq max_active */
246 #ifdef CONFIG_LOCKDEP
247 struct lockdep_map lockdep_map;
248 #endif
249 char name[]; /* I: workqueue name */
250 };
251
252 struct workqueue_struct *system_wq __read_mostly;
253 struct workqueue_struct *system_long_wq __read_mostly;
254 struct workqueue_struct *system_nrt_wq __read_mostly;
255 struct workqueue_struct *system_unbound_wq __read_mostly;
256 struct workqueue_struct *system_freezable_wq __read_mostly;
257 struct workqueue_struct *system_nrt_freezable_wq __read_mostly;
258 EXPORT_SYMBOL_GPL(system_wq);
259 EXPORT_SYMBOL_GPL(system_long_wq);
260 EXPORT_SYMBOL_GPL(system_nrt_wq);
261 EXPORT_SYMBOL_GPL(system_unbound_wq);
262 EXPORT_SYMBOL_GPL(system_freezable_wq);
263 EXPORT_SYMBOL_GPL(system_nrt_freezable_wq);
264
265 #define CREATE_TRACE_POINTS
266 #include <trace/events/workqueue.h>
267
268 #define for_each_busy_worker(worker, i, pos, gcwq) \
269 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \
270 hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
271
__next_gcwq_cpu(int cpu,const struct cpumask * mask,unsigned int sw)272 static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
273 unsigned int sw)
274 {
275 if (cpu < nr_cpu_ids) {
276 if (sw & 1) {
277 cpu = cpumask_next(cpu, mask);
278 if (cpu < nr_cpu_ids)
279 return cpu;
280 }
281 if (sw & 2)
282 return WORK_CPU_UNBOUND;
283 }
284 return WORK_CPU_NONE;
285 }
286
__next_wq_cpu(int cpu,const struct cpumask * mask,struct workqueue_struct * wq)287 static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
288 struct workqueue_struct *wq)
289 {
290 return __next_gcwq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
291 }
292
293 /*
294 * CPU iterators
295 *
296 * An extra gcwq is defined for an invalid cpu number
297 * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any
298 * specific CPU. The following iterators are similar to
299 * for_each_*_cpu() iterators but also considers the unbound gcwq.
300 *
301 * for_each_gcwq_cpu() : possible CPUs + WORK_CPU_UNBOUND
302 * for_each_online_gcwq_cpu() : online CPUs + WORK_CPU_UNBOUND
303 * for_each_cwq_cpu() : possible CPUs for bound workqueues,
304 * WORK_CPU_UNBOUND for unbound workqueues
305 */
306 #define for_each_gcwq_cpu(cpu) \
307 for ((cpu) = __next_gcwq_cpu(-1, cpu_possible_mask, 3); \
308 (cpu) < WORK_CPU_NONE; \
309 (cpu) = __next_gcwq_cpu((cpu), cpu_possible_mask, 3))
310
311 #define for_each_online_gcwq_cpu(cpu) \
312 for ((cpu) = __next_gcwq_cpu(-1, cpu_online_mask, 3); \
313 (cpu) < WORK_CPU_NONE; \
314 (cpu) = __next_gcwq_cpu((cpu), cpu_online_mask, 3))
315
316 #define for_each_cwq_cpu(cpu, wq) \
317 for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, (wq)); \
318 (cpu) < WORK_CPU_NONE; \
319 (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq)))
320
321 #ifdef CONFIG_DEBUG_OBJECTS_WORK
322
323 static struct debug_obj_descr work_debug_descr;
324
work_debug_hint(void * addr)325 static void *work_debug_hint(void *addr)
326 {
327 return ((struct work_struct *) addr)->func;
328 }
329
330 /*
331 * fixup_init is called when:
332 * - an active object is initialized
333 */
work_fixup_init(void * addr,enum debug_obj_state state)334 static int work_fixup_init(void *addr, enum debug_obj_state state)
335 {
336 struct work_struct *work = addr;
337
338 switch (state) {
339 case ODEBUG_STATE_ACTIVE:
340 cancel_work_sync(work);
341 debug_object_init(work, &work_debug_descr);
342 return 1;
343 default:
344 return 0;
345 }
346 }
347
348 /*
349 * fixup_activate is called when:
350 * - an active object is activated
351 * - an unknown object is activated (might be a statically initialized object)
352 */
work_fixup_activate(void * addr,enum debug_obj_state state)353 static int work_fixup_activate(void *addr, enum debug_obj_state state)
354 {
355 struct work_struct *work = addr;
356
357 switch (state) {
358
359 case ODEBUG_STATE_NOTAVAILABLE:
360 /*
361 * This is not really a fixup. The work struct was
362 * statically initialized. We just make sure that it
363 * is tracked in the object tracker.
364 */
365 if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
366 debug_object_init(work, &work_debug_descr);
367 debug_object_activate(work, &work_debug_descr);
368 return 0;
369 }
370 WARN_ON_ONCE(1);
371 return 0;
372
373 case ODEBUG_STATE_ACTIVE:
374 WARN_ON(1);
375
376 default:
377 return 0;
378 }
379 }
380
381 /*
382 * fixup_free is called when:
383 * - an active object is freed
384 */
work_fixup_free(void * addr,enum debug_obj_state state)385 static int work_fixup_free(void *addr, enum debug_obj_state state)
386 {
387 struct work_struct *work = addr;
388
389 switch (state) {
390 case ODEBUG_STATE_ACTIVE:
391 cancel_work_sync(work);
392 debug_object_free(work, &work_debug_descr);
393 return 1;
394 default:
395 return 0;
396 }
397 }
398
399 static struct debug_obj_descr work_debug_descr = {
400 .name = "work_struct",
401 .debug_hint = work_debug_hint,
402 .fixup_init = work_fixup_init,
403 .fixup_activate = work_fixup_activate,
404 .fixup_free = work_fixup_free,
405 };
406
debug_work_activate(struct work_struct * work)407 static inline void debug_work_activate(struct work_struct *work)
408 {
409 debug_object_activate(work, &work_debug_descr);
410 }
411
debug_work_deactivate(struct work_struct * work)412 static inline void debug_work_deactivate(struct work_struct *work)
413 {
414 debug_object_deactivate(work, &work_debug_descr);
415 }
416
__init_work(struct work_struct * work,int onstack)417 void __init_work(struct work_struct *work, int onstack)
418 {
419 if (onstack)
420 debug_object_init_on_stack(work, &work_debug_descr);
421 else
422 debug_object_init(work, &work_debug_descr);
423 }
424 EXPORT_SYMBOL_GPL(__init_work);
425
destroy_work_on_stack(struct work_struct * work)426 void destroy_work_on_stack(struct work_struct *work)
427 {
428 debug_object_free(work, &work_debug_descr);
429 }
430 EXPORT_SYMBOL_GPL(destroy_work_on_stack);
431
432 #else
debug_work_activate(struct work_struct * work)433 static inline void debug_work_activate(struct work_struct *work) { }
debug_work_deactivate(struct work_struct * work)434 static inline void debug_work_deactivate(struct work_struct *work) { }
435 #endif
436
437 /* Serializes the accesses to the list of workqueues. */
438 static DEFINE_SPINLOCK(workqueue_lock);
439 static LIST_HEAD(workqueues);
440 static bool workqueue_freezing; /* W: have wqs started freezing? */
441
442 /*
443 * The almighty global cpu workqueues. nr_running is the only field
444 * which is expected to be used frequently by other cpus via
445 * try_to_wake_up(). Put it in a separate cacheline.
446 */
447 static DEFINE_PER_CPU(struct global_cwq, global_cwq);
448 static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running);
449
450 /*
451 * Global cpu workqueue and nr_running counter for unbound gcwq. The
452 * gcwq is always online, has GCWQ_DISASSOCIATED set, and all its
453 * workers have WORKER_UNBOUND set.
454 */
455 static struct global_cwq unbound_global_cwq;
456 static atomic_t unbound_gcwq_nr_running = ATOMIC_INIT(0); /* always 0 */
457
458 static int worker_thread(void *__worker);
459
get_gcwq(unsigned int cpu)460 static struct global_cwq *get_gcwq(unsigned int cpu)
461 {
462 if (cpu != WORK_CPU_UNBOUND)
463 return &per_cpu(global_cwq, cpu);
464 else
465 return &unbound_global_cwq;
466 }
467
get_gcwq_nr_running(unsigned int cpu)468 static atomic_t *get_gcwq_nr_running(unsigned int cpu)
469 {
470 if (cpu != WORK_CPU_UNBOUND)
471 return &per_cpu(gcwq_nr_running, cpu);
472 else
473 return &unbound_gcwq_nr_running;
474 }
475
get_cwq(unsigned int cpu,struct workqueue_struct * wq)476 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
477 struct workqueue_struct *wq)
478 {
479 if (!(wq->flags & WQ_UNBOUND)) {
480 if (likely(cpu < nr_cpu_ids))
481 return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
482 } else if (likely(cpu == WORK_CPU_UNBOUND))
483 return wq->cpu_wq.single;
484 return NULL;
485 }
486
work_color_to_flags(int color)487 static unsigned int work_color_to_flags(int color)
488 {
489 return color << WORK_STRUCT_COLOR_SHIFT;
490 }
491
get_work_color(struct work_struct * work)492 static int get_work_color(struct work_struct *work)
493 {
494 return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
495 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
496 }
497
work_next_color(int color)498 static int work_next_color(int color)
499 {
500 return (color + 1) % WORK_NR_COLORS;
501 }
502
503 /*
504 * A work's data points to the cwq with WORK_STRUCT_CWQ set while the
505 * work is on queue. Once execution starts, WORK_STRUCT_CWQ is
506 * cleared and the work data contains the cpu number it was last on.
507 *
508 * set_work_{cwq|cpu}() and clear_work_data() can be used to set the
509 * cwq, cpu or clear work->data. These functions should only be
510 * called while the work is owned - ie. while the PENDING bit is set.
511 *
512 * get_work_[g]cwq() can be used to obtain the gcwq or cwq
513 * corresponding to a work. gcwq is available once the work has been
514 * queued anywhere after initialization. cwq is available only from
515 * queueing until execution starts.
516 */
set_work_data(struct work_struct * work,unsigned long data,unsigned long flags)517 static inline void set_work_data(struct work_struct *work, unsigned long data,
518 unsigned long flags)
519 {
520 BUG_ON(!work_pending(work));
521 atomic_long_set(&work->data, data | flags | work_static(work));
522 }
523
set_work_cwq(struct work_struct * work,struct cpu_workqueue_struct * cwq,unsigned long extra_flags)524 static void set_work_cwq(struct work_struct *work,
525 struct cpu_workqueue_struct *cwq,
526 unsigned long extra_flags)
527 {
528 set_work_data(work, (unsigned long)cwq,
529 WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
530 }
531
set_work_cpu(struct work_struct * work,unsigned int cpu)532 static void set_work_cpu(struct work_struct *work, unsigned int cpu)
533 {
534 set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING);
535 }
536
clear_work_data(struct work_struct * work)537 static void clear_work_data(struct work_struct *work)
538 {
539 set_work_data(work, WORK_STRUCT_NO_CPU, 0);
540 }
541
get_work_cwq(struct work_struct * work)542 static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
543 {
544 unsigned long data = atomic_long_read(&work->data);
545
546 if (data & WORK_STRUCT_CWQ)
547 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
548 else
549 return NULL;
550 }
551
get_work_gcwq(struct work_struct * work)552 static struct global_cwq *get_work_gcwq(struct work_struct *work)
553 {
554 unsigned long data = atomic_long_read(&work->data);
555 unsigned int cpu;
556
557 if (data & WORK_STRUCT_CWQ)
558 return ((struct cpu_workqueue_struct *)
559 (data & WORK_STRUCT_WQ_DATA_MASK))->gcwq;
560
561 cpu = data >> WORK_STRUCT_FLAG_BITS;
562 if (cpu == WORK_CPU_NONE)
563 return NULL;
564
565 BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
566 return get_gcwq(cpu);
567 }
568
569 /*
570 * Policy functions. These define the policies on how the global
571 * worker pool is managed. Unless noted otherwise, these functions
572 * assume that they're being called with gcwq->lock held.
573 */
574
__need_more_worker(struct global_cwq * gcwq)575 static bool __need_more_worker(struct global_cwq *gcwq)
576 {
577 return !atomic_read(get_gcwq_nr_running(gcwq->cpu)) ||
578 gcwq->flags & GCWQ_HIGHPRI_PENDING;
579 }
580
581 /*
582 * Need to wake up a worker? Called from anything but currently
583 * running workers.
584 */
need_more_worker(struct global_cwq * gcwq)585 static bool need_more_worker(struct global_cwq *gcwq)
586 {
587 return !list_empty(&gcwq->worklist) && __need_more_worker(gcwq);
588 }
589
590 /* Can I start working? Called from busy but !running workers. */
may_start_working(struct global_cwq * gcwq)591 static bool may_start_working(struct global_cwq *gcwq)
592 {
593 return gcwq->nr_idle;
594 }
595
596 /* Do I need to keep working? Called from currently running workers. */
keep_working(struct global_cwq * gcwq)597 static bool keep_working(struct global_cwq *gcwq)
598 {
599 atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
600
601 return !list_empty(&gcwq->worklist) &&
602 (atomic_read(nr_running) <= 1 ||
603 gcwq->flags & GCWQ_HIGHPRI_PENDING);
604 }
605
606 /* Do we need a new worker? Called from manager. */
need_to_create_worker(struct global_cwq * gcwq)607 static bool need_to_create_worker(struct global_cwq *gcwq)
608 {
609 return need_more_worker(gcwq) && !may_start_working(gcwq);
610 }
611
612 /* Do I need to be the manager? */
need_to_manage_workers(struct global_cwq * gcwq)613 static bool need_to_manage_workers(struct global_cwq *gcwq)
614 {
615 return need_to_create_worker(gcwq) || gcwq->flags & GCWQ_MANAGE_WORKERS;
616 }
617
618 /* Do we have too many workers and should some go away? */
too_many_workers(struct global_cwq * gcwq)619 static bool too_many_workers(struct global_cwq *gcwq)
620 {
621 bool managing = gcwq->flags & GCWQ_MANAGING_WORKERS;
622 int nr_idle = gcwq->nr_idle + managing; /* manager is considered idle */
623 int nr_busy = gcwq->nr_workers - nr_idle;
624
625 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
626 }
627
628 /*
629 * Wake up functions.
630 */
631
632 /* Return the first worker. Safe with preemption disabled */
first_worker(struct global_cwq * gcwq)633 static struct worker *first_worker(struct global_cwq *gcwq)
634 {
635 if (unlikely(list_empty(&gcwq->idle_list)))
636 return NULL;
637
638 return list_first_entry(&gcwq->idle_list, struct worker, entry);
639 }
640
641 /**
642 * wake_up_worker - wake up an idle worker
643 * @gcwq: gcwq to wake worker for
644 *
645 * Wake up the first idle worker of @gcwq.
646 *
647 * CONTEXT:
648 * spin_lock_irq(gcwq->lock).
649 */
wake_up_worker(struct global_cwq * gcwq)650 static void wake_up_worker(struct global_cwq *gcwq)
651 {
652 struct worker *worker = first_worker(gcwq);
653
654 if (likely(worker))
655 wake_up_process(worker->task);
656 }
657
658 /**
659 * wq_worker_waking_up - a worker is waking up
660 * @task: task waking up
661 * @cpu: CPU @task is waking up to
662 *
663 * This function is called during try_to_wake_up() when a worker is
664 * being awoken.
665 *
666 * CONTEXT:
667 * spin_lock_irq(rq->lock)
668 */
wq_worker_waking_up(struct task_struct * task,unsigned int cpu)669 void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
670 {
671 struct worker *worker = kthread_data(task);
672
673 if (!(worker->flags & WORKER_NOT_RUNNING))
674 atomic_inc(get_gcwq_nr_running(cpu));
675 }
676
677 /**
678 * wq_worker_sleeping - a worker is going to sleep
679 * @task: task going to sleep
680 * @cpu: CPU in question, must be the current CPU number
681 *
682 * This function is called during schedule() when a busy worker is
683 * going to sleep. Worker on the same cpu can be woken up by
684 * returning pointer to its task.
685 *
686 * CONTEXT:
687 * spin_lock_irq(rq->lock)
688 *
689 * RETURNS:
690 * Worker task on @cpu to wake up, %NULL if none.
691 */
wq_worker_sleeping(struct task_struct * task,unsigned int cpu)692 struct task_struct *wq_worker_sleeping(struct task_struct *task,
693 unsigned int cpu)
694 {
695 struct worker *worker = kthread_data(task), *to_wakeup = NULL;
696 struct global_cwq *gcwq = get_gcwq(cpu);
697 atomic_t *nr_running = get_gcwq_nr_running(cpu);
698
699 if (worker->flags & WORKER_NOT_RUNNING)
700 return NULL;
701
702 /* this can only happen on the local cpu */
703 BUG_ON(cpu != raw_smp_processor_id());
704
705 /*
706 * The counterpart of the following dec_and_test, implied mb,
707 * worklist not empty test sequence is in insert_work().
708 * Please read comment there.
709 *
710 * NOT_RUNNING is clear. This means that trustee is not in
711 * charge and we're running on the local cpu w/ rq lock held
712 * and preemption disabled, which in turn means that none else
713 * could be manipulating idle_list, so dereferencing idle_list
714 * without gcwq lock is safe.
715 */
716 if (atomic_dec_and_test(nr_running) && !list_empty(&gcwq->worklist))
717 to_wakeup = first_worker(gcwq);
718 return to_wakeup ? to_wakeup->task : NULL;
719 }
720
721 /**
722 * worker_set_flags - set worker flags and adjust nr_running accordingly
723 * @worker: self
724 * @flags: flags to set
725 * @wakeup: wakeup an idle worker if necessary
726 *
727 * Set @flags in @worker->flags and adjust nr_running accordingly. If
728 * nr_running becomes zero and @wakeup is %true, an idle worker is
729 * woken up.
730 *
731 * CONTEXT:
732 * spin_lock_irq(gcwq->lock)
733 */
worker_set_flags(struct worker * worker,unsigned int flags,bool wakeup)734 static inline void worker_set_flags(struct worker *worker, unsigned int flags,
735 bool wakeup)
736 {
737 struct global_cwq *gcwq = worker->gcwq;
738
739 WARN_ON_ONCE(worker->task != current);
740
741 /*
742 * If transitioning into NOT_RUNNING, adjust nr_running and
743 * wake up an idle worker as necessary if requested by
744 * @wakeup.
745 */
746 if ((flags & WORKER_NOT_RUNNING) &&
747 !(worker->flags & WORKER_NOT_RUNNING)) {
748 atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
749
750 if (wakeup) {
751 if (atomic_dec_and_test(nr_running) &&
752 !list_empty(&gcwq->worklist))
753 wake_up_worker(gcwq);
754 } else
755 atomic_dec(nr_running);
756 }
757
758 worker->flags |= flags;
759 }
760
761 /**
762 * worker_clr_flags - clear worker flags and adjust nr_running accordingly
763 * @worker: self
764 * @flags: flags to clear
765 *
766 * Clear @flags in @worker->flags and adjust nr_running accordingly.
767 *
768 * CONTEXT:
769 * spin_lock_irq(gcwq->lock)
770 */
worker_clr_flags(struct worker * worker,unsigned int flags)771 static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
772 {
773 struct global_cwq *gcwq = worker->gcwq;
774 unsigned int oflags = worker->flags;
775
776 WARN_ON_ONCE(worker->task != current);
777
778 worker->flags &= ~flags;
779
780 /*
781 * If transitioning out of NOT_RUNNING, increment nr_running. Note
782 * that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask
783 * of multiple flags, not a single flag.
784 */
785 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
786 if (!(worker->flags & WORKER_NOT_RUNNING))
787 atomic_inc(get_gcwq_nr_running(gcwq->cpu));
788 }
789
790 /**
791 * busy_worker_head - return the busy hash head for a work
792 * @gcwq: gcwq of interest
793 * @work: work to be hashed
794 *
795 * Return hash head of @gcwq for @work.
796 *
797 * CONTEXT:
798 * spin_lock_irq(gcwq->lock).
799 *
800 * RETURNS:
801 * Pointer to the hash head.
802 */
busy_worker_head(struct global_cwq * gcwq,struct work_struct * work)803 static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
804 struct work_struct *work)
805 {
806 const int base_shift = ilog2(sizeof(struct work_struct));
807 unsigned long v = (unsigned long)work;
808
809 /* simple shift and fold hash, do we need something better? */
810 v >>= base_shift;
811 v += v >> BUSY_WORKER_HASH_ORDER;
812 v &= BUSY_WORKER_HASH_MASK;
813
814 return &gcwq->busy_hash[v];
815 }
816
817 /**
818 * __find_worker_executing_work - find worker which is executing a work
819 * @gcwq: gcwq of interest
820 * @bwh: hash head as returned by busy_worker_head()
821 * @work: work to find worker for
822 *
823 * Find a worker which is executing @work on @gcwq. @bwh should be
824 * the hash head obtained by calling busy_worker_head() with the same
825 * work.
826 *
827 * CONTEXT:
828 * spin_lock_irq(gcwq->lock).
829 *
830 * RETURNS:
831 * Pointer to worker which is executing @work if found, NULL
832 * otherwise.
833 */
__find_worker_executing_work(struct global_cwq * gcwq,struct hlist_head * bwh,struct work_struct * work)834 static struct worker *__find_worker_executing_work(struct global_cwq *gcwq,
835 struct hlist_head *bwh,
836 struct work_struct *work)
837 {
838 struct worker *worker;
839 struct hlist_node *tmp;
840
841 hlist_for_each_entry(worker, tmp, bwh, hentry)
842 if (worker->current_work == work &&
843 worker->current_func == work->func)
844 return worker;
845 return NULL;
846 }
847
848 /**
849 * find_worker_executing_work - find worker which is executing a work
850 * @gcwq: gcwq of interest
851 * @work: work to find worker for
852 *
853 * Find a worker which is executing @work on @gcwq by searching
854 * @gcwq->busy_hash which is keyed by the address of @work. For a worker
855 * to match, its current execution should match the address of @work and
856 * its work function. This is to avoid unwanted dependency between
857 * unrelated work executions through a work item being recycled while still
858 * being executed.
859 *
860 * This is a bit tricky. A work item may be freed once its execution
861 * starts and nothing prevents the freed area from being recycled for
862 * another work item. If the same work item address ends up being reused
863 * before the original execution finishes, workqueue will identify the
864 * recycled work item as currently executing and make it wait until the
865 * current execution finishes, introducing an unwanted dependency.
866 *
867 * This function checks the work item address, work function and workqueue
868 * to avoid false positives. Note that this isn't complete as one may
869 * construct a work function which can introduce dependency onto itself
870 * through a recycled work item. Well, if somebody wants to shoot oneself
871 * in the foot that badly, there's only so much we can do, and if such
872 * deadlock actually occurs, it should be easy to locate the culprit work
873 * function.
874 *
875 * CONTEXT:
876 * spin_lock_irq(gcwq->lock).
877 *
878 * RETURNS:
879 * Pointer to worker which is executing @work if found, NULL
880 * otherwise.
881 */
find_worker_executing_work(struct global_cwq * gcwq,struct work_struct * work)882 static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
883 struct work_struct *work)
884 {
885 return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
886 work);
887 }
888
889 /**
890 * gcwq_determine_ins_pos - find insertion position
891 * @gcwq: gcwq of interest
892 * @cwq: cwq a work is being queued for
893 *
894 * A work for @cwq is about to be queued on @gcwq, determine insertion
895 * position for the work. If @cwq is for HIGHPRI wq, the work is
896 * queued at the head of the queue but in FIFO order with respect to
897 * other HIGHPRI works; otherwise, at the end of the queue. This
898 * function also sets GCWQ_HIGHPRI_PENDING flag to hint @gcwq that
899 * there are HIGHPRI works pending.
900 *
901 * CONTEXT:
902 * spin_lock_irq(gcwq->lock).
903 *
904 * RETURNS:
905 * Pointer to inserstion position.
906 */
gcwq_determine_ins_pos(struct global_cwq * gcwq,struct cpu_workqueue_struct * cwq)907 static inline struct list_head *gcwq_determine_ins_pos(struct global_cwq *gcwq,
908 struct cpu_workqueue_struct *cwq)
909 {
910 struct work_struct *twork;
911
912 if (likely(!(cwq->wq->flags & WQ_HIGHPRI)))
913 return &gcwq->worklist;
914
915 list_for_each_entry(twork, &gcwq->worklist, entry) {
916 struct cpu_workqueue_struct *tcwq = get_work_cwq(twork);
917
918 if (!(tcwq->wq->flags & WQ_HIGHPRI))
919 break;
920 }
921
922 gcwq->flags |= GCWQ_HIGHPRI_PENDING;
923 return &twork->entry;
924 }
925
926 /**
927 * insert_work - insert a work into gcwq
928 * @cwq: cwq @work belongs to
929 * @work: work to insert
930 * @head: insertion point
931 * @extra_flags: extra WORK_STRUCT_* flags to set
932 *
933 * Insert @work which belongs to @cwq into @gcwq after @head.
934 * @extra_flags is or'd to work_struct flags.
935 *
936 * CONTEXT:
937 * spin_lock_irq(gcwq->lock).
938 */
insert_work(struct cpu_workqueue_struct * cwq,struct work_struct * work,struct list_head * head,unsigned int extra_flags)939 static void insert_work(struct cpu_workqueue_struct *cwq,
940 struct work_struct *work, struct list_head *head,
941 unsigned int extra_flags)
942 {
943 struct global_cwq *gcwq = cwq->gcwq;
944
945 /* we own @work, set data and link */
946 set_work_cwq(work, cwq, extra_flags);
947
948 /*
949 * Ensure that we get the right work->data if we see the
950 * result of list_add() below, see try_to_grab_pending().
951 */
952 smp_wmb();
953
954 list_add_tail(&work->entry, head);
955
956 /*
957 * Ensure either worker_sched_deactivated() sees the above
958 * list_add_tail() or we see zero nr_running to avoid workers
959 * lying around lazily while there are works to be processed.
960 */
961 smp_mb();
962
963 if (__need_more_worker(gcwq))
964 wake_up_worker(gcwq);
965 }
966
967 /*
968 * Test whether @work is being queued from another work executing on the
969 * same workqueue. This is rather expensive and should only be used from
970 * cold paths.
971 */
is_chained_work(struct workqueue_struct * wq)972 static bool is_chained_work(struct workqueue_struct *wq)
973 {
974 unsigned long flags;
975 unsigned int cpu;
976
977 for_each_gcwq_cpu(cpu) {
978 struct global_cwq *gcwq = get_gcwq(cpu);
979 struct worker *worker;
980 struct hlist_node *pos;
981 int i;
982
983 spin_lock_irqsave(&gcwq->lock, flags);
984 for_each_busy_worker(worker, i, pos, gcwq) {
985 if (worker->task != current)
986 continue;
987 spin_unlock_irqrestore(&gcwq->lock, flags);
988 /*
989 * I'm @worker, no locking necessary. See if @work
990 * is headed to the same workqueue.
991 */
992 return worker->current_cwq->wq == wq;
993 }
994 spin_unlock_irqrestore(&gcwq->lock, flags);
995 }
996 return false;
997 }
998
__queue_work(unsigned int cpu,struct workqueue_struct * wq,struct work_struct * work)999 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
1000 struct work_struct *work)
1001 {
1002 struct global_cwq *gcwq;
1003 struct cpu_workqueue_struct *cwq;
1004 struct list_head *worklist;
1005 unsigned int work_flags;
1006 unsigned long flags;
1007
1008 debug_work_activate(work);
1009
1010 /* if dying, only works from the same workqueue are allowed */
1011 if (unlikely(wq->flags & WQ_DRAINING) &&
1012 WARN_ON_ONCE(!is_chained_work(wq)))
1013 return;
1014
1015 /* determine gcwq to use */
1016 if (!(wq->flags & WQ_UNBOUND)) {
1017 struct global_cwq *last_gcwq;
1018
1019 if (unlikely(cpu == WORK_CPU_UNBOUND))
1020 cpu = raw_smp_processor_id();
1021
1022 /*
1023 * It's multi cpu. If @wq is non-reentrant and @work
1024 * was previously on a different cpu, it might still
1025 * be running there, in which case the work needs to
1026 * be queued on that cpu to guarantee non-reentrance.
1027 */
1028 gcwq = get_gcwq(cpu);
1029 if (wq->flags & WQ_NON_REENTRANT &&
1030 (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) {
1031 struct worker *worker;
1032
1033 spin_lock_irqsave(&last_gcwq->lock, flags);
1034
1035 worker = find_worker_executing_work(last_gcwq, work);
1036
1037 if (worker && worker->current_cwq->wq == wq)
1038 gcwq = last_gcwq;
1039 else {
1040 /* meh... not running there, queue here */
1041 spin_unlock_irqrestore(&last_gcwq->lock, flags);
1042 spin_lock_irqsave(&gcwq->lock, flags);
1043 }
1044 } else
1045 spin_lock_irqsave(&gcwq->lock, flags);
1046 } else {
1047 gcwq = get_gcwq(WORK_CPU_UNBOUND);
1048 spin_lock_irqsave(&gcwq->lock, flags);
1049 }
1050
1051 /* gcwq determined, get cwq and queue */
1052 cwq = get_cwq(gcwq->cpu, wq);
1053 trace_workqueue_queue_work(cpu, cwq, work);
1054
1055 BUG_ON(!list_empty(&work->entry));
1056
1057 cwq->nr_in_flight[cwq->work_color]++;
1058 work_flags = work_color_to_flags(cwq->work_color);
1059
1060 if (likely(cwq->nr_active < cwq->max_active)) {
1061 trace_workqueue_activate_work(work);
1062 cwq->nr_active++;
1063 worklist = gcwq_determine_ins_pos(gcwq, cwq);
1064 } else {
1065 work_flags |= WORK_STRUCT_DELAYED;
1066 worklist = &cwq->delayed_works;
1067 }
1068
1069 insert_work(cwq, work, worklist, work_flags);
1070
1071 spin_unlock_irqrestore(&gcwq->lock, flags);
1072 }
1073
1074 /**
1075 * queue_work - queue work on a workqueue
1076 * @wq: workqueue to use
1077 * @work: work to queue
1078 *
1079 * Returns 0 if @work was already on a queue, non-zero otherwise.
1080 *
1081 * We queue the work to the CPU on which it was submitted, but if the CPU dies
1082 * it can be processed by another CPU.
1083 */
queue_work(struct workqueue_struct * wq,struct work_struct * work)1084 int queue_work(struct workqueue_struct *wq, struct work_struct *work)
1085 {
1086 int ret;
1087
1088 ret = queue_work_on(get_cpu(), wq, work);
1089 put_cpu();
1090
1091 return ret;
1092 }
1093 EXPORT_SYMBOL_GPL(queue_work);
1094
1095 /**
1096 * queue_work_on - queue work on specific cpu
1097 * @cpu: CPU number to execute work on
1098 * @wq: workqueue to use
1099 * @work: work to queue
1100 *
1101 * Returns 0 if @work was already on a queue, non-zero otherwise.
1102 *
1103 * We queue the work to a specific CPU, the caller must ensure it
1104 * can't go away.
1105 */
1106 int
queue_work_on(int cpu,struct workqueue_struct * wq,struct work_struct * work)1107 queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
1108 {
1109 int ret = 0;
1110
1111 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1112 __queue_work(cpu, wq, work);
1113 ret = 1;
1114 }
1115 return ret;
1116 }
1117 EXPORT_SYMBOL_GPL(queue_work_on);
1118
delayed_work_timer_fn(unsigned long __data)1119 static void delayed_work_timer_fn(unsigned long __data)
1120 {
1121 struct delayed_work *dwork = (struct delayed_work *)__data;
1122 struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
1123
1124 __queue_work(smp_processor_id(), cwq->wq, &dwork->work);
1125 }
1126
1127 /**
1128 * queue_delayed_work - queue work on a workqueue after delay
1129 * @wq: workqueue to use
1130 * @dwork: delayable work to queue
1131 * @delay: number of jiffies to wait before queueing
1132 *
1133 * Returns 0 if @work was already on a queue, non-zero otherwise.
1134 */
queue_delayed_work(struct workqueue_struct * wq,struct delayed_work * dwork,unsigned long delay)1135 int queue_delayed_work(struct workqueue_struct *wq,
1136 struct delayed_work *dwork, unsigned long delay)
1137 {
1138 if (delay == 0)
1139 return queue_work(wq, &dwork->work);
1140
1141 return queue_delayed_work_on(-1, wq, dwork, delay);
1142 }
1143 EXPORT_SYMBOL_GPL(queue_delayed_work);
1144
1145 /**
1146 * queue_delayed_work_on - queue work on specific CPU after delay
1147 * @cpu: CPU number to execute work on
1148 * @wq: workqueue to use
1149 * @dwork: work to queue
1150 * @delay: number of jiffies to wait before queueing
1151 *
1152 * Returns 0 if @work was already on a queue, non-zero otherwise.
1153 */
queue_delayed_work_on(int cpu,struct workqueue_struct * wq,struct delayed_work * dwork,unsigned long delay)1154 int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1155 struct delayed_work *dwork, unsigned long delay)
1156 {
1157 int ret = 0;
1158 struct timer_list *timer = &dwork->timer;
1159 struct work_struct *work = &dwork->work;
1160
1161 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1162 unsigned int lcpu;
1163
1164 WARN_ON_ONCE(timer_pending(timer));
1165 WARN_ON_ONCE(!list_empty(&work->entry));
1166
1167 timer_stats_timer_set_start_info(&dwork->timer);
1168
1169 /*
1170 * This stores cwq for the moment, for the timer_fn.
1171 * Note that the work's gcwq is preserved to allow
1172 * reentrance detection for delayed works.
1173 */
1174 if (!(wq->flags & WQ_UNBOUND)) {
1175 struct global_cwq *gcwq = get_work_gcwq(work);
1176
1177 if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND)
1178 lcpu = gcwq->cpu;
1179 else
1180 lcpu = raw_smp_processor_id();
1181 } else
1182 lcpu = WORK_CPU_UNBOUND;
1183
1184 set_work_cwq(work, get_cwq(lcpu, wq), 0);
1185
1186 timer->expires = jiffies + delay;
1187 timer->data = (unsigned long)dwork;
1188 timer->function = delayed_work_timer_fn;
1189
1190 if (unlikely(cpu >= 0))
1191 add_timer_on(timer, cpu);
1192 else
1193 add_timer(timer);
1194 ret = 1;
1195 }
1196 return ret;
1197 }
1198 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
1199
1200 /**
1201 * worker_enter_idle - enter idle state
1202 * @worker: worker which is entering idle state
1203 *
1204 * @worker is entering idle state. Update stats and idle timer if
1205 * necessary.
1206 *
1207 * LOCKING:
1208 * spin_lock_irq(gcwq->lock).
1209 */
worker_enter_idle(struct worker * worker)1210 static void worker_enter_idle(struct worker *worker)
1211 {
1212 struct global_cwq *gcwq = worker->gcwq;
1213
1214 BUG_ON(worker->flags & WORKER_IDLE);
1215 BUG_ON(!list_empty(&worker->entry) &&
1216 (worker->hentry.next || worker->hentry.pprev));
1217
1218 /* can't use worker_set_flags(), also called from start_worker() */
1219 worker->flags |= WORKER_IDLE;
1220 gcwq->nr_idle++;
1221 worker->last_active = jiffies;
1222
1223 /* idle_list is LIFO */
1224 list_add(&worker->entry, &gcwq->idle_list);
1225
1226 if (likely(!(worker->flags & WORKER_ROGUE))) {
1227 if (too_many_workers(gcwq) && !timer_pending(&gcwq->idle_timer))
1228 mod_timer(&gcwq->idle_timer,
1229 jiffies + IDLE_WORKER_TIMEOUT);
1230 } else
1231 wake_up_all(&gcwq->trustee_wait);
1232
1233 /*
1234 * Sanity check nr_running. Because trustee releases gcwq->lock
1235 * between setting %WORKER_ROGUE and zapping nr_running, the
1236 * warning may trigger spuriously. Check iff trustee is idle.
1237 */
1238 WARN_ON_ONCE(gcwq->trustee_state == TRUSTEE_DONE &&
1239 gcwq->nr_workers == gcwq->nr_idle &&
1240 atomic_read(get_gcwq_nr_running(gcwq->cpu)));
1241 }
1242
1243 /**
1244 * worker_leave_idle - leave idle state
1245 * @worker: worker which is leaving idle state
1246 *
1247 * @worker is leaving idle state. Update stats.
1248 *
1249 * LOCKING:
1250 * spin_lock_irq(gcwq->lock).
1251 */
worker_leave_idle(struct worker * worker)1252 static void worker_leave_idle(struct worker *worker)
1253 {
1254 struct global_cwq *gcwq = worker->gcwq;
1255
1256 BUG_ON(!(worker->flags & WORKER_IDLE));
1257 worker_clr_flags(worker, WORKER_IDLE);
1258 gcwq->nr_idle--;
1259 list_del_init(&worker->entry);
1260 }
1261
1262 /**
1263 * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock gcwq
1264 * @worker: self
1265 *
1266 * Works which are scheduled while the cpu is online must at least be
1267 * scheduled to a worker which is bound to the cpu so that if they are
1268 * flushed from cpu callbacks while cpu is going down, they are
1269 * guaranteed to execute on the cpu.
1270 *
1271 * This function is to be used by rogue workers and rescuers to bind
1272 * themselves to the target cpu and may race with cpu going down or
1273 * coming online. kthread_bind() can't be used because it may put the
1274 * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
1275 * verbatim as it's best effort and blocking and gcwq may be
1276 * [dis]associated in the meantime.
1277 *
1278 * This function tries set_cpus_allowed() and locks gcwq and verifies
1279 * the binding against GCWQ_DISASSOCIATED which is set during
1280 * CPU_DYING and cleared during CPU_ONLINE, so if the worker enters
1281 * idle state or fetches works without dropping lock, it can guarantee
1282 * the scheduling requirement described in the first paragraph.
1283 *
1284 * CONTEXT:
1285 * Might sleep. Called without any lock but returns with gcwq->lock
1286 * held.
1287 *
1288 * RETURNS:
1289 * %true if the associated gcwq is online (@worker is successfully
1290 * bound), %false if offline.
1291 */
worker_maybe_bind_and_lock(struct worker * worker)1292 static bool worker_maybe_bind_and_lock(struct worker *worker)
1293 __acquires(&gcwq->lock)
1294 {
1295 struct global_cwq *gcwq = worker->gcwq;
1296 struct task_struct *task = worker->task;
1297
1298 while (true) {
1299 /*
1300 * The following call may fail, succeed or succeed
1301 * without actually migrating the task to the cpu if
1302 * it races with cpu hotunplug operation. Verify
1303 * against GCWQ_DISASSOCIATED.
1304 */
1305 if (!(gcwq->flags & GCWQ_DISASSOCIATED))
1306 set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu));
1307
1308 spin_lock_irq(&gcwq->lock);
1309 if (gcwq->flags & GCWQ_DISASSOCIATED)
1310 return false;
1311 if (task_cpu(task) == gcwq->cpu &&
1312 cpumask_equal(¤t->cpus_allowed,
1313 get_cpu_mask(gcwq->cpu)))
1314 return true;
1315 spin_unlock_irq(&gcwq->lock);
1316
1317 /*
1318 * We've raced with CPU hot[un]plug. Give it a breather
1319 * and retry migration. cond_resched() is required here;
1320 * otherwise, we might deadlock against cpu_stop trying to
1321 * bring down the CPU on non-preemptive kernel.
1322 */
1323 cpu_relax();
1324 cond_resched();
1325 }
1326 }
1327
1328 /*
1329 * Function for worker->rebind_work used to rebind rogue busy workers
1330 * to the associated cpu which is coming back online. This is
1331 * scheduled by cpu up but can race with other cpu hotplug operations
1332 * and may be executed twice without intervening cpu down.
1333 */
worker_rebind_fn(struct work_struct * work)1334 static void worker_rebind_fn(struct work_struct *work)
1335 {
1336 struct worker *worker = container_of(work, struct worker, rebind_work);
1337 struct global_cwq *gcwq = worker->gcwq;
1338
1339 if (worker_maybe_bind_and_lock(worker))
1340 worker_clr_flags(worker, WORKER_REBIND);
1341
1342 spin_unlock_irq(&gcwq->lock);
1343 }
1344
alloc_worker(void)1345 static struct worker *alloc_worker(void)
1346 {
1347 struct worker *worker;
1348
1349 worker = kzalloc(sizeof(*worker), GFP_KERNEL);
1350 if (worker) {
1351 INIT_LIST_HEAD(&worker->entry);
1352 INIT_LIST_HEAD(&worker->scheduled);
1353 INIT_WORK(&worker->rebind_work, worker_rebind_fn);
1354 /* on creation a worker is in !idle && prep state */
1355 worker->flags = WORKER_PREP;
1356 }
1357 return worker;
1358 }
1359
1360 /**
1361 * create_worker - create a new workqueue worker
1362 * @gcwq: gcwq the new worker will belong to
1363 * @bind: whether to set affinity to @cpu or not
1364 *
1365 * Create a new worker which is bound to @gcwq. The returned worker
1366 * can be started by calling start_worker() or destroyed using
1367 * destroy_worker().
1368 *
1369 * CONTEXT:
1370 * Might sleep. Does GFP_KERNEL allocations.
1371 *
1372 * RETURNS:
1373 * Pointer to the newly created worker.
1374 */
create_worker(struct global_cwq * gcwq,bool bind)1375 static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
1376 {
1377 bool on_unbound_cpu = gcwq->cpu == WORK_CPU_UNBOUND;
1378 struct worker *worker = NULL;
1379 int id = -1;
1380
1381 spin_lock_irq(&gcwq->lock);
1382 while (ida_get_new(&gcwq->worker_ida, &id)) {
1383 spin_unlock_irq(&gcwq->lock);
1384 if (!ida_pre_get(&gcwq->worker_ida, GFP_KERNEL))
1385 goto fail;
1386 spin_lock_irq(&gcwq->lock);
1387 }
1388 spin_unlock_irq(&gcwq->lock);
1389
1390 worker = alloc_worker();
1391 if (!worker)
1392 goto fail;
1393
1394 worker->gcwq = gcwq;
1395 worker->id = id;
1396
1397 if (!on_unbound_cpu)
1398 worker->task = kthread_create_on_node(worker_thread,
1399 worker,
1400 cpu_to_node(gcwq->cpu),
1401 "kworker/%u:%d", gcwq->cpu, id);
1402 else
1403 worker->task = kthread_create(worker_thread, worker,
1404 "kworker/u:%d", id);
1405 if (IS_ERR(worker->task))
1406 goto fail;
1407
1408 /*
1409 * A rogue worker will become a regular one if CPU comes
1410 * online later on. Make sure every worker has
1411 * PF_THREAD_BOUND set.
1412 */
1413 if (bind && !on_unbound_cpu)
1414 kthread_bind(worker->task, gcwq->cpu);
1415 else {
1416 worker->task->flags |= PF_THREAD_BOUND;
1417 if (on_unbound_cpu)
1418 worker->flags |= WORKER_UNBOUND;
1419 }
1420
1421 return worker;
1422 fail:
1423 if (id >= 0) {
1424 spin_lock_irq(&gcwq->lock);
1425 ida_remove(&gcwq->worker_ida, id);
1426 spin_unlock_irq(&gcwq->lock);
1427 }
1428 kfree(worker);
1429 return NULL;
1430 }
1431
1432 /**
1433 * start_worker - start a newly created worker
1434 * @worker: worker to start
1435 *
1436 * Make the gcwq aware of @worker and start it.
1437 *
1438 * CONTEXT:
1439 * spin_lock_irq(gcwq->lock).
1440 */
start_worker(struct worker * worker)1441 static void start_worker(struct worker *worker)
1442 {
1443 worker->flags |= WORKER_STARTED;
1444 worker->gcwq->nr_workers++;
1445 worker_enter_idle(worker);
1446 wake_up_process(worker->task);
1447 }
1448
1449 /**
1450 * destroy_worker - destroy a workqueue worker
1451 * @worker: worker to be destroyed
1452 *
1453 * Destroy @worker and adjust @gcwq stats accordingly.
1454 *
1455 * CONTEXT:
1456 * spin_lock_irq(gcwq->lock) which is released and regrabbed.
1457 */
destroy_worker(struct worker * worker)1458 static void destroy_worker(struct worker *worker)
1459 {
1460 struct global_cwq *gcwq = worker->gcwq;
1461 int id = worker->id;
1462
1463 /* sanity check frenzy */
1464 BUG_ON(worker->current_work);
1465 BUG_ON(!list_empty(&worker->scheduled));
1466
1467 if (worker->flags & WORKER_STARTED)
1468 gcwq->nr_workers--;
1469 if (worker->flags & WORKER_IDLE)
1470 gcwq->nr_idle--;
1471
1472 /*
1473 * Once WORKER_DIE is set, the kworker may destroy itself at any
1474 * point. Pin to ensure the task stays until we're done with it.
1475 */
1476 get_task_struct(worker->task);
1477
1478 list_del_init(&worker->entry);
1479 worker->flags |= WORKER_DIE;
1480
1481 spin_unlock_irq(&gcwq->lock);
1482
1483 kthread_stop(worker->task);
1484 put_task_struct(worker->task);
1485 kfree(worker);
1486
1487 spin_lock_irq(&gcwq->lock);
1488 ida_remove(&gcwq->worker_ida, id);
1489 }
1490
idle_worker_timeout(unsigned long __gcwq)1491 static void idle_worker_timeout(unsigned long __gcwq)
1492 {
1493 struct global_cwq *gcwq = (void *)__gcwq;
1494
1495 spin_lock_irq(&gcwq->lock);
1496
1497 if (too_many_workers(gcwq)) {
1498 struct worker *worker;
1499 unsigned long expires;
1500
1501 /* idle_list is kept in LIFO order, check the last one */
1502 worker = list_entry(gcwq->idle_list.prev, struct worker, entry);
1503 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
1504
1505 if (time_before(jiffies, expires))
1506 mod_timer(&gcwq->idle_timer, expires);
1507 else {
1508 /* it's been idle for too long, wake up manager */
1509 gcwq->flags |= GCWQ_MANAGE_WORKERS;
1510 wake_up_worker(gcwq);
1511 }
1512 }
1513
1514 spin_unlock_irq(&gcwq->lock);
1515 }
1516
send_mayday(struct work_struct * work)1517 static bool send_mayday(struct work_struct *work)
1518 {
1519 struct cpu_workqueue_struct *cwq = get_work_cwq(work);
1520 struct workqueue_struct *wq = cwq->wq;
1521 unsigned int cpu;
1522
1523 if (!(wq->flags & WQ_RESCUER))
1524 return false;
1525
1526 /* mayday mayday mayday */
1527 cpu = cwq->gcwq->cpu;
1528 /* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
1529 if (cpu == WORK_CPU_UNBOUND)
1530 cpu = 0;
1531 if (!mayday_test_and_set_cpu(cpu, wq->mayday_mask))
1532 wake_up_process(wq->rescuer->task);
1533 return true;
1534 }
1535
gcwq_mayday_timeout(unsigned long __gcwq)1536 static void gcwq_mayday_timeout(unsigned long __gcwq)
1537 {
1538 struct global_cwq *gcwq = (void *)__gcwq;
1539 struct work_struct *work;
1540
1541 spin_lock_irq(&gcwq->lock);
1542
1543 if (need_to_create_worker(gcwq)) {
1544 /*
1545 * We've been trying to create a new worker but
1546 * haven't been successful. We might be hitting an
1547 * allocation deadlock. Send distress signals to
1548 * rescuers.
1549 */
1550 list_for_each_entry(work, &gcwq->worklist, entry)
1551 send_mayday(work);
1552 }
1553
1554 spin_unlock_irq(&gcwq->lock);
1555
1556 mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INTERVAL);
1557 }
1558
1559 /**
1560 * maybe_create_worker - create a new worker if necessary
1561 * @gcwq: gcwq to create a new worker for
1562 *
1563 * Create a new worker for @gcwq if necessary. @gcwq is guaranteed to
1564 * have at least one idle worker on return from this function. If
1565 * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is
1566 * sent to all rescuers with works scheduled on @gcwq to resolve
1567 * possible allocation deadlock.
1568 *
1569 * On return, need_to_create_worker() is guaranteed to be false and
1570 * may_start_working() true.
1571 *
1572 * LOCKING:
1573 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1574 * multiple times. Does GFP_KERNEL allocations. Called only from
1575 * manager.
1576 *
1577 * RETURNS:
1578 * false if no action was taken and gcwq->lock stayed locked, true
1579 * otherwise.
1580 */
maybe_create_worker(struct global_cwq * gcwq)1581 static bool maybe_create_worker(struct global_cwq *gcwq)
1582 __releases(&gcwq->lock)
1583 __acquires(&gcwq->lock)
1584 {
1585 if (!need_to_create_worker(gcwq))
1586 return false;
1587 restart:
1588 spin_unlock_irq(&gcwq->lock);
1589
1590 /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
1591 mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
1592
1593 while (true) {
1594 struct worker *worker;
1595
1596 worker = create_worker(gcwq, true);
1597 if (worker) {
1598 del_timer_sync(&gcwq->mayday_timer);
1599 spin_lock_irq(&gcwq->lock);
1600 start_worker(worker);
1601 BUG_ON(need_to_create_worker(gcwq));
1602 return true;
1603 }
1604
1605 if (!need_to_create_worker(gcwq))
1606 break;
1607
1608 __set_current_state(TASK_INTERRUPTIBLE);
1609 schedule_timeout(CREATE_COOLDOWN);
1610
1611 if (!need_to_create_worker(gcwq))
1612 break;
1613 }
1614
1615 del_timer_sync(&gcwq->mayday_timer);
1616 spin_lock_irq(&gcwq->lock);
1617 if (need_to_create_worker(gcwq))
1618 goto restart;
1619 return true;
1620 }
1621
1622 /**
1623 * maybe_destroy_worker - destroy workers which have been idle for a while
1624 * @gcwq: gcwq to destroy workers for
1625 *
1626 * Destroy @gcwq workers which have been idle for longer than
1627 * IDLE_WORKER_TIMEOUT.
1628 *
1629 * LOCKING:
1630 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1631 * multiple times. Called only from manager.
1632 *
1633 * RETURNS:
1634 * false if no action was taken and gcwq->lock stayed locked, true
1635 * otherwise.
1636 */
maybe_destroy_workers(struct global_cwq * gcwq)1637 static bool maybe_destroy_workers(struct global_cwq *gcwq)
1638 {
1639 bool ret = false;
1640
1641 while (too_many_workers(gcwq)) {
1642 struct worker *worker;
1643 unsigned long expires;
1644
1645 worker = list_entry(gcwq->idle_list.prev, struct worker, entry);
1646 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
1647
1648 if (time_before(jiffies, expires)) {
1649 mod_timer(&gcwq->idle_timer, expires);
1650 break;
1651 }
1652
1653 destroy_worker(worker);
1654 ret = true;
1655 }
1656
1657 return ret;
1658 }
1659
1660 /**
1661 * manage_workers - manage worker pool
1662 * @worker: self
1663 *
1664 * Assume the manager role and manage gcwq worker pool @worker belongs
1665 * to. At any given time, there can be only zero or one manager per
1666 * gcwq. The exclusion is handled automatically by this function.
1667 *
1668 * The caller can safely start processing works on false return. On
1669 * true return, it's guaranteed that need_to_create_worker() is false
1670 * and may_start_working() is true.
1671 *
1672 * CONTEXT:
1673 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1674 * multiple times. Does GFP_KERNEL allocations.
1675 *
1676 * RETURNS:
1677 * false if no action was taken and gcwq->lock stayed locked, true if
1678 * some action was taken.
1679 */
manage_workers(struct worker * worker)1680 static bool manage_workers(struct worker *worker)
1681 {
1682 struct global_cwq *gcwq = worker->gcwq;
1683 bool ret = false;
1684
1685 if (gcwq->flags & GCWQ_MANAGING_WORKERS)
1686 return ret;
1687
1688 gcwq->flags &= ~GCWQ_MANAGE_WORKERS;
1689 gcwq->flags |= GCWQ_MANAGING_WORKERS;
1690
1691 /*
1692 * Destroy and then create so that may_start_working() is true
1693 * on return.
1694 */
1695 ret |= maybe_destroy_workers(gcwq);
1696 ret |= maybe_create_worker(gcwq);
1697
1698 gcwq->flags &= ~GCWQ_MANAGING_WORKERS;
1699
1700 /*
1701 * The trustee might be waiting to take over the manager
1702 * position, tell it we're done.
1703 */
1704 if (unlikely(gcwq->trustee))
1705 wake_up_all(&gcwq->trustee_wait);
1706
1707 return ret;
1708 }
1709
1710 /**
1711 * move_linked_works - move linked works to a list
1712 * @work: start of series of works to be scheduled
1713 * @head: target list to append @work to
1714 * @nextp: out paramter for nested worklist walking
1715 *
1716 * Schedule linked works starting from @work to @head. Work series to
1717 * be scheduled starts at @work and includes any consecutive work with
1718 * WORK_STRUCT_LINKED set in its predecessor.
1719 *
1720 * If @nextp is not NULL, it's updated to point to the next work of
1721 * the last scheduled work. This allows move_linked_works() to be
1722 * nested inside outer list_for_each_entry_safe().
1723 *
1724 * CONTEXT:
1725 * spin_lock_irq(gcwq->lock).
1726 */
move_linked_works(struct work_struct * work,struct list_head * head,struct work_struct ** nextp)1727 static void move_linked_works(struct work_struct *work, struct list_head *head,
1728 struct work_struct **nextp)
1729 {
1730 struct work_struct *n;
1731
1732 /*
1733 * Linked worklist will always end before the end of the list,
1734 * use NULL for list head.
1735 */
1736 list_for_each_entry_safe_from(work, n, NULL, entry) {
1737 list_move_tail(&work->entry, head);
1738 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1739 break;
1740 }
1741
1742 /*
1743 * If we're already inside safe list traversal and have moved
1744 * multiple works to the scheduled queue, the next position
1745 * needs to be updated.
1746 */
1747 if (nextp)
1748 *nextp = n;
1749 }
1750
cwq_activate_delayed_work(struct work_struct * work)1751 static void cwq_activate_delayed_work(struct work_struct *work)
1752 {
1753 struct cpu_workqueue_struct *cwq = get_work_cwq(work);
1754 struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
1755
1756 trace_workqueue_activate_work(work);
1757 move_linked_works(work, pos, NULL);
1758 __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
1759 cwq->nr_active++;
1760 }
1761
cwq_activate_first_delayed(struct cpu_workqueue_struct * cwq)1762 static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
1763 {
1764 struct work_struct *work = list_first_entry(&cwq->delayed_works,
1765 struct work_struct, entry);
1766
1767 cwq_activate_delayed_work(work);
1768 }
1769
1770 /**
1771 * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
1772 * @cwq: cwq of interest
1773 * @color: color of work which left the queue
1774 * @delayed: for a delayed work
1775 *
1776 * A work either has completed or is removed from pending queue,
1777 * decrement nr_in_flight of its cwq and handle workqueue flushing.
1778 *
1779 * CONTEXT:
1780 * spin_lock_irq(gcwq->lock).
1781 */
cwq_dec_nr_in_flight(struct cpu_workqueue_struct * cwq,int color,bool delayed)1782 static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
1783 bool delayed)
1784 {
1785 /* ignore uncolored works */
1786 if (color == WORK_NO_COLOR)
1787 return;
1788
1789 cwq->nr_in_flight[color]--;
1790
1791 if (!delayed) {
1792 cwq->nr_active--;
1793 if (!list_empty(&cwq->delayed_works)) {
1794 /* one down, submit a delayed one */
1795 if (cwq->nr_active < cwq->max_active)
1796 cwq_activate_first_delayed(cwq);
1797 }
1798 }
1799
1800 /* is flush in progress and are we at the flushing tip? */
1801 if (likely(cwq->flush_color != color))
1802 return;
1803
1804 /* are there still in-flight works? */
1805 if (cwq->nr_in_flight[color])
1806 return;
1807
1808 /* this cwq is done, clear flush_color */
1809 cwq->flush_color = -1;
1810
1811 /*
1812 * If this was the last cwq, wake up the first flusher. It
1813 * will handle the rest.
1814 */
1815 if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
1816 complete(&cwq->wq->first_flusher->done);
1817 }
1818
1819 /**
1820 * process_one_work - process single work
1821 * @worker: self
1822 * @work: work to process
1823 *
1824 * Process @work. This function contains all the logics necessary to
1825 * process a single work including synchronization against and
1826 * interaction with other workers on the same cpu, queueing and
1827 * flushing. As long as context requirement is met, any worker can
1828 * call this function to process a work.
1829 *
1830 * CONTEXT:
1831 * spin_lock_irq(gcwq->lock) which is released and regrabbed.
1832 */
process_one_work(struct worker * worker,struct work_struct * work)1833 static void process_one_work(struct worker *worker, struct work_struct *work)
1834 __releases(&gcwq->lock)
1835 __acquires(&gcwq->lock)
1836 {
1837 struct cpu_workqueue_struct *cwq = get_work_cwq(work);
1838 struct global_cwq *gcwq = cwq->gcwq;
1839 struct hlist_head *bwh = busy_worker_head(gcwq, work);
1840 bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
1841 int work_color;
1842 struct worker *collision;
1843 #ifdef CONFIG_LOCKDEP
1844 /*
1845 * It is permissible to free the struct work_struct from
1846 * inside the function that is called from it, this we need to
1847 * take into account for lockdep too. To avoid bogus "held
1848 * lock freed" warnings as well as problems when looking into
1849 * work->lockdep_map, make a copy and use that here.
1850 */
1851 struct lockdep_map lockdep_map = work->lockdep_map;
1852 #endif
1853 /*
1854 * A single work shouldn't be executed concurrently by
1855 * multiple workers on a single cpu. Check whether anyone is
1856 * already processing the work. If so, defer the work to the
1857 * currently executing one.
1858 */
1859 collision = __find_worker_executing_work(gcwq, bwh, work);
1860 if (unlikely(collision)) {
1861 move_linked_works(work, &collision->scheduled, NULL);
1862 return;
1863 }
1864
1865 /* claim and process */
1866 debug_work_deactivate(work);
1867 hlist_add_head(&worker->hentry, bwh);
1868 worker->current_work = work;
1869 worker->current_func = work->func;
1870 worker->current_cwq = cwq;
1871 work_color = get_work_color(work);
1872
1873 /* record the current cpu number in the work data and dequeue */
1874 set_work_cpu(work, gcwq->cpu);
1875 list_del_init(&work->entry);
1876
1877 /*
1878 * If HIGHPRI_PENDING, check the next work, and, if HIGHPRI,
1879 * wake up another worker; otherwise, clear HIGHPRI_PENDING.
1880 */
1881 if (unlikely(gcwq->flags & GCWQ_HIGHPRI_PENDING)) {
1882 struct work_struct *nwork = list_first_entry(&gcwq->worklist,
1883 struct work_struct, entry);
1884
1885 if (!list_empty(&gcwq->worklist) &&
1886 get_work_cwq(nwork)->wq->flags & WQ_HIGHPRI)
1887 wake_up_worker(gcwq);
1888 else
1889 gcwq->flags &= ~GCWQ_HIGHPRI_PENDING;
1890 }
1891
1892 /*
1893 * CPU intensive works don't participate in concurrency
1894 * management. They're the scheduler's responsibility.
1895 */
1896 if (unlikely(cpu_intensive))
1897 worker_set_flags(worker, WORKER_CPU_INTENSIVE, true);
1898
1899 spin_unlock_irq(&gcwq->lock);
1900
1901 smp_wmb(); /* paired with test_and_set_bit(PENDING) */
1902 work_clear_pending(work);
1903
1904 lock_map_acquire_read(&cwq->wq->lockdep_map);
1905 lock_map_acquire(&lockdep_map);
1906 trace_workqueue_execute_start(work);
1907 worker->current_func(work);
1908 /*
1909 * While we must be careful to not use "work" after this, the trace
1910 * point will only record its address.
1911 */
1912 trace_workqueue_execute_end(work);
1913 lock_map_release(&lockdep_map);
1914 lock_map_release(&cwq->wq->lockdep_map);
1915
1916 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
1917 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
1918 " last function: %pf\n",
1919 current->comm, preempt_count(), task_pid_nr(current),
1920 worker->current_func);
1921 debug_show_held_locks(current);
1922 dump_stack();
1923 }
1924
1925 /*
1926 * The following prevents a kworker from hogging CPU on !PREEMPT
1927 * kernels, where a requeueing work item waiting for something to
1928 * happen could deadlock with stop_machine as such work item could
1929 * indefinitely requeue itself while all other CPUs are trapped in
1930 * stop_machine.
1931 */
1932 cond_resched();
1933
1934 spin_lock_irq(&gcwq->lock);
1935
1936 /* clear cpu intensive status */
1937 if (unlikely(cpu_intensive))
1938 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
1939
1940 /* we're done with it, release */
1941 hlist_del_init(&worker->hentry);
1942 worker->current_work = NULL;
1943 worker->current_func = NULL;
1944 worker->current_cwq = NULL;
1945 cwq_dec_nr_in_flight(cwq, work_color, false);
1946 }
1947
1948 /**
1949 * process_scheduled_works - process scheduled works
1950 * @worker: self
1951 *
1952 * Process all scheduled works. Please note that the scheduled list
1953 * may change while processing a work, so this function repeatedly
1954 * fetches a work from the top and executes it.
1955 *
1956 * CONTEXT:
1957 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1958 * multiple times.
1959 */
process_scheduled_works(struct worker * worker)1960 static void process_scheduled_works(struct worker *worker)
1961 {
1962 while (!list_empty(&worker->scheduled)) {
1963 struct work_struct *work = list_first_entry(&worker->scheduled,
1964 struct work_struct, entry);
1965 process_one_work(worker, work);
1966 }
1967 }
1968
1969 /**
1970 * worker_thread - the worker thread function
1971 * @__worker: self
1972 *
1973 * The gcwq worker thread function. There's a single dynamic pool of
1974 * these per each cpu. These workers process all works regardless of
1975 * their specific target workqueue. The only exception is works which
1976 * belong to workqueues with a rescuer which will be explained in
1977 * rescuer_thread().
1978 */
worker_thread(void * __worker)1979 static int worker_thread(void *__worker)
1980 {
1981 struct worker *worker = __worker;
1982 struct global_cwq *gcwq = worker->gcwq;
1983
1984 /* tell the scheduler that this is a workqueue worker */
1985 worker->task->flags |= PF_WQ_WORKER;
1986 woke_up:
1987 spin_lock_irq(&gcwq->lock);
1988
1989 /* DIE can be set only while we're idle, checking here is enough */
1990 if (worker->flags & WORKER_DIE) {
1991 spin_unlock_irq(&gcwq->lock);
1992 worker->task->flags &= ~PF_WQ_WORKER;
1993 return 0;
1994 }
1995
1996 worker_leave_idle(worker);
1997 recheck:
1998 /* no more worker necessary? */
1999 if (!need_more_worker(gcwq))
2000 goto sleep;
2001
2002 /* do we need to manage? */
2003 if (unlikely(!may_start_working(gcwq)) && manage_workers(worker))
2004 goto recheck;
2005
2006 /*
2007 * ->scheduled list can only be filled while a worker is
2008 * preparing to process a work or actually processing it.
2009 * Make sure nobody diddled with it while I was sleeping.
2010 */
2011 BUG_ON(!list_empty(&worker->scheduled));
2012
2013 /*
2014 * When control reaches this point, we're guaranteed to have
2015 * at least one idle worker or that someone else has already
2016 * assumed the manager role.
2017 */
2018 worker_clr_flags(worker, WORKER_PREP);
2019
2020 do {
2021 struct work_struct *work =
2022 list_first_entry(&gcwq->worklist,
2023 struct work_struct, entry);
2024
2025 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2026 /* optimization path, not strictly necessary */
2027 process_one_work(worker, work);
2028 if (unlikely(!list_empty(&worker->scheduled)))
2029 process_scheduled_works(worker);
2030 } else {
2031 move_linked_works(work, &worker->scheduled, NULL);
2032 process_scheduled_works(worker);
2033 }
2034 } while (keep_working(gcwq));
2035
2036 worker_set_flags(worker, WORKER_PREP, false);
2037 sleep:
2038 if (unlikely(need_to_manage_workers(gcwq)) && manage_workers(worker))
2039 goto recheck;
2040
2041 /*
2042 * gcwq->lock is held and there's no work to process and no
2043 * need to manage, sleep. Workers are woken up only while
2044 * holding gcwq->lock or from local cpu, so setting the
2045 * current state before releasing gcwq->lock is enough to
2046 * prevent losing any event.
2047 */
2048 worker_enter_idle(worker);
2049 __set_current_state(TASK_INTERRUPTIBLE);
2050 spin_unlock_irq(&gcwq->lock);
2051 schedule();
2052 goto woke_up;
2053 }
2054
2055 /**
2056 * rescuer_thread - the rescuer thread function
2057 * @__wq: the associated workqueue
2058 *
2059 * Workqueue rescuer thread function. There's one rescuer for each
2060 * workqueue which has WQ_RESCUER set.
2061 *
2062 * Regular work processing on a gcwq may block trying to create a new
2063 * worker which uses GFP_KERNEL allocation which has slight chance of
2064 * developing into deadlock if some works currently on the same queue
2065 * need to be processed to satisfy the GFP_KERNEL allocation. This is
2066 * the problem rescuer solves.
2067 *
2068 * When such condition is possible, the gcwq summons rescuers of all
2069 * workqueues which have works queued on the gcwq and let them process
2070 * those works so that forward progress can be guaranteed.
2071 *
2072 * This should happen rarely.
2073 */
rescuer_thread(void * __wq)2074 static int rescuer_thread(void *__wq)
2075 {
2076 struct workqueue_struct *wq = __wq;
2077 struct worker *rescuer = wq->rescuer;
2078 struct list_head *scheduled = &rescuer->scheduled;
2079 bool is_unbound = wq->flags & WQ_UNBOUND;
2080 unsigned int cpu;
2081
2082 set_user_nice(current, RESCUER_NICE_LEVEL);
2083 repeat:
2084 set_current_state(TASK_INTERRUPTIBLE);
2085
2086 if (kthread_should_stop()) {
2087 __set_current_state(TASK_RUNNING);
2088 return 0;
2089 }
2090
2091 /*
2092 * See whether any cpu is asking for help. Unbounded
2093 * workqueues use cpu 0 in mayday_mask for CPU_UNBOUND.
2094 */
2095 for_each_mayday_cpu(cpu, wq->mayday_mask) {
2096 unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
2097 struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq);
2098 struct global_cwq *gcwq = cwq->gcwq;
2099 struct work_struct *work, *n;
2100
2101 __set_current_state(TASK_RUNNING);
2102 mayday_clear_cpu(cpu, wq->mayday_mask);
2103
2104 /* migrate to the target cpu if possible */
2105 rescuer->gcwq = gcwq;
2106 worker_maybe_bind_and_lock(rescuer);
2107
2108 /*
2109 * Slurp in all works issued via this workqueue and
2110 * process'em.
2111 */
2112 BUG_ON(!list_empty(&rescuer->scheduled));
2113 list_for_each_entry_safe(work, n, &gcwq->worklist, entry)
2114 if (get_work_cwq(work) == cwq)
2115 move_linked_works(work, scheduled, &n);
2116
2117 process_scheduled_works(rescuer);
2118
2119 /*
2120 * Leave this gcwq. If keep_working() is %true, notify a
2121 * regular worker; otherwise, we end up with 0 concurrency
2122 * and stalling the execution.
2123 */
2124 if (keep_working(gcwq))
2125 wake_up_worker(gcwq);
2126
2127 spin_unlock_irq(&gcwq->lock);
2128 }
2129
2130 schedule();
2131 goto repeat;
2132 }
2133
2134 struct wq_barrier {
2135 struct work_struct work;
2136 struct completion done;
2137 };
2138
wq_barrier_func(struct work_struct * work)2139 static void wq_barrier_func(struct work_struct *work)
2140 {
2141 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2142 complete(&barr->done);
2143 }
2144
2145 /**
2146 * insert_wq_barrier - insert a barrier work
2147 * @cwq: cwq to insert barrier into
2148 * @barr: wq_barrier to insert
2149 * @target: target work to attach @barr to
2150 * @worker: worker currently executing @target, NULL if @target is not executing
2151 *
2152 * @barr is linked to @target such that @barr is completed only after
2153 * @target finishes execution. Please note that the ordering
2154 * guarantee is observed only with respect to @target and on the local
2155 * cpu.
2156 *
2157 * Currently, a queued barrier can't be canceled. This is because
2158 * try_to_grab_pending() can't determine whether the work to be
2159 * grabbed is at the head of the queue and thus can't clear LINKED
2160 * flag of the previous work while there must be a valid next work
2161 * after a work with LINKED flag set.
2162 *
2163 * Note that when @worker is non-NULL, @target may be modified
2164 * underneath us, so we can't reliably determine cwq from @target.
2165 *
2166 * CONTEXT:
2167 * spin_lock_irq(gcwq->lock).
2168 */
insert_wq_barrier(struct cpu_workqueue_struct * cwq,struct wq_barrier * barr,struct work_struct * target,struct worker * worker)2169 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
2170 struct wq_barrier *barr,
2171 struct work_struct *target, struct worker *worker)
2172 {
2173 struct list_head *head;
2174 unsigned int linked = 0;
2175
2176 /*
2177 * debugobject calls are safe here even with gcwq->lock locked
2178 * as we know for sure that this will not trigger any of the
2179 * checks and call back into the fixup functions where we
2180 * might deadlock.
2181 */
2182 INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2183 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2184 init_completion(&barr->done);
2185
2186 /*
2187 * If @target is currently being executed, schedule the
2188 * barrier to the worker; otherwise, put it after @target.
2189 */
2190 if (worker)
2191 head = worker->scheduled.next;
2192 else {
2193 unsigned long *bits = work_data_bits(target);
2194
2195 head = target->entry.next;
2196 /* there can already be other linked works, inherit and set */
2197 linked = *bits & WORK_STRUCT_LINKED;
2198 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2199 }
2200
2201 debug_work_activate(&barr->work);
2202 insert_work(cwq, &barr->work, head,
2203 work_color_to_flags(WORK_NO_COLOR) | linked);
2204 }
2205
2206 /**
2207 * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
2208 * @wq: workqueue being flushed
2209 * @flush_color: new flush color, < 0 for no-op
2210 * @work_color: new work color, < 0 for no-op
2211 *
2212 * Prepare cwqs for workqueue flushing.
2213 *
2214 * If @flush_color is non-negative, flush_color on all cwqs should be
2215 * -1. If no cwq has in-flight commands at the specified color, all
2216 * cwq->flush_color's stay at -1 and %false is returned. If any cwq
2217 * has in flight commands, its cwq->flush_color is set to
2218 * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
2219 * wakeup logic is armed and %true is returned.
2220 *
2221 * The caller should have initialized @wq->first_flusher prior to
2222 * calling this function with non-negative @flush_color. If
2223 * @flush_color is negative, no flush color update is done and %false
2224 * is returned.
2225 *
2226 * If @work_color is non-negative, all cwqs should have the same
2227 * work_color which is previous to @work_color and all will be
2228 * advanced to @work_color.
2229 *
2230 * CONTEXT:
2231 * mutex_lock(wq->flush_mutex).
2232 *
2233 * RETURNS:
2234 * %true if @flush_color >= 0 and there's something to flush. %false
2235 * otherwise.
2236 */
flush_workqueue_prep_cwqs(struct workqueue_struct * wq,int flush_color,int work_color)2237 static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
2238 int flush_color, int work_color)
2239 {
2240 bool wait = false;
2241 unsigned int cpu;
2242
2243 if (flush_color >= 0) {
2244 BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
2245 atomic_set(&wq->nr_cwqs_to_flush, 1);
2246 }
2247
2248 for_each_cwq_cpu(cpu, wq) {
2249 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
2250 struct global_cwq *gcwq = cwq->gcwq;
2251
2252 spin_lock_irq(&gcwq->lock);
2253
2254 if (flush_color >= 0) {
2255 BUG_ON(cwq->flush_color != -1);
2256
2257 if (cwq->nr_in_flight[flush_color]) {
2258 cwq->flush_color = flush_color;
2259 atomic_inc(&wq->nr_cwqs_to_flush);
2260 wait = true;
2261 }
2262 }
2263
2264 if (work_color >= 0) {
2265 BUG_ON(work_color != work_next_color(cwq->work_color));
2266 cwq->work_color = work_color;
2267 }
2268
2269 spin_unlock_irq(&gcwq->lock);
2270 }
2271
2272 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
2273 complete(&wq->first_flusher->done);
2274
2275 return wait;
2276 }
2277
2278 /**
2279 * flush_workqueue - ensure that any scheduled work has run to completion.
2280 * @wq: workqueue to flush
2281 *
2282 * Forces execution of the workqueue and blocks until its completion.
2283 * This is typically used in driver shutdown handlers.
2284 *
2285 * We sleep until all works which were queued on entry have been handled,
2286 * but we are not livelocked by new incoming ones.
2287 */
flush_workqueue(struct workqueue_struct * wq)2288 void flush_workqueue(struct workqueue_struct *wq)
2289 {
2290 struct wq_flusher this_flusher = {
2291 .list = LIST_HEAD_INIT(this_flusher.list),
2292 .flush_color = -1,
2293 .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
2294 };
2295 int next_color;
2296
2297 lock_map_acquire(&wq->lockdep_map);
2298 lock_map_release(&wq->lockdep_map);
2299
2300 mutex_lock(&wq->flush_mutex);
2301
2302 /*
2303 * Start-to-wait phase
2304 */
2305 next_color = work_next_color(wq->work_color);
2306
2307 if (next_color != wq->flush_color) {
2308 /*
2309 * Color space is not full. The current work_color
2310 * becomes our flush_color and work_color is advanced
2311 * by one.
2312 */
2313 BUG_ON(!list_empty(&wq->flusher_overflow));
2314 this_flusher.flush_color = wq->work_color;
2315 wq->work_color = next_color;
2316
2317 if (!wq->first_flusher) {
2318 /* no flush in progress, become the first flusher */
2319 BUG_ON(wq->flush_color != this_flusher.flush_color);
2320
2321 wq->first_flusher = &this_flusher;
2322
2323 if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
2324 wq->work_color)) {
2325 /* nothing to flush, done */
2326 wq->flush_color = next_color;
2327 wq->first_flusher = NULL;
2328 goto out_unlock;
2329 }
2330 } else {
2331 /* wait in queue */
2332 BUG_ON(wq->flush_color == this_flusher.flush_color);
2333 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2334 flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
2335 }
2336 } else {
2337 /*
2338 * Oops, color space is full, wait on overflow queue.
2339 * The next flush completion will assign us
2340 * flush_color and transfer to flusher_queue.
2341 */
2342 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2343 }
2344
2345 mutex_unlock(&wq->flush_mutex);
2346
2347 wait_for_completion(&this_flusher.done);
2348
2349 /*
2350 * Wake-up-and-cascade phase
2351 *
2352 * First flushers are responsible for cascading flushes and
2353 * handling overflow. Non-first flushers can simply return.
2354 */
2355 if (wq->first_flusher != &this_flusher)
2356 return;
2357
2358 mutex_lock(&wq->flush_mutex);
2359
2360 /* we might have raced, check again with mutex held */
2361 if (wq->first_flusher != &this_flusher)
2362 goto out_unlock;
2363
2364 wq->first_flusher = NULL;
2365
2366 BUG_ON(!list_empty(&this_flusher.list));
2367 BUG_ON(wq->flush_color != this_flusher.flush_color);
2368
2369 while (true) {
2370 struct wq_flusher *next, *tmp;
2371
2372 /* complete all the flushers sharing the current flush color */
2373 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2374 if (next->flush_color != wq->flush_color)
2375 break;
2376 list_del_init(&next->list);
2377 complete(&next->done);
2378 }
2379
2380 BUG_ON(!list_empty(&wq->flusher_overflow) &&
2381 wq->flush_color != work_next_color(wq->work_color));
2382
2383 /* this flush_color is finished, advance by one */
2384 wq->flush_color = work_next_color(wq->flush_color);
2385
2386 /* one color has been freed, handle overflow queue */
2387 if (!list_empty(&wq->flusher_overflow)) {
2388 /*
2389 * Assign the same color to all overflowed
2390 * flushers, advance work_color and append to
2391 * flusher_queue. This is the start-to-wait
2392 * phase for these overflowed flushers.
2393 */
2394 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2395 tmp->flush_color = wq->work_color;
2396
2397 wq->work_color = work_next_color(wq->work_color);
2398
2399 list_splice_tail_init(&wq->flusher_overflow,
2400 &wq->flusher_queue);
2401 flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
2402 }
2403
2404 if (list_empty(&wq->flusher_queue)) {
2405 BUG_ON(wq->flush_color != wq->work_color);
2406 break;
2407 }
2408
2409 /*
2410 * Need to flush more colors. Make the next flusher
2411 * the new first flusher and arm cwqs.
2412 */
2413 BUG_ON(wq->flush_color == wq->work_color);
2414 BUG_ON(wq->flush_color != next->flush_color);
2415
2416 list_del_init(&next->list);
2417 wq->first_flusher = next;
2418
2419 if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
2420 break;
2421
2422 /*
2423 * Meh... this color is already done, clear first
2424 * flusher and repeat cascading.
2425 */
2426 wq->first_flusher = NULL;
2427 }
2428
2429 out_unlock:
2430 mutex_unlock(&wq->flush_mutex);
2431 }
2432 EXPORT_SYMBOL_GPL(flush_workqueue);
2433
2434 /**
2435 * drain_workqueue - drain a workqueue
2436 * @wq: workqueue to drain
2437 *
2438 * Wait until the workqueue becomes empty. While draining is in progress,
2439 * only chain queueing is allowed. IOW, only currently pending or running
2440 * work items on @wq can queue further work items on it. @wq is flushed
2441 * repeatedly until it becomes empty. The number of flushing is detemined
2442 * by the depth of chaining and should be relatively short. Whine if it
2443 * takes too long.
2444 */
drain_workqueue(struct workqueue_struct * wq)2445 void drain_workqueue(struct workqueue_struct *wq)
2446 {
2447 unsigned int flush_cnt = 0;
2448 unsigned int cpu;
2449
2450 /*
2451 * __queue_work() needs to test whether there are drainers, is much
2452 * hotter than drain_workqueue() and already looks at @wq->flags.
2453 * Use WQ_DRAINING so that queue doesn't have to check nr_drainers.
2454 */
2455 spin_lock(&workqueue_lock);
2456 if (!wq->nr_drainers++)
2457 wq->flags |= WQ_DRAINING;
2458 spin_unlock(&workqueue_lock);
2459 reflush:
2460 flush_workqueue(wq);
2461
2462 for_each_cwq_cpu(cpu, wq) {
2463 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
2464 bool drained;
2465
2466 spin_lock_irq(&cwq->gcwq->lock);
2467 drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
2468 spin_unlock_irq(&cwq->gcwq->lock);
2469
2470 if (drained)
2471 continue;
2472
2473 if (++flush_cnt == 10 ||
2474 (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2475 pr_warning("workqueue %s: flush on destruction isn't complete after %u tries\n",
2476 wq->name, flush_cnt);
2477 goto reflush;
2478 }
2479
2480 spin_lock(&workqueue_lock);
2481 if (!--wq->nr_drainers)
2482 wq->flags &= ~WQ_DRAINING;
2483 spin_unlock(&workqueue_lock);
2484 }
2485 EXPORT_SYMBOL_GPL(drain_workqueue);
2486
start_flush_work(struct work_struct * work,struct wq_barrier * barr,bool wait_executing)2487 static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
2488 bool wait_executing)
2489 {
2490 struct worker *worker = NULL;
2491 struct global_cwq *gcwq;
2492 struct cpu_workqueue_struct *cwq;
2493
2494 might_sleep();
2495 gcwq = get_work_gcwq(work);
2496 if (!gcwq)
2497 return false;
2498
2499 spin_lock_irq(&gcwq->lock);
2500 if (!list_empty(&work->entry)) {
2501 /*
2502 * See the comment near try_to_grab_pending()->smp_rmb().
2503 * If it was re-queued to a different gcwq under us, we
2504 * are not going to wait.
2505 */
2506 smp_rmb();
2507 cwq = get_work_cwq(work);
2508 if (unlikely(!cwq || gcwq != cwq->gcwq))
2509 goto already_gone;
2510 } else if (wait_executing) {
2511 worker = find_worker_executing_work(gcwq, work);
2512 if (!worker)
2513 goto already_gone;
2514 cwq = worker->current_cwq;
2515 } else
2516 goto already_gone;
2517
2518 insert_wq_barrier(cwq, barr, work, worker);
2519 spin_unlock_irq(&gcwq->lock);
2520
2521 /*
2522 * If @max_active is 1 or rescuer is in use, flushing another work
2523 * item on the same workqueue may lead to deadlock. Make sure the
2524 * flusher is not running on the same workqueue by verifying write
2525 * access.
2526 */
2527 if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER)
2528 lock_map_acquire(&cwq->wq->lockdep_map);
2529 else
2530 lock_map_acquire_read(&cwq->wq->lockdep_map);
2531 lock_map_release(&cwq->wq->lockdep_map);
2532
2533 return true;
2534 already_gone:
2535 spin_unlock_irq(&gcwq->lock);
2536 return false;
2537 }
2538
2539 /**
2540 * flush_work - wait for a work to finish executing the last queueing instance
2541 * @work: the work to flush
2542 *
2543 * Wait until @work has finished execution. This function considers
2544 * only the last queueing instance of @work. If @work has been
2545 * enqueued across different CPUs on a non-reentrant workqueue or on
2546 * multiple workqueues, @work might still be executing on return on
2547 * some of the CPUs from earlier queueing.
2548 *
2549 * If @work was queued only on a non-reentrant, ordered or unbound
2550 * workqueue, @work is guaranteed to be idle on return if it hasn't
2551 * been requeued since flush started.
2552 *
2553 * RETURNS:
2554 * %true if flush_work() waited for the work to finish execution,
2555 * %false if it was already idle.
2556 */
flush_work(struct work_struct * work)2557 bool flush_work(struct work_struct *work)
2558 {
2559 struct wq_barrier barr;
2560
2561 if (start_flush_work(work, &barr, true)) {
2562 wait_for_completion(&barr.done);
2563 destroy_work_on_stack(&barr.work);
2564 return true;
2565 } else
2566 return false;
2567 }
2568 EXPORT_SYMBOL_GPL(flush_work);
2569
wait_on_cpu_work(struct global_cwq * gcwq,struct work_struct * work)2570 static bool wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work)
2571 {
2572 struct wq_barrier barr;
2573 struct worker *worker;
2574
2575 spin_lock_irq(&gcwq->lock);
2576
2577 worker = find_worker_executing_work(gcwq, work);
2578 if (unlikely(worker))
2579 insert_wq_barrier(worker->current_cwq, &barr, work, worker);
2580
2581 spin_unlock_irq(&gcwq->lock);
2582
2583 if (unlikely(worker)) {
2584 wait_for_completion(&barr.done);
2585 destroy_work_on_stack(&barr.work);
2586 return true;
2587 } else
2588 return false;
2589 }
2590
wait_on_work(struct work_struct * work)2591 static bool wait_on_work(struct work_struct *work)
2592 {
2593 bool ret = false;
2594 int cpu;
2595
2596 might_sleep();
2597
2598 lock_map_acquire(&work->lockdep_map);
2599 lock_map_release(&work->lockdep_map);
2600
2601 for_each_gcwq_cpu(cpu)
2602 ret |= wait_on_cpu_work(get_gcwq(cpu), work);
2603 return ret;
2604 }
2605
2606 /**
2607 * flush_work_sync - wait until a work has finished execution
2608 * @work: the work to flush
2609 *
2610 * Wait until @work has finished execution. On return, it's
2611 * guaranteed that all queueing instances of @work which happened
2612 * before this function is called are finished. In other words, if
2613 * @work hasn't been requeued since this function was called, @work is
2614 * guaranteed to be idle on return.
2615 *
2616 * RETURNS:
2617 * %true if flush_work_sync() waited for the work to finish execution,
2618 * %false if it was already idle.
2619 */
flush_work_sync(struct work_struct * work)2620 bool flush_work_sync(struct work_struct *work)
2621 {
2622 struct wq_barrier barr;
2623 bool pending, waited;
2624
2625 /* we'll wait for executions separately, queue barr only if pending */
2626 pending = start_flush_work(work, &barr, false);
2627
2628 /* wait for executions to finish */
2629 waited = wait_on_work(work);
2630
2631 /* wait for the pending one */
2632 if (pending) {
2633 wait_for_completion(&barr.done);
2634 destroy_work_on_stack(&barr.work);
2635 }
2636
2637 return pending || waited;
2638 }
2639 EXPORT_SYMBOL_GPL(flush_work_sync);
2640
2641 /*
2642 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
2643 * so this work can't be re-armed in any way.
2644 */
try_to_grab_pending(struct work_struct * work)2645 static int try_to_grab_pending(struct work_struct *work)
2646 {
2647 struct global_cwq *gcwq;
2648 int ret = -1;
2649
2650 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
2651 return 0;
2652
2653 /*
2654 * The queueing is in progress, or it is already queued. Try to
2655 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
2656 */
2657 gcwq = get_work_gcwq(work);
2658 if (!gcwq)
2659 return ret;
2660
2661 spin_lock_irq(&gcwq->lock);
2662 if (!list_empty(&work->entry)) {
2663 /*
2664 * This work is queued, but perhaps we locked the wrong gcwq.
2665 * In that case we must see the new value after rmb(), see
2666 * insert_work()->wmb().
2667 */
2668 smp_rmb();
2669 if (gcwq == get_work_gcwq(work)) {
2670 debug_work_deactivate(work);
2671
2672 /*
2673 * A delayed work item cannot be grabbed directly
2674 * because it might have linked NO_COLOR work items
2675 * which, if left on the delayed_list, will confuse
2676 * cwq->nr_active management later on and cause
2677 * stall. Make sure the work item is activated
2678 * before grabbing.
2679 */
2680 if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
2681 cwq_activate_delayed_work(work);
2682
2683 list_del_init(&work->entry);
2684 cwq_dec_nr_in_flight(get_work_cwq(work),
2685 get_work_color(work),
2686 *work_data_bits(work) & WORK_STRUCT_DELAYED);
2687 ret = 1;
2688 }
2689 }
2690 spin_unlock_irq(&gcwq->lock);
2691
2692 return ret;
2693 }
2694
__cancel_work_timer(struct work_struct * work,struct timer_list * timer)2695 static bool __cancel_work_timer(struct work_struct *work,
2696 struct timer_list* timer)
2697 {
2698 int ret;
2699
2700 do {
2701 ret = (timer && likely(del_timer(timer)));
2702 if (!ret)
2703 ret = try_to_grab_pending(work);
2704 wait_on_work(work);
2705 } while (unlikely(ret < 0));
2706
2707 clear_work_data(work);
2708 return ret;
2709 }
2710
2711 /**
2712 * cancel_work_sync - cancel a work and wait for it to finish
2713 * @work: the work to cancel
2714 *
2715 * Cancel @work and wait for its execution to finish. This function
2716 * can be used even if the work re-queues itself or migrates to
2717 * another workqueue. On return from this function, @work is
2718 * guaranteed to be not pending or executing on any CPU.
2719 *
2720 * cancel_work_sync(&delayed_work->work) must not be used for
2721 * delayed_work's. Use cancel_delayed_work_sync() instead.
2722 *
2723 * The caller must ensure that the workqueue on which @work was last
2724 * queued can't be destroyed before this function returns.
2725 *
2726 * RETURNS:
2727 * %true if @work was pending, %false otherwise.
2728 */
cancel_work_sync(struct work_struct * work)2729 bool cancel_work_sync(struct work_struct *work)
2730 {
2731 return __cancel_work_timer(work, NULL);
2732 }
2733 EXPORT_SYMBOL_GPL(cancel_work_sync);
2734
2735 /**
2736 * flush_delayed_work - wait for a dwork to finish executing the last queueing
2737 * @dwork: the delayed work to flush
2738 *
2739 * Delayed timer is cancelled and the pending work is queued for
2740 * immediate execution. Like flush_work(), this function only
2741 * considers the last queueing instance of @dwork.
2742 *
2743 * RETURNS:
2744 * %true if flush_work() waited for the work to finish execution,
2745 * %false if it was already idle.
2746 */
flush_delayed_work(struct delayed_work * dwork)2747 bool flush_delayed_work(struct delayed_work *dwork)
2748 {
2749 if (del_timer_sync(&dwork->timer))
2750 __queue_work(raw_smp_processor_id(),
2751 get_work_cwq(&dwork->work)->wq, &dwork->work);
2752 return flush_work(&dwork->work);
2753 }
2754 EXPORT_SYMBOL(flush_delayed_work);
2755
2756 /**
2757 * flush_delayed_work_sync - wait for a dwork to finish
2758 * @dwork: the delayed work to flush
2759 *
2760 * Delayed timer is cancelled and the pending work is queued for
2761 * execution immediately. Other than timer handling, its behavior
2762 * is identical to flush_work_sync().
2763 *
2764 * RETURNS:
2765 * %true if flush_work_sync() waited for the work to finish execution,
2766 * %false if it was already idle.
2767 */
flush_delayed_work_sync(struct delayed_work * dwork)2768 bool flush_delayed_work_sync(struct delayed_work *dwork)
2769 {
2770 if (del_timer_sync(&dwork->timer))
2771 __queue_work(raw_smp_processor_id(),
2772 get_work_cwq(&dwork->work)->wq, &dwork->work);
2773 return flush_work_sync(&dwork->work);
2774 }
2775 EXPORT_SYMBOL(flush_delayed_work_sync);
2776
2777 /**
2778 * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
2779 * @dwork: the delayed work cancel
2780 *
2781 * This is cancel_work_sync() for delayed works.
2782 *
2783 * RETURNS:
2784 * %true if @dwork was pending, %false otherwise.
2785 */
cancel_delayed_work_sync(struct delayed_work * dwork)2786 bool cancel_delayed_work_sync(struct delayed_work *dwork)
2787 {
2788 return __cancel_work_timer(&dwork->work, &dwork->timer);
2789 }
2790 EXPORT_SYMBOL(cancel_delayed_work_sync);
2791
2792 /**
2793 * schedule_work - put work task in global workqueue
2794 * @work: job to be done
2795 *
2796 * Returns zero if @work was already on the kernel-global workqueue and
2797 * non-zero otherwise.
2798 *
2799 * This puts a job in the kernel-global workqueue if it was not already
2800 * queued and leaves it in the same position on the kernel-global
2801 * workqueue otherwise.
2802 */
schedule_work(struct work_struct * work)2803 int schedule_work(struct work_struct *work)
2804 {
2805 return queue_work(system_wq, work);
2806 }
2807 EXPORT_SYMBOL(schedule_work);
2808
2809 /*
2810 * schedule_work_on - put work task on a specific cpu
2811 * @cpu: cpu to put the work task on
2812 * @work: job to be done
2813 *
2814 * This puts a job on a specific cpu
2815 */
schedule_work_on(int cpu,struct work_struct * work)2816 int schedule_work_on(int cpu, struct work_struct *work)
2817 {
2818 return queue_work_on(cpu, system_wq, work);
2819 }
2820 EXPORT_SYMBOL(schedule_work_on);
2821
2822 /**
2823 * schedule_delayed_work - put work task in global workqueue after delay
2824 * @dwork: job to be done
2825 * @delay: number of jiffies to wait or 0 for immediate execution
2826 *
2827 * After waiting for a given time this puts a job in the kernel-global
2828 * workqueue.
2829 */
schedule_delayed_work(struct delayed_work * dwork,unsigned long delay)2830 int schedule_delayed_work(struct delayed_work *dwork,
2831 unsigned long delay)
2832 {
2833 return queue_delayed_work(system_wq, dwork, delay);
2834 }
2835 EXPORT_SYMBOL(schedule_delayed_work);
2836
2837 /**
2838 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
2839 * @cpu: cpu to use
2840 * @dwork: job to be done
2841 * @delay: number of jiffies to wait
2842 *
2843 * After waiting for a given time this puts a job in the kernel-global
2844 * workqueue on the specified CPU.
2845 */
schedule_delayed_work_on(int cpu,struct delayed_work * dwork,unsigned long delay)2846 int schedule_delayed_work_on(int cpu,
2847 struct delayed_work *dwork, unsigned long delay)
2848 {
2849 return queue_delayed_work_on(cpu, system_wq, dwork, delay);
2850 }
2851 EXPORT_SYMBOL(schedule_delayed_work_on);
2852
2853 /**
2854 * schedule_on_each_cpu - execute a function synchronously on each online CPU
2855 * @func: the function to call
2856 *
2857 * schedule_on_each_cpu() executes @func on each online CPU using the
2858 * system workqueue and blocks until all CPUs have completed.
2859 * schedule_on_each_cpu() is very slow.
2860 *
2861 * RETURNS:
2862 * 0 on success, -errno on failure.
2863 */
schedule_on_each_cpu(work_func_t func)2864 int schedule_on_each_cpu(work_func_t func)
2865 {
2866 int cpu;
2867 struct work_struct __percpu *works;
2868
2869 works = alloc_percpu(struct work_struct);
2870 if (!works)
2871 return -ENOMEM;
2872
2873 get_online_cpus();
2874
2875 for_each_online_cpu(cpu) {
2876 struct work_struct *work = per_cpu_ptr(works, cpu);
2877
2878 INIT_WORK(work, func);
2879 schedule_work_on(cpu, work);
2880 }
2881
2882 for_each_online_cpu(cpu)
2883 flush_work(per_cpu_ptr(works, cpu));
2884
2885 put_online_cpus();
2886 free_percpu(works);
2887 return 0;
2888 }
2889
2890 /**
2891 * flush_scheduled_work - ensure that any scheduled work has run to completion.
2892 *
2893 * Forces execution of the kernel-global workqueue and blocks until its
2894 * completion.
2895 *
2896 * Think twice before calling this function! It's very easy to get into
2897 * trouble if you don't take great care. Either of the following situations
2898 * will lead to deadlock:
2899 *
2900 * One of the work items currently on the workqueue needs to acquire
2901 * a lock held by your code or its caller.
2902 *
2903 * Your code is running in the context of a work routine.
2904 *
2905 * They will be detected by lockdep when they occur, but the first might not
2906 * occur very often. It depends on what work items are on the workqueue and
2907 * what locks they need, which you have no control over.
2908 *
2909 * In most situations flushing the entire workqueue is overkill; you merely
2910 * need to know that a particular work item isn't queued and isn't running.
2911 * In such cases you should use cancel_delayed_work_sync() or
2912 * cancel_work_sync() instead.
2913 */
flush_scheduled_work(void)2914 void flush_scheduled_work(void)
2915 {
2916 flush_workqueue(system_wq);
2917 }
2918 EXPORT_SYMBOL(flush_scheduled_work);
2919
2920 /**
2921 * execute_in_process_context - reliably execute the routine with user context
2922 * @fn: the function to execute
2923 * @ew: guaranteed storage for the execute work structure (must
2924 * be available when the work executes)
2925 *
2926 * Executes the function immediately if process context is available,
2927 * otherwise schedules the function for delayed execution.
2928 *
2929 * Returns: 0 - function was executed
2930 * 1 - function was scheduled for execution
2931 */
execute_in_process_context(work_func_t fn,struct execute_work * ew)2932 int execute_in_process_context(work_func_t fn, struct execute_work *ew)
2933 {
2934 if (!in_interrupt()) {
2935 fn(&ew->work);
2936 return 0;
2937 }
2938
2939 INIT_WORK(&ew->work, fn);
2940 schedule_work(&ew->work);
2941
2942 return 1;
2943 }
2944 EXPORT_SYMBOL_GPL(execute_in_process_context);
2945
keventd_up(void)2946 int keventd_up(void)
2947 {
2948 return system_wq != NULL;
2949 }
2950
alloc_cwqs(struct workqueue_struct * wq)2951 static int alloc_cwqs(struct workqueue_struct *wq)
2952 {
2953 /*
2954 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
2955 * Make sure that the alignment isn't lower than that of
2956 * unsigned long long.
2957 */
2958 const size_t size = sizeof(struct cpu_workqueue_struct);
2959 const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
2960 __alignof__(unsigned long long));
2961
2962 if (!(wq->flags & WQ_UNBOUND))
2963 wq->cpu_wq.pcpu = __alloc_percpu(size, align);
2964 else {
2965 void *ptr;
2966
2967 /*
2968 * Allocate enough room to align cwq and put an extra
2969 * pointer at the end pointing back to the originally
2970 * allocated pointer which will be used for free.
2971 */
2972 ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
2973 if (ptr) {
2974 wq->cpu_wq.single = PTR_ALIGN(ptr, align);
2975 *(void **)(wq->cpu_wq.single + 1) = ptr;
2976 }
2977 }
2978
2979 /* just in case, make sure it's actually aligned */
2980 BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
2981 return wq->cpu_wq.v ? 0 : -ENOMEM;
2982 }
2983
free_cwqs(struct workqueue_struct * wq)2984 static void free_cwqs(struct workqueue_struct *wq)
2985 {
2986 if (!(wq->flags & WQ_UNBOUND))
2987 free_percpu(wq->cpu_wq.pcpu);
2988 else if (wq->cpu_wq.single) {
2989 /* the pointer to free is stored right after the cwq */
2990 kfree(*(void **)(wq->cpu_wq.single + 1));
2991 }
2992 }
2993
wq_clamp_max_active(int max_active,unsigned int flags,const char * name)2994 static int wq_clamp_max_active(int max_active, unsigned int flags,
2995 const char *name)
2996 {
2997 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
2998
2999 if (max_active < 1 || max_active > lim)
3000 printk(KERN_WARNING "workqueue: max_active %d requested for %s "
3001 "is out of range, clamping between %d and %d\n",
3002 max_active, name, 1, lim);
3003
3004 return clamp_val(max_active, 1, lim);
3005 }
3006
__alloc_workqueue_key(const char * fmt,unsigned int flags,int max_active,struct lock_class_key * key,const char * lock_name,...)3007 struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
3008 unsigned int flags,
3009 int max_active,
3010 struct lock_class_key *key,
3011 const char *lock_name, ...)
3012 {
3013 va_list args, args1;
3014 struct workqueue_struct *wq;
3015 unsigned int cpu;
3016 size_t namelen;
3017
3018 /* determine namelen, allocate wq and format name */
3019 va_start(args, lock_name);
3020 va_copy(args1, args);
3021 namelen = vsnprintf(NULL, 0, fmt, args) + 1;
3022
3023 wq = kzalloc(sizeof(*wq) + namelen, GFP_KERNEL);
3024 if (!wq)
3025 goto err;
3026
3027 vsnprintf(wq->name, namelen, fmt, args1);
3028 va_end(args);
3029 va_end(args1);
3030
3031 /*
3032 * Workqueues which may be used during memory reclaim should
3033 * have a rescuer to guarantee forward progress.
3034 */
3035 if (flags & WQ_MEM_RECLAIM)
3036 flags |= WQ_RESCUER;
3037
3038 /*
3039 * Unbound workqueues aren't concurrency managed and should be
3040 * dispatched to workers immediately.
3041 */
3042 if (flags & WQ_UNBOUND)
3043 flags |= WQ_HIGHPRI;
3044
3045 max_active = max_active ?: WQ_DFL_ACTIVE;
3046 max_active = wq_clamp_max_active(max_active, flags, wq->name);
3047
3048 /* init wq */
3049 wq->flags = flags;
3050 wq->saved_max_active = max_active;
3051 mutex_init(&wq->flush_mutex);
3052 atomic_set(&wq->nr_cwqs_to_flush, 0);
3053 INIT_LIST_HEAD(&wq->flusher_queue);
3054 INIT_LIST_HEAD(&wq->flusher_overflow);
3055
3056 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
3057 INIT_LIST_HEAD(&wq->list);
3058
3059 if (alloc_cwqs(wq) < 0)
3060 goto err;
3061
3062 for_each_cwq_cpu(cpu, wq) {
3063 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3064 struct global_cwq *gcwq = get_gcwq(cpu);
3065
3066 BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
3067 cwq->gcwq = gcwq;
3068 cwq->wq = wq;
3069 cwq->flush_color = -1;
3070 cwq->max_active = max_active;
3071 INIT_LIST_HEAD(&cwq->delayed_works);
3072 }
3073
3074 if (flags & WQ_RESCUER) {
3075 struct worker *rescuer;
3076
3077 if (!alloc_mayday_mask(&wq->mayday_mask, GFP_KERNEL))
3078 goto err;
3079
3080 wq->rescuer = rescuer = alloc_worker();
3081 if (!rescuer)
3082 goto err;
3083
3084 rescuer->task = kthread_create(rescuer_thread, wq, "%s",
3085 wq->name);
3086 if (IS_ERR(rescuer->task))
3087 goto err;
3088
3089 rescuer->task->flags |= PF_THREAD_BOUND;
3090 wake_up_process(rescuer->task);
3091 }
3092
3093 /*
3094 * workqueue_lock protects global freeze state and workqueues
3095 * list. Grab it, set max_active accordingly and add the new
3096 * workqueue to workqueues list.
3097 */
3098 spin_lock(&workqueue_lock);
3099
3100 if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
3101 for_each_cwq_cpu(cpu, wq)
3102 get_cwq(cpu, wq)->max_active = 0;
3103
3104 list_add(&wq->list, &workqueues);
3105
3106 spin_unlock(&workqueue_lock);
3107
3108 return wq;
3109 err:
3110 if (wq) {
3111 free_cwqs(wq);
3112 free_mayday_mask(wq->mayday_mask);
3113 kfree(wq->rescuer);
3114 kfree(wq);
3115 }
3116 return NULL;
3117 }
3118 EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
3119
3120 /**
3121 * destroy_workqueue - safely terminate a workqueue
3122 * @wq: target workqueue
3123 *
3124 * Safely destroy a workqueue. All work currently pending will be done first.
3125 */
destroy_workqueue(struct workqueue_struct * wq)3126 void destroy_workqueue(struct workqueue_struct *wq)
3127 {
3128 unsigned int cpu;
3129
3130 /* drain it before proceeding with destruction */
3131 drain_workqueue(wq);
3132
3133 /*
3134 * wq list is used to freeze wq, remove from list after
3135 * flushing is complete in case freeze races us.
3136 */
3137 spin_lock(&workqueue_lock);
3138 list_del(&wq->list);
3139 spin_unlock(&workqueue_lock);
3140
3141 /* sanity check */
3142 for_each_cwq_cpu(cpu, wq) {
3143 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3144 int i;
3145
3146 for (i = 0; i < WORK_NR_COLORS; i++)
3147 BUG_ON(cwq->nr_in_flight[i]);
3148 BUG_ON(cwq->nr_active);
3149 BUG_ON(!list_empty(&cwq->delayed_works));
3150 }
3151
3152 if (wq->flags & WQ_RESCUER) {
3153 kthread_stop(wq->rescuer->task);
3154 free_mayday_mask(wq->mayday_mask);
3155 kfree(wq->rescuer);
3156 }
3157
3158 free_cwqs(wq);
3159 kfree(wq);
3160 }
3161 EXPORT_SYMBOL_GPL(destroy_workqueue);
3162
3163 /**
3164 * workqueue_set_max_active - adjust max_active of a workqueue
3165 * @wq: target workqueue
3166 * @max_active: new max_active value.
3167 *
3168 * Set max_active of @wq to @max_active.
3169 *
3170 * CONTEXT:
3171 * Don't call from IRQ context.
3172 */
workqueue_set_max_active(struct workqueue_struct * wq,int max_active)3173 void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
3174 {
3175 unsigned int cpu;
3176
3177 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
3178
3179 spin_lock(&workqueue_lock);
3180
3181 wq->saved_max_active = max_active;
3182
3183 for_each_cwq_cpu(cpu, wq) {
3184 struct global_cwq *gcwq = get_gcwq(cpu);
3185
3186 spin_lock_irq(&gcwq->lock);
3187
3188 if (!(wq->flags & WQ_FREEZABLE) ||
3189 !(gcwq->flags & GCWQ_FREEZING))
3190 get_cwq(gcwq->cpu, wq)->max_active = max_active;
3191
3192 spin_unlock_irq(&gcwq->lock);
3193 }
3194
3195 spin_unlock(&workqueue_lock);
3196 }
3197 EXPORT_SYMBOL_GPL(workqueue_set_max_active);
3198
3199 /**
3200 * workqueue_congested - test whether a workqueue is congested
3201 * @cpu: CPU in question
3202 * @wq: target workqueue
3203 *
3204 * Test whether @wq's cpu workqueue for @cpu is congested. There is
3205 * no synchronization around this function and the test result is
3206 * unreliable and only useful as advisory hints or for debugging.
3207 *
3208 * RETURNS:
3209 * %true if congested, %false otherwise.
3210 */
workqueue_congested(unsigned int cpu,struct workqueue_struct * wq)3211 bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
3212 {
3213 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3214
3215 return !list_empty(&cwq->delayed_works);
3216 }
3217 EXPORT_SYMBOL_GPL(workqueue_congested);
3218
3219 /**
3220 * work_cpu - return the last known associated cpu for @work
3221 * @work: the work of interest
3222 *
3223 * RETURNS:
3224 * CPU number if @work was ever queued. WORK_CPU_NONE otherwise.
3225 */
work_cpu(struct work_struct * work)3226 unsigned int work_cpu(struct work_struct *work)
3227 {
3228 struct global_cwq *gcwq = get_work_gcwq(work);
3229
3230 return gcwq ? gcwq->cpu : WORK_CPU_NONE;
3231 }
3232 EXPORT_SYMBOL_GPL(work_cpu);
3233
3234 /**
3235 * work_busy - test whether a work is currently pending or running
3236 * @work: the work to be tested
3237 *
3238 * Test whether @work is currently pending or running. There is no
3239 * synchronization around this function and the test result is
3240 * unreliable and only useful as advisory hints or for debugging.
3241 * Especially for reentrant wqs, the pending state might hide the
3242 * running state.
3243 *
3244 * RETURNS:
3245 * OR'd bitmask of WORK_BUSY_* bits.
3246 */
work_busy(struct work_struct * work)3247 unsigned int work_busy(struct work_struct *work)
3248 {
3249 struct global_cwq *gcwq = get_work_gcwq(work);
3250 unsigned long flags;
3251 unsigned int ret = 0;
3252
3253 if (!gcwq)
3254 return false;
3255
3256 spin_lock_irqsave(&gcwq->lock, flags);
3257
3258 if (work_pending(work))
3259 ret |= WORK_BUSY_PENDING;
3260 if (find_worker_executing_work(gcwq, work))
3261 ret |= WORK_BUSY_RUNNING;
3262
3263 spin_unlock_irqrestore(&gcwq->lock, flags);
3264
3265 return ret;
3266 }
3267 EXPORT_SYMBOL_GPL(work_busy);
3268
3269 /*
3270 * CPU hotplug.
3271 *
3272 * There are two challenges in supporting CPU hotplug. Firstly, there
3273 * are a lot of assumptions on strong associations among work, cwq and
3274 * gcwq which make migrating pending and scheduled works very
3275 * difficult to implement without impacting hot paths. Secondly,
3276 * gcwqs serve mix of short, long and very long running works making
3277 * blocked draining impractical.
3278 *
3279 * This is solved by allowing a gcwq to be detached from CPU, running
3280 * it with unbound (rogue) workers and allowing it to be reattached
3281 * later if the cpu comes back online. A separate thread is created
3282 * to govern a gcwq in such state and is called the trustee of the
3283 * gcwq.
3284 *
3285 * Trustee states and their descriptions.
3286 *
3287 * START Command state used on startup. On CPU_DOWN_PREPARE, a
3288 * new trustee is started with this state.
3289 *
3290 * IN_CHARGE Once started, trustee will enter this state after
3291 * assuming the manager role and making all existing
3292 * workers rogue. DOWN_PREPARE waits for trustee to
3293 * enter this state. After reaching IN_CHARGE, trustee
3294 * tries to execute the pending worklist until it's empty
3295 * and the state is set to BUTCHER, or the state is set
3296 * to RELEASE.
3297 *
3298 * BUTCHER Command state which is set by the cpu callback after
3299 * the cpu has went down. Once this state is set trustee
3300 * knows that there will be no new works on the worklist
3301 * and once the worklist is empty it can proceed to
3302 * killing idle workers.
3303 *
3304 * RELEASE Command state which is set by the cpu callback if the
3305 * cpu down has been canceled or it has come online
3306 * again. After recognizing this state, trustee stops
3307 * trying to drain or butcher and clears ROGUE, rebinds
3308 * all remaining workers back to the cpu and releases
3309 * manager role.
3310 *
3311 * DONE Trustee will enter this state after BUTCHER or RELEASE
3312 * is complete.
3313 *
3314 * trustee CPU draining
3315 * took over down complete
3316 * START -----------> IN_CHARGE -----------> BUTCHER -----------> DONE
3317 * | | ^
3318 * | CPU is back online v return workers |
3319 * ----------------> RELEASE --------------
3320 */
3321
3322 /**
3323 * trustee_wait_event_timeout - timed event wait for trustee
3324 * @cond: condition to wait for
3325 * @timeout: timeout in jiffies
3326 *
3327 * wait_event_timeout() for trustee to use. Handles locking and
3328 * checks for RELEASE request.
3329 *
3330 * CONTEXT:
3331 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
3332 * multiple times. To be used by trustee.
3333 *
3334 * RETURNS:
3335 * Positive indicating left time if @cond is satisfied, 0 if timed
3336 * out, -1 if canceled.
3337 */
3338 #define trustee_wait_event_timeout(cond, timeout) ({ \
3339 long __ret = (timeout); \
3340 while (!((cond) || (gcwq->trustee_state == TRUSTEE_RELEASE)) && \
3341 __ret) { \
3342 spin_unlock_irq(&gcwq->lock); \
3343 __wait_event_timeout(gcwq->trustee_wait, (cond) || \
3344 (gcwq->trustee_state == TRUSTEE_RELEASE), \
3345 __ret); \
3346 spin_lock_irq(&gcwq->lock); \
3347 } \
3348 gcwq->trustee_state == TRUSTEE_RELEASE ? -1 : (__ret); \
3349 })
3350
3351 /**
3352 * trustee_wait_event - event wait for trustee
3353 * @cond: condition to wait for
3354 *
3355 * wait_event() for trustee to use. Automatically handles locking and
3356 * checks for CANCEL request.
3357 *
3358 * CONTEXT:
3359 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
3360 * multiple times. To be used by trustee.
3361 *
3362 * RETURNS:
3363 * 0 if @cond is satisfied, -1 if canceled.
3364 */
3365 #define trustee_wait_event(cond) ({ \
3366 long __ret1; \
3367 __ret1 = trustee_wait_event_timeout(cond, MAX_SCHEDULE_TIMEOUT);\
3368 __ret1 < 0 ? -1 : 0; \
3369 })
3370
trustee_thread(void * __gcwq)3371 static int __cpuinit trustee_thread(void *__gcwq)
3372 {
3373 struct global_cwq *gcwq = __gcwq;
3374 struct worker *worker;
3375 struct work_struct *work;
3376 struct hlist_node *pos;
3377 long rc;
3378 int i;
3379
3380 BUG_ON(gcwq->cpu != smp_processor_id());
3381
3382 spin_lock_irq(&gcwq->lock);
3383 /*
3384 * Claim the manager position and make all workers rogue.
3385 * Trustee must be bound to the target cpu and can't be
3386 * cancelled.
3387 */
3388 BUG_ON(gcwq->cpu != smp_processor_id());
3389 rc = trustee_wait_event(!(gcwq->flags & GCWQ_MANAGING_WORKERS));
3390 BUG_ON(rc < 0);
3391
3392 gcwq->flags |= GCWQ_MANAGING_WORKERS;
3393
3394 list_for_each_entry(worker, &gcwq->idle_list, entry)
3395 worker->flags |= WORKER_ROGUE;
3396
3397 for_each_busy_worker(worker, i, pos, gcwq)
3398 worker->flags |= WORKER_ROGUE;
3399
3400 /*
3401 * Call schedule() so that we cross rq->lock and thus can
3402 * guarantee sched callbacks see the rogue flag. This is
3403 * necessary as scheduler callbacks may be invoked from other
3404 * cpus.
3405 */
3406 spin_unlock_irq(&gcwq->lock);
3407 schedule();
3408 spin_lock_irq(&gcwq->lock);
3409
3410 /*
3411 * Sched callbacks are disabled now. Zap nr_running. After
3412 * this, nr_running stays zero and need_more_worker() and
3413 * keep_working() are always true as long as the worklist is
3414 * not empty.
3415 */
3416 atomic_set(get_gcwq_nr_running(gcwq->cpu), 0);
3417
3418 spin_unlock_irq(&gcwq->lock);
3419 del_timer_sync(&gcwq->idle_timer);
3420 spin_lock_irq(&gcwq->lock);
3421
3422 /*
3423 * We're now in charge. Notify and proceed to drain. We need
3424 * to keep the gcwq running during the whole CPU down
3425 * procedure as other cpu hotunplug callbacks may need to
3426 * flush currently running tasks.
3427 */
3428 gcwq->trustee_state = TRUSTEE_IN_CHARGE;
3429 wake_up_all(&gcwq->trustee_wait);
3430
3431 /*
3432 * The original cpu is in the process of dying and may go away
3433 * anytime now. When that happens, we and all workers would
3434 * be migrated to other cpus. Try draining any left work. We
3435 * want to get it over with ASAP - spam rescuers, wake up as
3436 * many idlers as necessary and create new ones till the
3437 * worklist is empty. Note that if the gcwq is frozen, there
3438 * may be frozen works in freezable cwqs. Don't declare
3439 * completion while frozen.
3440 */
3441 while (gcwq->nr_workers != gcwq->nr_idle ||
3442 gcwq->flags & GCWQ_FREEZING ||
3443 gcwq->trustee_state == TRUSTEE_IN_CHARGE) {
3444 int nr_works = 0;
3445
3446 list_for_each_entry(work, &gcwq->worklist, entry) {
3447 send_mayday(work);
3448 nr_works++;
3449 }
3450
3451 list_for_each_entry(worker, &gcwq->idle_list, entry) {
3452 if (!nr_works--)
3453 break;
3454 wake_up_process(worker->task);
3455 }
3456
3457 if (need_to_create_worker(gcwq)) {
3458 spin_unlock_irq(&gcwq->lock);
3459 worker = create_worker(gcwq, false);
3460 spin_lock_irq(&gcwq->lock);
3461 if (worker) {
3462 worker->flags |= WORKER_ROGUE;
3463 start_worker(worker);
3464 }
3465 }
3466
3467 /* give a breather */
3468 if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN) < 0)
3469 break;
3470 }
3471
3472 /*
3473 * Either all works have been scheduled and cpu is down, or
3474 * cpu down has already been canceled. Wait for and butcher
3475 * all workers till we're canceled.
3476 */
3477 do {
3478 rc = trustee_wait_event(!list_empty(&gcwq->idle_list));
3479 while (!list_empty(&gcwq->idle_list))
3480 destroy_worker(list_first_entry(&gcwq->idle_list,
3481 struct worker, entry));
3482 } while (gcwq->nr_workers && rc >= 0);
3483
3484 /*
3485 * At this point, either draining has completed and no worker
3486 * is left, or cpu down has been canceled or the cpu is being
3487 * brought back up. There shouldn't be any idle one left.
3488 * Tell the remaining busy ones to rebind once it finishes the
3489 * currently scheduled works by scheduling the rebind_work.
3490 */
3491 WARN_ON(!list_empty(&gcwq->idle_list));
3492
3493 for_each_busy_worker(worker, i, pos, gcwq) {
3494 struct work_struct *rebind_work = &worker->rebind_work;
3495 unsigned long worker_flags = worker->flags;
3496
3497 /*
3498 * Rebind_work may race with future cpu hotplug
3499 * operations. Use a separate flag to mark that
3500 * rebinding is scheduled. The morphing should
3501 * be atomic.
3502 */
3503 worker_flags |= WORKER_REBIND;
3504 worker_flags &= ~WORKER_ROGUE;
3505 ACCESS_ONCE(worker->flags) = worker_flags;
3506
3507 /* queue rebind_work, wq doesn't matter, use the default one */
3508 if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
3509 work_data_bits(rebind_work)))
3510 continue;
3511
3512 debug_work_activate(rebind_work);
3513 insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
3514 worker->scheduled.next,
3515 work_color_to_flags(WORK_NO_COLOR));
3516 }
3517
3518 /* relinquish manager role */
3519 gcwq->flags &= ~GCWQ_MANAGING_WORKERS;
3520
3521 /* notify completion */
3522 gcwq->trustee = NULL;
3523 gcwq->trustee_state = TRUSTEE_DONE;
3524 wake_up_all(&gcwq->trustee_wait);
3525 spin_unlock_irq(&gcwq->lock);
3526 return 0;
3527 }
3528
3529 /**
3530 * wait_trustee_state - wait for trustee to enter the specified state
3531 * @gcwq: gcwq the trustee of interest belongs to
3532 * @state: target state to wait for
3533 *
3534 * Wait for the trustee to reach @state. DONE is already matched.
3535 *
3536 * CONTEXT:
3537 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
3538 * multiple times. To be used by cpu_callback.
3539 */
wait_trustee_state(struct global_cwq * gcwq,int state)3540 static void __cpuinit wait_trustee_state(struct global_cwq *gcwq, int state)
3541 __releases(&gcwq->lock)
3542 __acquires(&gcwq->lock)
3543 {
3544 if (!(gcwq->trustee_state == state ||
3545 gcwq->trustee_state == TRUSTEE_DONE)) {
3546 spin_unlock_irq(&gcwq->lock);
3547 __wait_event(gcwq->trustee_wait,
3548 gcwq->trustee_state == state ||
3549 gcwq->trustee_state == TRUSTEE_DONE);
3550 spin_lock_irq(&gcwq->lock);
3551 }
3552 }
3553
workqueue_cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)3554 static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
3555 unsigned long action,
3556 void *hcpu)
3557 {
3558 unsigned int cpu = (unsigned long)hcpu;
3559 struct global_cwq *gcwq = get_gcwq(cpu);
3560 struct task_struct *new_trustee = NULL;
3561 struct worker *uninitialized_var(new_worker);
3562 unsigned long flags;
3563
3564 action &= ~CPU_TASKS_FROZEN;
3565
3566 switch (action) {
3567 case CPU_DOWN_PREPARE:
3568 new_trustee = kthread_create(trustee_thread, gcwq,
3569 "workqueue_trustee/%d\n", cpu);
3570 if (IS_ERR(new_trustee))
3571 return notifier_from_errno(PTR_ERR(new_trustee));
3572 kthread_bind(new_trustee, cpu);
3573 /* fall through */
3574 case CPU_UP_PREPARE:
3575 BUG_ON(gcwq->first_idle);
3576 new_worker = create_worker(gcwq, false);
3577 if (!new_worker) {
3578 if (new_trustee)
3579 kthread_stop(new_trustee);
3580 return NOTIFY_BAD;
3581 }
3582 }
3583
3584 /* some are called w/ irq disabled, don't disturb irq status */
3585 spin_lock_irqsave(&gcwq->lock, flags);
3586
3587 switch (action) {
3588 case CPU_DOWN_PREPARE:
3589 /* initialize trustee and tell it to acquire the gcwq */
3590 BUG_ON(gcwq->trustee || gcwq->trustee_state != TRUSTEE_DONE);
3591 gcwq->trustee = new_trustee;
3592 gcwq->trustee_state = TRUSTEE_START;
3593 wake_up_process(gcwq->trustee);
3594 wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE);
3595 /* fall through */
3596 case CPU_UP_PREPARE:
3597 BUG_ON(gcwq->first_idle);
3598 gcwq->first_idle = new_worker;
3599 break;
3600
3601 case CPU_DYING:
3602 /*
3603 * Before this, the trustee and all workers except for
3604 * the ones which are still executing works from
3605 * before the last CPU down must be on the cpu. After
3606 * this, they'll all be diasporas.
3607 */
3608 gcwq->flags |= GCWQ_DISASSOCIATED;
3609 break;
3610
3611 case CPU_POST_DEAD:
3612 gcwq->trustee_state = TRUSTEE_BUTCHER;
3613 /* fall through */
3614 case CPU_UP_CANCELED:
3615 destroy_worker(gcwq->first_idle);
3616 gcwq->first_idle = NULL;
3617 break;
3618
3619 case CPU_DOWN_FAILED:
3620 case CPU_ONLINE:
3621 gcwq->flags &= ~GCWQ_DISASSOCIATED;
3622 if (gcwq->trustee_state != TRUSTEE_DONE) {
3623 gcwq->trustee_state = TRUSTEE_RELEASE;
3624 wake_up_process(gcwq->trustee);
3625 wait_trustee_state(gcwq, TRUSTEE_DONE);
3626 }
3627
3628 /*
3629 * Trustee is done and there might be no worker left.
3630 * Put the first_idle in and request a real manager to
3631 * take a look.
3632 */
3633 spin_unlock_irq(&gcwq->lock);
3634 kthread_bind(gcwq->first_idle->task, cpu);
3635 spin_lock_irq(&gcwq->lock);
3636 gcwq->flags |= GCWQ_MANAGE_WORKERS;
3637 start_worker(gcwq->first_idle);
3638 gcwq->first_idle = NULL;
3639 break;
3640 }
3641
3642 spin_unlock_irqrestore(&gcwq->lock, flags);
3643
3644 return notifier_from_errno(0);
3645 }
3646
3647 /*
3648 * Workqueues should be brought up before normal priority CPU notifiers.
3649 * This will be registered high priority CPU notifier.
3650 */
workqueue_cpu_up_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)3651 static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb,
3652 unsigned long action,
3653 void *hcpu)
3654 {
3655 switch (action & ~CPU_TASKS_FROZEN) {
3656 case CPU_UP_PREPARE:
3657 case CPU_UP_CANCELED:
3658 case CPU_DOWN_FAILED:
3659 case CPU_ONLINE:
3660 return workqueue_cpu_callback(nfb, action, hcpu);
3661 }
3662 return NOTIFY_OK;
3663 }
3664
3665 /*
3666 * Workqueues should be brought down after normal priority CPU notifiers.
3667 * This will be registered as low priority CPU notifier.
3668 */
workqueue_cpu_down_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)3669 static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb,
3670 unsigned long action,
3671 void *hcpu)
3672 {
3673 switch (action & ~CPU_TASKS_FROZEN) {
3674 case CPU_DOWN_PREPARE:
3675 case CPU_DYING:
3676 case CPU_POST_DEAD:
3677 return workqueue_cpu_callback(nfb, action, hcpu);
3678 }
3679 return NOTIFY_OK;
3680 }
3681
3682 #ifdef CONFIG_SMP
3683
3684 struct work_for_cpu {
3685 struct work_struct work;
3686 long (*fn)(void *);
3687 void *arg;
3688 long ret;
3689 };
3690
work_for_cpu_fn(struct work_struct * work)3691 static void work_for_cpu_fn(struct work_struct *work)
3692 {
3693 struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
3694
3695 wfc->ret = wfc->fn(wfc->arg);
3696 }
3697
3698 /**
3699 * work_on_cpu - run a function in user context on a particular cpu
3700 * @cpu: the cpu to run on
3701 * @fn: the function to run
3702 * @arg: the function arg
3703 *
3704 * This will return the value @fn returns.
3705 * It is up to the caller to ensure that the cpu doesn't go offline.
3706 * The caller must not hold any locks which would prevent @fn from completing.
3707 */
work_on_cpu(unsigned int cpu,long (* fn)(void *),void * arg)3708 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
3709 {
3710 struct work_for_cpu wfc = { .fn = fn, .arg = arg };
3711
3712 INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
3713 schedule_work_on(cpu, &wfc.work);
3714 flush_work(&wfc.work);
3715 return wfc.ret;
3716 }
3717 EXPORT_SYMBOL_GPL(work_on_cpu);
3718 #endif /* CONFIG_SMP */
3719
3720 #ifdef CONFIG_FREEZER
3721
3722 /**
3723 * freeze_workqueues_begin - begin freezing workqueues
3724 *
3725 * Start freezing workqueues. After this function returns, all freezable
3726 * workqueues will queue new works to their frozen_works list instead of
3727 * gcwq->worklist.
3728 *
3729 * CONTEXT:
3730 * Grabs and releases workqueue_lock and gcwq->lock's.
3731 */
freeze_workqueues_begin(void)3732 void freeze_workqueues_begin(void)
3733 {
3734 unsigned int cpu;
3735
3736 spin_lock(&workqueue_lock);
3737
3738 BUG_ON(workqueue_freezing);
3739 workqueue_freezing = true;
3740
3741 for_each_gcwq_cpu(cpu) {
3742 struct global_cwq *gcwq = get_gcwq(cpu);
3743 struct workqueue_struct *wq;
3744
3745 spin_lock_irq(&gcwq->lock);
3746
3747 BUG_ON(gcwq->flags & GCWQ_FREEZING);
3748 gcwq->flags |= GCWQ_FREEZING;
3749
3750 list_for_each_entry(wq, &workqueues, list) {
3751 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3752
3753 if (cwq && wq->flags & WQ_FREEZABLE)
3754 cwq->max_active = 0;
3755 }
3756
3757 spin_unlock_irq(&gcwq->lock);
3758 }
3759
3760 spin_unlock(&workqueue_lock);
3761 }
3762
3763 /**
3764 * freeze_workqueues_busy - are freezable workqueues still busy?
3765 *
3766 * Check whether freezing is complete. This function must be called
3767 * between freeze_workqueues_begin() and thaw_workqueues().
3768 *
3769 * CONTEXT:
3770 * Grabs and releases workqueue_lock.
3771 *
3772 * RETURNS:
3773 * %true if some freezable workqueues are still busy. %false if freezing
3774 * is complete.
3775 */
freeze_workqueues_busy(void)3776 bool freeze_workqueues_busy(void)
3777 {
3778 unsigned int cpu;
3779 bool busy = false;
3780
3781 spin_lock(&workqueue_lock);
3782
3783 BUG_ON(!workqueue_freezing);
3784
3785 for_each_gcwq_cpu(cpu) {
3786 struct workqueue_struct *wq;
3787 /*
3788 * nr_active is monotonically decreasing. It's safe
3789 * to peek without lock.
3790 */
3791 list_for_each_entry(wq, &workqueues, list) {
3792 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3793
3794 if (!cwq || !(wq->flags & WQ_FREEZABLE))
3795 continue;
3796
3797 BUG_ON(cwq->nr_active < 0);
3798 if (cwq->nr_active) {
3799 busy = true;
3800 goto out_unlock;
3801 }
3802 }
3803 }
3804 out_unlock:
3805 spin_unlock(&workqueue_lock);
3806 return busy;
3807 }
3808
3809 /**
3810 * thaw_workqueues - thaw workqueues
3811 *
3812 * Thaw workqueues. Normal queueing is restored and all collected
3813 * frozen works are transferred to their respective gcwq worklists.
3814 *
3815 * CONTEXT:
3816 * Grabs and releases workqueue_lock and gcwq->lock's.
3817 */
thaw_workqueues(void)3818 void thaw_workqueues(void)
3819 {
3820 unsigned int cpu;
3821
3822 spin_lock(&workqueue_lock);
3823
3824 if (!workqueue_freezing)
3825 goto out_unlock;
3826
3827 for_each_gcwq_cpu(cpu) {
3828 struct global_cwq *gcwq = get_gcwq(cpu);
3829 struct workqueue_struct *wq;
3830
3831 spin_lock_irq(&gcwq->lock);
3832
3833 BUG_ON(!(gcwq->flags & GCWQ_FREEZING));
3834 gcwq->flags &= ~GCWQ_FREEZING;
3835
3836 list_for_each_entry(wq, &workqueues, list) {
3837 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3838
3839 if (!cwq || !(wq->flags & WQ_FREEZABLE))
3840 continue;
3841
3842 /* restore max_active and repopulate worklist */
3843 cwq->max_active = wq->saved_max_active;
3844
3845 while (!list_empty(&cwq->delayed_works) &&
3846 cwq->nr_active < cwq->max_active)
3847 cwq_activate_first_delayed(cwq);
3848 }
3849
3850 wake_up_worker(gcwq);
3851
3852 spin_unlock_irq(&gcwq->lock);
3853 }
3854
3855 workqueue_freezing = false;
3856 out_unlock:
3857 spin_unlock(&workqueue_lock);
3858 }
3859 #endif /* CONFIG_FREEZER */
3860
init_workqueues(void)3861 static int __init init_workqueues(void)
3862 {
3863 unsigned int cpu;
3864 int i;
3865
3866 cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
3867 cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
3868
3869 /* initialize gcwqs */
3870 for_each_gcwq_cpu(cpu) {
3871 struct global_cwq *gcwq = get_gcwq(cpu);
3872
3873 spin_lock_init(&gcwq->lock);
3874 INIT_LIST_HEAD(&gcwq->worklist);
3875 gcwq->cpu = cpu;
3876 gcwq->flags |= GCWQ_DISASSOCIATED;
3877
3878 INIT_LIST_HEAD(&gcwq->idle_list);
3879 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
3880 INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
3881
3882 init_timer_deferrable(&gcwq->idle_timer);
3883 gcwq->idle_timer.function = idle_worker_timeout;
3884 gcwq->idle_timer.data = (unsigned long)gcwq;
3885
3886 setup_timer(&gcwq->mayday_timer, gcwq_mayday_timeout,
3887 (unsigned long)gcwq);
3888
3889 ida_init(&gcwq->worker_ida);
3890
3891 gcwq->trustee_state = TRUSTEE_DONE;
3892 init_waitqueue_head(&gcwq->trustee_wait);
3893 }
3894
3895 /* create the initial worker */
3896 for_each_online_gcwq_cpu(cpu) {
3897 struct global_cwq *gcwq = get_gcwq(cpu);
3898 struct worker *worker;
3899
3900 if (cpu != WORK_CPU_UNBOUND)
3901 gcwq->flags &= ~GCWQ_DISASSOCIATED;
3902 worker = create_worker(gcwq, true);
3903 BUG_ON(!worker);
3904 spin_lock_irq(&gcwq->lock);
3905 start_worker(worker);
3906 spin_unlock_irq(&gcwq->lock);
3907 }
3908
3909 system_wq = alloc_workqueue("events", 0, 0);
3910 system_long_wq = alloc_workqueue("events_long", 0, 0);
3911 system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
3912 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
3913 WQ_UNBOUND_MAX_ACTIVE);
3914 system_freezable_wq = alloc_workqueue("events_freezable",
3915 WQ_FREEZABLE, 0);
3916 system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable",
3917 WQ_NON_REENTRANT | WQ_FREEZABLE, 0);
3918 BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
3919 !system_unbound_wq || !system_freezable_wq ||
3920 !system_nrt_freezable_wq);
3921 return 0;
3922 }
3923 early_initcall(init_workqueues);
3924