1 /*
2  * kernel/time/sched_debug.c
3  *
4  * Print the CFS rbtree
5  *
6  * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 
13 #include <linux/proc_fs.h>
14 #include <linux/sched.h>
15 #include <linux/seq_file.h>
16 #include <linux/kallsyms.h>
17 #include <linux/utsname.h>
18 
19 static DEFINE_SPINLOCK(sched_debug_lock);
20 
21 /*
22  * This allows printing both to /proc/sched_debug and
23  * to the console
24  */
25 #define SEQ_printf(m, x...)			\
26  do {						\
27 	if (m)					\
28 		seq_printf(m, x);		\
29 	else					\
30 		printk(x);			\
31  } while (0)
32 
33 /*
34  * Ease the printing of nsec fields:
35  */
nsec_high(unsigned long long nsec)36 static long long nsec_high(unsigned long long nsec)
37 {
38 	if ((long long)nsec < 0) {
39 		nsec = -nsec;
40 		do_div(nsec, 1000000);
41 		return -nsec;
42 	}
43 	do_div(nsec, 1000000);
44 
45 	return nsec;
46 }
47 
nsec_low(unsigned long long nsec)48 static unsigned long nsec_low(unsigned long long nsec)
49 {
50 	if ((long long)nsec < 0)
51 		nsec = -nsec;
52 
53 	return do_div(nsec, 1000000);
54 }
55 
56 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
57 
58 #ifdef CONFIG_FAIR_GROUP_SCHED
print_cfs_group_stats(struct seq_file * m,int cpu,struct task_group * tg)59 static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
60 {
61 	struct sched_entity *se = tg->se[cpu];
62 	if (!se)
63 		return;
64 
65 #define P(F) \
66 	SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)F)
67 #define PN(F) \
68 	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
69 
70 	PN(se->exec_start);
71 	PN(se->vruntime);
72 	PN(se->sum_exec_runtime);
73 #ifdef CONFIG_SCHEDSTATS
74 	PN(se->statistics.wait_start);
75 	PN(se->statistics.sleep_start);
76 	PN(se->statistics.block_start);
77 	PN(se->statistics.sleep_max);
78 	PN(se->statistics.block_max);
79 	PN(se->statistics.exec_max);
80 	PN(se->statistics.slice_max);
81 	PN(se->statistics.wait_max);
82 	PN(se->statistics.wait_sum);
83 	P(se->statistics.wait_count);
84 #endif
85 	P(se->load.weight);
86 #undef PN
87 #undef P
88 }
89 #endif
90 
91 #ifdef CONFIG_CGROUP_SCHED
92 static char group_path[PATH_MAX];
93 
task_group_path(struct task_group * tg)94 static char *task_group_path(struct task_group *tg)
95 {
96 	if (autogroup_path(tg, group_path, PATH_MAX))
97 		return group_path;
98 
99 	/*
100 	 * May be NULL if the underlying cgroup isn't fully-created yet
101 	 */
102 	if (!tg->css.cgroup) {
103 		group_path[0] = '\0';
104 		return group_path;
105 	}
106 	cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
107 	return group_path;
108 }
109 #endif
110 
111 static void
print_task(struct seq_file * m,struct rq * rq,struct task_struct * p)112 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
113 {
114 	if (rq->curr == p)
115 		SEQ_printf(m, "R");
116 	else
117 		SEQ_printf(m, " ");
118 
119 	SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ",
120 		p->comm, p->pid,
121 		SPLIT_NS(p->se.vruntime),
122 		(long long)(p->nvcsw + p->nivcsw),
123 		p->prio);
124 #ifdef CONFIG_SCHEDSTATS
125 	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
126 		SPLIT_NS(p->se.vruntime),
127 		SPLIT_NS(p->se.sum_exec_runtime),
128 		SPLIT_NS(p->se.statistics.sum_sleep_runtime));
129 #else
130 	SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
131 		0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
132 #endif
133 #ifdef CONFIG_CGROUP_SCHED
134 	SEQ_printf(m, " %s", task_group_path(task_group(p)));
135 #endif
136 
137 	SEQ_printf(m, "\n");
138 }
139 
print_rq(struct seq_file * m,struct rq * rq,int rq_cpu)140 static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
141 {
142 	struct task_struct *g, *p;
143 	unsigned long flags;
144 
145 	SEQ_printf(m,
146 	"\nrunnable tasks:\n"
147 	"            task   PID         tree-key  switches  prio"
148 	"     exec-runtime         sum-exec        sum-sleep\n"
149 	"------------------------------------------------------"
150 	"----------------------------------------------------\n");
151 
152 	read_lock_irqsave(&tasklist_lock, flags);
153 
154 	do_each_thread(g, p) {
155 		if (!p->se.on_rq || task_cpu(p) != rq_cpu)
156 			continue;
157 
158 		print_task(m, rq, p);
159 	} while_each_thread(g, p);
160 
161 	read_unlock_irqrestore(&tasklist_lock, flags);
162 }
163 
print_cfs_rq(struct seq_file * m,int cpu,struct cfs_rq * cfs_rq)164 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
165 {
166 	s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
167 		spread, rq0_min_vruntime, spread0;
168 	struct rq *rq = cpu_rq(cpu);
169 	struct sched_entity *last;
170 	unsigned long flags;
171 
172 #ifdef CONFIG_FAIR_GROUP_SCHED
173 	SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg));
174 #else
175 	SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
176 #endif
177 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
178 			SPLIT_NS(cfs_rq->exec_clock));
179 
180 	raw_spin_lock_irqsave(&rq->lock, flags);
181 	if (cfs_rq->rb_leftmost)
182 		MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
183 	last = __pick_last_entity(cfs_rq);
184 	if (last)
185 		max_vruntime = last->vruntime;
186 	min_vruntime = cfs_rq->min_vruntime;
187 	rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
188 	raw_spin_unlock_irqrestore(&rq->lock, flags);
189 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
190 			SPLIT_NS(MIN_vruntime));
191 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
192 			SPLIT_NS(min_vruntime));
193 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
194 			SPLIT_NS(max_vruntime));
195 	spread = max_vruntime - MIN_vruntime;
196 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
197 			SPLIT_NS(spread));
198 	spread0 = min_vruntime - rq0_min_vruntime;
199 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
200 			SPLIT_NS(spread0));
201 	SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
202 			cfs_rq->nr_spread_over);
203 	SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
204 	SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
205 #ifdef CONFIG_FAIR_GROUP_SCHED
206 #ifdef CONFIG_SMP
207 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "load_avg",
208 			SPLIT_NS(cfs_rq->load_avg));
209 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "load_period",
210 			SPLIT_NS(cfs_rq->load_period));
211 	SEQ_printf(m, "  .%-30s: %ld\n", "load_contrib",
212 			cfs_rq->load_contribution);
213 	SEQ_printf(m, "  .%-30s: %d\n", "load_tg",
214 			atomic_read(&cfs_rq->tg->load_weight));
215 #endif
216 
217 	print_cfs_group_stats(m, cpu, cfs_rq->tg);
218 #endif
219 }
220 
print_rt_rq(struct seq_file * m,int cpu,struct rt_rq * rt_rq)221 void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
222 {
223 #ifdef CONFIG_RT_GROUP_SCHED
224 	SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg));
225 #else
226 	SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
227 #endif
228 
229 #define P(x) \
230 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
231 #define PN(x) \
232 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
233 
234 	P(rt_nr_running);
235 	P(rt_throttled);
236 	PN(rt_time);
237 	PN(rt_runtime);
238 
239 #undef PN
240 #undef P
241 }
242 
243 extern __read_mostly int sched_clock_running;
244 
print_cpu(struct seq_file * m,int cpu)245 static void print_cpu(struct seq_file *m, int cpu)
246 {
247 	struct rq *rq = cpu_rq(cpu);
248 	unsigned long flags;
249 
250 #ifdef CONFIG_X86
251 	{
252 		unsigned int freq = cpu_khz ? : 1;
253 
254 		SEQ_printf(m, "\ncpu#%d, %u.%03u MHz\n",
255 			   cpu, freq / 1000, (freq % 1000));
256 	}
257 #else
258 	SEQ_printf(m, "\ncpu#%d\n", cpu);
259 #endif
260 
261 #define P(x) \
262 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rq->x))
263 #define PN(x) \
264 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
265 
266 	P(nr_running);
267 	SEQ_printf(m, "  .%-30s: %lu\n", "load",
268 		   rq->load.weight);
269 	P(nr_switches);
270 	P(nr_load_updates);
271 	P(nr_uninterruptible);
272 	PN(next_balance);
273 	P(curr->pid);
274 	PN(clock);
275 	P(cpu_load[0]);
276 	P(cpu_load[1]);
277 	P(cpu_load[2]);
278 	P(cpu_load[3]);
279 	P(cpu_load[4]);
280 #undef P
281 #undef PN
282 
283 #ifdef CONFIG_SCHEDSTATS
284 #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
285 #define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
286 
287 	P(yld_count);
288 
289 	P(sched_switch);
290 	P(sched_count);
291 	P(sched_goidle);
292 #ifdef CONFIG_SMP
293 	P64(avg_idle);
294 #endif
295 
296 	P(ttwu_count);
297 	P(ttwu_local);
298 
299 	SEQ_printf(m, "  .%-30s: %d\n", "bkl_count",
300 				rq->rq_sched_info.bkl_count);
301 
302 #undef P
303 #undef P64
304 #endif
305 	spin_lock_irqsave(&sched_debug_lock, flags);
306 	print_cfs_stats(m, cpu);
307 	print_rt_stats(m, cpu);
308 
309 	rcu_read_lock();
310 	print_rq(m, rq, cpu);
311 	rcu_read_unlock();
312 	spin_unlock_irqrestore(&sched_debug_lock, flags);
313 }
314 
315 static const char *sched_tunable_scaling_names[] = {
316 	"none",
317 	"logaritmic",
318 	"linear"
319 };
320 
sched_debug_show(struct seq_file * m,void * v)321 static int sched_debug_show(struct seq_file *m, void *v)
322 {
323 	u64 ktime, sched_clk, cpu_clk;
324 	unsigned long flags;
325 	int cpu;
326 
327 	local_irq_save(flags);
328 	ktime = ktime_to_ns(ktime_get());
329 	sched_clk = sched_clock();
330 	cpu_clk = local_clock();
331 	local_irq_restore(flags);
332 
333 	SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n",
334 		init_utsname()->release,
335 		(int)strcspn(init_utsname()->version, " "),
336 		init_utsname()->version);
337 
338 #define P(x) \
339 	SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
340 #define PN(x) \
341 	SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
342 	PN(ktime);
343 	PN(sched_clk);
344 	PN(cpu_clk);
345 	P(jiffies);
346 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
347 	P(sched_clock_stable);
348 #endif
349 #undef PN
350 #undef P
351 
352 	SEQ_printf(m, "\n");
353 	SEQ_printf(m, "sysctl_sched\n");
354 
355 #define P(x) \
356 	SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
357 #define PN(x) \
358 	SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
359 	PN(sysctl_sched_latency);
360 	PN(sysctl_sched_min_granularity);
361 	PN(sysctl_sched_wakeup_granularity);
362 	P(sysctl_sched_child_runs_first);
363 	P(sysctl_sched_features);
364 #undef PN
365 #undef P
366 
367 	SEQ_printf(m, "  .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling",
368 		sysctl_sched_tunable_scaling,
369 		sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
370 
371 	for_each_online_cpu(cpu)
372 		print_cpu(m, cpu);
373 
374 	SEQ_printf(m, "\n");
375 
376 	return 0;
377 }
378 
sysrq_sched_debug_show(void)379 static void sysrq_sched_debug_show(void)
380 {
381 	sched_debug_show(NULL, NULL);
382 }
383 
sched_debug_open(struct inode * inode,struct file * filp)384 static int sched_debug_open(struct inode *inode, struct file *filp)
385 {
386 	return single_open(filp, sched_debug_show, NULL);
387 }
388 
389 static const struct file_operations sched_debug_fops = {
390 	.open		= sched_debug_open,
391 	.read		= seq_read,
392 	.llseek		= seq_lseek,
393 	.release	= single_release,
394 };
395 
init_sched_debug_procfs(void)396 static int __init init_sched_debug_procfs(void)
397 {
398 	struct proc_dir_entry *pe;
399 
400 	pe = proc_create("sched_debug", 0444, NULL, &sched_debug_fops);
401 	if (!pe)
402 		return -ENOMEM;
403 	return 0;
404 }
405 
406 __initcall(init_sched_debug_procfs);
407 
proc_sched_show_task(struct task_struct * p,struct seq_file * m)408 void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
409 {
410 	unsigned long nr_switches;
411 
412 	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid,
413 						get_nr_threads(p));
414 	SEQ_printf(m,
415 		"---------------------------------------------------------\n");
416 #define __P(F) \
417 	SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)F)
418 #define P(F) \
419 	SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)p->F)
420 #define __PN(F) \
421 	SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
422 #define PN(F) \
423 	SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
424 
425 	PN(se.exec_start);
426 	PN(se.vruntime);
427 	PN(se.sum_exec_runtime);
428 
429 	nr_switches = p->nvcsw + p->nivcsw;
430 
431 #ifdef CONFIG_SCHEDSTATS
432 	PN(se.statistics.wait_start);
433 	PN(se.statistics.sleep_start);
434 	PN(se.statistics.block_start);
435 	PN(se.statistics.sleep_max);
436 	PN(se.statistics.block_max);
437 	PN(se.statistics.exec_max);
438 	PN(se.statistics.slice_max);
439 	PN(se.statistics.wait_max);
440 	PN(se.statistics.wait_sum);
441 	P(se.statistics.wait_count);
442 	PN(se.statistics.iowait_sum);
443 	P(se.statistics.iowait_count);
444 	P(sched_info.bkl_count);
445 	P(se.nr_migrations);
446 	P(se.statistics.nr_migrations_cold);
447 	P(se.statistics.nr_failed_migrations_affine);
448 	P(se.statistics.nr_failed_migrations_running);
449 	P(se.statistics.nr_failed_migrations_hot);
450 	P(se.statistics.nr_forced_migrations);
451 	P(se.statistics.nr_wakeups);
452 	P(se.statistics.nr_wakeups_sync);
453 	P(se.statistics.nr_wakeups_migrate);
454 	P(se.statistics.nr_wakeups_local);
455 	P(se.statistics.nr_wakeups_remote);
456 	P(se.statistics.nr_wakeups_affine);
457 	P(se.statistics.nr_wakeups_affine_attempts);
458 	P(se.statistics.nr_wakeups_passive);
459 	P(se.statistics.nr_wakeups_idle);
460 
461 	{
462 		u64 avg_atom, avg_per_cpu;
463 
464 		avg_atom = p->se.sum_exec_runtime;
465 		if (nr_switches)
466 			do_div(avg_atom, nr_switches);
467 		else
468 			avg_atom = -1LL;
469 
470 		avg_per_cpu = p->se.sum_exec_runtime;
471 		if (p->se.nr_migrations) {
472 			avg_per_cpu = div64_u64(avg_per_cpu,
473 						p->se.nr_migrations);
474 		} else {
475 			avg_per_cpu = -1LL;
476 		}
477 
478 		__PN(avg_atom);
479 		__PN(avg_per_cpu);
480 	}
481 #endif
482 	__P(nr_switches);
483 	SEQ_printf(m, "%-35s:%21Ld\n",
484 		   "nr_voluntary_switches", (long long)p->nvcsw);
485 	SEQ_printf(m, "%-35s:%21Ld\n",
486 		   "nr_involuntary_switches", (long long)p->nivcsw);
487 
488 	P(se.load.weight);
489 	P(policy);
490 	P(prio);
491 #undef PN
492 #undef __PN
493 #undef P
494 #undef __P
495 
496 	{
497 		unsigned int this_cpu = raw_smp_processor_id();
498 		u64 t0, t1;
499 
500 		t0 = cpu_clock(this_cpu);
501 		t1 = cpu_clock(this_cpu);
502 		SEQ_printf(m, "%-35s:%21Ld\n",
503 			   "clock-delta", (long long)(t1-t0));
504 	}
505 }
506 
proc_sched_set_task(struct task_struct * p)507 void proc_sched_set_task(struct task_struct *p)
508 {
509 #ifdef CONFIG_SCHEDSTATS
510 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
511 #endif
512 }
513