1 /*
2  *  linux/kernel/exit.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/config.h>
8 #include <linux/slab.h>
9 #include <linux/interrupt.h>
10 #include <linux/smp_lock.h>
11 #include <linux/module.h>
12 #include <linux/completion.h>
13 #include <linux/personality.h>
14 #include <linux/tty.h>
15 #include <linux/namespace.h>
16 #ifdef CONFIG_BSD_PROCESS_ACCT
17 #include <linux/acct.h>
18 #endif
19 
20 #include <asm/uaccess.h>
21 #include <asm/pgtable.h>
22 #include <asm/mmu_context.h>
23 
24 extern void sem_exit (void);
25 extern struct task_struct *child_reaper;
26 
27 int getrusage(struct task_struct *, int, struct rusage *);
28 
release_task(struct task_struct * p)29 static void release_task(struct task_struct * p)
30 {
31 	if (p != current) {
32 #ifdef CONFIG_SMP
33 		/*
34 		 * Wait to make sure the process isn't on the
35 		 * runqueue (active on some other CPU still)
36 		 */
37 		for (;;) {
38 			task_lock(p);
39 			if (!task_has_cpu(p))
40 				break;
41 			task_unlock(p);
42 			do {
43 				cpu_relax();
44 				barrier();
45 			} while (task_has_cpu(p));
46 		}
47 		task_unlock(p);
48 #endif
49 		atomic_dec(&p->user->processes);
50 		free_uid(p->user);
51 		unhash_process(p);
52 
53 		release_thread(p);
54 		current->cmin_flt += p->min_flt + p->cmin_flt;
55 		current->cmaj_flt += p->maj_flt + p->cmaj_flt;
56 		current->cnswap += p->nswap + p->cnswap;
57 		/*
58 		 * Potentially available timeslices are retrieved
59 		 * here - this way the parent does not get penalized
60 		 * for creating too many processes.
61 		 *
62 		 * (this cannot be used to artificially 'generate'
63 		 * timeslices, because any timeslice recovered here
64 		 * was given away by the parent in the first place.)
65 		 */
66 		current->counter += p->counter;
67 		if (current->counter >= MAX_COUNTER)
68 			current->counter = MAX_COUNTER;
69 		p->pid = 0;
70 		free_task_struct(p);
71 	} else {
72 		printk("task releasing itself\n");
73 	}
74 }
75 
76 /*
77  * This checks not only the pgrp, but falls back on the pid if no
78  * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
79  * without this...
80  */
session_of_pgrp(int pgrp)81 int session_of_pgrp(int pgrp)
82 {
83 	struct task_struct *p;
84 	int fallback;
85 
86 	fallback = -1;
87 	read_lock(&tasklist_lock);
88 	for_each_task(p) {
89  		if (p->session <= 0)
90  			continue;
91 		if (p->pgrp == pgrp) {
92 			fallback = p->session;
93 			break;
94 		}
95 		if (p->pid == pgrp)
96 			fallback = p->session;
97 	}
98 	read_unlock(&tasklist_lock);
99 	return fallback;
100 }
101 
102 /*
103  * Determine if a process group is "orphaned", according to the POSIX
104  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
105  * by terminal-generated stop signals.  Newly orphaned process groups are
106  * to receive a SIGHUP and a SIGCONT.
107  *
108  * "I ask you, have you ever known what it is to be an orphan?"
109  */
will_become_orphaned_pgrp(int pgrp,struct task_struct * ignored_task)110 static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
111 {
112 	struct task_struct *p;
113 
114 	read_lock(&tasklist_lock);
115 	for_each_task(p) {
116 		if ((p == ignored_task) || (p->pgrp != pgrp) ||
117 		    (p->state == TASK_ZOMBIE) ||
118 		    (p->p_pptr->pid == 1))
119 			continue;
120 		if ((p->p_pptr->pgrp != pgrp) &&
121 		    (p->p_pptr->session == p->session)) {
122 			read_unlock(&tasklist_lock);
123  			return 0;
124 		}
125 	}
126 	read_unlock(&tasklist_lock);
127 	return 1;	/* (sighing) "Often!" */
128 }
129 
is_orphaned_pgrp(int pgrp)130 int is_orphaned_pgrp(int pgrp)
131 {
132 	return will_become_orphaned_pgrp(pgrp, 0);
133 }
134 
has_stopped_jobs(int pgrp)135 static inline int has_stopped_jobs(int pgrp)
136 {
137 	int retval = 0;
138 	struct task_struct * p;
139 
140 	read_lock(&tasklist_lock);
141 	for_each_task(p) {
142 		if (p->pgrp != pgrp)
143 			continue;
144 		if (p->state != TASK_STOPPED)
145 			continue;
146 		retval = 1;
147 		break;
148 	}
149 	read_unlock(&tasklist_lock);
150 	return retval;
151 }
152 
153 /*
154  * When we die, we re-parent all our children.
155  * Try to give them to another thread in our thread
156  * group, and if no such member exists, give it to
157  * the global child reaper process (ie "init")
158  */
forget_original_parent(struct task_struct * father)159 static inline void forget_original_parent(struct task_struct * father)
160 {
161 	struct task_struct * p;
162 
163 	read_lock(&tasklist_lock);
164 
165 	for_each_task(p) {
166 		if (p->p_opptr == father) {
167 			/* We dont want people slaying init */
168 			p->exit_signal = SIGCHLD;
169 			p->self_exec_id++;
170 
171 			/* Make sure we're not reparenting to ourselves */
172 			p->p_opptr = child_reaper;
173 
174 			if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
175 		}
176 	}
177 	read_unlock(&tasklist_lock);
178 }
179 
close_files(struct files_struct * files)180 static inline void close_files(struct files_struct * files)
181 {
182 	int i, j;
183 
184 	j = 0;
185 	for (;;) {
186 		unsigned long set;
187 		i = j * __NFDBITS;
188 		if (i >= files->max_fdset || i >= files->max_fds)
189 			break;
190 		set = files->open_fds->fds_bits[j++];
191 		while (set) {
192 			if (set & 1) {
193 				struct file * file = xchg(&files->fd[i], NULL);
194 				if (file)
195 					filp_close(file, files);
196 			}
197 			i++;
198 			set >>= 1;
199 		}
200 	}
201 }
202 
put_files_struct(struct files_struct * files)203 void put_files_struct(struct files_struct *files)
204 {
205 	if (atomic_dec_and_test(&files->count)) {
206 		close_files(files);
207 		/*
208 		 * Free the fd and fdset arrays if we expanded them.
209 		 */
210 		if (files->fd != &files->fd_array[0])
211 			free_fd_array(files->fd, files->max_fds);
212 		if (files->max_fdset > __FD_SETSIZE) {
213 			free_fdset(files->open_fds, files->max_fdset);
214 			free_fdset(files->close_on_exec, files->max_fdset);
215 		}
216 		kmem_cache_free(files_cachep, files);
217 	}
218 }
219 
__exit_files(struct task_struct * tsk)220 static inline void __exit_files(struct task_struct *tsk)
221 {
222 	struct files_struct * files = tsk->files;
223 
224 	if (files) {
225 		task_lock(tsk);
226 		tsk->files = NULL;
227 		task_unlock(tsk);
228 		put_files_struct(files);
229 	}
230 }
231 
exit_files(struct task_struct * tsk)232 void exit_files(struct task_struct *tsk)
233 {
234 	__exit_files(tsk);
235 }
236 
__put_fs_struct(struct fs_struct * fs)237 static inline void __put_fs_struct(struct fs_struct *fs)
238 {
239 	/* No need to hold fs->lock if we are killing it */
240 	if (atomic_dec_and_test(&fs->count)) {
241 		dput(fs->root);
242 		mntput(fs->rootmnt);
243 		dput(fs->pwd);
244 		mntput(fs->pwdmnt);
245 		if (fs->altroot) {
246 			dput(fs->altroot);
247 			mntput(fs->altrootmnt);
248 		}
249 		kmem_cache_free(fs_cachep, fs);
250 	}
251 }
252 
put_fs_struct(struct fs_struct * fs)253 void put_fs_struct(struct fs_struct *fs)
254 {
255 	__put_fs_struct(fs);
256 }
257 
__exit_fs(struct task_struct * tsk)258 static inline void __exit_fs(struct task_struct *tsk)
259 {
260 	struct fs_struct * fs = tsk->fs;
261 
262 	if (fs) {
263 		task_lock(tsk);
264 		tsk->fs = NULL;
265 		task_unlock(tsk);
266 		__put_fs_struct(fs);
267 	}
268 }
269 
exit_fs(struct task_struct * tsk)270 void exit_fs(struct task_struct *tsk)
271 {
272 	__exit_fs(tsk);
273 }
274 
275 /*
276  * We can use these to temporarily drop into
277  * "lazy TLB" mode and back.
278  */
start_lazy_tlb(void)279 struct mm_struct * start_lazy_tlb(void)
280 {
281 	struct mm_struct *mm = current->mm;
282 	current->mm = NULL;
283 	/* active_mm is still 'mm' */
284 	atomic_inc(&mm->mm_count);
285 	enter_lazy_tlb(mm, current, smp_processor_id());
286 	return mm;
287 }
288 
end_lazy_tlb(struct mm_struct * mm)289 void end_lazy_tlb(struct mm_struct *mm)
290 {
291 	struct mm_struct *active_mm = current->active_mm;
292 
293 	current->mm = mm;
294 	if (mm != active_mm) {
295 		current->active_mm = mm;
296 		activate_mm(active_mm, mm);
297 	}
298 	mmdrop(active_mm);
299 }
300 
301 /*
302  * Turn us into a lazy TLB process if we
303  * aren't already..
304  */
__exit_mm(struct task_struct * tsk)305 static inline void __exit_mm(struct task_struct * tsk)
306 {
307 	struct mm_struct * mm = tsk->mm;
308 
309 	mm_release();
310 	if (mm) {
311 		atomic_inc(&mm->mm_count);
312 		BUG_ON(mm != tsk->active_mm);
313 		/* more a memory barrier than a real lock */
314 		task_lock(tsk);
315 		tsk->mm = NULL;
316 		task_unlock(tsk);
317 		enter_lazy_tlb(mm, current, smp_processor_id());
318 		mmput(mm);
319 	}
320 }
321 
exit_mm(struct task_struct * tsk)322 void exit_mm(struct task_struct *tsk)
323 {
324 	__exit_mm(tsk);
325 }
326 
327 /*
328  * Send signals to all our closest relatives so that they know
329  * to properly mourn us..
330  */
exit_notify(void)331 static void exit_notify(void)
332 {
333 	struct task_struct * p, *t;
334 
335 	forget_original_parent(current);
336 	/*
337 	 * Check to see if any process groups have become orphaned
338 	 * as a result of our exiting, and if they have any stopped
339 	 * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
340 	 *
341 	 * Case i: Our father is in a different pgrp than we are
342 	 * and we were the only connection outside, so our pgrp
343 	 * is about to become orphaned.
344 	 */
345 
346 	t = current->p_pptr;
347 
348 	if ((t->pgrp != current->pgrp) &&
349 	    (t->session == current->session) &&
350 	    will_become_orphaned_pgrp(current->pgrp, current) &&
351 	    has_stopped_jobs(current->pgrp)) {
352 		kill_pg(current->pgrp,SIGHUP,1);
353 		kill_pg(current->pgrp,SIGCONT,1);
354 	}
355 
356 	/* Let father know we died
357 	 *
358 	 * Thread signals are configurable, but you aren't going to use
359 	 * that to send signals to arbitary processes.
360 	 * That stops right now.
361 	 *
362 	 * If the parent exec id doesn't match the exec id we saved
363 	 * when we started then we know the parent has changed security
364 	 * domain.
365 	 *
366 	 * If our self_exec id doesn't match our parent_exec_id then
367 	 * we have changed execution domain as these two values started
368 	 * the same after a fork.
369 	 *
370 	 */
371 
372 	if (current->exit_signal && current->exit_signal != SIGCHLD &&
373 	    ( current->parent_exec_id != t->self_exec_id  ||
374 	      current->self_exec_id != current->parent_exec_id))
375 		current->exit_signal = SIGCHLD;
376 
377 
378 	/*
379 	 * This loop does two things:
380 	 *
381   	 * A.  Make init inherit all the child processes
382 	 * B.  Check to see if any process groups have become orphaned
383 	 *	as a result of our exiting, and if they have any stopped
384 	 *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
385 	 */
386 
387 	write_lock_irq(&tasklist_lock);
388 	current->state = TASK_ZOMBIE;
389 	do_notify_parent(current, current->exit_signal);
390 	while (current->p_cptr != NULL) {
391 		p = current->p_cptr;
392 		current->p_cptr = p->p_osptr;
393 		p->p_ysptr = NULL;
394 		p->ptrace = 0;
395 
396 		p->p_pptr = p->p_opptr;
397 		p->p_osptr = p->p_pptr->p_cptr;
398 		if (p->p_osptr)
399 			p->p_osptr->p_ysptr = p;
400 		p->p_pptr->p_cptr = p;
401 		if (p->state == TASK_ZOMBIE)
402 			do_notify_parent(p, p->exit_signal);
403 		/*
404 		 * process group orphan check
405 		 * Case ii: Our child is in a different pgrp
406 		 * than we are, and it was the only connection
407 		 * outside, so the child pgrp is now orphaned.
408 		 */
409 		if ((p->pgrp != current->pgrp) &&
410 		    (p->session == current->session)) {
411 			int pgrp = p->pgrp;
412 
413 			write_unlock_irq(&tasklist_lock);
414 			if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
415 				kill_pg(pgrp,SIGHUP,1);
416 				kill_pg(pgrp,SIGCONT,1);
417 			}
418 			write_lock_irq(&tasklist_lock);
419 		}
420 	}
421 	write_unlock_irq(&tasklist_lock);
422 }
423 
do_exit(long code)424 NORET_TYPE void do_exit(long code)
425 {
426 	struct task_struct *tsk = current;
427 
428 	if (in_interrupt())
429 		panic("Aiee, killing interrupt handler!");
430 	if (!tsk->pid)
431 		panic("Attempted to kill the idle task!");
432 	if (tsk->pid == 1)
433 		panic("Attempted to kill init!");
434 	tsk->flags |= PF_EXITING;
435 	del_timer_sync(&tsk->real_timer);
436 
437 fake_volatile:
438 #ifdef CONFIG_BSD_PROCESS_ACCT
439 	acct_process(code);
440 #endif
441 	__exit_mm(tsk);
442 
443 	lock_kernel();
444 	sem_exit();
445 	__exit_files(tsk);
446 	__exit_fs(tsk);
447 	exit_namespace(tsk);
448 	exit_sighand(tsk);
449 	exit_thread();
450 
451 	if (current->leader)
452 		disassociate_ctty(1);
453 
454 	put_exec_domain(tsk->exec_domain);
455 	if (tsk->binfmt && tsk->binfmt->module)
456 		__MOD_DEC_USE_COUNT(tsk->binfmt->module);
457 
458 	tsk->exit_code = code;
459 	exit_notify();
460 	schedule();
461 	BUG();
462 /*
463  * In order to get rid of the "volatile function does return" message
464  * I did this little loop that confuses gcc to think do_exit really
465  * is volatile. In fact it's schedule() that is volatile in some
466  * circumstances: when current->state = ZOMBIE, schedule() never
467  * returns.
468  *
469  * In fact the natural way to do all this is to have the label and the
470  * goto right after each other, but I put the fake_volatile label at
471  * the start of the function just in case something /really/ bad
472  * happens, and the schedule returns. This way we can try again. I'm
473  * not paranoid: it's just that everybody is out to get me.
474  */
475 	goto fake_volatile;
476 }
477 
complete_and_exit(struct completion * comp,long code)478 NORET_TYPE void complete_and_exit(struct completion *comp, long code)
479 {
480 	if (comp)
481 		complete(comp);
482 
483 	do_exit(code);
484 }
485 
sys_exit(int error_code)486 asmlinkage long sys_exit(int error_code)
487 {
488 	do_exit((error_code&0xff)<<8);
489 }
490 
sys_wait4(pid_t pid,unsigned int * stat_addr,int options,struct rusage * ru)491 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
492 {
493 	int flag, retval;
494 	DECLARE_WAITQUEUE(wait, current);
495 	struct task_struct *tsk;
496 
497 	if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
498 		return -EINVAL;
499 
500 	add_wait_queue(&current->wait_chldexit,&wait);
501 repeat:
502 	flag = 0;
503 	current->state = TASK_INTERRUPTIBLE;
504 	read_lock(&tasklist_lock);
505 	tsk = current;
506 	do {
507 		struct task_struct *p;
508 	 	for (p = tsk->p_cptr ; p ; p = p->p_osptr) {
509 			if (pid>0) {
510 				if (p->pid != pid)
511 					continue;
512 			} else if (!pid) {
513 				if (p->pgrp != current->pgrp)
514 					continue;
515 			} else if (pid != -1) {
516 				if (p->pgrp != -pid)
517 					continue;
518 			}
519 			/* Wait for all children (clone and not) if __WALL is set;
520 			 * otherwise, wait for clone children *only* if __WCLONE is
521 			 * set; otherwise, wait for non-clone children *only*.  (Note:
522 			 * A "clone" child here is one that reports to its parent
523 			 * using a signal other than SIGCHLD.) */
524 			if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
525 			    && !(options & __WALL))
526 				continue;
527 			flag = 1;
528 			switch (p->state) {
529 			case TASK_STOPPED:
530 				if (!p->exit_code)
531 					continue;
532 				if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
533 					continue;
534 				read_unlock(&tasklist_lock);
535 				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
536 				if (!retval && stat_addr)
537 					retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
538 				if (!retval) {
539 					p->exit_code = 0;
540 					retval = p->pid;
541 				}
542 				goto end_wait4;
543 			case TASK_ZOMBIE:
544 				current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
545 				current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
546 				read_unlock(&tasklist_lock);
547 				retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
548 				if (!retval && stat_addr)
549 					retval = put_user(p->exit_code, stat_addr);
550 				if (retval)
551 					goto end_wait4;
552 				retval = p->pid;
553 				if (p->p_opptr != p->p_pptr) {
554 					write_lock_irq(&tasklist_lock);
555 					REMOVE_LINKS(p);
556 					p->p_pptr = p->p_opptr;
557 					SET_LINKS(p);
558 					do_notify_parent(p, SIGCHLD);
559 					write_unlock_irq(&tasklist_lock);
560 				} else
561 					release_task(p);
562 				goto end_wait4;
563 			default:
564 				continue;
565 			}
566 		}
567 		if (options & __WNOTHREAD)
568 			break;
569 		tsk = next_thread(tsk);
570 	} while (tsk != current);
571 	read_unlock(&tasklist_lock);
572 	if (flag) {
573 		retval = 0;
574 		if (options & WNOHANG)
575 			goto end_wait4;
576 		retval = -ERESTARTSYS;
577 		if (signal_pending(current))
578 			goto end_wait4;
579 		schedule();
580 		goto repeat;
581 	}
582 	retval = -ECHILD;
583 end_wait4:
584 	current->state = TASK_RUNNING;
585 	remove_wait_queue(&current->wait_chldexit,&wait);
586 	return retval;
587 }
588 
589 #if !defined(__alpha__) && !defined(__ia64__)
590 
591 /*
592  * sys_waitpid() remains for compatibility. waitpid() should be
593  * implemented by calling sys_wait4() from libc.a.
594  */
sys_waitpid(pid_t pid,unsigned int * stat_addr,int options)595 asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
596 {
597 	return sys_wait4(pid, stat_addr, options, NULL);
598 }
599 
600 #endif
601