1 /*
2  *  linux/arch/x86-64/kernel/process.c
3  *
4  *  Copyright (C) 1995  Linus Torvalds
5  *
6  *  Pentium III FXSR, SSE support
7  *	Gareth Hughes <gareth@valinux.com>, May 2000
8  *
9  *  X86-64 port
10  *	Andi Kleen.
11  *
12  *  $Id: process.c,v 1.77 2004/03/22 00:37:29 ak Exp $
13  */
14 
15 /*
16  * This file handles the architecture-dependent parts of process handling..
17  */
18 
19 #define __KERNEL_SYSCALLS__
20 #include <stdarg.h>
21 
22 #include <linux/compiler.h>
23 #include <linux/errno.h>
24 #include <linux/sched.h>
25 #include <linux/kernel.h>
26 #include <linux/mm.h>
27 #include <linux/smp.h>
28 #include <linux/smp_lock.h>
29 #include <linux/stddef.h>
30 #include <linux/unistd.h>
31 #include <linux/ptrace.h>
32 #include <linux/slab.h>
33 #include <linux/vmalloc.h>
34 #include <linux/user.h>
35 #include <linux/a.out.h>
36 #include <linux/interrupt.h>
37 #include <linux/config.h>
38 #include <linux/delay.h>
39 #include <linux/reboot.h>
40 #include <linux/init.h>
41 #include <linux/ctype.h>
42 #include <linux/slab.h>
43 
44 #include <asm/uaccess.h>
45 #include <asm/pgtable.h>
46 #include <asm/system.h>
47 #include <asm/io.h>
48 #include <asm/ldt.h>
49 #include <asm/processor.h>
50 #include <asm/i387.h>
51 #include <asm/desc.h>
52 #include <asm/mmu_context.h>
53 #include <asm/pda.h>
54 #include <asm/prctl.h>
55 #include <asm/kdebug.h>
56 #include <asm/proto.h>
57 #include <asm/apic.h>
58 
59 #include <linux/irq.h>
60 
61 asmlinkage extern void ret_from_fork(void);
62 
63 int hlt_counter;
64 
65 /*
66  * Powermanagement idle function, if any..
67  */
68 void (*pm_idle)(void);
69 
70 /*
71  * Power off function, if any
72  */
73 void (*pm_power_off)(void);
74 
disable_hlt(void)75 void disable_hlt(void)
76 {
77 	hlt_counter++;
78 }
79 
enable_hlt(void)80 void enable_hlt(void)
81 {
82 	hlt_counter--;
83 }
84 
85 /*
86  * We use this if we don't have any better
87  * idle routine..
88  */
default_idle(void)89 static void default_idle(void)
90 {
91 	if (!hlt_counter) {
92 		__cli();
93 		if (!current->need_resched)
94 			safe_halt();
95 		else
96 			__sti();
97 	}
98 }
99 
100 /*
101  * On SMP it's slightly faster (but much more power-consuming!)
102  * to poll the ->need_resched flag instead of waiting for the
103  * cross-CPU IPI to arrive. Use this option with caution.
104  */
poll_idle(void)105 static void poll_idle (void)
106 {
107 	int oldval;
108 
109 	__sti();
110 
111 	/*
112 	 * Deal with another CPU just having chosen a thread to
113 	 * run here:
114 	 */
115 	oldval = xchg(&current->need_resched, -1);
116 
117 	if (!oldval)
118 		asm volatile(
119 			"2:"
120 			"cmpl $-1, %0;"
121 			"rep; nop;"
122 			"je 2b;"
123 				: :"m" (current->need_resched));
124 }
125 
126 /*
127  * The idle thread. There's no useful work to be
128  * done, so just try to conserve power and have a
129  * low exit latency (ie sit in a loop waiting for
130  * somebody to say that they'd like to reschedule)
131  */
cpu_idle(void)132 void cpu_idle (void)
133 {
134 	/* endless idle loop with no priority at all */
135 	init_idle();
136 	current->nice = 20;
137 	current->counter = -100;
138 
139 	while (1) {
140 		void (*idle)(void) = pm_idle;
141 		if (!idle)
142 			idle = default_idle;
143 		while (!current->need_resched)
144 			idle();
145 		schedule();
146 		check_pgt_cache();
147 	}
148 }
149 
150 /*
151  * This is a kind of hybrid between poll and halt idle routines. This uses new
152  * Monitor/Mwait instructions on P4 processors with PNI. We Monitor
153  * need_resched and go to optimized wait state through Mwait.
154  * Whenever someone changes need_resched, we would be woken up from Mwait
155  * (without an IPI).
156  */
mwait_idle(void)157 static void mwait_idle (void)
158 {
159 	int oldval;
160 
161 	__sti();
162 	/* Setting need_resched to -1 skips sending IPI during idle resched */
163 	oldval = xchg(&current->need_resched, -1);
164 	if (!oldval) {
165 		do {
166 			__monitor((void *)&current->need_resched, 0, 0);
167 			if (current->need_resched != -1)
168 				break;
169 			__mwait(0, 0);
170 		} while (current->need_resched == -1);
171 	}
172 }
173 
select_idle_routine(struct cpuinfo_x86 * c)174 int __init select_idle_routine(struct cpuinfo_x86 *c)
175 {
176 	if (cpu_has(c, X86_FEATURE_MWAIT)) {
177 		printk("Monitor/Mwait feature present.\n");
178 		/*
179 		 * Take care of system with asymmetric CPUs.
180 		 * Use, mwait_idle only if all cpus support it.
181 		 * If not, we fallback to default_idle()
182 		 */
183 		if (!pm_idle) {
184 			pm_idle = mwait_idle;
185 		}
186 		return 1;
187 	}
188 	return 1;
189 }
190 
191 
idle_setup(char * str)192 static int __init idle_setup (char *str)
193 {
194 	if (!strncmp(str, "poll", 4)) {
195 		printk("using polling idle threads.\n");
196 		pm_idle = poll_idle;
197 	} else if (!strncmp(str, "halt", 4)) {
198 		printk("using halt in idle threads.\n");
199                 pm_idle = default_idle;
200 	}
201 
202 	return 1;
203 }
204 
205 __setup("idle=", idle_setup);
206 
207 static struct { long x; } no_idt[3];
208 static enum {
209 	BOOT_BIOS = 'b',
210 	BOOT_TRIPLE = 't',
211 	BOOT_KBD = 'k',
212 } reboot_type = BOOT_KBD;
213 static int reboot_mode = 0;
214 
215 /* reboot=b[ios] | t[riple] | k[bd] [, [w]arm | [c]old]
216    bios	  Use the CPU reboot vector for warm reset
217    warm   Don't set the cold reboot flag
218    cold   Set the cold reboto flag
219    triple Force a triple fault (init)
220    kbd    Use the keyboard controller. cold reset (default)
221  */
reboot_setup(char * str)222 static int __init reboot_setup(char *str)
223 {
224 	for (;;) {
225 		switch (*str) {
226 		case 'w':
227 			reboot_mode = 0x1234;
228 			break;
229 
230 		case 'c':
231 			reboot_mode = 0;
232 			break;
233 
234 		case 't':
235 		case 'b':
236 		case 'k':
237 			reboot_type = *str;
238 			break;
239 		}
240 		if((str = strchr(str,',')) != NULL)
241 			str++;
242 		else
243 			break;
244 	}
245 	return 1;
246 }
247 __setup("reboot=", reboot_setup);
248 
249 /* overwrites random kernel memory. Should not be kernel .text */
250 #define WARMBOOT_TRAMP 0x1000UL
251 
reboot_warm(void)252 static void reboot_warm(void)
253 {
254 	extern unsigned char warm_reboot[], warm_reboot_end[];
255 	printk("warm reboot\n");
256 
257 	__cli();
258 
259 	/* restore identity mapping */
260 	init_level4_pgt[0] = __pml4(__pa(level3_ident_pgt) | 7);
261 	__flush_tlb_all();
262 
263 	memcpy(__va(WARMBOOT_TRAMP), warm_reboot, warm_reboot_end - warm_reboot);
264 
265 	asm volatile( "   pushq $0\n" 		/* ss */
266 		     "   pushq $0x2000\n" 	/* rsp */
267 	             "   pushfq\n"		/* eflags */
268 		     "   pushq %[cs]\n"
269 		     "   pushq %[target]\n"
270 		     "   iretq" ::
271 		      [cs] "i" (__KERNEL_COMPAT32_CS),
272 		      [target] "b" (WARMBOOT_TRAMP));
273 }
274 
kb_wait(void)275 static void kb_wait(void)
276 {
277 	int i;
278 
279 	for (i=0; i<0x10000; i++)
280 		if ((inb_p(0x64) & 0x02) == 0)
281 			break;
282 }
283 
284 
285 #ifdef CONFIG_SMP
smp_halt(void)286 static void smp_halt(void)
287 {
288 	int cpuid = safe_smp_processor_id();
289 	static int first_entry = 1;
290 
291 	if (first_entry) {
292 		first_entry = 0;
293 		smp_call_function((void *)machine_restart, NULL, 1, 0);
294 	}
295 
296 	smp_stop_cpu();
297 
298 	/* AP calling this. Just halt */
299 	if (cpuid != boot_cpu_id) {
300 		printk("CPU %d SMP halt\n", cpuid);
301 		for (;;)
302 			asm("hlt");
303 	}
304 
305 	/* Wait for all other CPUs to have run smp_stop_cpu */
306 	while (cpu_online_map)
307 		rep_nop();
308 }
309 #endif
310 
machine_restart(char * __unused)311 void machine_restart(char * __unused)
312 {
313 	int i;
314 
315 #if CONFIG_SMP
316 	smp_halt();
317 #endif
318 	__cli();
319 
320 #ifndef CONFIG_SMP
321 	disable_local_APIC();
322 #endif
323 	disable_IO_APIC();
324 
325 	__sti();
326 
327 	/* Tell the BIOS if we want cold or warm reboot */
328 	*((unsigned short *)__va(0x472)) = reboot_mode;
329 
330 	for (;;) {
331 		/* Could also try the reset bit in the Hammer NB */
332 		switch (reboot_type) {
333 		case BOOT_BIOS:
334 			reboot_warm();
335 
336 		case BOOT_KBD:
337 			/* force cold reboot to reinit all hardware*/
338 		for (i=0; i<100; i++) {
339 			kb_wait();
340 			udelay(50);
341 			outb(0xfe,0x64);         /* pulse reset low */
342 			udelay(50);
343 		}
344 
345 		case BOOT_TRIPLE:
346 			/* force cold reboot to reinit all hardware*/
347 			*((unsigned short *)__va(0x472)) = 0;
348 
349 			__asm__ __volatile__("lidt (%0)": :"r" (no_idt));
350 		__asm__ __volatile__("int3");
351 
352 			reboot_type = BOOT_KBD;
353 			break;
354 		}
355 	}
356 }
357 
machine_halt(void)358 void machine_halt(void)
359 {
360 }
361 
machine_power_off(void)362 void machine_power_off(void)
363 {
364 	if (pm_power_off)
365 		pm_power_off();
366 }
367 
368 extern int printk_address(unsigned long);
369 
370 /* Prints also some state that isn't saved in the pt_regs */
__show_regs(struct pt_regs * regs)371 void __show_regs(struct pt_regs * regs)
372 {
373 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
374 	unsigned int fsindex,gsindex;
375 	unsigned int ds,cs,es;
376 
377 	printk("\n");
378 	printk("Pid: %d, comm: %.20s %s\n", current->pid, current->comm, print_tainted());
379 	printk("RIP: %04lx:", regs->cs & 0xffff);
380 	printk_address(regs->rip);
381 	printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
382 	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
383 	       regs->rax, regs->rbx, regs->rcx);
384 	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
385 	       regs->rdx, regs->rsi, regs->rdi);
386 	printk("RBP: %016lx R08: %016lx R09: %016lx\n",
387 	       regs->rbp, regs->r8, regs->r9);
388 	printk("R10: %016lx R11: %016lx R12: %016lx\n",
389 	       regs->r10, regs->r11, regs->r12);
390 	printk("R13: %016lx R14: %016lx R15: %016lx\n",
391 	       regs->r13, regs->r14, regs->r15);
392 
393 	asm("movl %%ds,%0" : "=r" (ds));
394 	asm("movl %%cs,%0" : "=r" (cs));
395 	asm("movl %%es,%0" : "=r" (es));
396 	asm("movl %%fs,%0" : "=r" (fsindex));
397 	asm("movl %%gs,%0" : "=r" (gsindex));
398 
399 	rdmsrl(MSR_FS_BASE, fs);
400 	rdmsrl(MSR_GS_BASE, gs);
401 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
402 
403 	asm("movq %%cr0, %0": "=r" (cr0));
404 	asm("movq %%cr2, %0": "=r" (cr2));
405 	asm("movq %%cr3, %0": "=r" (cr3));
406 	asm("movq %%cr4, %0": "=r" (cr4));
407 
408 	printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
409 	       fs,fsindex,gs,gsindex,shadowgs);
410 	printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
411 	printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
412 }
413 
show_regs(struct pt_regs * regs)414 void show_regs(struct pt_regs * regs)
415 {
416 	__show_regs(regs);
417 	show_trace(&regs->rsp);
418 }
419 
420 /*
421  * No need to lock the MM as we are the last user
422  */
release_segments(struct mm_struct * mm)423 void release_segments(struct mm_struct *mm)
424 {
425 	void * ldt = mm->context.segments;
426 
427 	/*
428 	 * free the LDT
429 	 */
430 	if (ldt) {
431 		mm->context.segments = NULL;
432 		clear_LDT();
433 		vfree(ldt);
434 	}
435 }
436 
437 /*
438  * Free current thread data structures etc..
439  */
exit_thread(void)440 void exit_thread(void)
441 {
442 	struct task_struct *me = current;
443 	if (me->thread.io_bitmap_ptr) {
444 		(init_tss + smp_processor_id())->io_map_base =
445 			INVALID_IO_BITMAP_OFFSET;
446 		kfree(me->thread.io_bitmap_ptr);
447 		me->thread.io_bitmap_ptr = NULL;
448 	}
449 }
450 
flush_thread(void)451 void flush_thread(void)
452 {
453 	struct task_struct *tsk = current;
454 
455 	memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
456 	/*
457 	 * Forget coprocessor state..
458 	 */
459 	clear_fpu(tsk);
460 	tsk->used_math = 0;
461 }
462 
release_thread(struct task_struct * dead_task)463 void release_thread(struct task_struct *dead_task)
464 {
465 	if (dead_task->mm) {
466 		void * ldt = dead_task->mm->context.segments;
467 
468 		// temporary debugging check
469 		if (ldt) {
470 			printk("WARNING: dead process %8s still has LDT? <%p>\n",
471 					dead_task->comm, ldt);
472 			BUG();
473 		}
474 	}
475 }
476 
477 /*
478  * we do not have to muck with descriptors here, that is
479  * done in switch_mm() as needed.
480  */
copy_segments(struct task_struct * p,struct mm_struct * new_mm)481 void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
482 {
483 	struct mm_struct * old_mm;
484 	void *old_ldt, *ldt;
485 
486 	ldt = NULL;
487 	old_mm = current->mm;
488 	if (old_mm && (old_ldt = old_mm->context.segments) != NULL) {
489 		/*
490 		 * Completely new LDT, we initialize it from the parent:
491 		 */
492 		ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
493 		if (!ldt)
494 			printk(KERN_WARNING "ldt allocation failed\n");
495 		else
496 			memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
497 	}
498 	new_mm->context.segments = ldt;
499 	new_mm->context.cpuvalid = 0UL;
500 	return;
501 }
502 
copy_thread(int nr,unsigned long clone_flags,unsigned long rsp,unsigned long unused,struct task_struct * p,struct pt_regs * regs)503 int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
504 		unsigned long unused,
505 	struct task_struct * p, struct pt_regs * regs)
506 {
507 	struct pt_regs * childregs;
508 	struct task_struct *me = current;
509 
510 	childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
511 
512 	*childregs = *regs;
513 
514 	childregs->rax = 0;
515 	childregs->rsp = rsp;
516 	if (rsp == ~0) {
517 		childregs->rsp = (unsigned long)childregs;
518 	}
519 
520 	p->thread.rsp = (unsigned long) childregs;
521 	p->thread.rsp0 = (unsigned long) (childregs+1);
522 	p->thread.userrsp = current->thread.userrsp;
523 
524 	p->thread.rip = (unsigned long) ret_from_fork;
525 
526 	p->thread.fs = me->thread.fs;
527 	p->thread.gs = me->thread.gs;
528 
529 	asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
530 	asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
531 	asm("mov %%es,%0" : "=m" (p->thread.es));
532 	asm("mov %%ds,%0" : "=m" (p->thread.ds));
533 
534 	unlazy_fpu(current);
535 	p->thread.i387 = current->thread.i387;
536 
537 	if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
538 		p->thread.io_bitmap_ptr = kmalloc((IO_BITMAP_SIZE+1)*4, GFP_KERNEL);
539 		if (!p->thread.io_bitmap_ptr)
540 			return -ENOMEM;
541 		memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
542 		       (IO_BITMAP_SIZE+1)*4);
543 	}
544 
545 	return 0;
546 }
547 
548 /*
549  * This special macro can be used to load a debugging register
550  */
551 #define loaddebug(thread,register) \
552 		set_debug(thread->debugreg[register], register)
553 
554 /*
555  *	switch_to(x,y) should switch tasks from x to y.
556  *
557  * This could still be optimized:
558  * - fold all the options into a flag word and test it with a single test.
559  * - could test fs/gs bitsliced
560  */
__switch_to(struct task_struct * prev_p,struct task_struct * next_p)561 struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
562 {
563 	struct thread_struct *prev = &prev_p->thread,
564 				 *next = &next_p->thread;
565 	struct tss_struct *tss = init_tss + smp_processor_id();
566 
567 	/*
568 	 * Reload rsp0, LDT and the page table pointer:
569 	 */
570 	tss->rsp0 = next->rsp0;
571 
572 	/*
573 	 * Switch DS and ES.
574 	 */
575 	asm volatile("mov %%es,%0" : "=m" (prev->es));
576 	if (unlikely(next->es | prev->es))
577 		loadsegment(es, next->es);
578 
579 	asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
580 	if (unlikely(next->ds | prev->ds))
581 		loadsegment(ds, next->ds);
582 
583 	/*
584   	 * Must be after DS reload for AMD workaround.
585 	 */
586 	unlazy_fpu(prev_p);
587 
588 	/*
589 	 * Switch FS and GS.
590 	 */
591 	{
592 		unsigned fsindex;
593 		asm volatile("movl %%fs,%0" : "=r" (fsindex));
594 		/* segment register != 0 always requires a reload.
595 		   also reload when it has changed.
596 		   when prev process used 64bit base always reload
597 		   to avoid an information leak. */
598 		if (unlikely((fsindex | next->fsindex) || prev->fs)) {
599 			loadsegment(fs, next->fsindex);
600 			/* check if the user use a selector != 0
601 			 * if yes clear 64bit base, since overloaded base
602 			 * is allways mapped to the Null selector
603 			 */
604 			if (fsindex)
605 			prev->fs = 0;
606 		}
607 		/* when next process has a 64bit base use it */
608 		if (next->fs)
609 			wrmsrl(MSR_FS_BASE, next->fs);
610 		prev->fsindex = fsindex;
611 	}
612 	{
613 		unsigned gsindex;
614 		asm volatile("movl %%gs,%0" : "=r" (gsindex));
615 		if (unlikely((gsindex | next->gsindex) || prev->gs)) {
616 			load_gs_index(next->gsindex);
617 			if (gsindex)
618 			prev->gs = 0;
619 		}
620 		if (next->gs)
621 			wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
622 		prev->gsindex = gsindex;
623 	}
624 
625 	/*
626 	 * Switch the PDA context.
627 	 */
628 	prev->userrsp = read_pda(oldrsp);
629 	write_pda(oldrsp, next->userrsp);
630 	write_pda(pcurrent, next_p);
631 	write_pda(kernelstack, (unsigned long)next_p + THREAD_SIZE - PDA_STACKOFFSET);
632 
633 	/*
634 	 * Now maybe reload the debug registers
635 	 */
636 	if (unlikely(next->debugreg[7])) {
637 		loaddebug(next, 0);
638 		loaddebug(next, 1);
639 		loaddebug(next, 2);
640 		loaddebug(next, 3);
641 		/* no 4 and 5 */
642 		loaddebug(next, 6);
643 		loaddebug(next, 7);
644 	}
645 
646 
647 	/*
648 	 * Handle the IO bitmap
649 	 */
650 	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
651 		if (next->io_bitmap_ptr) {
652 			/*
653 			 * 4 cachelines copy ... not good, but not that
654 			 * bad either. Anyone got something better?
655 			 * This only affects processes which use ioperm().
656 			 * [Putting the TSSs into 4k-tlb mapped regions
657 			 * and playing VM tricks to switch the IO bitmap
658 			 * is not really acceptable.]
659 			 */
660 			memcpy(tss->io_bitmap, next->io_bitmap_ptr,
661 				 IO_BITMAP_SIZE*sizeof(u32));
662 			tss->io_map_base = IO_BITMAP_OFFSET;
663 		} else {
664 			/*
665 			 * a bitmap offset pointing outside of the TSS limit
666 			 * causes a nicely controllable SIGSEGV if a process
667 			 * tries to use a port IO instruction. The first
668 			 * sys_ioperm() call sets up the bitmap properly.
669 			 */
670 			tss->io_map_base = INVALID_IO_BITMAP_OFFSET;
671 		}
672 	}
673 
674 
675 	return prev_p;
676 }
677 
678 /*
679  * sys_execve() executes a new program.
680  */
681 asmlinkage
sys_execve(char * name,char ** argv,char ** envp,struct pt_regs regs)682 long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
683 {
684 	long error;
685 	char * filename;
686 
687 	filename = getname(name);
688 	error = PTR_ERR(filename);
689 	if (IS_ERR(filename))
690 		return error;
691 	error = do_execve(filename, argv, envp, &regs);
692 	if (error == 0)
693 		current->ptrace &= ~PT_DTRACE;
694 	putname(filename);
695 	return error;
696 }
697 
set_personality_64bit(void)698 void set_personality_64bit(void)
699 {
700 	/* inherit personality from parent */
701 
702 	/* Make sure to be in 64bit mode */
703 	current->thread.flags = 0;
704 }
705 
sys_fork(struct pt_regs regs)706 asmlinkage long sys_fork(struct pt_regs regs)
707 {
708 	return do_fork(SIGCHLD, regs.rsp, &regs, 0);
709 }
710 
sys_clone(unsigned long clone_flags,unsigned long newsp,struct pt_regs regs)711 asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, struct pt_regs regs)
712 {
713 	if (!newsp)
714 		newsp = regs.rsp;
715 	return do_fork(clone_flags, newsp, &regs, 0);
716 }
717 
718 /*
719  * This is trivial, and on the face of it looks like it
720  * could equally well be done in user mode.
721  *
722  * Not so, for quite unobvious reasons - register pressure.
723  * In user mode vfork() cannot have a stack frame, and if
724  * done by calling the "clone()" system call directly, you
725  * do not have enough call-clobbered registers to hold all
726  * the information you need.
727  */
sys_vfork(struct pt_regs regs)728 asmlinkage long sys_vfork(struct pt_regs regs)
729 {
730 	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.rsp, &regs, 0);
731 }
732 
733 /*
734  * These bracket the sleeping functions..
735  */
736 extern void scheduling_functions_start_here(void);
737 extern void scheduling_functions_end_here(void);
738 #define first_sched	((unsigned long) scheduling_functions_start_here)
739 #define last_sched	((unsigned long) scheduling_functions_end_here)
740 
get_wchan(struct task_struct * p)741 unsigned long get_wchan(struct task_struct *p)
742 {
743 	u64 fp,rip;
744 	int count = 0;
745 
746 	if (!p || p == current || p->state==TASK_RUNNING)
747 		return 0;
748 	if (p->thread.rsp < (u64)p || p->thread.rsp > (u64)p + THREAD_SIZE)
749 		return 0;
750 	fp = *(u64 *)(p->thread.rsp);
751 	do {
752 		if (fp < (unsigned long)p || fp > (unsigned long)p+THREAD_SIZE)
753 			return 0;
754 		rip = *(u64 *)(fp+8);
755 		if (rip < first_sched || rip >= last_sched)
756 			return rip;
757 		fp = *(u64 *)fp;
758 	} while (count++ < 16);
759 	return 0;
760 }
761 #undef last_sched
762 #undef first_sched
763 
sys_arch_prctl(int code,unsigned long addr)764 asmlinkage long sys_arch_prctl(int code, unsigned long addr)
765 {
766 	int ret = 0;
767 	unsigned long tmp;
768 
769 	switch (code) {
770 	case ARCH_SET_GS:
771 		if (addr >= TASK_SIZE)
772 			return -EPERM;
773 		asm volatile("movl %0,%%gs" :: "r" (0));
774 		current->thread.gsindex = 0;
775 		current->thread.gs = addr;
776 		ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
777 		break;
778 	case ARCH_SET_FS:
779 		/* Not strictly needed for fs, but do it for symmetry
780 		   with gs. */
781 		if (addr >= TASK_SIZE)
782 			return -EPERM;
783 		asm volatile("movl %0,%%fs" :: "r" (0));
784 		current->thread.fsindex = 0;
785 		current->thread.fs = addr;
786 		ret = checking_wrmsrl(MSR_FS_BASE, addr);
787 		break;
788 
789 		/* Returned value may not be correct when the user changed fs/gs */
790 	case ARCH_GET_FS:
791 		rdmsrl(MSR_FS_BASE, tmp);
792 		ret = put_user(tmp, (unsigned long *)addr);
793 		break;
794 
795 	case ARCH_GET_GS:
796 		rdmsrl(MSR_KERNEL_GS_BASE, tmp);
797 		ret = put_user(tmp, (unsigned long *)addr);
798 		break;
799 
800 	default:
801 		ret = -EINVAL;
802 		break;
803 	}
804 	return ret;
805 }
806