1 /*
2  *  linux/kernel/timer.c
3  *
4  *  Kernel internal timers, kernel timekeeping, basic process system calls
5  *
6  *  Copyright (C) 1991, 1992  Linus Torvalds
7  *
8  *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
9  *
10  *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
11  *              "A Kernel Model for Precision Timekeeping" by Dave Mills
12  *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
13  *              serialize accesses to xtime/lost_ticks).
14  *                              Copyright (C) 1998  Andrea Arcangeli
15  *  1999-03-10  Improved NTP compatibility by Ulrich Windl
16  */
17 
18 #include <linux/config.h>
19 #include <linux/mm.h>
20 #include <linux/timex.h>
21 #include <linux/delay.h>
22 #include <linux/smp_lock.h>
23 #include <linux/interrupt.h>
24 #include <linux/kernel_stat.h>
25 
26 #include <asm/uaccess.h>
27 
28 /*
29  * Timekeeping variables
30  */
31 
32 long tick = (1000000 + HZ/2) / HZ;	/* timer interrupt period */
33 
34 /* The current time */
35 struct timeval xtime __attribute__ ((aligned (16)));
36 
37 /* Don't completely fail for HZ > 500.  */
38 int tickadj = 500/HZ ? : 1;		/* microsecs */
39 
40 DECLARE_TASK_QUEUE(tq_timer);
41 DECLARE_TASK_QUEUE(tq_immediate);
42 
43 /*
44  * phase-lock loop variables
45  */
46 /* TIME_ERROR prevents overwriting the CMOS clock */
47 int time_state = TIME_OK;		/* clock synchronization status	*/
48 int time_status = STA_UNSYNC;		/* clock status bits		*/
49 long time_offset;			/* time adjustment (us)		*/
50 long time_constant = 2;			/* pll time constant		*/
51 long time_tolerance = MAXFREQ;		/* frequency tolerance (ppm)	*/
52 long time_precision = 1;		/* clock precision (us)		*/
53 long time_maxerror = NTP_PHASE_LIMIT;	/* maximum error (us)		*/
54 long time_esterror = NTP_PHASE_LIMIT;	/* estimated error (us)		*/
55 long time_phase;			/* phase offset (scaled us)	*/
56 long time_freq = ((1000000 + HZ/2) % HZ - HZ/2) << SHIFT_USEC;
57 					/* frequency offset (scaled ppm)*/
58 long time_adj;				/* tick adjust (scaled 1 / HZ)	*/
59 long time_reftime;			/* time at last adjustment (s)	*/
60 
61 long time_adjust;
62 long time_adjust_step;
63 
64 unsigned long event;
65 
66 extern int do_setitimer(int, struct itimerval *, struct itimerval *);
67 
68 unsigned long volatile jiffies;
69 
70 unsigned int * prof_buffer;
71 unsigned long prof_len;
72 unsigned long prof_shift;
73 
74 /*
75  * Event timer code
76  */
77 #define TVN_BITS 6
78 #define TVR_BITS 8
79 #define TVN_SIZE (1 << TVN_BITS)
80 #define TVR_SIZE (1 << TVR_BITS)
81 #define TVN_MASK (TVN_SIZE - 1)
82 #define TVR_MASK (TVR_SIZE - 1)
83 
84 struct timer_vec {
85 	int index;
86 	struct list_head vec[TVN_SIZE];
87 };
88 
89 struct timer_vec_root {
90 	int index;
91 	struct list_head vec[TVR_SIZE];
92 };
93 
94 static struct timer_vec tv5;
95 static struct timer_vec tv4;
96 static struct timer_vec tv3;
97 static struct timer_vec tv2;
98 static struct timer_vec_root tv1;
99 
100 static struct timer_vec * const tvecs[] = {
101 	(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
102 };
103 
104 static struct list_head * run_timer_list_running;
105 
106 #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
107 
init_timervecs(void)108 void init_timervecs (void)
109 {
110 	int i;
111 
112 	for (i = 0; i < TVN_SIZE; i++) {
113 		INIT_LIST_HEAD(tv5.vec + i);
114 		INIT_LIST_HEAD(tv4.vec + i);
115 		INIT_LIST_HEAD(tv3.vec + i);
116 		INIT_LIST_HEAD(tv2.vec + i);
117 	}
118 	for (i = 0; i < TVR_SIZE; i++)
119 		INIT_LIST_HEAD(tv1.vec + i);
120 }
121 
122 static unsigned long timer_jiffies;
123 
internal_add_timer(struct timer_list * timer)124 static inline void internal_add_timer(struct timer_list *timer)
125 {
126 	/*
127 	 * must be cli-ed when calling this
128 	 */
129 	unsigned long expires = timer->expires;
130 	unsigned long idx = expires - timer_jiffies;
131 	struct list_head * vec;
132 
133 	if (run_timer_list_running)
134 		vec = run_timer_list_running;
135 	else if (idx < TVR_SIZE) {
136 		int i = expires & TVR_MASK;
137 		vec = tv1.vec + i;
138 	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
139 		int i = (expires >> TVR_BITS) & TVN_MASK;
140 		vec = tv2.vec + i;
141 	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
142 		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
143 		vec =  tv3.vec + i;
144 	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
145 		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
146 		vec = tv4.vec + i;
147 	} else if ((signed long) idx < 0) {
148 		/* can happen if you add a timer with expires == jiffies,
149 		 * or you set a timer to go off in the past
150 		 */
151 		vec = tv1.vec + tv1.index;
152 	} else if (idx <= 0xffffffffUL) {
153 		int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
154 		vec = tv5.vec + i;
155 	} else {
156 		/* Can only get here on architectures with 64-bit jiffies */
157 		INIT_LIST_HEAD(&timer->list);
158 		return;
159 	}
160 	/*
161 	 * Timers are FIFO!
162 	 */
163 	list_add(&timer->list, vec->prev);
164 }
165 
166 /* Initialize both explicitly - let's try to have them in the same cache line */
167 spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
168 
169 #ifdef CONFIG_SMP
170 volatile struct timer_list * volatile running_timer;
171 #define timer_enter(t) do { running_timer = t; mb(); } while (0)
172 #define timer_exit() do { running_timer = NULL; } while (0)
173 #define timer_is_running(t) (running_timer == t)
174 #define timer_synchronize(t) while (timer_is_running(t)) barrier()
175 #else
176 #define timer_enter(t)		do { } while (0)
177 #define timer_exit()		do { } while (0)
178 #endif
179 
add_timer(struct timer_list * timer)180 void add_timer(struct timer_list *timer)
181 {
182 	unsigned long flags;
183 
184 	spin_lock_irqsave(&timerlist_lock, flags);
185 	if (timer_pending(timer))
186 		goto bug;
187 	internal_add_timer(timer);
188 	spin_unlock_irqrestore(&timerlist_lock, flags);
189 	return;
190 bug:
191 	spin_unlock_irqrestore(&timerlist_lock, flags);
192 	printk("bug: kernel timer added twice at %p.\n",
193 			__builtin_return_address(0));
194 }
195 
detach_timer(struct timer_list * timer)196 static inline int detach_timer (struct timer_list *timer)
197 {
198 	if (!timer_pending(timer))
199 		return 0;
200 	list_del(&timer->list);
201 	return 1;
202 }
203 
mod_timer(struct timer_list * timer,unsigned long expires)204 int mod_timer(struct timer_list *timer, unsigned long expires)
205 {
206 	int ret;
207 	unsigned long flags;
208 
209 	spin_lock_irqsave(&timerlist_lock, flags);
210 	timer->expires = expires;
211 	ret = detach_timer(timer);
212 	internal_add_timer(timer);
213 	spin_unlock_irqrestore(&timerlist_lock, flags);
214 	return ret;
215 }
216 
del_timer(struct timer_list * timer)217 int del_timer(struct timer_list * timer)
218 {
219 	int ret;
220 	unsigned long flags;
221 
222 	spin_lock_irqsave(&timerlist_lock, flags);
223 	ret = detach_timer(timer);
224 	timer->list.next = timer->list.prev = NULL;
225 	spin_unlock_irqrestore(&timerlist_lock, flags);
226 	return ret;
227 }
228 
229 #ifdef CONFIG_SMP
sync_timers(void)230 void sync_timers(void)
231 {
232 	spin_unlock_wait(&global_bh_lock);
233 }
234 
235 /*
236  * SMP specific function to delete periodic timer.
237  * Caller must disable by some means restarting the timer
238  * for new. Upon exit the timer is not queued and handler is not running
239  * on any CPU. It returns number of times, which timer was deleted
240  * (for reference counting).
241  */
242 
del_timer_sync(struct timer_list * timer)243 int del_timer_sync(struct timer_list * timer)
244 {
245 	int ret = 0;
246 
247 	for (;;) {
248 		unsigned long flags;
249 		int running;
250 
251 		spin_lock_irqsave(&timerlist_lock, flags);
252 		ret += detach_timer(timer);
253 		timer->list.next = timer->list.prev = 0;
254 		running = timer_is_running(timer);
255 		spin_unlock_irqrestore(&timerlist_lock, flags);
256 
257 		if (!running)
258 			break;
259 
260 		timer_synchronize(timer);
261 	}
262 
263 	return ret;
264 }
265 #endif
266 
267 
cascade_timers(struct timer_vec * tv)268 static inline void cascade_timers(struct timer_vec *tv)
269 {
270 	/* cascade all the timers from tv up one level */
271 	struct list_head *head, *curr, *next;
272 
273 	head = tv->vec + tv->index;
274 	curr = head->next;
275 	/*
276 	 * We are removing _all_ timers from the list, so we don't  have to
277 	 * detach them individually, just clear the list afterwards.
278 	 */
279 	while (curr != head) {
280 		struct timer_list *tmp;
281 
282 		tmp = list_entry(curr, struct timer_list, list);
283 		next = curr->next;
284 		list_del(curr); // not needed
285 		internal_add_timer(tmp);
286 		curr = next;
287 	}
288 	INIT_LIST_HEAD(head);
289 	tv->index = (tv->index + 1) & TVN_MASK;
290 }
291 
run_timer_list(void)292 static inline void run_timer_list(void)
293 {
294 	spin_lock_irq(&timerlist_lock);
295 	while ((long)(jiffies - timer_jiffies) >= 0) {
296 		LIST_HEAD(queued);
297 		struct list_head *head, *curr;
298 		if (!tv1.index) {
299 			int n = 1;
300 			do {
301 				cascade_timers(tvecs[n]);
302 			} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
303 		}
304 		run_timer_list_running = &queued;
305 repeat:
306 		head = tv1.vec + tv1.index;
307 		curr = head->next;
308 		if (curr != head) {
309 			struct timer_list *timer;
310 			void (*fn)(unsigned long);
311 			unsigned long data;
312 
313 			timer = list_entry(curr, struct timer_list, list);
314  			fn = timer->function;
315  			data= timer->data;
316 
317 			detach_timer(timer);
318 			timer->list.next = timer->list.prev = NULL;
319 			timer_enter(timer);
320 			spin_unlock_irq(&timerlist_lock);
321 			fn(data);
322 			spin_lock_irq(&timerlist_lock);
323 			timer_exit();
324 			goto repeat;
325 		}
326 		run_timer_list_running = NULL;
327 		++timer_jiffies;
328 		tv1.index = (tv1.index + 1) & TVR_MASK;
329 
330 		curr = queued.next;
331 		while (curr != &queued) {
332 			struct timer_list *timer;
333 
334 			timer = list_entry(curr, struct timer_list, list);
335 			curr = curr->next;
336 			internal_add_timer(timer);
337 		}
338 	}
339 	spin_unlock_irq(&timerlist_lock);
340 }
341 
342 spinlock_t tqueue_lock = SPIN_LOCK_UNLOCKED;
343 
tqueue_bh(void)344 void tqueue_bh(void)
345 {
346 	run_task_queue(&tq_timer);
347 }
348 
immediate_bh(void)349 void immediate_bh(void)
350 {
351 	run_task_queue(&tq_immediate);
352 }
353 
354 /*
355  * this routine handles the overflow of the microsecond field
356  *
357  * The tricky bits of code to handle the accurate clock support
358  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
359  * They were originally developed for SUN and DEC kernels.
360  * All the kudos should go to Dave for this stuff.
361  *
362  */
second_overflow(void)363 static void second_overflow(void)
364 {
365     long ltemp;
366 
367     /* Bump the maxerror field */
368     time_maxerror += time_tolerance >> SHIFT_USEC;
369     if ( time_maxerror > NTP_PHASE_LIMIT ) {
370 	time_maxerror = NTP_PHASE_LIMIT;
371 	time_status |= STA_UNSYNC;
372     }
373 
374     /*
375      * Leap second processing. If in leap-insert state at
376      * the end of the day, the system clock is set back one
377      * second; if in leap-delete state, the system clock is
378      * set ahead one second. The microtime() routine or
379      * external clock driver will insure that reported time
380      * is always monotonic. The ugly divides should be
381      * replaced.
382      */
383     switch (time_state) {
384 
385     case TIME_OK:
386 	if (time_status & STA_INS)
387 	    time_state = TIME_INS;
388 	else if (time_status & STA_DEL)
389 	    time_state = TIME_DEL;
390 	break;
391 
392     case TIME_INS:
393 	if (xtime.tv_sec % 86400 == 0) {
394 	    xtime.tv_sec--;
395 	    time_state = TIME_OOP;
396 	    printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
397 	}
398 	break;
399 
400     case TIME_DEL:
401 	if ((xtime.tv_sec + 1) % 86400 == 0) {
402 	    xtime.tv_sec++;
403 	    time_state = TIME_WAIT;
404 	    printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
405 	}
406 	break;
407 
408     case TIME_OOP:
409 	time_state = TIME_WAIT;
410 	break;
411 
412     case TIME_WAIT:
413 	if (!(time_status & (STA_INS | STA_DEL)))
414 	    time_state = TIME_OK;
415     }
416 
417     /*
418      * Compute the phase adjustment for the next second. In
419      * PLL mode, the offset is reduced by a fixed factor
420      * times the time constant. In FLL mode the offset is
421      * used directly. In either mode, the maximum phase
422      * adjustment for each second is clamped so as to spread
423      * the adjustment over not more than the number of
424      * seconds between updates.
425      */
426     if (time_offset < 0) {
427 	ltemp = -time_offset;
428 	if (!(time_status & STA_FLL))
429 	    ltemp >>= SHIFT_KG + time_constant;
430 	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
431 	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
432 	time_offset += ltemp;
433 	time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
434     } else {
435 	ltemp = time_offset;
436 	if (!(time_status & STA_FLL))
437 	    ltemp >>= SHIFT_KG + time_constant;
438 	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
439 	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
440 	time_offset -= ltemp;
441 	time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
442     }
443 
444     /*
445      * Compute the frequency estimate and additional phase
446      * adjustment due to frequency error for the next
447      * second. When the PPS signal is engaged, gnaw on the
448      * watchdog counter and update the frequency computed by
449      * the pll and the PPS signal.
450      */
451     pps_valid++;
452     if (pps_valid == PPS_VALID) {	/* PPS signal lost */
453 	pps_jitter = MAXTIME;
454 	pps_stabil = MAXFREQ;
455 	time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
456 			 STA_PPSWANDER | STA_PPSERROR);
457     }
458     ltemp = time_freq + pps_freq;
459     if (ltemp < 0)
460 	time_adj -= -ltemp >>
461 	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
462     else
463 	time_adj += ltemp >>
464 	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
465 
466 #if HZ == 100
467     /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
468      * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
469      */
470     if (time_adj < 0)
471 	time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
472     else
473 	time_adj += (time_adj >> 2) + (time_adj >> 5);
474 #endif
475 }
476 
477 /* in the NTP reference this is called "hardclock()" */
update_wall_time_one_tick(void)478 static void update_wall_time_one_tick(void)
479 {
480 	if ( (time_adjust_step = time_adjust) != 0 ) {
481 	    /* We are doing an adjtime thing.
482 	     *
483 	     * Prepare time_adjust_step to be within bounds.
484 	     * Note that a positive time_adjust means we want the clock
485 	     * to run faster.
486 	     *
487 	     * Limit the amount of the step to be in the range
488 	     * -tickadj .. +tickadj
489 	     */
490 	     if (time_adjust > tickadj)
491 		time_adjust_step = tickadj;
492 	     else if (time_adjust < -tickadj)
493 		time_adjust_step = -tickadj;
494 
495 	    /* Reduce by this step the amount of time left  */
496 	    time_adjust -= time_adjust_step;
497 	}
498 	xtime.tv_usec += tick + time_adjust_step;
499 	/*
500 	 * Advance the phase, once it gets to one microsecond, then
501 	 * advance the tick more.
502 	 */
503 	time_phase += time_adj;
504 	if (time_phase <= -FINEUSEC) {
505 		long ltemp = -time_phase >> SHIFT_SCALE;
506 		time_phase += ltemp << SHIFT_SCALE;
507 		xtime.tv_usec -= ltemp;
508 	}
509 	else if (time_phase >= FINEUSEC) {
510 		long ltemp = time_phase >> SHIFT_SCALE;
511 		time_phase -= ltemp << SHIFT_SCALE;
512 		xtime.tv_usec += ltemp;
513 	}
514 }
515 
516 /*
517  * Using a loop looks inefficient, but "ticks" is
518  * usually just one (we shouldn't be losing ticks,
519  * we're doing this this way mainly for interrupt
520  * latency reasons, not because we think we'll
521  * have lots of lost timer ticks
522  */
update_wall_time(unsigned long ticks)523 static void update_wall_time(unsigned long ticks)
524 {
525 	do {
526 		ticks--;
527 		update_wall_time_one_tick();
528 	} while (ticks);
529 
530 	if (xtime.tv_usec >= 1000000) {
531 	    xtime.tv_usec -= 1000000;
532 	    xtime.tv_sec++;
533 	    second_overflow();
534 	}
535 }
536 
do_process_times(struct task_struct * p,unsigned long user,unsigned long system)537 static inline void do_process_times(struct task_struct *p,
538 	unsigned long user, unsigned long system)
539 {
540 	unsigned long psecs;
541 
542 	psecs = (p->times.tms_utime += user);
543 	psecs += (p->times.tms_stime += system);
544 	if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_cur) {
545 		/* Send SIGXCPU every second.. */
546 		if (!(psecs % HZ))
547 			send_sig(SIGXCPU, p, 1);
548 		/* and SIGKILL when we go over max.. */
549 		if (psecs / HZ > p->rlim[RLIMIT_CPU].rlim_max)
550 			send_sig(SIGKILL, p, 1);
551 	}
552 }
553 
do_it_virt(struct task_struct * p,unsigned long ticks)554 static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
555 {
556 	unsigned long it_virt = p->it_virt_value;
557 
558 	if (it_virt) {
559 		it_virt -= ticks;
560 		if (!it_virt) {
561 			it_virt = p->it_virt_incr;
562 			send_sig(SIGVTALRM, p, 1);
563 		}
564 		p->it_virt_value = it_virt;
565 	}
566 }
567 
do_it_prof(struct task_struct * p)568 static inline void do_it_prof(struct task_struct *p)
569 {
570 	unsigned long it_prof = p->it_prof_value;
571 
572 	if (it_prof) {
573 		if (--it_prof == 0) {
574 			it_prof = p->it_prof_incr;
575 			send_sig(SIGPROF, p, 1);
576 		}
577 		p->it_prof_value = it_prof;
578 	}
579 }
580 
update_one_process(struct task_struct * p,unsigned long user,unsigned long system,int cpu)581 void update_one_process(struct task_struct *p, unsigned long user,
582 			unsigned long system, int cpu)
583 {
584 	p->per_cpu_utime[cpu] += user;
585 	p->per_cpu_stime[cpu] += system;
586 	do_process_times(p, user, system);
587 	do_it_virt(p, user);
588 	do_it_prof(p);
589 }
590 
591 /*
592  * Called from the timer interrupt handler to charge one tick to the current
593  * process.  user_tick is 1 if the tick is user time, 0 for system.
594  */
update_process_times(int user_tick)595 void update_process_times(int user_tick)
596 {
597 	struct task_struct *p = current;
598 	int cpu = smp_processor_id(), system = user_tick ^ 1;
599 
600 	update_one_process(p, user_tick, system, cpu);
601 	if (p->pid) {
602 		if (--p->counter <= 0) {
603 			p->counter = 0;
604 			/*
605 			 * SCHED_FIFO is priority preemption, so this is
606 			 * not the place to decide whether to reschedule a
607 			 * SCHED_FIFO task or not - Bhavesh Davda
608 			 */
609 			if (p->policy != SCHED_FIFO) {
610 				p->need_resched = 1;
611 			}
612 		}
613 		if (p->nice > 0)
614 			kstat.per_cpu_nice[cpu] += user_tick;
615 		else
616 			kstat.per_cpu_user[cpu] += user_tick;
617 		kstat.per_cpu_system[cpu] += system;
618 	} else if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
619 		kstat.per_cpu_system[cpu] += system;
620 }
621 
622 /*
623  * Nr of active tasks - counted in fixed-point numbers
624  */
count_active_tasks(void)625 static unsigned long count_active_tasks(void)
626 {
627 	struct task_struct *p;
628 	unsigned long nr = 0;
629 
630 	read_lock(&tasklist_lock);
631 	for_each_task(p) {
632 		if ((p->state == TASK_RUNNING ||
633 		     (p->state & TASK_UNINTERRUPTIBLE)))
634 			nr += FIXED_1;
635 	}
636 	read_unlock(&tasklist_lock);
637 	return nr;
638 }
639 
640 /*
641  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
642  * imply that avenrun[] is the standard name for this kind of thing.
643  * Nothing else seems to be standardized: the fractional size etc
644  * all seem to differ on different machines.
645  */
646 unsigned long avenrun[3];
647 
calc_load(unsigned long ticks)648 static inline void calc_load(unsigned long ticks)
649 {
650 	unsigned long active_tasks; /* fixed-point */
651 	static int count = LOAD_FREQ;
652 
653 	count -= ticks;
654 	if (count < 0) {
655 		count += LOAD_FREQ;
656 		active_tasks = count_active_tasks();
657 		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
658 		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
659 		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
660 	}
661 }
662 
663 /* jiffies at the most recent update of wall time */
664 unsigned long wall_jiffies;
665 
666 /*
667  * This spinlock protect us from races in SMP while playing with xtime. -arca
668  */
669 rwlock_t xtime_lock = RW_LOCK_UNLOCKED;
670 
update_times(void)671 static inline void update_times(void)
672 {
673 	unsigned long ticks;
674 
675 	/*
676 	 * update_times() is run from the raw timer_bh handler so we
677 	 * just know that the irqs are locally enabled and so we don't
678 	 * need to save/restore the flags of the local CPU here. -arca
679 	 */
680 	write_lock_irq(&xtime_lock);
681 	vxtime_lock();
682 
683 	ticks = jiffies - wall_jiffies;
684 	if (ticks) {
685 		wall_jiffies += ticks;
686 		update_wall_time(ticks);
687 	}
688 	vxtime_unlock();
689 	write_unlock_irq(&xtime_lock);
690 	calc_load(ticks);
691 }
692 
timer_bh(void)693 void timer_bh(void)
694 {
695 	update_times();
696 	run_timer_list();
697 }
698 
do_timer(struct pt_regs * regs)699 void do_timer(struct pt_regs *regs)
700 {
701 	(*(unsigned long *)&jiffies)++;
702 #ifndef CONFIG_SMP
703 	/* SMP process accounting uses the local APIC timer */
704 
705 	update_process_times(user_mode(regs));
706 #endif
707 	mark_bh(TIMER_BH);
708 	if (TQ_ACTIVE(tq_timer))
709 		mark_bh(TQUEUE_BH);
710 }
711 
712 #if !defined(__alpha__) && !defined(__ia64__)
713 
714 /*
715  * For backwards compatibility?  This can be done in libc so Alpha
716  * and all newer ports shouldn't need it.
717  */
sys_alarm(unsigned int seconds)718 asmlinkage unsigned long sys_alarm(unsigned int seconds)
719 {
720 	struct itimerval it_new, it_old;
721 	unsigned int oldalarm;
722 
723 	it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
724 	it_new.it_value.tv_sec = seconds;
725 	it_new.it_value.tv_usec = 0;
726 	do_setitimer(ITIMER_REAL, &it_new, &it_old);
727 	oldalarm = it_old.it_value.tv_sec;
728 	/* ehhh.. We can't return 0 if we have an alarm pending.. */
729 	/* And we'd better return too much than too little anyway */
730 	if (it_old.it_value.tv_usec)
731 		oldalarm++;
732 	return oldalarm;
733 }
734 
735 #endif
736 
737 #ifndef __alpha__
738 
739 /*
740  * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
741  * should be moved into arch/i386 instead?
742  */
743 
744 /**
745  * sys_getpid - return the thread group id of the current process
746  *
747  * Note, despite the name, this returns the tgid not the pid.  The tgid and
748  * the pid are identical unless CLONE_THREAD was specified on clone() in
749  * which case the tgid is the same in all threads of the same group.
750  *
751  * This is SMP safe as current->tgid does not change.
752  */
sys_getpid(void)753 asmlinkage long sys_getpid(void)
754 {
755 	return current->tgid;
756 }
757 
758 /*
759  * This is not strictly SMP safe: p_opptr could change
760  * from under us. However, rather than getting any lock
761  * we can use an optimistic algorithm: get the parent
762  * pid, and go back and check that the parent is still
763  * the same. If it has changed (which is extremely unlikely
764  * indeed), we just try again..
765  *
766  * NOTE! This depends on the fact that even if we _do_
767  * get an old value of "parent", we can happily dereference
768  * the pointer: we just can't necessarily trust the result
769  * until we know that the parent pointer is valid.
770  *
771  * The "mb()" macro is a memory barrier - a synchronizing
772  * event. It also makes sure that gcc doesn't optimize
773  * away the necessary memory references.. The barrier doesn't
774  * have to have all that strong semantics: on x86 we don't
775  * really require a synchronizing instruction, for example.
776  * The barrier is more important for code generation than
777  * for any real memory ordering semantics (even if there is
778  * a small window for a race, using the old pointer is
779  * harmless for a while).
780  */
sys_getppid(void)781 asmlinkage long sys_getppid(void)
782 {
783 	int pid;
784 	struct task_struct * me = current;
785 	struct task_struct * parent;
786 
787 	parent = me->p_opptr;
788 	for (;;) {
789 		pid = parent->pid;
790 #if CONFIG_SMP
791 {
792 		struct task_struct *old = parent;
793 		mb();
794 		parent = me->p_opptr;
795 		if (old != parent)
796 			continue;
797 }
798 #endif
799 		break;
800 	}
801 	return pid;
802 }
803 
sys_getuid(void)804 asmlinkage long sys_getuid(void)
805 {
806 	/* Only we change this so SMP safe */
807 	return current->uid;
808 }
809 
sys_geteuid(void)810 asmlinkage long sys_geteuid(void)
811 {
812 	/* Only we change this so SMP safe */
813 	return current->euid;
814 }
815 
sys_getgid(void)816 asmlinkage long sys_getgid(void)
817 {
818 	/* Only we change this so SMP safe */
819 	return current->gid;
820 }
821 
sys_getegid(void)822 asmlinkage long sys_getegid(void)
823 {
824 	/* Only we change this so SMP safe */
825 	return  current->egid;
826 }
827 
828 #endif
829 
830 /* Thread ID - the internal kernel "pid" */
sys_gettid(void)831 asmlinkage long sys_gettid(void)
832 {
833 	return current->pid;
834 }
835 
sys_nanosleep(struct timespec * rqtp,struct timespec * rmtp)836 asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
837 {
838 	struct timespec t;
839 	unsigned long expire;
840 
841 	if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
842 		return -EFAULT;
843 
844 	if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
845 		return -EINVAL;
846 
847 
848 	if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
849 	    current->policy != SCHED_OTHER)
850 	{
851 		/*
852 		 * Short delay requests up to 2 ms will be handled with
853 		 * high precision by a busy wait for all real-time processes.
854 		 *
855 		 * Its important on SMP not to do this holding locks.
856 		 */
857 		udelay((t.tv_nsec + 999) / 1000);
858 		return 0;
859 	}
860 
861 	expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
862 
863 	current->state = TASK_INTERRUPTIBLE;
864 	expire = schedule_timeout(expire);
865 
866 	if (expire) {
867 		if (rmtp) {
868 			jiffies_to_timespec(expire, &t);
869 			if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
870 				return -EFAULT;
871 		}
872 		return -EINTR;
873 	}
874 	return 0;
875 }
876 
877