1 /*
2  * Smp support for ppc.
3  *
4  * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
5  * deal of code from the sparc and intel versions.
6  *
7  * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
8  *
9  */
10 
11 #include <linux/config.h>
12 #include <linux/kernel.h>
13 #include <linux/sched.h>
14 #include <linux/smp.h>
15 #include <linux/smp_lock.h>
16 #include <linux/interrupt.h>
17 #include <linux/kernel_stat.h>
18 #include <linux/delay.h>
19 #define __KERNEL_SYSCALLS__
20 #include <linux/unistd.h>
21 #include <linux/init.h>
22 #include <linux/spinlock.h>
23 #include <linux/cache.h>
24 
25 #include <asm/ptrace.h>
26 #include <asm/atomic.h>
27 #include <asm/irq.h>
28 #include <asm/page.h>
29 #include <asm/pgtable.h>
30 #include <asm/hardirq.h>
31 #include <asm/softirq.h>
32 #include <asm/io.h>
33 #include <asm/prom.h>
34 #include <asm/smp.h>
35 #include <asm/residual.h>
36 #include <asm/time.h>
37 
38 int smp_threads_ready;
39 volatile int smp_commenced;
40 int smp_num_cpus = 1;
41 int smp_tb_synchronized;
42 struct cpuinfo_PPC cpu_data[NR_CPUS];
43 struct klock_info_struct klock_info = { KLOCK_CLEAR, 0 };
44 atomic_t ipi_recv;
45 atomic_t ipi_sent;
46 spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
47 unsigned int prof_multiplier[NR_CPUS];
48 unsigned int prof_counter[NR_CPUS];
49 cycles_t cacheflush_time;
50 static int max_cpus __initdata = NR_CPUS;
51 unsigned long cpu_online_map;
52 int smp_hw_index[NR_CPUS];
53 static struct smp_ops_t *smp_ops;
54 
55 /* all cpu mappings are 1-1 -- Cort */
56 volatile unsigned long cpu_callin_map[NR_CPUS];
57 
58 #define TB_SYNC_PASSES 4
59 volatile unsigned long __initdata tb_sync_flag = 0;
60 volatile unsigned long __initdata tb_offset = 0;
61 
62 int start_secondary(void *);
63 extern int cpu_idle(void *unused);
64 void smp_call_function_interrupt(void);
65 
66 /* Low level assembly function used to backup CPU 0 state */
67 extern void __save_cpu_setup(void);
68 
69 /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
70  *
71  * Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up
72  * in /proc/interrupts will be wrong!!! --Troy */
73 #define PPC_MSG_CALL_FUNCTION	0
74 #define PPC_MSG_RESCHEDULE	1
75 #define PPC_MSG_INVALIDATE_TLB	2
76 #define PPC_MSG_XMON_BREAK	3
77 
78 static inline void
smp_message_pass(int target,int msg,unsigned long data,int wait)79 smp_message_pass(int target, int msg, unsigned long data, int wait)
80 {
81 	if (smp_ops){
82 		atomic_inc(&ipi_sent);
83 		smp_ops->message_pass(target,msg,data,wait);
84 	}
85 }
86 
87 /*
88  * Common functions
89  */
smp_local_timer_interrupt(struct pt_regs * regs)90 void smp_local_timer_interrupt(struct pt_regs * regs)
91 {
92 	int cpu = smp_processor_id();
93 
94 	if (!--prof_counter[cpu]) {
95 		update_process_times(user_mode(regs));
96 		prof_counter[cpu]=prof_multiplier[cpu];
97 	}
98 }
99 
smp_message_recv(int msg,struct pt_regs * regs)100 void smp_message_recv(int msg, struct pt_regs *regs)
101 {
102 	atomic_inc(&ipi_recv);
103 
104 	switch( msg ) {
105 	case PPC_MSG_CALL_FUNCTION:
106 		smp_call_function_interrupt();
107 		break;
108 	case PPC_MSG_RESCHEDULE:
109 		current->need_resched = 1;
110 		break;
111 	case PPC_MSG_INVALIDATE_TLB:
112 		_tlbia();
113 		break;
114 #ifdef CONFIG_XMON
115 	case PPC_MSG_XMON_BREAK:
116 		xmon(regs);
117 		break;
118 #endif /* CONFIG_XMON */
119 	default:
120 		printk("SMP %d: smp_message_recv(): unknown msg %d\n",
121 		       smp_processor_id(), msg);
122 		break;
123 	}
124 }
125 
126 #ifdef CONFIG_750_SMP
127 /*
128  * 750's don't broadcast tlb invalidates so
129  * we have to emulate that behavior.
130  *   -- Cort
131  */
smp_ppc750_send_tlb_invalidate(int cpu)132 void smp_ppc750_send_tlb_invalidate(int cpu)
133 {
134 	if ( PVR_VER(mfspr(PVR)) == 8 )
135 		smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_INVALIDATE_TLB, 0, 0);
136 }
137 #endif
138 
smp_send_reschedule(int cpu)139 void smp_send_reschedule(int cpu)
140 {
141 	/*
142 	 * This is only used if `cpu' is running an idle task,
143 	 * so it will reschedule itself anyway...
144 	 *
145 	 * This isn't the case anymore since the other CPU could be
146 	 * sleeping and won't reschedule until the next interrupt (such
147 	 * as the timer).
148 	 *  -- Cort
149 	 */
150 	/* This is only used if `cpu' is running an idle task,
151 	   so it will reschedule itself anyway... */
152 	smp_message_pass(cpu, PPC_MSG_RESCHEDULE, 0, 0);
153 }
154 
155 #ifdef CONFIG_XMON
smp_send_xmon_break(int cpu)156 void smp_send_xmon_break(int cpu)
157 {
158 	smp_message_pass(cpu, PPC_MSG_XMON_BREAK, 0, 0);
159 }
160 #endif /* CONFIG_XMON */
161 
stop_this_cpu(void * dummy)162 static void stop_this_cpu(void *dummy)
163 {
164 	__cli();
165 	while (1)
166 		;
167 }
168 
smp_send_stop(void)169 void smp_send_stop(void)
170 {
171 	smp_call_function(stop_this_cpu, NULL, 1, 0);
172 	smp_num_cpus = 1;
173 }
174 
175 /*
176  * Structure and data for smp_call_function(). This is designed to minimise
177  * static memory requirements. It also looks cleaner.
178  * Stolen from the i386 version.
179  */
180 static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
181 
182 static struct call_data_struct {
183 	void (*func) (void *info);
184 	void *info;
185 	atomic_t started;
186 	atomic_t finished;
187 	int wait;
188 } *call_data;
189 
190 /*
191  * this function sends a 'generic call function' IPI to all other CPUs
192  * in the system.
193  */
194 
smp_call_function(void (* func)(void * info),void * info,int nonatomic,int wait)195 int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
196 			int wait)
197 /*
198  * [SUMMARY] Run a function on all other CPUs.
199  * <func> The function to run. This must be fast and non-blocking.
200  * <info> An arbitrary pointer to pass to the function.
201  * <nonatomic> currently unused.
202  * <wait> If true, wait (atomically) until function has completed on other CPUs.
203  * [RETURNS] 0 on success, else a negative status code. Does not return until
204  * remote CPUs are nearly ready to execute <<func>> or are or have executed.
205  *
206  * You must not call this function with disabled interrupts or from a
207  * hardware interrupt handler, you may call it from a bottom half handler.
208  */
209 {
210 	struct call_data_struct data;
211 	int ret = -1, cpus = smp_num_cpus-1;
212 	int timeout;
213 
214 	if (!cpus)
215 		return 0;
216 
217 	data.func = func;
218 	data.info = info;
219 	atomic_set(&data.started, 0);
220 	data.wait = wait;
221 	if (wait)
222 		atomic_set(&data.finished, 0);
223 
224 	spin_lock_bh(&call_lock);
225 	call_data = &data;
226 	/* Send a message to all other CPUs and wait for them to respond */
227 	smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_CALL_FUNCTION, 0, 0);
228 
229 	/* Wait for response */
230 	timeout = 1000000;
231 	while (atomic_read(&data.started) != cpus) {
232 		if (--timeout == 0) {
233 			printk("smp_call_function on cpu %d: other cpus not responding (%d)\n",
234 			       smp_processor_id(), atomic_read(&data.started));
235 			goto out;
236 		}
237 		barrier();
238 		udelay(1);
239 	}
240 
241 	if (wait) {
242 		timeout = 1000000;
243 		while (atomic_read(&data.finished) != cpus) {
244 			if (--timeout == 0) {
245 				printk("smp_call_function on cpu %d: other cpus not finishing (%d/%d)\n",
246 				       smp_processor_id(), atomic_read(&data.finished), atomic_read(&data.started));
247 				goto out;
248 			}
249 			barrier();
250 			udelay(1);
251 		}
252 	}
253 	ret = 0;
254 
255  out:
256 	spin_unlock_bh(&call_lock);
257 	return ret;
258 }
259 
smp_call_function_interrupt(void)260 void smp_call_function_interrupt(void)
261 {
262 	void (*func) (void *info) = call_data->func;
263 	void *info = call_data->info;
264 	int wait = call_data->wait;
265 
266 	/*
267 	 * Notify initiating CPU that I've grabbed the data and am
268 	 * about to execute the function
269 	 */
270 	atomic_inc(&call_data->started);
271 	/*
272 	 * At this point the info structure may be out of scope unless wait==1
273 	 */
274 	(*func)(info);
275 	if (wait)
276 		atomic_inc(&call_data->finished);
277 }
278 
smp_boot_cpus(void)279 void __init smp_boot_cpus(void)
280 {
281 	extern struct task_struct *current_set[NR_CPUS];
282 	int i, cpu_nr;
283 	struct task_struct *p;
284 
285 	printk("Entering SMP Mode...\n");
286 	smp_num_cpus = 1;
287         smp_store_cpu_info(0);
288 	cpu_online_map = 1UL;
289 
290 	/*
291 	 * assume for now that the first cpu booted is
292 	 * cpu 0, the master -- Cort
293 	 */
294 	cpu_callin_map[0] = 1;
295 	current->processor = 0;
296 
297 	init_idle();
298 
299 	for (i = 0; i < NR_CPUS; i++) {
300 		prof_counter[i] = 1;
301 		prof_multiplier[i] = 1;
302 	}
303 
304 	/*
305 	 * XXX very rough, assumes 20 bus cycles to read a cache line,
306 	 * timebase increments every 4 bus cycles, 32kB L1 data cache.
307 	 */
308 	cacheflush_time = 5 * 1024;
309 
310 	smp_ops = ppc_md.smp_ops;
311 	if (smp_ops == NULL) {
312 		printk("SMP not supported on this machine.\n");
313 		return;
314 	}
315 
316 #ifndef CONFIG_750_SMP
317 	/* check for 750's, they just don't work with linux SMP.
318 	 * If you actually have 750 SMP hardware and want to try to get
319 	 * it to work, send me a patch to make it work and
320 	 * I'll make CONFIG_750_SMP a config option.  -- Troy (hozer@drgw.net)
321 	 */
322 	if ( PVR_VER(mfspr(PVR)) == 8 ){
323 		printk("SMP not supported on 750 cpus. %s line %d\n",
324 				__FILE__, __LINE__);
325 		return;
326 	}
327 #endif
328 
329 
330 	/* Probe arch for CPUs */
331 	cpu_nr = smp_ops->probe();
332 
333 	/* Backup CPU 0 state */
334 	__save_cpu_setup();
335 
336 	/*
337 	 * only check for cpus we know exist.  We keep the callin map
338 	 * with cpus at the bottom -- Cort
339 	 */
340 	if (cpu_nr > max_cpus)
341 		cpu_nr = max_cpus;
342 	for (i = 1; i < cpu_nr; i++) {
343 		int c;
344 		struct pt_regs regs;
345 
346 		/* create a process for the processor */
347 		/* only regs.msr is actually used, and 0 is OK for it */
348 		memset(&regs, 0, sizeof(struct pt_regs));
349 		if (do_fork(CLONE_VM|CLONE_PID, 0, &regs, 0) < 0)
350 			panic("failed fork for CPU %d", i);
351 		p = init_task.prev_task;
352 		if (!p)
353 			panic("No idle task for CPU %d", i);
354 		del_from_runqueue(p);
355 		unhash_process(p);
356 		init_tasks[i] = p;
357 
358 		p->processor = i;
359 		p->cpus_runnable = 1 << i; /* we schedule the first task manually */
360 		current_set[i] = p;
361 
362 		/*
363 		 * There was a cache flush loop here to flush the cache
364 		 * to memory for the first 8MB of RAM.  The cache flush
365 		 * has been pushed into the kick_cpu function for those
366 		 * platforms that need it.
367 		 */
368 
369 		/* wake up cpus */
370 		smp_ops->kick_cpu(i);
371 
372 		/*
373 		 * wait to see if the cpu made a callin (is actually up).
374 		 * use this value that I found through experimentation.
375 		 * -- Cort
376 		 */
377 		for ( c = 10000; c && !cpu_callin_map[i] ; c-- )
378 			udelay(100);
379 
380 		if ( cpu_callin_map[i] )
381 		{
382 			char buf[32];
383 			sprintf(buf, "found cpu %d", i);
384 			if (ppc_md.progress) ppc_md.progress(buf, 0x350+i);
385 			printk("Processor %d found.\n", i);
386 			smp_num_cpus++;
387 		} else {
388 			char buf[32];
389 			sprintf(buf, "didn't find cpu %d", i);
390 			if (ppc_md.progress) ppc_md.progress(buf, 0x360+i);
391 			printk("Processor %d is stuck.\n", i);
392 		}
393 	}
394 
395 	/* Setup CPU 0 last (important) */
396 	smp_ops->setup_cpu(0);
397 
398 	if (smp_num_cpus < 2)
399 		smp_tb_synchronized = 1;
400 }
401 
smp_software_tb_sync(int cpu)402 void __init smp_software_tb_sync(int cpu)
403 {
404 #define PASSES 4	/* 4 passes.. */
405 	int pass;
406 	int i, j;
407 
408 	/* stop - start will be the number of timebase ticks it takes for cpu0
409 	 * to send a message to all others and the first reponse to show up.
410 	 *
411 	 * ASSUMPTION: this time is similiar for all cpus
412 	 * ASSUMPTION: the time to send a one-way message is ping/2
413 	 */
414 	register unsigned long start = 0;
415 	register unsigned long stop = 0;
416 	register unsigned long temp = 0;
417 
418 	set_tb(0, 0);
419 
420 	/* multiple passes to get in l1 cache.. */
421 	for (pass = 2; pass < 2+PASSES; pass++){
422 		if (cpu == 0){
423 			mb();
424 			for (i = j = 1; i < smp_num_cpus; i++, j++){
425 				/* skip stuck cpus */
426 				while (!cpu_callin_map[j])
427 					++j;
428 				while (cpu_callin_map[j] != pass)
429 					barrier();
430 			}
431 			mb();
432 			tb_sync_flag = pass;
433 			start = get_tbl();	/* start timing */
434 			while (tb_sync_flag)
435 				mb();
436 			stop = get_tbl();	/* end timing */
437 			/* theoretically, the divisor should be 2, but
438 			 * I get better results on my dual mtx. someone
439 			 * please report results on other smp machines..
440 			 */
441 			tb_offset = (stop-start)/4;
442 			mb();
443 			tb_sync_flag = pass;
444 			udelay(10);
445 			mb();
446 			tb_sync_flag = 0;
447 			mb();
448 			set_tb(0,0);
449 			mb();
450 		} else {
451 			cpu_callin_map[cpu] = pass;
452 			mb();
453 			while (!tb_sync_flag)
454 				mb();		/* wait for cpu0 */
455 			mb();
456 			tb_sync_flag = 0;	/* send response for timing */
457 			mb();
458 			while (!tb_sync_flag)
459 				mb();
460 			temp = tb_offset;	/* make sure offset is loaded */
461 			while (tb_sync_flag)
462 				mb();
463 			set_tb(0,temp);		/* now, set the timebase */
464 			mb();
465 		}
466 	}
467 	if (cpu == 0) {
468 		smp_tb_synchronized = 1;
469 		printk("smp_software_tb_sync: %d passes, final offset: %ld\n",
470 			PASSES, tb_offset);
471 	}
472 	/* so time.c doesn't get confused */
473 	set_dec(tb_ticks_per_jiffy);
474 	last_jiffy_stamp(cpu) = 0;
475 }
476 
smp_commence(void)477 void __init smp_commence(void)
478 {
479 	/*
480 	 *	Lets the callin's below out of their loop.
481 	 */
482 	if (ppc_md.progress) ppc_md.progress("smp_commence", 0x370);
483 	wmb();
484 	smp_commenced = 1;
485 
486 	/* if the smp_ops->setup_cpu function has not already synched the
487 	 * timebases with a nicer hardware-based method, do so now
488 	 *
489 	 * I am open to suggestions for improvements to this method
490 	 * -- Troy <hozer@drgw.net>
491 	 *
492 	 * NOTE: if you are debugging, set smp_tb_synchronized for now
493 	 * since if this code runs pretty early and needs all cpus that
494 	 * reported in in smp_callin_map to be working
495 	 *
496 	 * NOTE2: this code doesn't seem to work on > 2 cpus. -- paulus/BenH
497 	 */
498 	if (!smp_tb_synchronized && smp_num_cpus == 2) {
499 		unsigned long flags;
500 		__save_and_cli(flags);
501 		smp_software_tb_sync(0);
502 		__restore_flags(flags);
503 	}
504 }
505 
smp_callin(void)506 void __init smp_callin(void)
507 {
508 	int cpu = current->processor;
509 
510         smp_store_cpu_info(cpu);
511 	smp_ops->setup_cpu(cpu);
512 	set_dec(tb_ticks_per_jiffy);
513 	cpu_online_map |= 1UL << cpu;
514 	mb();
515 	cpu_callin_map[cpu] = 1;
516 
517 	while(!smp_commenced)
518 		barrier();
519 
520 	/* see smp_commence for more info */
521 	if (!smp_tb_synchronized && smp_num_cpus == 2) {
522 		smp_software_tb_sync(cpu);
523 	}
524 	__sti();
525 }
526 
527 /* intel needs this */
initialize_secondary(void)528 void __init initialize_secondary(void)
529 {
530 }
531 
532 /* Activate a secondary processor. */
start_secondary(void * unused)533 int __init start_secondary(void *unused)
534 {
535 	atomic_inc(&init_mm.mm_count);
536 	current->active_mm = &init_mm;
537 	smp_callin();
538 	return cpu_idle(NULL);
539 }
540 
smp_setup(char * str,int * ints)541 void __init smp_setup(char *str, int *ints)
542 {
543 }
544 
setup_profiling_timer(unsigned int multiplier)545 int __init setup_profiling_timer(unsigned int multiplier)
546 {
547 	return 0;
548 }
549 
smp_store_cpu_info(int id)550 void __init smp_store_cpu_info(int id)
551 {
552         struct cpuinfo_PPC *c = &cpu_data[id];
553 
554 	/* assume bogomips are same for everything */
555         c->loops_per_jiffy = loops_per_jiffy;
556         c->pvr = mfspr(PVR);
557 }
558 
maxcpus(char * str)559 static int __init maxcpus(char *str)
560 {
561 	get_option(&str, &max_cpus);
562 	return 1;
563 }
564 
565 __setup("maxcpus=", maxcpus);
566