1 /* sun4m_smp.c: Sparc SUN4M SMP support.
2  *
3  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
4  */
5 
6 #include <asm/head.h>
7 
8 #include <linux/kernel.h>
9 #include <linux/sched.h>
10 #include <linux/threads.h>
11 #include <linux/smp.h>
12 #include <linux/smp_lock.h>
13 #include <linux/interrupt.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/init.h>
16 #include <linux/spinlock.h>
17 #include <linux/mm.h>
18 
19 #include <asm/ptrace.h>
20 #include <asm/atomic.h>
21 
22 #include <asm/delay.h>
23 #include <asm/irq.h>
24 #include <asm/page.h>
25 #include <asm/pgalloc.h>
26 #include <asm/pgtable.h>
27 #include <asm/oplib.h>
28 #include <asm/hardirq.h>
29 #include <asm/softirq.h>
30 
31 #define __KERNEL_SYSCALLS__
32 #include <linux/unistd.h>
33 
34 #define IRQ_RESCHEDULE		13
35 #define IRQ_STOP_CPU		14
36 #define IRQ_CROSS_CALL		15
37 
38 extern ctxd_t *srmmu_ctx_table_phys;
39 extern int linux_num_cpus;
40 
41 extern void calibrate_delay(void);
42 
43 extern struct task_struct *current_set[NR_CPUS];
44 extern volatile int smp_processors_ready;
45 extern unsigned long cpu_present_map;
46 extern int smp_num_cpus;
47 extern int smp_threads_ready;
48 extern unsigned char mid_xlate[NR_CPUS];
49 extern volatile unsigned long cpu_callin_map[NR_CPUS];
50 extern unsigned long smp_proc_in_lock[NR_CPUS];
51 extern struct cpuinfo_sparc cpu_data[NR_CPUS];
52 extern unsigned long cpu_offset[NR_CPUS];
53 extern unsigned char boot_cpu_id;
54 extern int smp_activated;
55 extern volatile int __cpu_number_map[NR_CPUS];
56 extern volatile int __cpu_logical_map[NR_CPUS];
57 extern volatile unsigned long ipi_count;
58 extern volatile int smp_process_available;
59 extern volatile int smp_commenced;
60 extern int __smp4m_processor_id(void);
61 
62 extern unsigned long totalram_pages;
63 
64 /*#define SMP_DEBUG*/
65 
66 #ifdef SMP_DEBUG
67 #define SMP_PRINTK(x)	printk x
68 #else
69 #define SMP_PRINTK(x)
70 #endif
71 
swap(volatile unsigned long * ptr,unsigned long val)72 static inline unsigned long swap(volatile unsigned long *ptr, unsigned long val)
73 {
74 	__asm__ __volatile__("swap [%1], %0\n\t" :
75 			     "=&r" (val), "=&r" (ptr) :
76 			     "0" (val), "1" (ptr));
77 	return val;
78 }
79 
80 static void smp_setup_percpu_timer(void);
81 extern void cpu_probe(void);
82 
smp4m_callin(void)83 void __init smp4m_callin(void)
84 {
85 	int cpuid = hard_smp_processor_id();
86 
87 	local_flush_cache_all();
88 	local_flush_tlb_all();
89 
90 	set_irq_udt(mid_xlate[boot_cpu_id]);
91 
92 	/* Get our local ticker going. */
93 	smp_setup_percpu_timer();
94 
95 	calibrate_delay();
96 	smp_store_cpu_info(cpuid);
97 
98 	local_flush_cache_all();
99 	local_flush_tlb_all();
100 
101 	/*
102 	 * Unblock the master CPU _only_ when the scheduler state
103 	 * of all secondary CPUs will be up-to-date, so after
104 	 * the SMP initialization the master will be just allowed
105 	 * to call the scheduler code.
106 	 */
107 	init_idle();
108 
109 	/* Allow master to continue. */
110 	swap((unsigned long *)&cpu_callin_map[cpuid], 1);
111 
112 	local_flush_cache_all();
113 	local_flush_tlb_all();
114 
115 	cpu_probe();
116 
117 	/* Fix idle thread fields. */
118 	__asm__ __volatile__("ld [%0], %%g6\n\t"
119 			     : : "r" (&current_set[cpuid])
120 			     : "memory" /* paranoid */);
121 
122 	/* Attach to the address space of init_task. */
123 	atomic_inc(&init_mm.mm_count);
124 	current->active_mm = &init_mm;
125 
126 	while(!smp_commenced)
127 		barrier();
128 
129 	local_flush_cache_all();
130 	local_flush_tlb_all();
131 
132 	__sti();
133 }
134 
135 extern int cpu_idle(void *unused);
136 extern void init_IRQ(void);
137 extern void cpu_panic(void);
138 extern int start_secondary(void *unused);
139 
140 /*
141  *	Cycle through the processors asking the PROM to start each one.
142  */
143 
144 extern struct prom_cpuinfo linux_cpus[NR_CPUS];
145 extern struct linux_prom_registers smp_penguin_ctable;
146 extern unsigned long trapbase_cpu1[];
147 extern unsigned long trapbase_cpu2[];
148 extern unsigned long trapbase_cpu3[];
149 
smp4m_boot_cpus(void)150 void __init smp4m_boot_cpus(void)
151 {
152 	int cpucount = 0;
153 	int i = 0;
154 	int first, prev;
155 
156 	printk("Entering SMP Mode...\n");
157 
158 	__sti();
159 	cpu_present_map = 0;
160 
161 	for(i=0; i < linux_num_cpus; i++)
162 		cpu_present_map |= (1<<i);
163 
164 	for(i=0; i < NR_CPUS; i++) {
165 		cpu_offset[i] = (char *)&cpu_data[i] - (char *)&cpu_data;
166 		__cpu_number_map[i] = -1;
167 		__cpu_logical_map[i] = -1;
168 	}
169 
170 	mid_xlate[boot_cpu_id] = (linux_cpus[boot_cpu_id].mid & ~8);
171 	__cpu_number_map[boot_cpu_id] = 0;
172 	__cpu_logical_map[0] = boot_cpu_id;
173 	current->processor = boot_cpu_id;
174 
175 	smp_store_cpu_info(boot_cpu_id);
176 	set_irq_udt(mid_xlate[boot_cpu_id]);
177 	smp_setup_percpu_timer();
178 	init_idle();
179 	local_flush_cache_all();
180 	if(linux_num_cpus == 1)
181 		return;  /* Not an MP box. */
182 	for(i = 0; i < NR_CPUS; i++) {
183 		if(i == boot_cpu_id)
184 			continue;
185 
186 		if(cpu_present_map & (1 << i)) {
187 			extern unsigned long sun4m_cpu_startup;
188 			unsigned long *entry = &sun4m_cpu_startup;
189 			struct task_struct *p;
190 			int timeout;
191 
192 			/* Cook up an idler for this guy. */
193 			kernel_thread(start_secondary, NULL, CLONE_PID);
194 
195 			cpucount++;
196 
197 			p = init_task.prev_task;
198 			init_tasks[i] = p;
199 
200 			p->processor = i;
201 			p->cpus_runnable = 1 << i; /* we schedule the first task manually */
202 
203 			current_set[i] = p;
204 
205 			del_from_runqueue(p);
206 			unhash_process(p);
207 
208 			/* See trampoline.S for details... */
209 			entry += ((i-1) * 3);
210 
211 			/*
212 			 * Initialize the contexts table
213 			 * Since the call to prom_startcpu() trashes the structure,
214 			 * we need to re-initialize it for each cpu
215 			 */
216 			smp_penguin_ctable.which_io = 0;
217 			smp_penguin_ctable.phys_addr = (unsigned int) srmmu_ctx_table_phys;
218 			smp_penguin_ctable.reg_size = 0;
219 
220 			/* whirrr, whirrr, whirrrrrrrrr... */
221 			printk("Starting CPU %d at %p\n", i, entry);
222 			mid_xlate[i] = (linux_cpus[i].mid & ~8);
223 			local_flush_cache_all();
224 			prom_startcpu(linux_cpus[i].prom_node,
225 				      &smp_penguin_ctable, 0, (char *)entry);
226 
227 			/* wheee... it's going... */
228 			for(timeout = 0; timeout < 10000; timeout++) {
229 				if(cpu_callin_map[i])
230 					break;
231 				udelay(200);
232 			}
233 			if(cpu_callin_map[i]) {
234 				/* Another "Red Snapper". */
235 				__cpu_number_map[i] = i;
236 				__cpu_logical_map[i] = i;
237 			} else {
238 				cpucount--;
239 				printk("Processor %d is stuck.\n", i);
240 			}
241 		}
242 		if(!(cpu_callin_map[i])) {
243 			cpu_present_map &= ~(1 << i);
244 			__cpu_number_map[i] = -1;
245 		}
246 	}
247 	local_flush_cache_all();
248 	if(cpucount == 0) {
249 		printk("Error: only one Processor found.\n");
250 		cpu_present_map = (1 << smp_processor_id());
251 	} else {
252 		unsigned long bogosum = 0;
253 		for(i = 0; i < NR_CPUS; i++) {
254 			if(cpu_present_map & (1 << i))
255 				bogosum += cpu_data[i].udelay_val;
256 		}
257 		printk("Total of %d Processors activated (%lu.%02lu BogoMIPS).\n",
258 		       cpucount + 1,
259 		       bogosum/(500000/HZ),
260 		       (bogosum/(5000/HZ))%100);
261 		smp_activated = 1;
262 		smp_num_cpus = cpucount + 1;
263 	}
264 
265 	/* Setup CPU list for IRQ distribution scheme. */
266 	first = prev = -1;
267 	for(i = 0; i < NR_CPUS; i++) {
268 		if(cpu_present_map & (1 << i)) {
269 			if(first == -1)
270 				first = i;
271 			if(prev != -1)
272 				cpu_data[prev].next = i;
273 			cpu_data[i].mid = mid_xlate[i];
274 			prev = i;
275 		}
276 	}
277 	cpu_data[prev].next = first;
278 
279 	/* Free unneeded trap tables */
280 	if (!(cpu_present_map & (1 << 1))) {
281 		ClearPageReserved(virt_to_page(trapbase_cpu1));
282 		set_page_count(virt_to_page(trapbase_cpu1), 1);
283 		free_page((unsigned long)trapbase_cpu1);
284 		totalram_pages++;
285 		num_physpages++;
286 	}
287 	if (!(cpu_present_map & (1 << 2))) {
288 		ClearPageReserved(virt_to_page(trapbase_cpu2));
289 		set_page_count(virt_to_page(trapbase_cpu2), 1);
290 		free_page((unsigned long)trapbase_cpu2);
291 		totalram_pages++;
292 		num_physpages++;
293 	}
294 	if (!(cpu_present_map & (1 << 3))) {
295 		ClearPageReserved(virt_to_page(trapbase_cpu3));
296 		set_page_count(virt_to_page(trapbase_cpu3), 1);
297 		free_page((unsigned long)trapbase_cpu3);
298 		totalram_pages++;
299 		num_physpages++;
300 	}
301 
302 	/* Ok, they are spinning and ready to go. */
303 	smp_processors_ready = 1;
304 }
305 
306 /* At each hardware IRQ, we get this called to forward IRQ reception
307  * to the next processor.  The caller must disable the IRQ level being
308  * serviced globally so that there are no double interrupts received.
309  */
smp4m_irq_rotate(int cpu)310 void smp4m_irq_rotate(int cpu)
311 {
312 	if(smp_processors_ready)
313 		set_irq_udt(cpu_data[cpu_data[cpu].next].mid);
314 }
315 
316 /* Cross calls, in order to work efficiently and atomically do all
317  * the message passing work themselves, only stopcpu and reschedule
318  * messages come through here.
319  */
smp4m_message_pass(int target,int msg,unsigned long data,int wait)320 void smp4m_message_pass(int target, int msg, unsigned long data, int wait)
321 {
322 	static unsigned long smp_cpu_in_msg[NR_CPUS];
323 	unsigned long mask;
324 	int me = smp_processor_id();
325 	int irq, i;
326 
327 	if(msg == MSG_RESCHEDULE) {
328 		irq = IRQ_RESCHEDULE;
329 
330 		if(smp_cpu_in_msg[me])
331 			return;
332 	} else if(msg == MSG_STOP_CPU) {
333 		irq = IRQ_STOP_CPU;
334 	} else {
335 		goto barf;
336 	}
337 
338 	smp_cpu_in_msg[me]++;
339 	if(target == MSG_ALL_BUT_SELF || target == MSG_ALL) {
340 		mask = cpu_present_map;
341 		if(target == MSG_ALL_BUT_SELF)
342 			mask &= ~(1 << me);
343 		for(i = 0; i < 4; i++) {
344 			if(mask & (1 << i))
345 				set_cpu_int(mid_xlate[i], irq);
346 		}
347 	} else {
348 		set_cpu_int(mid_xlate[target], irq);
349 	}
350 	smp_cpu_in_msg[me]--;
351 
352 	return;
353 barf:
354 	printk("Yeeee, trying to send SMP msg(%d) on cpu %d\n", msg, me);
355 	panic("Bogon SMP message pass.");
356 }
357 
358 static struct smp_funcall {
359 	smpfunc_t func;
360 	unsigned long arg1;
361 	unsigned long arg2;
362 	unsigned long arg3;
363 	unsigned long arg4;
364 	unsigned long arg5;
365 	unsigned long processors_in[NR_CPUS];  /* Set when ipi entered. */
366 	unsigned long processors_out[NR_CPUS]; /* Set when ipi exited. */
367 } ccall_info;
368 
369 static spinlock_t cross_call_lock = SPIN_LOCK_UNLOCKED;
370 
371 /* Cross calls must be serialized, at least currently. */
smp4m_cross_call(smpfunc_t func,unsigned long arg1,unsigned long arg2,unsigned long arg3,unsigned long arg4,unsigned long arg5)372 void smp4m_cross_call(smpfunc_t func, unsigned long arg1, unsigned long arg2,
373 		    unsigned long arg3, unsigned long arg4, unsigned long arg5)
374 {
375 	if(smp_processors_ready) {
376 		register int ncpus = smp_num_cpus;
377 		unsigned long flags;
378 
379 		spin_lock_irqsave(&cross_call_lock, flags);
380 
381 		/* Init function glue. */
382 		ccall_info.func = func;
383 		ccall_info.arg1 = arg1;
384 		ccall_info.arg2 = arg2;
385 		ccall_info.arg3 = arg3;
386 		ccall_info.arg4 = arg4;
387 		ccall_info.arg5 = arg5;
388 
389 		/* Init receive/complete mapping, plus fire the IPI's off. */
390 		{
391 			register unsigned long mask;
392 			register int i;
393 
394 			mask = (cpu_present_map & ~(1 << smp_processor_id()));
395 			for(i = 0; i < ncpus; i++) {
396 				if(mask & (1 << i)) {
397 					ccall_info.processors_in[i] = 0;
398 					ccall_info.processors_out[i] = 0;
399 					set_cpu_int(mid_xlate[i], IRQ_CROSS_CALL);
400 				} else {
401 					ccall_info.processors_in[i] = 1;
402 					ccall_info.processors_out[i] = 1;
403 				}
404 			}
405 		}
406 
407 		{
408 			register int i;
409 
410 			i = 0;
411 			do {
412 				while(!ccall_info.processors_in[i])
413 					barrier();
414 			} while(++i < ncpus);
415 
416 			i = 0;
417 			do {
418 				while(!ccall_info.processors_out[i])
419 					barrier();
420 			} while(++i < ncpus);
421 		}
422 
423 		spin_unlock_irqrestore(&cross_call_lock, flags);
424 	}
425 }
426 
427 /* Running cross calls. */
smp4m_cross_call_irq(void)428 void smp4m_cross_call_irq(void)
429 {
430 	int i = smp_processor_id();
431 
432 	ccall_info.processors_in[i] = 1;
433 	ccall_info.func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3,
434 			ccall_info.arg4, ccall_info.arg5);
435 	ccall_info.processors_out[i] = 1;
436 }
437 
438 extern unsigned int prof_multiplier[NR_CPUS];
439 extern unsigned int prof_counter[NR_CPUS];
440 
441 extern void sparc_do_profile(unsigned long pc, unsigned long o7);
442 
smp4m_percpu_timer_interrupt(struct pt_regs * regs)443 void smp4m_percpu_timer_interrupt(struct pt_regs *regs)
444 {
445 	int cpu = smp_processor_id();
446 
447 	clear_profile_irq(mid_xlate[cpu]);
448 
449 	if(!user_mode(regs))
450 		sparc_do_profile(regs->pc, regs->u_regs[UREG_RETPC]);
451 
452 	if(!--prof_counter[cpu]) {
453 		int user = user_mode(regs);
454 
455 		irq_enter(cpu, 0);
456 		update_process_times(user);
457 		irq_exit(cpu, 0);
458 
459 		prof_counter[cpu] = prof_multiplier[cpu];
460 	}
461 }
462 
463 extern unsigned int lvl14_resolution;
464 
smp_setup_percpu_timer(void)465 static void __init smp_setup_percpu_timer(void)
466 {
467 	int cpu = smp_processor_id();
468 
469 	prof_counter[cpu] = prof_multiplier[cpu] = 1;
470 	load_profile_irq(mid_xlate[cpu], lvl14_resolution);
471 
472 	if(cpu == boot_cpu_id)
473 		enable_pil_irq(14);
474 }
475 
smp4m_blackbox_id(unsigned * addr)476 void __init smp4m_blackbox_id(unsigned *addr)
477 {
478 	int rd = *addr & 0x3e000000;
479 	int rs1 = rd >> 11;
480 
481 	addr[0] = 0x81580000 | rd;		/* rd %tbr, reg */
482 	addr[1] = 0x8130200c | rd | rs1;    	/* srl reg, 0xc, reg */
483 	addr[2] = 0x80082003 | rd | rs1;	/* and reg, 3, reg */
484 }
485 
smp4m_blackbox_current(unsigned * addr)486 void __init smp4m_blackbox_current(unsigned *addr)
487 {
488 	int rd = *addr & 0x3e000000;
489 	int rs1 = rd >> 11;
490 
491 	addr[0] = 0x81580000 | rd;		/* rd %tbr, reg */
492 	addr[2] = 0x8130200a | rd | rs1;    	/* srl reg, 0xa, reg */
493 	addr[4] = 0x8008200c | rd | rs1;	/* and reg, 3, reg */
494 }
495 
sun4m_init_smp(void)496 void __init sun4m_init_smp(void)
497 {
498 	BTFIXUPSET_BLACKBOX(smp_processor_id, smp4m_blackbox_id);
499 	BTFIXUPSET_BLACKBOX(load_current, smp4m_blackbox_current);
500 	BTFIXUPSET_CALL(smp_cross_call, smp4m_cross_call, BTFIXUPCALL_NORM);
501 	BTFIXUPSET_CALL(smp_message_pass, smp4m_message_pass, BTFIXUPCALL_NORM);
502 	BTFIXUPSET_CALL(__smp_processor_id, __smp4m_processor_id, BTFIXUPCALL_NORM);
503 }
504