1 /*
2 * Copyright (C) 1999,2001-2003 Silicon Graphics, Inc. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public
20 * License along with this program; if not, write the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/NoticeExplan
31 */
32
33 #include <linux/config.h>
34 #include <linux/init.h>
35 #include <linux/delay.h>
36 #include <linux/kernel.h>
37 #include <linux/kdev_t.h>
38 #include <linux/string.h>
39 #include <linux/tty.h>
40 #include <linux/console.h>
41 #include <linux/timex.h>
42 #include <linux/sched.h>
43 #include <linux/ioport.h>
44 #include <linux/mm.h>
45 #include <linux/serial.h>
46 #include <linux/irq.h>
47 #include <linux/bootmem.h>
48 #include <linux/mmzone.h>
49 #include <linux/interrupt.h>
50 #include <linux/acpi.h>
51 #include <linux/compiler.h>
52 #include <linux/sched.h>
53
54 #include <asm/io.h>
55 #include <asm/sal.h>
56 #include <asm/machvec.h>
57 #include <asm/system.h>
58 #include <asm/processor.h>
59 #include <asm/pgalloc.h>
60 #include <asm/sn/sgi.h>
61 #include <asm/sn/io.h>
62 #include <asm/sn/pci/pciio.h>
63 #include <asm/sn/arch.h>
64 #include <asm/sn/addrs.h>
65 #include <asm/sn/pda.h>
66 #include <asm/sn/nodepda.h>
67 #include <asm/sn/sn_cpuid.h>
68 #include <asm/sn/sn_private.h>
69 #include <asm/sn/simulator.h>
70 #include <asm/sn/leds.h>
71 #include <asm/sn/bte.h>
72 #include <asm/sn/clksupport.h>
73 #include <asm/sn/sn_sal.h>
74 #include <asm/sn/sn2/shub.h>
75
76 #define pxm_to_nasid(pxm) (((pxm)<<1) | (get_nasid() & ~0x1ff))
77
78 #define MAX_PHYS_MEMORY (1UL << 49) /* 1 TB */
79
80 extern void bte_init_node (nodepda_t *, cnodeid_t);
81 extern void bte_init_cpu (void);
82 extern void sn_timer_init(void);
83 extern unsigned long last_time_offset;
84 extern void init_platform_hubinfo(nodepda_t **nodepdaindr);
85 extern void (*ia64_mark_idle)(int);
86 extern void (*ia64_platform_timer_extras)(void);
87 extern void sn_timer_interrupt_extras(void);
88 extern void snidle(int);
89 extern unsigned char acpi_kbd_controller_present;
90
91 unsigned long sn_rtc_cycles_per_second;
92
93 partid_t sn_partid = -1;
94 char sn_system_serial_number_string[128];
95 u64 sn_partition_serial_number;
96
97 short physical_node_map[MAX_PHYSNODE_ID];
98
99 int numionodes;
100 /*
101 * This is the address of the RRegs in the HSpace of the global
102 * master. It is used by a hack in serial.c (serial_[in|out],
103 * printk.c (early_printk), and kdb_io.c to put console output on that
104 * node's Bedrock UART. It is initialized here to 0, so that
105 * early_printk won't try to access the UART before
106 * master_node_bedrock_address is properly calculated.
107 */
108 u64 master_node_bedrock_address;
109
110 static void sn_init_pdas(char **);
111
112
113 static nodepda_t *nodepdaindr[MAX_COMPACT_NODES];
114
115 irqpda_t *irqpdaindr;
116
117
118 /*
119 * The format of "screen_info" is strange, and due to early i386-setup
120 * code. This is just enough to make the console code think we're on a
121 * VGA color display.
122 */
123 struct screen_info sn_screen_info = {
124 orig_x: 0,
125 orig_y: 0,
126 orig_video_mode: 3,
127 orig_video_cols: 80,
128 orig_video_ega_bx: 3,
129 orig_video_lines: 25,
130 orig_video_isVGA: 1,
131 orig_video_points: 16
132 };
133
134 /*
135 * This is here so we can use the CMOS detection in ide-probe.c to
136 * determine what drives are present. In theory, we don't need this
137 * as the auto-detection could be done via ide-probe.c:do_probe() but
138 * in practice that would be much slower, which is painful when
139 * running in the simulator. Note that passing zeroes in DRIVE_INFO
140 * is sufficient (the IDE driver will autodetect the drive geometry).
141 */
142 #ifdef CONFIG_IA64_GENERIC
143 extern char drive_info[4*16];
144 #else
145 char drive_info[4*16];
146 #endif
147
148 /**
149 * early_sn_setup - early setup routine for SN platforms
150 *
151 * Sets up an initial console to aid debugging. Intended primarily
152 * for bringup. See start_kernel() in init/main.c.
153 */
154 #if defined(CONFIG_IA64_EARLY_PRINTK_SGI_SN) || defined(CONFIG_IA64_SGI_SN_SIM)
155
156 void __init
early_sn_setup(void)157 early_sn_setup(void)
158 {
159 void ia64_sal_handler_init (void *entry_point, void *gpval);
160 efi_system_table_t *efi_systab;
161 efi_config_table_t *config_tables;
162 struct ia64_sal_systab *sal_systab;
163 struct ia64_sal_desc_entry_point *ep;
164 char *p;
165 int i;
166
167 /*
168 * Parse enough of the SAL tables to locate the SAL entry point. Since, console
169 * IO on SN2 is done via SAL calls, early_printk won't work without this.
170 *
171 * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c.
172 * Any changes to those file may have to be made hereas well.
173 */
174 efi_systab = (efi_system_table_t*)__va(ia64_boot_param->efi_systab);
175 config_tables = __va(efi_systab->tables);
176 for (i = 0; i < efi_systab->nr_tables; i++) {
177 if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
178 sal_systab = __va(config_tables[i].table);
179 p = (char*)(sal_systab+1);
180 for (i = 0; i < sal_systab->entry_count; i++) {
181 if (*p == SAL_DESC_ENTRY_POINT) {
182 ep = (struct ia64_sal_desc_entry_point *) p;
183 ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp));
184 break;
185 }
186 p += SAL_DESC_SIZE(*p);
187 }
188 }
189 }
190
191 if ( IS_RUNNING_ON_SIMULATOR() ) {
192 master_node_bedrock_address = (u64)REMOTE_HUB(get_nasid(), SH_JUNK_BUS_UART0);
193 printk(KERN_DEBUG "early_sn_setup: setting master_node_bedrock_address to 0x%lx\n", master_node_bedrock_address);
194 }
195 }
196 #endif /* CONFIG_IA64_EARLY_PRINTK_SGI_SN */
197
198 #ifdef CONFIG_IA64_MCA
199 extern int platform_intr_list[];
200 #endif
201
202 extern nasid_t master_nasid;
203 static int shub_1_1_found __initdata;
204
205
206 /*
207 * sn_check_for_wars
208 *
209 * Set flag for enabling shub specific wars
210 */
211
212 static inline int __init
is_shub_1_1(int nasid)213 is_shub_1_1(int nasid)
214 {
215 unsigned long id;
216 int rev;
217
218 id = REMOTE_HUB_L(nasid, SH_SHUB_ID);
219 rev = (id & SH_SHUB_ID_REVISION_MASK) >> SH_SHUB_ID_REVISION_SHFT;
220 return rev <= 2;
221 }
222
223 static void __init
sn_check_for_wars(void)224 sn_check_for_wars(void)
225 {
226 int cnode;
227
228 for (cnode=0; cnode< numnodes; cnode++)
229 if (is_shub_1_1(cnodeid_to_nasid(cnode)))
230 shub_1_1_found = 1;
231 }
232
233 /*
234 * SN2 requires very slightly different alternate data-TLB miss handle than what
235 * the mainline linux kernel provides. At some point this approach could be used
236 * to allow the use of the low-memory thrown away on other platforms when VGA is
237 * present.
238 *
239 * On SN2 we want to load small TCs for granule-0 (and aliases of) faulting
240 * addresses. The details of this are more sublte than at which they first
241 * appear.
242 */
243 static void __init
sn2_replace_ivt(void)244 sn2_replace_ivt(void)
245 {
246 extern unsigned char alt_dtlb_miss[], ia64_ivt_page_fault[];
247 extern unsigned char sn2_alt_dtlb_miss[], sn2_alt_dtlb_miss_end[];
248 extern unsigned char sn2_alt_dtlb_miss_patch1[];
249
250 unsigned char *s, *d;
251 u64 *p;
252 u64 len = (u64)sn2_alt_dtlb_miss_end - (u64)sn2_alt_dtlb_miss;
253 u64 broffs = (ia64_ivt_page_fault - alt_dtlb_miss) - (sn2_alt_dtlb_miss_patch1 - sn2_alt_dtlb_miss);
254 u64 psr;
255 int i;
256
257 /* printk(KERN_DEBUG "Replacing alternate data-TLB miss handler.\n"); */
258
259 /* Check the code isn't too large */
260 if (len > 1024) {
261 printk(KERN_ERR "SGI: Specific alt_dtlb_misse too large! Not replacing\n");
262 return;
263 }
264
265 /* check the offset is sane (should always be) */
266 if ((broffs>>4) + (1<<20) >= (1<<21)) {
267 printk(KERN_ERR "SGI: IVT patch ivt offset %ld invalid! Not replacing!\n", broffs);
268 return;
269 }
270
271 /* 2nd half of bundle to patch (has slot 2) */
272 p = (u64*)sn2_alt_dtlb_miss_patch1 + 1;
273 /* patch the offset into slot 2 (imm20b + s) */
274 *p = (*p & ~(0x8fffff000000000)) | ((broffs & 0x1000000) << 35) | ((broffs & 0x0fffff0) << 32);
275
276 /* don't want any interrupts when doing this */
277 psr = ia64_clear_ic();
278
279 /* copy over the existing code, flush i-cache as required */
280 d = alt_dtlb_miss;
281 s = sn2_alt_dtlb_miss;
282 for (i=0; i<len; ++i, ++s) {
283 *d++ = *s;
284 if ((((u64)s) & 63) == 63) {
285 ia64_insn_group_barrier();
286 ia64_fc((void*)s);
287 }
288 }
289 ia64_insn_group_barrier();
290 ia64_fc((void*)s);
291
292 /* sync & serialize instruction stream */
293 ia64_sync_i();
294 ia64_srlz_i();
295
296 /* restore interrupt status */
297 ia64_set_psr(psr);
298
299 /* flush any TC's we have had previously loaded that could cause problems here */
300 local_flush_tlb_all();
301
302 printk(KERN_DEBUG "SGI: Replaced alt_dtlb_miss handler.\n");
303 }
304
305 /**
306 * sn_setup - SN platform setup routine
307 * @cmdline_p: kernel command line
308 *
309 * Handles platform setup for SN machines. This includes determining
310 * the RTC frequency (via a SAL call), initializing secondary CPUs, and
311 * setting up per-node data areas. The console is also initialized here.
312 */
313 void __init
sn_setup(char ** cmdline_p)314 sn_setup(char **cmdline_p)
315 {
316 long status, ticks_per_sec, drift;
317 int pxm;
318 int major = sn_sal_rev_major(), minor = sn_sal_rev_minor();
319 extern void sn_cpu_init(void);
320
321 /*
322 * If the generic code has enabled vga console support - lets
323 * get rid of it again. This is a kludge for the fact that ACPI
324 * currtently has no way of informing us if legacy VGA is available
325 * or not.
326 */
327 #if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
328 if (conswitchp == &vga_con) {
329 printk(KERN_DEBUG "SGI: Disabling VGA console\n");
330 #ifdef CONFIG_DUMMY_CONSOLE
331 conswitchp = &dummy_con;
332 #else
333 conswitchp = NULL;
334 #endif /* CONFIG_DUMMY_CONSOLE */
335 }
336 #endif /* def(CONFIG_VT) && def(CONFIG_VGA_CONSOLE) */
337
338 MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY;
339
340 memset(physical_node_map, -1, sizeof(physical_node_map));
341 for (pxm=0; pxm<MAX_PXM_DOMAINS; pxm++)
342 if (pxm_to_nid_map[pxm] != -1)
343 physical_node_map[pxm_to_nasid(pxm)] = pxm_to_nid_map[pxm];
344
345
346 /*
347 * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard
348 * support here so we don't have to listen to failed keyboard probe
349 * messages.
350 */
351 if ((major < 2 || (major == 2 && minor <= 9)) &&
352 acpi_kbd_controller_present) {
353 printk(KERN_INFO "Disabling legacy keyboard support as prom "
354 "is too old and doesn't provide FADT\n");
355 acpi_kbd_controller_present = 0;
356 }
357
358 printk("SGI SAL version %x.%02x\n", major, minor);
359
360 /*
361 * Confirm the SAL we're running on is recent enough...
362 */
363 if ((major < SN_SAL_MIN_MAJOR) || (major == SN_SAL_MIN_MAJOR &&
364 minor < SN_SAL_MIN_MINOR)) {
365 printk(KERN_ERR "This kernel needs SGI SAL version >= "
366 "%x.%02x\n", SN_SAL_MIN_MAJOR, SN_SAL_MIN_MINOR);
367 panic("PROM version too old\n");
368 }
369
370 /* Patch the ivt */
371 sn2_replace_ivt();
372
373 master_nasid = get_nasid();
374 (void)snia_get_console_nasid();
375 (void)snia_get_master_baseio_nasid();
376
377 status = ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, &drift);
378 if (status != 0 || ticks_per_sec < 100000) {
379 printk(KERN_WARNING "unable to determine platform RTC clock frequency, guessing.\n");
380 /* PROM gives wrong value for clock freq. so guess */
381 sn_rtc_cycles_per_second = 1000000000000UL/30000UL;
382 }
383 else
384 sn_rtc_cycles_per_second = ticks_per_sec;
385
386 platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR;
387
388
389 if ( IS_RUNNING_ON_SIMULATOR() )
390 {
391 master_node_bedrock_address = (u64)REMOTE_HUB(get_nasid(), SH_JUNK_BUS_UART0);
392 printk(KERN_DEBUG "sn_setup: setting master_node_bedrock_address to 0x%lx\n",
393 master_node_bedrock_address);
394 }
395
396 /*
397 * we set the default root device to /dev/hda
398 * to make simulation easy
399 */
400 ROOT_DEV = to_kdev_t(0x0301);
401
402 /*
403 * Create the PDAs and NODEPDAs for all the cpus.
404 */
405 sn_init_pdas(cmdline_p);
406
407 ia64_mark_idle = &snidle;
408
409 /*
410 * For the bootcpu, we do this here. All other cpus will make the
411 * call as part of cpu_init in slave cpu initialization.
412 */
413 sn_cpu_init();
414
415 /*
416 * Setup hubinfo stuff. Has to happen AFTER sn_cpu_init(),
417 * because it uses the cnode to nasid tables.
418 */
419 init_platform_hubinfo(nodepdaindr);
420 #ifdef CONFIG_SMP
421 init_smp_config();
422 #endif
423 screen_info = sn_screen_info;
424
425 ia64_platform_timer_extras = &sn_timer_interrupt_extras;
426
427 sn_timer_init();
428 }
429
430 /**
431 * sn_init_pdas - setup node data areas
432 *
433 * One time setup for Node Data Area. Called by sn_setup().
434 */
435 void
sn_init_pdas(char ** cmdline_p)436 sn_init_pdas(char **cmdline_p)
437 {
438 cnodeid_t cnode;
439 void scan_for_ionodes(void);
440
441 /*
442 * Make sure that the PDA fits entirely in the same page as the
443 * cpu_data area.
444 */
445 if ((PDAADDR&~PAGE_MASK)+sizeof(pda_t) > PAGE_SIZE)
446 panic("overflow of cpu_data page");
447
448 memset(pda.cnodeid_to_nasid_table, -1, sizeof(pda.cnodeid_to_nasid_table));
449 for (cnode=0; cnode<numnodes; cnode++)
450 pda.cnodeid_to_nasid_table[cnode] = pxm_to_nasid(nid_to_pxm_map[cnode]);
451
452 numionodes = numnodes;
453 scan_for_ionodes();
454
455 /*
456 * Allocate & initalize the nodepda for each node.
457 */
458 for (cnode=0; cnode < numnodes; cnode++) {
459 nodepdaindr[cnode] = alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t));
460 memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
461 }
462
463 /*
464 * Allocate & initialize nodepda for TIOs. For now, put them on node 0.
465 */
466 for (cnode = numnodes; cnode < numionodes; cnode ++) {
467 nodepdaindr[cnode] = alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t));
468 memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
469 }
470
471 /*
472 * Now copy the array of nodepda pointers to each nodepda.
473 */
474 for (cnode=0; cnode < numionodes; cnode++)
475 memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr, sizeof(nodepdaindr));
476
477
478 /*
479 * Set up IO related platform-dependent nodepda fields.
480 * The following routine actually sets up the hubinfo struct
481 * in nodepda.
482 */
483 for (cnode = 0; cnode < numnodes; cnode++) {
484 init_platform_nodepda(nodepdaindr[cnode], cnode);
485 spin_lock_init(&nodepdaindr[cnode]->bist_lock);
486 bte_init_node (nodepdaindr[cnode], cnode);
487 }
488
489 /*
490 * Handle TIO differently .. we do not do BTE init ..
491 */
492 for (cnode = numnodes; cnode < numionodes; cnode++) {
493 init_platform_nodepda(nodepdaindr[cnode], cnode);
494 }
495 }
496
497 /**
498 * sn_cpu_init - initialize per-cpu data areas
499 * @cpuid: cpuid of the caller
500 *
501 * Called during cpu initialization on each cpu as it starts.
502 * Currently, initializes the per-cpu data area for SNIA.
503 * Also sets up a few fields in the nodepda. Also known as
504 * platform_cpu_init() by the ia64 machvec code.
505 */
506 void __init
sn_cpu_init(void)507 sn_cpu_init(void)
508 {
509 int cpuid;
510 int cpuphyid;
511 int nasid;
512 int slice;
513 int cnode, i;
514 static int wars_have_been_checked = 0;
515
516 /*
517 * The boot cpu makes this call again after platform initialization is
518 * complete.
519 */
520 if (nodepdaindr[0] == NULL)
521 return;
522
523 cpuid = smp_processor_id();
524 cpuphyid = ((ia64_get_lid() >> 16) & 0xffff);
525 nasid = cpu_physical_id_to_nasid(cpuphyid);
526 cnode = nasid_to_cnodeid(nasid);
527 slice = cpu_physical_id_to_slice(cpuphyid);
528
529 printk("CPU %d: nasid %d, slice %d, cnode %d\n",
530 smp_processor_id(), nasid, slice, cnode);
531
532 memset(&pda, 0, sizeof(pda));
533 pda.p_nodepda = nodepdaindr[cnode];
534 pda.led_address = (typeof(pda.led_address)) (LED0 + (slice<<LED_CPU_SHIFT));
535 pda.led_state = LED_ALWAYS_SET;
536 pda.hb_count = HZ/2;
537 pda.hb_state = 0;
538 pda.idle_flag = 0;
539
540 memset(pda.cnodeid_to_nasid_table, -1, sizeof(pda.cnodeid_to_nasid_table));
541 for (i=0; i<numnodes; i++)
542 pda.cnodeid_to_nasid_table[i] = pxm_to_nasid(nid_to_pxm_map[i]);
543 /*
544 * Check for WARs.
545 * Only needs to be done once, on BSP.
546 * Has to be done after loop above, because it uses pda.cnodeid_to_nasid_table[i].
547 * Has to be done before assignment below.
548 */
549 if (!wars_have_been_checked) {
550 sn_check_for_wars();
551 wars_have_been_checked = 1;
552 }
553
554 pda.shub_1_1_found = shub_1_1_found;
555
556 if (local_node_data->active_cpu_count == 1)
557 nodepda->node_first_cpu = cpuid;
558
559
560
561 /*
562 * We must use different memory allocators for first cpu (bootmem
563 * allocator) than for the other cpus (regular allocator).
564 */
565 if (cpuid == 0)
566 irqpdaindr = alloc_bootmem_node(NODE_DATA(cpuid_to_cnodeid(cpuid)),sizeof(irqpda_t));
567
568 memset(irqpdaindr, 0, sizeof(irqpda_t));
569 irqpdaindr->irq_flags[SGI_PCIBR_ERROR] = SN2_IRQ_SHARED;
570 irqpdaindr->irq_flags[SGI_PCIBR_ERROR] |= SN2_IRQ_RESERVED;
571 irqpdaindr->irq_flags[SGI_II_ERROR] = SN2_IRQ_SHARED;
572 irqpdaindr->irq_flags[SGI_II_ERROR] |= SN2_IRQ_RESERVED;
573
574 pda.pio_write_status_addr = (volatile unsigned long *)
575 LOCAL_MMR_ADDR((slice < 2 ? SH_PIO_WRITE_STATUS_0 : SH_PIO_WRITE_STATUS_1 ) );
576 pda.mem_write_status_addr = (volatile u64 *)
577 LOCAL_MMR_ADDR((slice < 2 ? SH_MEMORY_WRITE_STATUS_0 : SH_MEMORY_WRITE_STATUS_1 ) );
578
579 if (nodepda->node_first_cpu == cpuid) {
580 int buddy_nasid;
581 buddy_nasid = cnodeid_to_nasid(numa_node_id() == numnodes-1 ? 0 : numa_node_id()+ 1);
582 pda.pio_shub_war_cam_addr = (volatile unsigned long*)GLOBAL_MMR_ADDR(nasid, SH_PI_CAM_CONTROL);
583 }
584
585 bte_init_cpu();
586 }
587
588 /*
589 * Scan klconfig for TIO's. Add the TIO nasids to the
590 * physical_node_map and the pda and increment numionodes.
591 */
592
593 void
scan_for_ionodes()594 scan_for_ionodes() {
595 int nasid = 0;
596 lboard_t *brd;
597
598 /* Scan all compute nodes. */
599 for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid +=2) {
600 /* if there's no nasid, don't try to read the klconfig on the node */
601 if (physical_node_map[nasid] == -1) continue;
602 brd = find_lboard((lboard_t *)KL_CONFIG_INFO(nasid), KLTYPE_TIO);
603 while (brd) {
604 pda.cnodeid_to_nasid_table[numionodes] = brd->brd_nasid;
605 physical_node_map[brd->brd_nasid] = numionodes++;
606 brd = KLCF_NEXT(brd);
607 brd = find_lboard(brd, KLTYPE_TIO);
608 }
609 }
610 }
611