1/* 2 * Here is where the ball gets rolling as far as the kernel is concerned. 3 * When control is transferred to _start, the bootload has already 4 * loaded us to the correct address. All that's left to do here is 5 * to set up the kernel's global pointer and jump to the kernel 6 * entry point. 7 * 8 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co 9 * David Mosberger-Tang <davidm@hpl.hp.com> 10 * Stephane Eranian <eranian@hpl.hp.com> 11 * Copyright (C) 1999 VA Linux Systems 12 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> 13 * Copyright (C) 1999 Intel Corp. 14 * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com> 15 * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com> 16 * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com> 17 * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2. 18 * Copyright (C) 2003 Silicon Graphics, Inc. 19 */ 20 21#include <linux/config.h> 22 23#include <asm/asmmacro.h> 24#include <asm/fpu.h> 25#include <asm/kregs.h> 26#include <asm/mmu_context.h> 27#include <asm/offsets.h> 28#include <asm/pal.h> 29#include <asm/pgtable.h> 30#include <asm/processor.h> 31#include <asm/ptrace.h> 32#include <asm/system.h> 33 34 .section __special_page_section,"ax" 35 36 .global empty_zero_page 37empty_zero_page: 38 .skip PAGE_SIZE 39 40 .global swapper_pg_dir 41swapper_pg_dir: 42 .skip PAGE_SIZE 43 44 .rodata 45halt_msg: 46 stringz "Halting kernel\n" 47 48 .text 49 50 .global start_ap 51 52 /* 53 * Start the kernel. When the bootloader passes control to _start(), r28 54 * points to the address of the boot parameter area. Execution reaches 55 * here in physical mode. 56 */ 57GLOBAL_ENTRY(_start) 58start_ap: 59 .prologue 60 .save rp, r4 // terminate unwind chain with a NULL rp 61 mov r4=r0 62 .body 63 64 /* 65 * Initialize the region register for region 7 and install a translation register 66 * that maps the kernel's text and data: 67 */ 68 rsm psr.i | psr.ic 69 ;; 70 srlz.i 71 ;; 72 mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)) 73 movl r21=(7<<61) 74 ;; 75 mov rr[r21]=r20 76 ;; 77 /* 78 * Now pin mappings into the TLB for kernel text and data 79 */ 80 mov r18=KERNEL_TR_PAGE_SHIFT<<2 81 movl r17=KERNEL_START 82 ;; 83 mov cr.itir=r18 84 mov cr.ifa=r17 85 mov r16=IA64_TR_KERNEL 86 mov r3=ip 87 movl r18=PAGE_KERNEL 88 ;; 89 dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT 90 ;; 91 or r18=r2,r18 92 ;; 93 srlz.i 94 ;; 95 itr.i itr[r16]=r18 96 ;; 97 itr.d dtr[r16]=r18 98 ;; 99 srlz.i 100 101 /* 102 * Switch into virtual mode: 103 */ 104 movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ 105 |IA64_PSR_DI) 106 ;; 107 mov cr.ipsr=r16 108 movl r17=1f 109 ;; 110 mov cr.iip=r17 111 mov cr.ifs=r0 112 ;; 113 rfi 114 ;; 1151: // now we are in virtual mode 116 117 // set IVT entry point---can't access I/O ports without it 118 movl r3=ia64_ivt 119 ;; 120 mov cr.iva=r3 121 movl r2=FPSR_DEFAULT 122 ;; 123 srlz.i 124 movl gp=__gp 125 126 mov ar.fpsr=r2 127 ;; 128 129#ifdef CONFIG_IA64_EARLY_PRINTK 130 mov r3=(6<<8) | (IA64_GRANULE_SHIFT<<2) 131 movl r2=6<<61 132 ;; 133 mov rr[r2]=r3 134 ;; 135 srlz.i 136 ;; 137#endif 138 139#define isAP p2 // are we an Application Processor? 140#define isBP p3 // are we the Bootstrap Processor? 141 142#ifdef CONFIG_SMP 143 /* 144 * Find the init_task for the currently booting CPU. At poweron, and in 145 * UP mode, cpucount is 0. 146 */ 147 movl r3=cpucount 148 ;; 149 ld4 r3=[r3] // r3 <- smp_processor_id() 150#else 151 mov r3=0 152#endif 153 ;; 154 cmp4.ne isAP,isBP=r3,r0 155 156 /* 157 * Make task struct pointer in init_tasks an identity mapped pointer. 158 * The value that is compiled into the array may not be identity mapped. 159 */ 160 movl r18=init_tasks 161 ;; 162 shladd r18=r3,3,r18 163 ;; 164 ld8 r8=[r18] 165 ;; 166 tpa r3=r8 // r3 == phys addr of task struct 167 ;; 168 dep r2=-1,r3,61,3 // IMVA of task 169 ;; 170 st8 [r18]=r2 // and save it back in init_tasks[thiscpu] 171 172 // load mapping for stack (virtaddr in r2, physaddr in r3) 173 // load dtr[2] only if the va for current (r2) isn't covered by the dtr[0] 174 shr.u r18=r2,KERNEL_TR_PAGE_SHIFT /* va of current in units of kernel-pages */ 175 movl r17=KERNEL_START>>KERNEL_TR_PAGE_SHIFT /* va of kernel-start in units of kernel-pages */ 176 ;; 177 cmp.eq p0,p6=r17,r18 178 rsm psr.ic 179 movl r17=PAGE_KERNEL 180 ;; 181 srlz.d 182 dep r18=0,r3,0,12 183 ;; 184 or r18=r17,r18 185 ;; 186 mov r17=rr[r2] 187 shr.u r16=r3,IA64_GRANULE_SHIFT 188 ;; 189 dep r17=0,r17,8,24 190 ;; 191 mov cr.itir=r17 192 mov cr.ifa=r2 193 194 mov r19=IA64_TR_CURRENT_STACK 195 ;; 196(p6) itr.d dtr[r19]=r18 197 ;; 198 ssm psr.ic 199 srlz.d 200 ;; 201 202 // load the "current" pointer (r13) and ar.k6 with the current task 203 mov IA64_KR(CURRENT)=r3 // Physical address 204 mov IA64_KR(CURRENT_STACK)=r16 205 mov r13=r2 206 207 /* 208 * Reserve space at the top of the stack for "struct pt_regs". Kernel threads 209 * don't store interesting values in that structure, but the space still needs 210 * to be there because time-critical stuff such as the context switching can 211 * be implemented more efficiently (for example, __switch_to() 212 * always sets the psr.dfh bit of the task it is switching to). 213 */ 214 addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2 215 addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE 216 mov ar.rsc=0 // place RSE in enforced lazy mode 217 ;; 218 loadrs // clear the dirty partition 219 ;; 220 mov ar.bspstore=r2 // establish the new RSE stack 221 ;; 222 mov ar.rsc=0x3 // place RSE in eager mode 223 224(isBP) dep r28=-1,r28,61,3 // make address virtual 225(isBP) movl r2=ia64_boot_param 226 ;; 227(isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader 228 229#ifdef CONFIG_IA64_EARLY_PRINTK 230 .rodata 231alive_msg: 232 stringz "I'm alive and well\n" 233alive_msg_end: 234 .previous 235 236 alloc r2=ar.pfs,0,0,2,0 237 movl out0=alive_msg 238 movl out1=alive_msg_end-alive_msg-1 239 ;; 240 br.call.sptk.many rp=early_printk 2411: // force new bundle 242#endif /* CONFIG_IA64_EARLY_PRINTK */ 243 244#ifdef CONFIG_SMP 245(isAP) br.call.sptk.many rp=start_secondary 246.ret0: 247(isAP) br.cond.sptk self 248#endif 249 250 // This is executed by the bootstrap processor (bsp) only: 251 252#ifdef CONFIG_IA64_FW_EMU 253 // initialize PAL & SAL emulator: 254 br.call.sptk.many rp=sys_fw_init 255.ret1: 256#endif 257 br.call.sptk.many rp=start_kernel 258.ret2: addl r3=@ltoff(halt_msg),gp 259 ;; 260 alloc r2=ar.pfs,8,0,2,0 261 ;; 262 ld8 out0=[r3] 263 br.call.sptk.many b0=console_print 264self: br.sptk.many self // endless loop 265END(_start) 266 267GLOBAL_ENTRY(ia64_save_debug_regs) 268 alloc r16=ar.pfs,1,0,0,0 269 mov r20=ar.lc // preserve ar.lc 270 mov ar.lc=IA64_NUM_DBG_REGS-1 271 mov r18=0 272 add r19=IA64_NUM_DBG_REGS*8,in0 273 ;; 2741: mov r16=dbr[r18] 275#ifdef CONFIG_ITANIUM 276 ;; 277 srlz.d 278#endif 279 mov r17=ibr[r18] 280 add r18=1,r18 281 ;; 282 st8.nta [in0]=r16,8 283 st8.nta [r19]=r17,8 284 br.cloop.sptk.many 1b 285 ;; 286 mov ar.lc=r20 // restore ar.lc 287 br.ret.sptk.many rp 288END(ia64_save_debug_regs) 289 290GLOBAL_ENTRY(ia64_load_debug_regs) 291 alloc r16=ar.pfs,1,0,0,0 292 lfetch.nta [in0] 293 mov r20=ar.lc // preserve ar.lc 294 add r19=IA64_NUM_DBG_REGS*8,in0 295 mov ar.lc=IA64_NUM_DBG_REGS-1 296 mov r18=-1 297 ;; 2981: ld8.nta r16=[in0],8 299 ld8.nta r17=[r19],8 300 add r18=1,r18 301 ;; 302 mov dbr[r18]=r16 303#ifdef CONFIG_ITANIUM 304 ;; 305 srlz.d // Errata 132 (NoFix status) 306#endif 307 mov ibr[r18]=r17 308 br.cloop.sptk.many 1b 309 ;; 310 mov ar.lc=r20 // restore ar.lc 311 br.ret.sptk.many rp 312END(ia64_load_debug_regs) 313 314GLOBAL_ENTRY(__ia64_save_fpu) 315 alloc r2=ar.pfs,1,4,0,0 316 adds loc0=96*16-16,in0 317 adds loc1=96*16-16-128,in0 318 ;; 319 stf.spill.nta [loc0]=f127,-256 320 stf.spill.nta [loc1]=f119,-256 321 ;; 322 stf.spill.nta [loc0]=f111,-256 323 stf.spill.nta [loc1]=f103,-256 324 ;; 325 stf.spill.nta [loc0]=f95,-256 326 stf.spill.nta [loc1]=f87,-256 327 ;; 328 stf.spill.nta [loc0]=f79,-256 329 stf.spill.nta [loc1]=f71,-256 330 ;; 331 stf.spill.nta [loc0]=f63,-256 332 stf.spill.nta [loc1]=f55,-256 333 adds loc2=96*16-32,in0 334 ;; 335 stf.spill.nta [loc0]=f47,-256 336 stf.spill.nta [loc1]=f39,-256 337 adds loc3=96*16-32-128,in0 338 ;; 339 stf.spill.nta [loc2]=f126,-256 340 stf.spill.nta [loc3]=f118,-256 341 ;; 342 stf.spill.nta [loc2]=f110,-256 343 stf.spill.nta [loc3]=f102,-256 344 ;; 345 stf.spill.nta [loc2]=f94,-256 346 stf.spill.nta [loc3]=f86,-256 347 ;; 348 stf.spill.nta [loc2]=f78,-256 349 stf.spill.nta [loc3]=f70,-256 350 ;; 351 stf.spill.nta [loc2]=f62,-256 352 stf.spill.nta [loc3]=f54,-256 353 adds loc0=96*16-48,in0 354 ;; 355 stf.spill.nta [loc2]=f46,-256 356 stf.spill.nta [loc3]=f38,-256 357 adds loc1=96*16-48-128,in0 358 ;; 359 stf.spill.nta [loc0]=f125,-256 360 stf.spill.nta [loc1]=f117,-256 361 ;; 362 stf.spill.nta [loc0]=f109,-256 363 stf.spill.nta [loc1]=f101,-256 364 ;; 365 stf.spill.nta [loc0]=f93,-256 366 stf.spill.nta [loc1]=f85,-256 367 ;; 368 stf.spill.nta [loc0]=f77,-256 369 stf.spill.nta [loc1]=f69,-256 370 ;; 371 stf.spill.nta [loc0]=f61,-256 372 stf.spill.nta [loc1]=f53,-256 373 adds loc2=96*16-64,in0 374 ;; 375 stf.spill.nta [loc0]=f45,-256 376 stf.spill.nta [loc1]=f37,-256 377 adds loc3=96*16-64-128,in0 378 ;; 379 stf.spill.nta [loc2]=f124,-256 380 stf.spill.nta [loc3]=f116,-256 381 ;; 382 stf.spill.nta [loc2]=f108,-256 383 stf.spill.nta [loc3]=f100,-256 384 ;; 385 stf.spill.nta [loc2]=f92,-256 386 stf.spill.nta [loc3]=f84,-256 387 ;; 388 stf.spill.nta [loc2]=f76,-256 389 stf.spill.nta [loc3]=f68,-256 390 ;; 391 stf.spill.nta [loc2]=f60,-256 392 stf.spill.nta [loc3]=f52,-256 393 adds loc0=96*16-80,in0 394 ;; 395 stf.spill.nta [loc2]=f44,-256 396 stf.spill.nta [loc3]=f36,-256 397 adds loc1=96*16-80-128,in0 398 ;; 399 stf.spill.nta [loc0]=f123,-256 400 stf.spill.nta [loc1]=f115,-256 401 ;; 402 stf.spill.nta [loc0]=f107,-256 403 stf.spill.nta [loc1]=f99,-256 404 ;; 405 stf.spill.nta [loc0]=f91,-256 406 stf.spill.nta [loc1]=f83,-256 407 ;; 408 stf.spill.nta [loc0]=f75,-256 409 stf.spill.nta [loc1]=f67,-256 410 ;; 411 stf.spill.nta [loc0]=f59,-256 412 stf.spill.nta [loc1]=f51,-256 413 adds loc2=96*16-96,in0 414 ;; 415 stf.spill.nta [loc0]=f43,-256 416 stf.spill.nta [loc1]=f35,-256 417 adds loc3=96*16-96-128,in0 418 ;; 419 stf.spill.nta [loc2]=f122,-256 420 stf.spill.nta [loc3]=f114,-256 421 ;; 422 stf.spill.nta [loc2]=f106,-256 423 stf.spill.nta [loc3]=f98,-256 424 ;; 425 stf.spill.nta [loc2]=f90,-256 426 stf.spill.nta [loc3]=f82,-256 427 ;; 428 stf.spill.nta [loc2]=f74,-256 429 stf.spill.nta [loc3]=f66,-256 430 ;; 431 stf.spill.nta [loc2]=f58,-256 432 stf.spill.nta [loc3]=f50,-256 433 adds loc0=96*16-112,in0 434 ;; 435 stf.spill.nta [loc2]=f42,-256 436 stf.spill.nta [loc3]=f34,-256 437 adds loc1=96*16-112-128,in0 438 ;; 439 stf.spill.nta [loc0]=f121,-256 440 stf.spill.nta [loc1]=f113,-256 441 ;; 442 stf.spill.nta [loc0]=f105,-256 443 stf.spill.nta [loc1]=f97,-256 444 ;; 445 stf.spill.nta [loc0]=f89,-256 446 stf.spill.nta [loc1]=f81,-256 447 ;; 448 stf.spill.nta [loc0]=f73,-256 449 stf.spill.nta [loc1]=f65,-256 450 ;; 451 stf.spill.nta [loc0]=f57,-256 452 stf.spill.nta [loc1]=f49,-256 453 adds loc2=96*16-128,in0 454 ;; 455 stf.spill.nta [loc0]=f41,-256 456 stf.spill.nta [loc1]=f33,-256 457 adds loc3=96*16-128-128,in0 458 ;; 459 stf.spill.nta [loc2]=f120,-256 460 stf.spill.nta [loc3]=f112,-256 461 ;; 462 stf.spill.nta [loc2]=f104,-256 463 stf.spill.nta [loc3]=f96,-256 464 ;; 465 stf.spill.nta [loc2]=f88,-256 466 stf.spill.nta [loc3]=f80,-256 467 ;; 468 stf.spill.nta [loc2]=f72,-256 469 stf.spill.nta [loc3]=f64,-256 470 ;; 471 stf.spill.nta [loc2]=f56,-256 472 stf.spill.nta [loc3]=f48,-256 473 ;; 474 stf.spill.nta [loc2]=f40 475 stf.spill.nta [loc3]=f32 476 br.ret.sptk.many rp 477END(__ia64_save_fpu) 478 479GLOBAL_ENTRY(__ia64_load_fpu) 480 alloc r2=ar.pfs,1,2,0,0 481 adds r3=128,in0 482 adds r14=256,in0 483 adds r15=384,in0 484 mov loc0=512 485 mov loc1=-1024+16 486 ;; 487 ldf.fill.nta f32=[in0],loc0 488 ldf.fill.nta f40=[ r3],loc0 489 ldf.fill.nta f48=[r14],loc0 490 ldf.fill.nta f56=[r15],loc0 491 ;; 492 ldf.fill.nta f64=[in0],loc0 493 ldf.fill.nta f72=[ r3],loc0 494 ldf.fill.nta f80=[r14],loc0 495 ldf.fill.nta f88=[r15],loc0 496 ;; 497 ldf.fill.nta f96=[in0],loc1 498 ldf.fill.nta f104=[ r3],loc1 499 ldf.fill.nta f112=[r14],loc1 500 ldf.fill.nta f120=[r15],loc1 501 ;; 502 ldf.fill.nta f33=[in0],loc0 503 ldf.fill.nta f41=[ r3],loc0 504 ldf.fill.nta f49=[r14],loc0 505 ldf.fill.nta f57=[r15],loc0 506 ;; 507 ldf.fill.nta f65=[in0],loc0 508 ldf.fill.nta f73=[ r3],loc0 509 ldf.fill.nta f81=[r14],loc0 510 ldf.fill.nta f89=[r15],loc0 511 ;; 512 ldf.fill.nta f97=[in0],loc1 513 ldf.fill.nta f105=[ r3],loc1 514 ldf.fill.nta f113=[r14],loc1 515 ldf.fill.nta f121=[r15],loc1 516 ;; 517 ldf.fill.nta f34=[in0],loc0 518 ldf.fill.nta f42=[ r3],loc0 519 ldf.fill.nta f50=[r14],loc0 520 ldf.fill.nta f58=[r15],loc0 521 ;; 522 ldf.fill.nta f66=[in0],loc0 523 ldf.fill.nta f74=[ r3],loc0 524 ldf.fill.nta f82=[r14],loc0 525 ldf.fill.nta f90=[r15],loc0 526 ;; 527 ldf.fill.nta f98=[in0],loc1 528 ldf.fill.nta f106=[ r3],loc1 529 ldf.fill.nta f114=[r14],loc1 530 ldf.fill.nta f122=[r15],loc1 531 ;; 532 ldf.fill.nta f35=[in0],loc0 533 ldf.fill.nta f43=[ r3],loc0 534 ldf.fill.nta f51=[r14],loc0 535 ldf.fill.nta f59=[r15],loc0 536 ;; 537 ldf.fill.nta f67=[in0],loc0 538 ldf.fill.nta f75=[ r3],loc0 539 ldf.fill.nta f83=[r14],loc0 540 ldf.fill.nta f91=[r15],loc0 541 ;; 542 ldf.fill.nta f99=[in0],loc1 543 ldf.fill.nta f107=[ r3],loc1 544 ldf.fill.nta f115=[r14],loc1 545 ldf.fill.nta f123=[r15],loc1 546 ;; 547 ldf.fill.nta f36=[in0],loc0 548 ldf.fill.nta f44=[ r3],loc0 549 ldf.fill.nta f52=[r14],loc0 550 ldf.fill.nta f60=[r15],loc0 551 ;; 552 ldf.fill.nta f68=[in0],loc0 553 ldf.fill.nta f76=[ r3],loc0 554 ldf.fill.nta f84=[r14],loc0 555 ldf.fill.nta f92=[r15],loc0 556 ;; 557 ldf.fill.nta f100=[in0],loc1 558 ldf.fill.nta f108=[ r3],loc1 559 ldf.fill.nta f116=[r14],loc1 560 ldf.fill.nta f124=[r15],loc1 561 ;; 562 ldf.fill.nta f37=[in0],loc0 563 ldf.fill.nta f45=[ r3],loc0 564 ldf.fill.nta f53=[r14],loc0 565 ldf.fill.nta f61=[r15],loc0 566 ;; 567 ldf.fill.nta f69=[in0],loc0 568 ldf.fill.nta f77=[ r3],loc0 569 ldf.fill.nta f85=[r14],loc0 570 ldf.fill.nta f93=[r15],loc0 571 ;; 572 ldf.fill.nta f101=[in0],loc1 573 ldf.fill.nta f109=[ r3],loc1 574 ldf.fill.nta f117=[r14],loc1 575 ldf.fill.nta f125=[r15],loc1 576 ;; 577 ldf.fill.nta f38 =[in0],loc0 578 ldf.fill.nta f46 =[ r3],loc0 579 ldf.fill.nta f54 =[r14],loc0 580 ldf.fill.nta f62 =[r15],loc0 581 ;; 582 ldf.fill.nta f70 =[in0],loc0 583 ldf.fill.nta f78 =[ r3],loc0 584 ldf.fill.nta f86 =[r14],loc0 585 ldf.fill.nta f94 =[r15],loc0 586 ;; 587 ldf.fill.nta f102=[in0],loc1 588 ldf.fill.nta f110=[ r3],loc1 589 ldf.fill.nta f118=[r14],loc1 590 ldf.fill.nta f126=[r15],loc1 591 ;; 592 ldf.fill.nta f39 =[in0],loc0 593 ldf.fill.nta f47 =[ r3],loc0 594 ldf.fill.nta f55 =[r14],loc0 595 ldf.fill.nta f63 =[r15],loc0 596 ;; 597 ldf.fill.nta f71 =[in0],loc0 598 ldf.fill.nta f79 =[ r3],loc0 599 ldf.fill.nta f87 =[r14],loc0 600 ldf.fill.nta f95 =[r15],loc0 601 ;; 602 ldf.fill.nta f103=[in0] 603 ldf.fill.nta f111=[ r3] 604 ldf.fill.nta f119=[r14] 605 ldf.fill.nta f127=[r15] 606 br.ret.sptk.many rp 607END(__ia64_load_fpu) 608 609GLOBAL_ENTRY(__ia64_init_fpu) 610 stf.spill [sp]=f0 // M3 611 mov f32=f0 // F 612 nop.b 0 613 614 ldfps f33,f34=[sp] // M0 615 ldfps f35,f36=[sp] // M1 616 mov f37=f0 // F 617 ;; 618 619 setf.s f38=r0 // M2 620 setf.s f39=r0 // M3 621 mov f40=f0 // F 622 623 ldfps f41,f42=[sp] // M0 624 ldfps f43,f44=[sp] // M1 625 mov f45=f0 // F 626 627 setf.s f46=r0 // M2 628 setf.s f47=r0 // M3 629 mov f48=f0 // F 630 631 ldfps f49,f50=[sp] // M0 632 ldfps f51,f52=[sp] // M1 633 mov f53=f0 // F 634 635 setf.s f54=r0 // M2 636 setf.s f55=r0 // M3 637 mov f56=f0 // F 638 639 ldfps f57,f58=[sp] // M0 640 ldfps f59,f60=[sp] // M1 641 mov f61=f0 // F 642 643 setf.s f62=r0 // M2 644 setf.s f63=r0 // M3 645 mov f64=f0 // F 646 647 ldfps f65,f66=[sp] // M0 648 ldfps f67,f68=[sp] // M1 649 mov f69=f0 // F 650 651 setf.s f70=r0 // M2 652 setf.s f71=r0 // M3 653 mov f72=f0 // F 654 655 ldfps f73,f74=[sp] // M0 656 ldfps f75,f76=[sp] // M1 657 mov f77=f0 // F 658 659 setf.s f78=r0 // M2 660 setf.s f79=r0 // M3 661 mov f80=f0 // F 662 663 ldfps f81,f82=[sp] // M0 664 ldfps f83,f84=[sp] // M1 665 mov f85=f0 // F 666 667 setf.s f86=r0 // M2 668 setf.s f87=r0 // M3 669 mov f88=f0 // F 670 671 /* 672 * When the instructions are cached, it would be faster to initialize 673 * the remaining registers with simply mov instructions (F-unit). 674 * This gets the time down to ~29 cycles. However, this would use up 675 * 33 bundles, whereas continuing with the above pattern yields 676 * 10 bundles and ~30 cycles. 677 */ 678 679 ldfps f89,f90=[sp] // M0 680 ldfps f91,f92=[sp] // M1 681 mov f93=f0 // F 682 683 setf.s f94=r0 // M2 684 setf.s f95=r0 // M3 685 mov f96=f0 // F 686 687 ldfps f97,f98=[sp] // M0 688 ldfps f99,f100=[sp] // M1 689 mov f101=f0 // F 690 691 setf.s f102=r0 // M2 692 setf.s f103=r0 // M3 693 mov f104=f0 // F 694 695 ldfps f105,f106=[sp] // M0 696 ldfps f107,f108=[sp] // M1 697 mov f109=f0 // F 698 699 setf.s f110=r0 // M2 700 setf.s f111=r0 // M3 701 mov f112=f0 // F 702 703 ldfps f113,f114=[sp] // M0 704 ldfps f115,f116=[sp] // M1 705 mov f117=f0 // F 706 707 setf.s f118=r0 // M2 708 setf.s f119=r0 // M3 709 mov f120=f0 // F 710 711 ldfps f121,f122=[sp] // M0 712 ldfps f123,f124=[sp] // M1 713 mov f125=f0 // F 714 715 setf.s f126=r0 // M2 716 setf.s f127=r0 // M3 717 br.ret.sptk.many rp // F 718END(__ia64_init_fpu) 719 720/* 721 * Switch execution mode from virtual to physical 722 * 723 * Inputs: 724 * r16 = new psr to establish 725 * 726 * Note: RSE must already be in enforced lazy mode 727 */ 728GLOBAL_ENTRY(ia64_switch_mode_phys) 729 { 730 alloc r2=ar.pfs,0,0,0,0 731 rsm psr.i | psr.ic // disable interrupts and interrupt collection 732 mov r15=ip 733 } 734 ;; 735 { 736 flushrs // must be first insn in group 737 srlz.i 738 } 739 ;; 740 mov cr.ipsr=r16 // set new PSR 741 add r3=1f-ia64_switch_mode_phys,r15 742 743 mov r17=ar.bsp 744 mov r14=rp // get return address into a general register 745 ;; 746 747 // going to physical mode, use tpa to translate virt->phys 748 tpa r17=r17 749 tpa r3=r3 750 tpa sp=sp 751 tpa r14=r14 752 ;; 753 754 mov r18=ar.rnat // save ar.rnat 755 mov ar.bspstore=r17 // this steps on ar.rnat 756 mov cr.iip=r3 757 mov cr.ifs=r0 758 ;; 759 mov ar.rnat=r18 // restore ar.rnat 760 rfi // must be last insn in group 761 ;; 7621: mov rp=r14 763 br.ret.sptk.many rp 764END(ia64_switch_mode_phys) 765 766/* 767 * Switch execution mode from physical to virtual 768 * 769 * Inputs: 770 * r16 = new psr to establish 771 * 772 * Note: RSE must already be in enforced lazy mode 773 */ 774GLOBAL_ENTRY(ia64_switch_mode_virt) 775 { 776 alloc r2=ar.pfs,0,0,0,0 777 rsm psr.i | psr.ic // disable interrupts and interrupt collection 778 mov r15=ip 779 } 780 ;; 781 { 782 flushrs // must be first insn in group 783 srlz.i 784 } 785 ;; 786 mov cr.ipsr=r16 // set new PSR 787 add r3=1f-ia64_switch_mode_virt,r15 788 789 mov r17=ar.bsp 790 mov r14=rp // get return address into a general register 791 ;; 792 793 // going to virtual 794 // - for code addresses, set upper bits of addr to KERNEL_START 795 // - for stack addresses, set upper 3 bits to 0xe.... Dont change any of the 796 // lower bits since we want it to stay identity mapped 797 movl r18=KERNEL_START 798 dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT 799 dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT 800 dep r17=-1,r17,61,3 801 dep sp=-1,sp,61,3 802 ;; 803 or r3=r3,r18 804 or r14=r14,r18 805 ;; 806 807 mov r18=ar.rnat // save ar.rnat 808 mov ar.bspstore=r17 // this steps on ar.rnat 809 mov cr.iip=r3 810 mov cr.ifs=r0 811 ;; 812 mov ar.rnat=r18 // restore ar.rnat 813 rfi // must be last insn in group 814 ;; 8151: mov rp=r14 816 br.ret.sptk.many rp 817END(ia64_switch_mode_virt) 818 819#ifdef CONFIG_IA64_BRL_EMU 820 821/* 822 * Assembly routines used by brl_emu.c to set preserved register state. 823 */ 824 825#define SET_REG(reg) \ 826 GLOBAL_ENTRY(ia64_set_##reg); \ 827 alloc r16=ar.pfs,1,0,0,0; \ 828 mov reg=r32; \ 829 ;; \ 830 br.ret.sptk.many rp; \ 831 END(ia64_set_##reg) 832 833SET_REG(b1); 834SET_REG(b2); 835SET_REG(b3); 836SET_REG(b4); 837SET_REG(b5); 838 839#endif /* CONFIG_IA64_BRL_EMU */ 840 841#ifdef CONFIG_SMP 842 843 /* 844 * This routine handles spinlock contention. It uses a simple exponential backoff 845 * algorithm to reduce unnecessary bus traffic. The initial delay is selected from 846 * the low-order bits of the cycle counter (a cheap "randomizer"). I'm sure this 847 * could use additional tuning, especially on systems with a large number of CPUs. 848 * Also, I think the maximum delay should be made a function of the number of CPUs in 849 * the system. --davidm 00/08/05 850 * 851 * WARNING: This is not a normal procedure. It gets called from C code without 852 * the compiler knowing about it. Thus, we must not use any scratch registers 853 * beyond those that were declared "clobbered" at the call-site (see spin_lock() 854 * macro). We may not even use the stacked registers, because that could overwrite 855 * output registers. Similarly, we can't use the scratch stack area as it may be 856 * in use, too. 857 * 858 * Inputs: 859 * ar.ccv = 0 (and available for use) 860 * r28 = available for use 861 * r29 = available for use 862 * r30 = non-zero (and available for use) 863 * r31 = address of lock we're trying to acquire 864 * p15 = available for use 865 */ 866 867# define delay r28 868# define timeout r29 869# define tmp r30 870 871GLOBAL_ENTRY(ia64_spinlock_contention) 872 mov tmp=ar.itc 873 ;; 874 and delay=0x3f,tmp 875 ;; 876 877.retry: add timeout=tmp,delay 878 shl delay=delay,1 879 ;; 880 dep delay=delay,r0,0,13 // limit delay to 8192 cycles 881 ;; 882 // delay a little... 883.wait: sub tmp=tmp,timeout 884#ifdef GAS_HAS_HINT_INSN 885 hint @pause 886#endif 887 or delay=0xf,delay // make sure delay is non-zero (otherwise we get stuck with 0) 888 ;; 889 cmp.lt p15,p0=tmp,r0 890 mov tmp=ar.itc 891(p15) br.cond.sptk .wait 892 ;; 893 ld4 tmp=[r31] 894 ;; 895 cmp.ne p15,p0=tmp,r0 896 mov tmp=ar.itc 897(p15) br.cond.sptk .retry // lock is still busy 898 ;; 899 // try acquiring lock (we know ar.ccv is still zero!): 900 mov tmp=1 901 ;; 902 cmpxchg4.acq tmp=[r31],tmp,ar.ccv 903 ;; 904 cmp.eq p15,p0=tmp,r0 905 906 mov tmp=ar.itc 907(p15) br.ret.sptk.many b7 // got lock -> return 908 br .retry // still no luck, retry 909 910END(ia64_spinlock_contention) 911 912#endif 913