1/* 2 * String handling functions for PowerPC. 3 * 4 * Copyright (C) 1996 Paul Mackerras. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11#include <linux/config.h> 12#include <asm/processor.h> 13#include <asm/cache.h> 14#include <asm/errno.h> 15#include <asm/ppc_asm.h> 16 17#define COPY_16_BYTES \ 18 lwz r7,4(r4); \ 19 lwz r8,8(r4); \ 20 lwz r9,12(r4); \ 21 lwzu r10,16(r4); \ 22 stw r7,4(r6); \ 23 stw r8,8(r6); \ 24 stw r9,12(r6); \ 25 stwu r10,16(r6) 26 27#define COPY_16_BYTES_WITHEX(n) \ 288 ## n ## 0: \ 29 lwz r7,4(r4); \ 308 ## n ## 1: \ 31 lwz r8,8(r4); \ 328 ## n ## 2: \ 33 lwz r9,12(r4); \ 348 ## n ## 3: \ 35 lwzu r10,16(r4); \ 368 ## n ## 4: \ 37 stw r7,4(r6); \ 388 ## n ## 5: \ 39 stw r8,8(r6); \ 408 ## n ## 6: \ 41 stw r9,12(r6); \ 428 ## n ## 7: \ 43 stwu r10,16(r6) 44 45#define COPY_16_BYTES_EXCODE(n) \ 469 ## n ## 0: \ 47 addi r5,r5,-(16 * n); \ 48 b 104f; \ 499 ## n ## 1: \ 50 addi r5,r5,-(16 * n); \ 51 b 105f; \ 52.section __ex_table,"a"; \ 53 .align 2; \ 54 .long 8 ## n ## 0b,9 ## n ## 0b; \ 55 .long 8 ## n ## 1b,9 ## n ## 0b; \ 56 .long 8 ## n ## 2b,9 ## n ## 0b; \ 57 .long 8 ## n ## 3b,9 ## n ## 0b; \ 58 .long 8 ## n ## 4b,9 ## n ## 1b; \ 59 .long 8 ## n ## 5b,9 ## n ## 1b; \ 60 .long 8 ## n ## 6b,9 ## n ## 1b; \ 61 .long 8 ## n ## 7b,9 ## n ## 1b; \ 62 .text 63 64 .text 65 .stabs "arch/ppc/lib/",N_SO,0,0,0f 66 .stabs "string.S",N_SO,0,0,0f 67 68CACHELINE_BYTES = L1_CACHE_LINE_SIZE 69LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE 70CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1) 71 72_GLOBAL(strcpy) 73 addi r5,r3,-1 74 addi r4,r4,-1 751: lbzu r0,1(r4) 76 cmpwi 0,r0,0 77 stbu r0,1(r5) 78 bne 1b 79 blr 80 81/* This clears out any unused part of the destination buffer, 82 just as the libc version does. -- paulus */ 83_GLOBAL(strncpy) 84 cmpwi 0,r5,0 85 beqlr 86 mtctr r5 87 addi r6,r3,-1 88 addi r4,r4,-1 891: lbzu r0,1(r4) 90 cmpwi 0,r0,0 91 stbu r0,1(r6) 92 bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ 93 bnelr /* if we didn't hit a null char, we're done */ 94 mfctr r5 95 cmpwi 0,r5,0 /* any space left in destination buffer? */ 96 beqlr /* we know r0 == 0 here */ 972: stbu r0,1(r6) /* clear it out if so */ 98 bdnz 2b 99 blr 100 101_GLOBAL(strcat) 102 addi r5,r3,-1 103 addi r4,r4,-1 1041: lbzu r0,1(r5) 105 cmpwi 0,r0,0 106 bne 1b 107 addi r5,r5,-1 1081: lbzu r0,1(r4) 109 cmpwi 0,r0,0 110 stbu r0,1(r5) 111 bne 1b 112 blr 113 114_GLOBAL(strcmp) 115 addi r5,r3,-1 116 addi r4,r4,-1 1171: lbzu r3,1(r5) 118 cmpwi 1,r3,0 119 lbzu r0,1(r4) 120 subf. r3,r0,r3 121 beqlr 1 122 beq 1b 123 blr 124 125_GLOBAL(strlen) 126 addi r4,r3,-1 1271: lbzu r0,1(r4) 128 cmpwi 0,r0,0 129 bne 1b 130 subf r3,r3,r4 131 blr 132 133/* 134 * Use dcbz on the complete cache lines in the destination 135 * to set them to zero. This requires that the destination 136 * area is cacheable. -- paulus 137 */ 138_GLOBAL(cacheable_memzero) 139 mr r5,r4 140 li r4,0 141 addi r6,r3,-4 142 cmplwi 0,r5,4 143 blt 7f 144 stwu r4,4(r6) 145 beqlr 146 andi. r0,r6,3 147 add r5,r0,r5 148 subf r6,r0,r6 149 clrlwi r7,r6,32-LG_CACHELINE_BYTES 150 add r8,r7,r5 151 srwi r9,r8,LG_CACHELINE_BYTES 152 addic. r9,r9,-1 /* total number of complete cachelines */ 153 ble 2f 154 xori r0,r7,CACHELINE_MASK & ~3 155 srwi. r0,r0,2 156 beq 3f 157 mtctr r0 1584: stwu r4,4(r6) 159 bdnz 4b 1603: mtctr r9 161 li r7,4 162#if !defined(CONFIG_8xx) 16310: dcbz r7,r6 164#else 16510: stw r4, 4(r6) 166 stw r4, 8(r6) 167 stw r4, 12(r6) 168 stw r4, 16(r6) 169#endif 170 addi r6,r6,CACHELINE_BYTES 171 bdnz 10b 172 clrlwi r5,r8,32-LG_CACHELINE_BYTES 173 addi r5,r5,4 1742: srwi r0,r5,2 175 mtctr r0 176 bdz 6f 1771: stwu r4,4(r6) 178 bdnz 1b 1796: andi. r5,r5,3 1807: cmpwi 0,r5,0 181 beqlr 182 mtctr r5 183 addi r6,r6,3 1848: stbu r4,1(r6) 185 bdnz 8b 186 blr 187 188_GLOBAL(memset) 189 rlwimi r4,r4,8,16,23 190 rlwimi r4,r4,16,0,15 191 addi r6,r3,-4 192 cmplwi 0,r5,4 193 blt 7f 194 stwu r4,4(r6) 195 beqlr 196 andi. r0,r6,3 197 add r5,r0,r5 198 subf r6,r0,r6 199 srwi r0,r5,2 200 mtctr r0 201 bdz 6f 2021: stwu r4,4(r6) 203 bdnz 1b 2046: andi. r5,r5,3 2057: cmpwi 0,r5,0 206 beqlr 207 mtctr r5 208 addi r6,r6,3 2098: stbu r4,1(r6) 210 bdnz 8b 211 blr 212 213_GLOBAL(bcopy) 214 mr r6,r3 215 mr r3,r4 216 mr r4,r6 217 b memcpy 218 219/* 220 * This version uses dcbz on the complete cache lines in the 221 * destination area to reduce memory traffic. This requires that 222 * the destination area is cacheable. 223 * We only use this version if the source and dest don't overlap. 224 * -- paulus. 225 */ 226_GLOBAL(cacheable_memcpy) 227 add r7,r3,r5 /* test if the src & dst overlap */ 228 add r8,r4,r5 229 cmplw 0,r4,r7 230 cmplw 1,r3,r8 231 crand 0,0,4 /* cr0.lt &= cr1.lt */ 232 blt memcpy /* if regions overlap */ 233 234 addi r4,r4,-4 235 addi r6,r3,-4 236 neg r0,r3 237 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 238 beq 58f 239 240 cmplw 0,r5,r0 /* is this more than total to do? */ 241 blt 63f /* if not much to do */ 242 andi. r8,r0,3 /* get it word-aligned first */ 243 subf r5,r0,r5 244 mtctr r8 245 beq+ 61f 24670: lbz r9,4(r4) /* do some bytes */ 247 stb r9,4(r6) 248 addi r4,r4,1 249 addi r6,r6,1 250 bdnz 70b 25161: srwi. r0,r0,2 252 mtctr r0 253 beq 58f 25472: lwzu r9,4(r4) /* do some words */ 255 stwu r9,4(r6) 256 bdnz 72b 257 25858: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 259 clrlwi r5,r5,32-LG_CACHELINE_BYTES 260 li r11,4 261 mtctr r0 262 beq 63f 26353: 264#if !defined(CONFIG_8xx) 265 dcbz r11,r6 266#endif 267 COPY_16_BYTES 268#if L1_CACHE_LINE_SIZE >= 32 269 COPY_16_BYTES 270#if L1_CACHE_LINE_SIZE >= 64 271 COPY_16_BYTES 272 COPY_16_BYTES 273#if L1_CACHE_LINE_SIZE >= 128 274 COPY_16_BYTES 275 COPY_16_BYTES 276 COPY_16_BYTES 277 COPY_16_BYTES 278#endif 279#endif 280#endif 281 bdnz 53b 282 28363: srwi. r0,r5,2 284 mtctr r0 285 beq 64f 28630: lwzu r0,4(r4) 287 stwu r0,4(r6) 288 bdnz 30b 289 29064: andi. r0,r5,3 291 mtctr r0 292 beq+ 65f 29340: lbz r0,4(r4) 294 stb r0,4(r6) 295 addi r4,r4,1 296 addi r6,r6,1 297 bdnz 40b 29865: blr 299 300_GLOBAL(memmove) 301 cmplw 0,r3,r4 302 bgt backwards_memcpy 303 /* fall through */ 304 305_GLOBAL(memcpy) 306 srwi. r7,r5,3 307 addi r6,r3,-4 308 addi r4,r4,-4 309 beq 2f /* if less than 8 bytes to do */ 310 andi. r0,r6,3 /* get dest word aligned */ 311 mtctr r7 312 bne 5f 3131: lwz r7,4(r4) 314 lwzu r8,8(r4) 315 stw r7,4(r6) 316 stwu r8,8(r6) 317 bdnz 1b 318 andi. r5,r5,7 3192: cmplwi 0,r5,4 320 blt 3f 321 lwzu r0,4(r4) 322 addi r5,r5,-4 323 stwu r0,4(r6) 3243: cmpwi 0,r5,0 325 beqlr 326 mtctr r5 327 addi r4,r4,3 328 addi r6,r6,3 3294: lbzu r0,1(r4) 330 stbu r0,1(r6) 331 bdnz 4b 332 blr 3335: subfic r0,r0,4 334 mtctr r0 3356: lbz r7,4(r4) 336 addi r4,r4,1 337 stb r7,4(r6) 338 addi r6,r6,1 339 bdnz 6b 340 subf r5,r0,r5 341 rlwinm. r7,r5,32-3,3,31 342 beq 2b 343 mtctr r7 344 b 1b 345 346_GLOBAL(backwards_memcpy) 347 rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ 348 add r6,r3,r5 349 add r4,r4,r5 350 beq 2f 351 andi. r0,r6,3 352 mtctr r7 353 bne 5f 3541: lwz r7,-4(r4) 355 lwzu r8,-8(r4) 356 stw r7,-4(r6) 357 stwu r8,-8(r6) 358 bdnz 1b 359 andi. r5,r5,7 3602: cmplwi 0,r5,4 361 blt 3f 362 lwzu r0,-4(r4) 363 subi r5,r5,4 364 stwu r0,-4(r6) 3653: cmpwi 0,r5,0 366 beqlr 367 mtctr r5 3684: lbzu r0,-1(r4) 369 stbu r0,-1(r6) 370 bdnz 4b 371 blr 3725: mtctr r0 3736: lbzu r7,-1(r4) 374 stbu r7,-1(r6) 375 bdnz 6b 376 subf r5,r0,r5 377 rlwinm. r7,r5,32-3,3,31 378 beq 2b 379 mtctr r7 380 b 1b 381 382_GLOBAL(memcmp) 383 cmpwi 0,r5,0 384 ble- 2f 385 mtctr r5 386 addi r6,r3,-1 387 addi r4,r4,-1 3881: lbzu r3,1(r6) 389 lbzu r0,1(r4) 390 subf. r3,r0,r3 391 bdnzt 2,1b 392 blr 3932: li r3,0 394 blr 395 396_GLOBAL(memchr) 397 cmpwi 0,r5,0 398 ble- 2f 399 mtctr r5 400 addi r3,r3,-1 4011: lbzu r0,1(r3) 402 cmpw 0,r0,r4 403 bdnzf 2,1b 404 beqlr 4052: li r3,0 406 blr 407 408_GLOBAL(__copy_tofrom_user) 409 addi r4,r4,-4 410 addi r6,r3,-4 411 neg r0,r3 412 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 413 beq 58f 414 415 cmplw 0,r5,r0 /* is this more than total to do? */ 416 blt 63f /* if not much to do */ 417 andi. r8,r0,3 /* get it word-aligned first */ 418 mtctr r8 419 beq+ 61f 42070: lbz r9,4(r4) /* do some bytes */ 42171: stb r9,4(r6) 422 addi r4,r4,1 423 addi r6,r6,1 424 bdnz 70b 42561: subf r5,r0,r5 426 srwi. r0,r0,2 427 mtctr r0 428 beq 58f 42972: lwzu r9,4(r4) /* do some words */ 43073: stwu r9,4(r6) 431 bdnz 72b 432 433 .section __ex_table,"a" 434 .align 2 435 .long 70b,100f 436 .long 71b,101f 437 .long 72b,102f 438 .long 73b,103f 439 .text 440 44158: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 442 clrlwi r5,r5,32-LG_CACHELINE_BYTES 443 li r11,4 444 beq 63f 445 446#ifdef CONFIG_8xx 447 /* Don't use prefetch on 8xx */ 448 mtctr r0 44953: COPY_16_BYTES_WITHEX(0) 450 bdnz 53b 451 452#else /* not CONFIG_8xx */ 453 /* Here we decide how far ahead to prefetch the source */ 454 li r3,4 455 cmpwi r0,1 456 li r7,0 457 ble 114f 458 li r7,1 459#if MAX_COPY_PREFETCH > 1 460 /* Heuristically, for large transfers we prefetch 461 MAX_COPY_PREFETCH cachelines ahead. For small transfers 462 we prefetch 1 cacheline ahead. */ 463 cmpwi r0,MAX_COPY_PREFETCH 464 ble 112f 465 li r7,MAX_COPY_PREFETCH 466112: mtctr r7 467111: dcbt r3,r4 468 addi r3,r3,CACHELINE_BYTES 469 bdnz 111b 470#else 471 dcbt r3,r4 472 addi r3,r3,CACHELINE_BYTES 473#endif /* MAX_COPY_PREFETCH > 1 */ 474 475114: subf r8,r7,r0 476 mr r0,r7 477 mtctr r8 478 47953: dcbt r3,r4 48054: dcbz r11,r6 481 .section __ex_table,"a" 482 .align 2 483 .long 54b,105f 484 .text 485/* the main body of the cacheline loop */ 486 COPY_16_BYTES_WITHEX(0) 487#if L1_CACHE_LINE_SIZE >= 32 488 COPY_16_BYTES_WITHEX(1) 489#if L1_CACHE_LINE_SIZE >= 64 490 COPY_16_BYTES_WITHEX(2) 491 COPY_16_BYTES_WITHEX(3) 492#if L1_CACHE_LINE_SIZE >= 128 493 COPY_16_BYTES_WITHEX(4) 494 COPY_16_BYTES_WITHEX(5) 495 COPY_16_BYTES_WITHEX(6) 496 COPY_16_BYTES_WITHEX(7) 497#endif 498#endif 499#endif 500 bdnz 53b 501 cmpwi r0,0 502 li r3,4 503 li r7,0 504 bne 114b 505#endif /* CONFIG_8xx */ 506 50763: srwi. r0,r5,2 508 mtctr r0 509 beq 64f 51030: lwzu r0,4(r4) 51131: stwu r0,4(r6) 512 bdnz 30b 513 51464: andi. r0,r5,3 515 mtctr r0 516 beq+ 65f 51740: lbz r0,4(r4) 51841: stb r0,4(r6) 519 addi r4,r4,1 520 addi r6,r6,1 521 bdnz 40b 52265: li r3,0 523 blr 524 525/* read fault, initial single-byte copy */ 526100: li r9,0 527 b 90f 528/* write fault, initial single-byte copy */ 529101: li r9,1 53090: subf r5,r8,r5 531 li r3,0 532 b 99f 533/* read fault, initial word copy */ 534102: li r9,0 535 b 91f 536/* write fault, initial word copy */ 537103: li r9,1 53891: li r3,2 539 b 99f 540 541/* 542 * this stuff handles faults in the cacheline loop and branches to either 543 * 104f (if in read part) or 105f (if in write part), after updating r5 544 */ 545 COPY_16_BYTES_EXCODE(0) 546#if L1_CACHE_LINE_SIZE >= 32 547 COPY_16_BYTES_EXCODE(1) 548#if L1_CACHE_LINE_SIZE >= 64 549 COPY_16_BYTES_EXCODE(2) 550 COPY_16_BYTES_EXCODE(3) 551#if L1_CACHE_LINE_SIZE >= 128 552 COPY_16_BYTES_EXCODE(4) 553 COPY_16_BYTES_EXCODE(5) 554 COPY_16_BYTES_EXCODE(6) 555 COPY_16_BYTES_EXCODE(7) 556#endif 557#endif 558#endif 559 560/* read fault in cacheline loop */ 561104: li r9,0 562 b 92f 563/* fault on dcbz (effectively a write fault) */ 564/* or write fault in cacheline loop */ 565105: li r9,1 56692: li r3,LG_CACHELINE_BYTES 567 b 99f 568/* read fault in final word loop */ 569108: li r9,0 570 b 93f 571/* write fault in final word loop */ 572109: li r9,1 57393: andi. r5,r5,3 574 li r3,2 575 b 99f 576/* read fault in final byte loop */ 577110: li r9,0 578 b 94f 579/* write fault in final byte loop */ 580111: li r9,1 58194: li r5,0 582 li r3,0 583/* 584 * At this stage the number of bytes not copied is 585 * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. 586 */ 58799: mfctr r0 588 slw r3,r0,r3 589 add. r3,r3,r5 590 beq 120f /* shouldn't happen */ 591 cmpwi 0,r9,0 592 bne 120f 593/* for a read fault, first try to continue the copy one byte at a time */ 594 mtctr r3 595130: lbz r0,4(r4) 596131: stb r0,4(r6) 597 addi r4,r4,1 598 addi r6,r6,1 599 bdnz 130b 600/* then clear out the destination: r3 bytes starting at 4(r6) */ 601132: mfctr r3 602 srwi. r0,r3,2 603 li r9,0 604 mtctr r0 605 beq 113f 606112: stwu r9,4(r6) 607 bdnz 112b 608113: andi. r0,r3,3 609 mtctr r0 610 beq 120f 611114: stb r9,4(r6) 612 addi r6,r6,1 613 bdnz 114b 614120: blr 615 616 .section __ex_table,"a" 617 .align 2 618 .long 30b,108b 619 .long 31b,109b 620 .long 40b,110b 621 .long 41b,111b 622 .long 130b,132b 623 .long 131b,120b 624 .long 112b,120b 625 .long 114b,120b 626 .text 627 628_GLOBAL(__clear_user) 629 addi r6,r3,-4 630 li r3,0 631 li r5,0 632 cmplwi 0,r4,4 633 blt 7f 634 /* clear a single word */ 63511: stwu r5,4(r6) 636 beqlr 637 /* clear word sized chunks */ 638 andi. r0,r6,3 639 add r4,r0,r4 640 subf r6,r0,r6 641 srwi r0,r4,2 642 andi. r4,r4,3 643 mtctr r0 644 bdz 7f 6451: stwu r5,4(r6) 646 bdnz 1b 647 /* clear byte sized chunks */ 6487: cmpwi 0,r4,0 649 beqlr 650 mtctr r4 651 addi r6,r6,3 6528: stbu r5,1(r6) 653 bdnz 8b 654 blr 65590: mr r3,r4 656 blr 65791: mfctr r3 658 slwi r3,r3,2 659 add r3,r3,r4 660 blr 66192: mfctr r3 662 blr 663 664 .section __ex_table,"a" 665 .align 2 666 .long 11b,90b 667 .long 1b,91b 668 .long 8b,92b 669 .text 670 671_GLOBAL(__strncpy_from_user) 672 addi r6,r3,-1 673 addi r4,r4,-1 674 cmpwi 0,r5,0 675 beq 2f 676 mtctr r5 6771: lbzu r0,1(r4) 678 cmpwi 0,r0,0 679 stbu r0,1(r6) 680 bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ 681 beq 3f 6822: addi r6,r6,1 6833: subf r3,r3,r6 684 blr 68599: li r3,-EFAULT 686 blr 687 688 .section __ex_table,"a" 689 .align 2 690 .long 1b,99b 691 .text 692 693/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */ 694_GLOBAL(__strnlen_user) 695 addi r7,r3,-1 696 subf r6,r7,r5 /* top+1 - str */ 697 cmplw 0,r4,r6 698 bge 0f 699 mr r6,r4 7000: mtctr r6 /* ctr = min(len, top - str) */ 7011: lbzu r0,1(r7) /* get next byte */ 702 cmpwi 0,r0,0 703 bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */ 704 addi r7,r7,1 705 subf r3,r3,r7 /* number of bytes we have looked at */ 706 beqlr /* return if we found a 0 byte */ 707 cmpw 0,r3,r4 /* did we look at all len bytes? */ 708 blt 99f /* if not, must have hit top */ 709 addi r3,r4,1 /* return len + 1 to indicate no null found */ 710 blr 71199: li r3,0 /* bad address, return 0 */ 712 blr 713 714 .section __ex_table,"a" 715 .align 2 716 .long 1b,99b 717