1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Original implementation written by Andy Polyakov, @dot-asm. 4 * This is an adaptation of the original code for kernel use. 5 * 6 * Copyright (C) 2006-2019 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved. 7 */ 8 9#include <linux/linkage.h> 10#include <asm/nospec-insn.h> 11#include <asm/vx-insn.h> 12 13#define SP %r15 14#define FRAME (16 * 8 + 4 * 8) 15 16.data 17.align 32 18 19.Lsigma: 20.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral 21.long 1,0,0,0 22.long 2,0,0,0 23.long 3,0,0,0 24.long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap 25 26.long 0,1,2,3 27.long 0x61707865,0x61707865,0x61707865,0x61707865 # smashed sigma 28.long 0x3320646e,0x3320646e,0x3320646e,0x3320646e 29.long 0x79622d32,0x79622d32,0x79622d32,0x79622d32 30.long 0x6b206574,0x6b206574,0x6b206574,0x6b206574 31 32.previous 33 34 GEN_BR_THUNK %r14 35 36.text 37 38############################################################################# 39# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len, 40# counst u32 *key, const u32 *counter) 41 42#define OUT %r2 43#define INP %r3 44#define LEN %r4 45#define KEY %r5 46#define COUNTER %r6 47 48#define BEPERM %v31 49#define CTR %v26 50 51#define K0 %v16 52#define K1 %v17 53#define K2 %v18 54#define K3 %v19 55 56#define XA0 %v0 57#define XA1 %v1 58#define XA2 %v2 59#define XA3 %v3 60 61#define XB0 %v4 62#define XB1 %v5 63#define XB2 %v6 64#define XB3 %v7 65 66#define XC0 %v8 67#define XC1 %v9 68#define XC2 %v10 69#define XC3 %v11 70 71#define XD0 %v12 72#define XD1 %v13 73#define XD2 %v14 74#define XD3 %v15 75 76#define XT0 %v27 77#define XT1 %v28 78#define XT2 %v29 79#define XT3 %v30 80 81ENTRY(chacha20_vx_4x) 82 stmg %r6,%r7,6*8(SP) 83 84 larl %r7,.Lsigma 85 lhi %r0,10 86 lhi %r1,0 87 88 VL K0,0,,%r7 # load sigma 89 VL K1,0,,KEY # load key 90 VL K2,16,,KEY 91 VL K3,0,,COUNTER # load counter 92 93 VL BEPERM,0x40,,%r7 94 VL CTR,0x50,,%r7 95 96 VLM XA0,XA3,0x60,%r7,4 # load [smashed] sigma 97 98 VREPF XB0,K1,0 # smash the key 99 VREPF XB1,K1,1 100 VREPF XB2,K1,2 101 VREPF XB3,K1,3 102 103 VREPF XD0,K3,0 104 VREPF XD1,K3,1 105 VREPF XD2,K3,2 106 VREPF XD3,K3,3 107 VAF XD0,XD0,CTR 108 109 VREPF XC0,K2,0 110 VREPF XC1,K2,1 111 VREPF XC2,K2,2 112 VREPF XC3,K2,3 113 114.Loop_4x: 115 VAF XA0,XA0,XB0 116 VX XD0,XD0,XA0 117 VERLLF XD0,XD0,16 118 119 VAF XA1,XA1,XB1 120 VX XD1,XD1,XA1 121 VERLLF XD1,XD1,16 122 123 VAF XA2,XA2,XB2 124 VX XD2,XD2,XA2 125 VERLLF XD2,XD2,16 126 127 VAF XA3,XA3,XB3 128 VX XD3,XD3,XA3 129 VERLLF XD3,XD3,16 130 131 VAF XC0,XC0,XD0 132 VX XB0,XB0,XC0 133 VERLLF XB0,XB0,12 134 135 VAF XC1,XC1,XD1 136 VX XB1,XB1,XC1 137 VERLLF XB1,XB1,12 138 139 VAF XC2,XC2,XD2 140 VX XB2,XB2,XC2 141 VERLLF XB2,XB2,12 142 143 VAF XC3,XC3,XD3 144 VX XB3,XB3,XC3 145 VERLLF XB3,XB3,12 146 147 VAF XA0,XA0,XB0 148 VX XD0,XD0,XA0 149 VERLLF XD0,XD0,8 150 151 VAF XA1,XA1,XB1 152 VX XD1,XD1,XA1 153 VERLLF XD1,XD1,8 154 155 VAF XA2,XA2,XB2 156 VX XD2,XD2,XA2 157 VERLLF XD2,XD2,8 158 159 VAF XA3,XA3,XB3 160 VX XD3,XD3,XA3 161 VERLLF XD3,XD3,8 162 163 VAF XC0,XC0,XD0 164 VX XB0,XB0,XC0 165 VERLLF XB0,XB0,7 166 167 VAF XC1,XC1,XD1 168 VX XB1,XB1,XC1 169 VERLLF XB1,XB1,7 170 171 VAF XC2,XC2,XD2 172 VX XB2,XB2,XC2 173 VERLLF XB2,XB2,7 174 175 VAF XC3,XC3,XD3 176 VX XB3,XB3,XC3 177 VERLLF XB3,XB3,7 178 179 VAF XA0,XA0,XB1 180 VX XD3,XD3,XA0 181 VERLLF XD3,XD3,16 182 183 VAF XA1,XA1,XB2 184 VX XD0,XD0,XA1 185 VERLLF XD0,XD0,16 186 187 VAF XA2,XA2,XB3 188 VX XD1,XD1,XA2 189 VERLLF XD1,XD1,16 190 191 VAF XA3,XA3,XB0 192 VX XD2,XD2,XA3 193 VERLLF XD2,XD2,16 194 195 VAF XC2,XC2,XD3 196 VX XB1,XB1,XC2 197 VERLLF XB1,XB1,12 198 199 VAF XC3,XC3,XD0 200 VX XB2,XB2,XC3 201 VERLLF XB2,XB2,12 202 203 VAF XC0,XC0,XD1 204 VX XB3,XB3,XC0 205 VERLLF XB3,XB3,12 206 207 VAF XC1,XC1,XD2 208 VX XB0,XB0,XC1 209 VERLLF XB0,XB0,12 210 211 VAF XA0,XA0,XB1 212 VX XD3,XD3,XA0 213 VERLLF XD3,XD3,8 214 215 VAF XA1,XA1,XB2 216 VX XD0,XD0,XA1 217 VERLLF XD0,XD0,8 218 219 VAF XA2,XA2,XB3 220 VX XD1,XD1,XA2 221 VERLLF XD1,XD1,8 222 223 VAF XA3,XA3,XB0 224 VX XD2,XD2,XA3 225 VERLLF XD2,XD2,8 226 227 VAF XC2,XC2,XD3 228 VX XB1,XB1,XC2 229 VERLLF XB1,XB1,7 230 231 VAF XC3,XC3,XD0 232 VX XB2,XB2,XC3 233 VERLLF XB2,XB2,7 234 235 VAF XC0,XC0,XD1 236 VX XB3,XB3,XC0 237 VERLLF XB3,XB3,7 238 239 VAF XC1,XC1,XD2 240 VX XB0,XB0,XC1 241 VERLLF XB0,XB0,7 242 brct %r0,.Loop_4x 243 244 VAF XD0,XD0,CTR 245 246 VMRHF XT0,XA0,XA1 # transpose data 247 VMRHF XT1,XA2,XA3 248 VMRLF XT2,XA0,XA1 249 VMRLF XT3,XA2,XA3 250 VPDI XA0,XT0,XT1,0b0000 251 VPDI XA1,XT0,XT1,0b0101 252 VPDI XA2,XT2,XT3,0b0000 253 VPDI XA3,XT2,XT3,0b0101 254 255 VMRHF XT0,XB0,XB1 256 VMRHF XT1,XB2,XB3 257 VMRLF XT2,XB0,XB1 258 VMRLF XT3,XB2,XB3 259 VPDI XB0,XT0,XT1,0b0000 260 VPDI XB1,XT0,XT1,0b0101 261 VPDI XB2,XT2,XT3,0b0000 262 VPDI XB3,XT2,XT3,0b0101 263 264 VMRHF XT0,XC0,XC1 265 VMRHF XT1,XC2,XC3 266 VMRLF XT2,XC0,XC1 267 VMRLF XT3,XC2,XC3 268 VPDI XC0,XT0,XT1,0b0000 269 VPDI XC1,XT0,XT1,0b0101 270 VPDI XC2,XT2,XT3,0b0000 271 VPDI XC3,XT2,XT3,0b0101 272 273 VMRHF XT0,XD0,XD1 274 VMRHF XT1,XD2,XD3 275 VMRLF XT2,XD0,XD1 276 VMRLF XT3,XD2,XD3 277 VPDI XD0,XT0,XT1,0b0000 278 VPDI XD1,XT0,XT1,0b0101 279 VPDI XD2,XT2,XT3,0b0000 280 VPDI XD3,XT2,XT3,0b0101 281 282 VAF XA0,XA0,K0 283 VAF XB0,XB0,K1 284 VAF XC0,XC0,K2 285 VAF XD0,XD0,K3 286 287 VPERM XA0,XA0,XA0,BEPERM 288 VPERM XB0,XB0,XB0,BEPERM 289 VPERM XC0,XC0,XC0,BEPERM 290 VPERM XD0,XD0,XD0,BEPERM 291 292 VLM XT0,XT3,0,INP,0 293 294 VX XT0,XT0,XA0 295 VX XT1,XT1,XB0 296 VX XT2,XT2,XC0 297 VX XT3,XT3,XD0 298 299 VSTM XT0,XT3,0,OUT,0 300 301 la INP,0x40(INP) 302 la OUT,0x40(OUT) 303 aghi LEN,-0x40 304 305 VAF XA0,XA1,K0 306 VAF XB0,XB1,K1 307 VAF XC0,XC1,K2 308 VAF XD0,XD1,K3 309 310 VPERM XA0,XA0,XA0,BEPERM 311 VPERM XB0,XB0,XB0,BEPERM 312 VPERM XC0,XC0,XC0,BEPERM 313 VPERM XD0,XD0,XD0,BEPERM 314 315 clgfi LEN,0x40 316 jl .Ltail_4x 317 318 VLM XT0,XT3,0,INP,0 319 320 VX XT0,XT0,XA0 321 VX XT1,XT1,XB0 322 VX XT2,XT2,XC0 323 VX XT3,XT3,XD0 324 325 VSTM XT0,XT3,0,OUT,0 326 327 la INP,0x40(INP) 328 la OUT,0x40(OUT) 329 aghi LEN,-0x40 330 je .Ldone_4x 331 332 VAF XA0,XA2,K0 333 VAF XB0,XB2,K1 334 VAF XC0,XC2,K2 335 VAF XD0,XD2,K3 336 337 VPERM XA0,XA0,XA0,BEPERM 338 VPERM XB0,XB0,XB0,BEPERM 339 VPERM XC0,XC0,XC0,BEPERM 340 VPERM XD0,XD0,XD0,BEPERM 341 342 clgfi LEN,0x40 343 jl .Ltail_4x 344 345 VLM XT0,XT3,0,INP,0 346 347 VX XT0,XT0,XA0 348 VX XT1,XT1,XB0 349 VX XT2,XT2,XC0 350 VX XT3,XT3,XD0 351 352 VSTM XT0,XT3,0,OUT,0 353 354 la INP,0x40(INP) 355 la OUT,0x40(OUT) 356 aghi LEN,-0x40 357 je .Ldone_4x 358 359 VAF XA0,XA3,K0 360 VAF XB0,XB3,K1 361 VAF XC0,XC3,K2 362 VAF XD0,XD3,K3 363 364 VPERM XA0,XA0,XA0,BEPERM 365 VPERM XB0,XB0,XB0,BEPERM 366 VPERM XC0,XC0,XC0,BEPERM 367 VPERM XD0,XD0,XD0,BEPERM 368 369 clgfi LEN,0x40 370 jl .Ltail_4x 371 372 VLM XT0,XT3,0,INP,0 373 374 VX XT0,XT0,XA0 375 VX XT1,XT1,XB0 376 VX XT2,XT2,XC0 377 VX XT3,XT3,XD0 378 379 VSTM XT0,XT3,0,OUT,0 380 381.Ldone_4x: 382 lmg %r6,%r7,6*8(SP) 383 BR_EX %r14 384 385.Ltail_4x: 386 VLR XT0,XC0 387 VLR XT1,XD0 388 389 VST XA0,8*8+0x00,,SP 390 VST XB0,8*8+0x10,,SP 391 VST XT0,8*8+0x20,,SP 392 VST XT1,8*8+0x30,,SP 393 394 lghi %r1,0 395 396.Loop_tail_4x: 397 llgc %r5,0(%r1,INP) 398 llgc %r6,8*8(%r1,SP) 399 xr %r6,%r5 400 stc %r6,0(%r1,OUT) 401 la %r1,1(%r1) 402 brct LEN,.Loop_tail_4x 403 404 lmg %r6,%r7,6*8(SP) 405 BR_EX %r14 406ENDPROC(chacha20_vx_4x) 407 408#undef OUT 409#undef INP 410#undef LEN 411#undef KEY 412#undef COUNTER 413 414#undef BEPERM 415 416#undef K0 417#undef K1 418#undef K2 419#undef K3 420 421 422############################################################################# 423# void chacha20_vx(u8 *out, counst u8 *inp, size_t len, 424# counst u32 *key, const u32 *counter) 425 426#define OUT %r2 427#define INP %r3 428#define LEN %r4 429#define KEY %r5 430#define COUNTER %r6 431 432#define BEPERM %v31 433 434#define K0 %v27 435#define K1 %v24 436#define K2 %v25 437#define K3 %v26 438 439#define A0 %v0 440#define B0 %v1 441#define C0 %v2 442#define D0 %v3 443 444#define A1 %v4 445#define B1 %v5 446#define C1 %v6 447#define D1 %v7 448 449#define A2 %v8 450#define B2 %v9 451#define C2 %v10 452#define D2 %v11 453 454#define A3 %v12 455#define B3 %v13 456#define C3 %v14 457#define D3 %v15 458 459#define A4 %v16 460#define B4 %v17 461#define C4 %v18 462#define D4 %v19 463 464#define A5 %v20 465#define B5 %v21 466#define C5 %v22 467#define D5 %v23 468 469#define T0 %v27 470#define T1 %v28 471#define T2 %v29 472#define T3 %v30 473 474ENTRY(chacha20_vx) 475 clgfi LEN,256 476 jle chacha20_vx_4x 477 stmg %r6,%r7,6*8(SP) 478 479 lghi %r1,-FRAME 480 lgr %r0,SP 481 la SP,0(%r1,SP) 482 stg %r0,0(SP) # back-chain 483 484 larl %r7,.Lsigma 485 lhi %r0,10 486 487 VLM K1,K2,0,KEY,0 # load key 488 VL K3,0,,COUNTER # load counter 489 490 VLM K0,BEPERM,0,%r7,4 # load sigma, increments, ... 491 492.Loop_outer_vx: 493 VLR A0,K0 494 VLR B0,K1 495 VLR A1,K0 496 VLR B1,K1 497 VLR A2,K0 498 VLR B2,K1 499 VLR A3,K0 500 VLR B3,K1 501 VLR A4,K0 502 VLR B4,K1 503 VLR A5,K0 504 VLR B5,K1 505 506 VLR D0,K3 507 VAF D1,K3,T1 # K[3]+1 508 VAF D2,K3,T2 # K[3]+2 509 VAF D3,K3,T3 # K[3]+3 510 VAF D4,D2,T2 # K[3]+4 511 VAF D5,D2,T3 # K[3]+5 512 513 VLR C0,K2 514 VLR C1,K2 515 VLR C2,K2 516 VLR C3,K2 517 VLR C4,K2 518 VLR C5,K2 519 520 VLR T1,D1 521 VLR T2,D2 522 VLR T3,D3 523 524.Loop_vx: 525 VAF A0,A0,B0 526 VAF A1,A1,B1 527 VAF A2,A2,B2 528 VAF A3,A3,B3 529 VAF A4,A4,B4 530 VAF A5,A5,B5 531 VX D0,D0,A0 532 VX D1,D1,A1 533 VX D2,D2,A2 534 VX D3,D3,A3 535 VX D4,D4,A4 536 VX D5,D5,A5 537 VERLLF D0,D0,16 538 VERLLF D1,D1,16 539 VERLLF D2,D2,16 540 VERLLF D3,D3,16 541 VERLLF D4,D4,16 542 VERLLF D5,D5,16 543 544 VAF C0,C0,D0 545 VAF C1,C1,D1 546 VAF C2,C2,D2 547 VAF C3,C3,D3 548 VAF C4,C4,D4 549 VAF C5,C5,D5 550 VX B0,B0,C0 551 VX B1,B1,C1 552 VX B2,B2,C2 553 VX B3,B3,C3 554 VX B4,B4,C4 555 VX B5,B5,C5 556 VERLLF B0,B0,12 557 VERLLF B1,B1,12 558 VERLLF B2,B2,12 559 VERLLF B3,B3,12 560 VERLLF B4,B4,12 561 VERLLF B5,B5,12 562 563 VAF A0,A0,B0 564 VAF A1,A1,B1 565 VAF A2,A2,B2 566 VAF A3,A3,B3 567 VAF A4,A4,B4 568 VAF A5,A5,B5 569 VX D0,D0,A0 570 VX D1,D1,A1 571 VX D2,D2,A2 572 VX D3,D3,A3 573 VX D4,D4,A4 574 VX D5,D5,A5 575 VERLLF D0,D0,8 576 VERLLF D1,D1,8 577 VERLLF D2,D2,8 578 VERLLF D3,D3,8 579 VERLLF D4,D4,8 580 VERLLF D5,D5,8 581 582 VAF C0,C0,D0 583 VAF C1,C1,D1 584 VAF C2,C2,D2 585 VAF C3,C3,D3 586 VAF C4,C4,D4 587 VAF C5,C5,D5 588 VX B0,B0,C0 589 VX B1,B1,C1 590 VX B2,B2,C2 591 VX B3,B3,C3 592 VX B4,B4,C4 593 VX B5,B5,C5 594 VERLLF B0,B0,7 595 VERLLF B1,B1,7 596 VERLLF B2,B2,7 597 VERLLF B3,B3,7 598 VERLLF B4,B4,7 599 VERLLF B5,B5,7 600 601 VSLDB C0,C0,C0,8 602 VSLDB C1,C1,C1,8 603 VSLDB C2,C2,C2,8 604 VSLDB C3,C3,C3,8 605 VSLDB C4,C4,C4,8 606 VSLDB C5,C5,C5,8 607 VSLDB B0,B0,B0,4 608 VSLDB B1,B1,B1,4 609 VSLDB B2,B2,B2,4 610 VSLDB B3,B3,B3,4 611 VSLDB B4,B4,B4,4 612 VSLDB B5,B5,B5,4 613 VSLDB D0,D0,D0,12 614 VSLDB D1,D1,D1,12 615 VSLDB D2,D2,D2,12 616 VSLDB D3,D3,D3,12 617 VSLDB D4,D4,D4,12 618 VSLDB D5,D5,D5,12 619 620 VAF A0,A0,B0 621 VAF A1,A1,B1 622 VAF A2,A2,B2 623 VAF A3,A3,B3 624 VAF A4,A4,B4 625 VAF A5,A5,B5 626 VX D0,D0,A0 627 VX D1,D1,A1 628 VX D2,D2,A2 629 VX D3,D3,A3 630 VX D4,D4,A4 631 VX D5,D5,A5 632 VERLLF D0,D0,16 633 VERLLF D1,D1,16 634 VERLLF D2,D2,16 635 VERLLF D3,D3,16 636 VERLLF D4,D4,16 637 VERLLF D5,D5,16 638 639 VAF C0,C0,D0 640 VAF C1,C1,D1 641 VAF C2,C2,D2 642 VAF C3,C3,D3 643 VAF C4,C4,D4 644 VAF C5,C5,D5 645 VX B0,B0,C0 646 VX B1,B1,C1 647 VX B2,B2,C2 648 VX B3,B3,C3 649 VX B4,B4,C4 650 VX B5,B5,C5 651 VERLLF B0,B0,12 652 VERLLF B1,B1,12 653 VERLLF B2,B2,12 654 VERLLF B3,B3,12 655 VERLLF B4,B4,12 656 VERLLF B5,B5,12 657 658 VAF A0,A0,B0 659 VAF A1,A1,B1 660 VAF A2,A2,B2 661 VAF A3,A3,B3 662 VAF A4,A4,B4 663 VAF A5,A5,B5 664 VX D0,D0,A0 665 VX D1,D1,A1 666 VX D2,D2,A2 667 VX D3,D3,A3 668 VX D4,D4,A4 669 VX D5,D5,A5 670 VERLLF D0,D0,8 671 VERLLF D1,D1,8 672 VERLLF D2,D2,8 673 VERLLF D3,D3,8 674 VERLLF D4,D4,8 675 VERLLF D5,D5,8 676 677 VAF C0,C0,D0 678 VAF C1,C1,D1 679 VAF C2,C2,D2 680 VAF C3,C3,D3 681 VAF C4,C4,D4 682 VAF C5,C5,D5 683 VX B0,B0,C0 684 VX B1,B1,C1 685 VX B2,B2,C2 686 VX B3,B3,C3 687 VX B4,B4,C4 688 VX B5,B5,C5 689 VERLLF B0,B0,7 690 VERLLF B1,B1,7 691 VERLLF B2,B2,7 692 VERLLF B3,B3,7 693 VERLLF B4,B4,7 694 VERLLF B5,B5,7 695 696 VSLDB C0,C0,C0,8 697 VSLDB C1,C1,C1,8 698 VSLDB C2,C2,C2,8 699 VSLDB C3,C3,C3,8 700 VSLDB C4,C4,C4,8 701 VSLDB C5,C5,C5,8 702 VSLDB B0,B0,B0,12 703 VSLDB B1,B1,B1,12 704 VSLDB B2,B2,B2,12 705 VSLDB B3,B3,B3,12 706 VSLDB B4,B4,B4,12 707 VSLDB B5,B5,B5,12 708 VSLDB D0,D0,D0,4 709 VSLDB D1,D1,D1,4 710 VSLDB D2,D2,D2,4 711 VSLDB D3,D3,D3,4 712 VSLDB D4,D4,D4,4 713 VSLDB D5,D5,D5,4 714 brct %r0,.Loop_vx 715 716 VAF A0,A0,K0 717 VAF B0,B0,K1 718 VAF C0,C0,K2 719 VAF D0,D0,K3 720 VAF A1,A1,K0 721 VAF D1,D1,T1 # +K[3]+1 722 723 VPERM A0,A0,A0,BEPERM 724 VPERM B0,B0,B0,BEPERM 725 VPERM C0,C0,C0,BEPERM 726 VPERM D0,D0,D0,BEPERM 727 728 clgfi LEN,0x40 729 jl .Ltail_vx 730 731 VAF D2,D2,T2 # +K[3]+2 732 VAF D3,D3,T3 # +K[3]+3 733 VLM T0,T3,0,INP,0 734 735 VX A0,A0,T0 736 VX B0,B0,T1 737 VX C0,C0,T2 738 VX D0,D0,T3 739 740 VLM K0,T3,0,%r7,4 # re-load sigma and increments 741 742 VSTM A0,D0,0,OUT,0 743 744 la INP,0x40(INP) 745 la OUT,0x40(OUT) 746 aghi LEN,-0x40 747 je .Ldone_vx 748 749 VAF B1,B1,K1 750 VAF C1,C1,K2 751 752 VPERM A0,A1,A1,BEPERM 753 VPERM B0,B1,B1,BEPERM 754 VPERM C0,C1,C1,BEPERM 755 VPERM D0,D1,D1,BEPERM 756 757 clgfi LEN,0x40 758 jl .Ltail_vx 759 760 VLM A1,D1,0,INP,0 761 762 VX A0,A0,A1 763 VX B0,B0,B1 764 VX C0,C0,C1 765 VX D0,D0,D1 766 767 VSTM A0,D0,0,OUT,0 768 769 la INP,0x40(INP) 770 la OUT,0x40(OUT) 771 aghi LEN,-0x40 772 je .Ldone_vx 773 774 VAF A2,A2,K0 775 VAF B2,B2,K1 776 VAF C2,C2,K2 777 778 VPERM A0,A2,A2,BEPERM 779 VPERM B0,B2,B2,BEPERM 780 VPERM C0,C2,C2,BEPERM 781 VPERM D0,D2,D2,BEPERM 782 783 clgfi LEN,0x40 784 jl .Ltail_vx 785 786 VLM A1,D1,0,INP,0 787 788 VX A0,A0,A1 789 VX B0,B0,B1 790 VX C0,C0,C1 791 VX D0,D0,D1 792 793 VSTM A0,D0,0,OUT,0 794 795 la INP,0x40(INP) 796 la OUT,0x40(OUT) 797 aghi LEN,-0x40 798 je .Ldone_vx 799 800 VAF A3,A3,K0 801 VAF B3,B3,K1 802 VAF C3,C3,K2 803 VAF D2,K3,T3 # K[3]+3 804 805 VPERM A0,A3,A3,BEPERM 806 VPERM B0,B3,B3,BEPERM 807 VPERM C0,C3,C3,BEPERM 808 VPERM D0,D3,D3,BEPERM 809 810 clgfi LEN,0x40 811 jl .Ltail_vx 812 813 VAF D3,D2,T1 # K[3]+4 814 VLM A1,D1,0,INP,0 815 816 VX A0,A0,A1 817 VX B0,B0,B1 818 VX C0,C0,C1 819 VX D0,D0,D1 820 821 VSTM A0,D0,0,OUT,0 822 823 la INP,0x40(INP) 824 la OUT,0x40(OUT) 825 aghi LEN,-0x40 826 je .Ldone_vx 827 828 VAF A4,A4,K0 829 VAF B4,B4,K1 830 VAF C4,C4,K2 831 VAF D4,D4,D3 # +K[3]+4 832 VAF D3,D3,T1 # K[3]+5 833 VAF K3,D2,T3 # K[3]+=6 834 835 VPERM A0,A4,A4,BEPERM 836 VPERM B0,B4,B4,BEPERM 837 VPERM C0,C4,C4,BEPERM 838 VPERM D0,D4,D4,BEPERM 839 840 clgfi LEN,0x40 841 jl .Ltail_vx 842 843 VLM A1,D1,0,INP,0 844 845 VX A0,A0,A1 846 VX B0,B0,B1 847 VX C0,C0,C1 848 VX D0,D0,D1 849 850 VSTM A0,D0,0,OUT,0 851 852 la INP,0x40(INP) 853 la OUT,0x40(OUT) 854 aghi LEN,-0x40 855 je .Ldone_vx 856 857 VAF A5,A5,K0 858 VAF B5,B5,K1 859 VAF C5,C5,K2 860 VAF D5,D5,D3 # +K[3]+5 861 862 VPERM A0,A5,A5,BEPERM 863 VPERM B0,B5,B5,BEPERM 864 VPERM C0,C5,C5,BEPERM 865 VPERM D0,D5,D5,BEPERM 866 867 clgfi LEN,0x40 868 jl .Ltail_vx 869 870 VLM A1,D1,0,INP,0 871 872 VX A0,A0,A1 873 VX B0,B0,B1 874 VX C0,C0,C1 875 VX D0,D0,D1 876 877 VSTM A0,D0,0,OUT,0 878 879 la INP,0x40(INP) 880 la OUT,0x40(OUT) 881 lhi %r0,10 882 aghi LEN,-0x40 883 jne .Loop_outer_vx 884 885.Ldone_vx: 886 lmg %r6,%r7,FRAME+6*8(SP) 887 la SP,FRAME(SP) 888 BR_EX %r14 889 890.Ltail_vx: 891 VSTM A0,D0,8*8,SP,3 892 lghi %r1,0 893 894.Loop_tail_vx: 895 llgc %r5,0(%r1,INP) 896 llgc %r6,8*8(%r1,SP) 897 xr %r6,%r5 898 stc %r6,0(%r1,OUT) 899 la %r1,1(%r1) 900 brct LEN,.Loop_tail_vx 901 902 lmg %r6,%r7,FRAME+6*8(SP) 903 la SP,FRAME(SP) 904 BR_EX %r14 905ENDPROC(chacha20_vx) 906 907.previous 908