1/* x86-64 __mpn_lshift -- 2 Copyright (C) 2007-2022 Free Software Foundation, Inc. 3 This file is part of the GNU MP Library. 4 5 The GNU MP Library is free software; you can redistribute it and/or modify 6 it under the terms of the GNU Lesser General Public License as published by 7 the Free Software Foundation; either version 2.1 of the License, or (at your 8 option) any later version. 9 10 The GNU MP Library is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 12 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 13 License for more details. 14 15 You should have received a copy of the GNU Lesser General Public License 16 along with the GNU MP Library; see the file COPYING.LIB. If not, 17 see <https://www.gnu.org/licenses/>. */ 18 19#include "sysdep.h" 20#include "asm-syntax.h" 21 22#define rp %rdi 23#define up %rsi 24#define n %rdx 25#define cnt %cl 26 27 .text 28ENTRY (__mpn_lshift) 29 lea -8(rp,n,8), rp 30 lea -8(up,n,8), up 31 32 mov %edx, %eax 33 and $3, %eax 34 jne L(nb00) 35L(b00): /* n = 4, 8, 12, ... */ 36 mov (up), %r10 37 mov -8(up), %r11 38 xor %eax, %eax 39 shld %cl, %r10, %rax 40 mov -16(up), %r8 41 lea 24(rp), rp 42 sub $4, n 43 jmp L(00) 44 45L(nb00):/* n = 1, 5, 9, ... */ 46 cmp $2, %eax 47 jae L(nb01) 48L(b01): mov (up), %r9 49 xor %eax, %eax 50 shld %cl, %r9, %rax 51 sub $2, n 52 jb L(le1) 53 mov -8(up), %r10 54 mov -16(up), %r11 55 lea -8(up), up 56 lea 16(rp), rp 57 jmp L(01) 58L(le1): shl %cl, %r9 59 mov %r9, (rp) 60 ret 61 62L(nb01):/* n = 2, 6, 10, ... */ 63 jne L(b11) 64L(b10): mov (up), %r8 65 mov -8(up), %r9 66 xor %eax, %eax 67 shld %cl, %r8, %rax 68 sub $3, n 69 jb L(le2) 70 mov -16(up), %r10 71 lea -16(up), up 72 lea 8(rp), rp 73 jmp L(10) 74L(le2): shld %cl, %r9, %r8 75 mov %r8, (rp) 76 shl %cl, %r9 77 mov %r9, -8(rp) 78 ret 79 80 .p2align 4 /* performance critical! */ 81L(b11): /* n = 3, 7, 11, ... */ 82 mov (up), %r11 83 mov -8(up), %r8 84 xor %eax, %eax 85 shld %cl, %r11, %rax 86 mov -16(up), %r9 87 lea -24(up), up 88 sub $4, n 89 jb L(end) 90 91 .p2align 4 92L(top): shld %cl, %r8, %r11 93 mov (up), %r10 94 mov %r11, (rp) 95L(10): shld %cl, %r9, %r8 96 mov -8(up), %r11 97 mov %r8, -8(rp) 98L(01): shld %cl, %r10, %r9 99 mov -16(up), %r8 100 mov %r9, -16(rp) 101L(00): shld %cl, %r11, %r10 102 mov -24(up), %r9 103 mov %r10, -24(rp) 104 add $-32, up 105 lea -32(rp), rp 106 sub $4, n 107 jnc L(top) 108 109L(end): shld %cl, %r8, %r11 110 mov %r11, (rp) 111 shld %cl, %r9, %r8 112 mov %r8, -8(rp) 113 shl %cl, %r9 114 mov %r9, -16(rp) 115 ret 116END (__mpn_lshift) 117