1/* AMD64 __mpn_mul_1 -- Multiply a limb vector with a limb and store 2 the result in a second limb vector. 3 Copyright (C) 2003-2022 Free Software Foundation, Inc. 4 This file is part of the GNU MP Library. 5 6 The GNU MP Library is free software; you can redistribute it and/or modify 7 it under the terms of the GNU Lesser General Public License as published by 8 the Free Software Foundation; either version 2.1 of the License, or (at your 9 option) any later version. 10 11 The GNU MP Library is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 14 License for more details. 15 16 You should have received a copy of the GNU Lesser General Public License 17 along with the GNU MP Library; see the file COPYING.LIB. If not, 18 see <https://www.gnu.org/licenses/>. */ 19 20#include <sysdep.h> 21#include "asm-syntax.h" 22 23#define rp %rdi 24#define up %rsi 25#define n_param %rdx 26#define vl %rcx 27 28#define n %r11 29 30 .text 31ENTRY (__mpn_mul_1) 32 push %rbx 33 cfi_adjust_cfa_offset (8) 34 cfi_rel_offset (%rbx, 0) 35 xor %r10, %r10 36 mov (up), %rax /* read first u limb early */ 37 mov n_param, %rbx /* move away n from rdx, mul uses it */ 38 mul vl 39 mov %rbx, %r11 40 41 add %r10, %rax 42 adc $0, %rdx 43 44 and $3, %ebx 45 jz L(b0) 46 cmp $2, %ebx 47 jz L(b2) 48 jg L(b3) 49 50L(b1): dec n 51 jne L(gt1) 52 mov %rax, (rp) 53 jmp L(ret) 54L(gt1): lea 8(up,n,8), up 55 lea -8(rp,n,8), rp 56 neg n 57 xor %r10, %r10 58 xor %ebx, %ebx 59 mov %rax, %r9 60 mov (up,n,8), %rax 61 mov %rdx, %r8 62 jmp L(L1) 63 64L(b0): lea (up,n,8), up 65 lea -16(rp,n,8), rp 66 neg n 67 xor %r10, %r10 68 mov %rax, %r8 69 mov %rdx, %rbx 70 jmp L(L0) 71 72L(b3): lea -8(up,n,8), up 73 lea -24(rp,n,8), rp 74 neg n 75 mov %rax, %rbx 76 mov %rdx, %r10 77 jmp L(L3) 78 79L(b2): lea -16(up,n,8), up 80 lea -32(rp,n,8), rp 81 neg n 82 xor %r8, %r8 83 xor %ebx, %ebx 84 mov %rax, %r10 85 mov 24(up,n,8), %rax 86 mov %rdx, %r9 87 jmp L(L2) 88 89 .p2align 4 90L(top): mov %r10, (rp,n,8) 91 add %rax, %r9 92 mov (up,n,8), %rax 93 adc %rdx, %r8 94 mov $0, %r10d 95L(L1): mul vl 96 mov %r9, 8(rp,n,8) 97 add %rax, %r8 98 adc %rdx, %rbx 99L(L0): mov 8(up,n,8), %rax 100 mul vl 101 mov %r8, 16(rp,n,8) 102 add %rax, %rbx 103 adc %rdx, %r10 104L(L3): mov 16(up,n,8), %rax 105 mul vl 106 mov %rbx, 24(rp,n,8) 107 mov $0, %r8d # zero 108 mov %r8, %rbx # zero 109 add %rax, %r10 110 mov 24(up,n,8), %rax 111 mov %r8, %r9 # zero 112 adc %rdx, %r9 113L(L2): mul vl 114 add $4, n 115 js L(top) 116 117 mov %r10, (rp,n,8) 118 add %rax, %r9 119 adc %r8, %rdx 120 mov %r9, 8(rp,n,8) 121 add %r8, %rdx 122L(ret): mov %rdx, %rax 123 124 pop %rbx 125 cfi_adjust_cfa_offset (-8) 126 cfi_restore (%rbx) 127 ret 128END (__mpn_mul_1) 129