1/* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store 2 sum in a third limb vector. 3 Copyright (C) 2006-2022 Free Software Foundation, Inc. 4 This file is part of the GNU MP Library. 5 6 The GNU MP Library is free software; you can redistribute it and/or modify 7 it under the terms of the GNU Lesser General Public License as published by 8 the Free Software Foundation; either version 2.1 of the License, or (at your 9 option) any later version. 10 11 The GNU MP Library is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 14 License for more details. 15 16 You should have received a copy of the GNU Lesser General Public License 17 along with the GNU MP Library; see the file COPYING.LIB. If not, 18 see <https://www.gnu.org/licenses/>. */ 19 20#include "sysdep.h" 21#include "asm-syntax.h" 22 23#define rp %rdi 24#define up %rsi 25#define vp %rdx 26#define n %rcx 27#define cy %r8 28 29#ifndef func 30# define func __mpn_add_n 31# define ADCSBB adc 32#endif 33 34 .text 35ENTRY (func) 36 xor %r8, %r8 37 mov (up), %r10 38 mov (vp), %r11 39 40 lea -8(up,n,8), up 41 lea -8(vp,n,8), vp 42 lea -16(rp,n,8), rp 43 mov %ecx, %eax 44 neg n 45 and $3, %eax 46 je L(b00) 47 add %rax, n /* clear low rcx bits for jrcxz */ 48 cmp $2, %eax 49 jl L(b01) 50 je L(b10) 51 52L(b11): shr %r8 /* set cy */ 53 jmp L(e11) 54 55L(b00): shr %r8 /* set cy */ 56 mov %r10, %r8 57 mov %r11, %r9 58 lea 4(n), n 59 jmp L(e00) 60 61L(b01): shr %r8 /* set cy */ 62 jmp L(e01) 63 64L(b10): shr %r8 /* set cy */ 65 mov %r10, %r8 66 mov %r11, %r9 67 jmp L(e10) 68 69L(end): ADCSBB %r11, %r10 70 mov %r10, 8(rp) 71 mov %ecx, %eax /* clear eax, ecx contains 0 */ 72 adc %eax, %eax 73 ret 74 75 .p2align 4 76L(top): 77 mov -24(up,n,8), %r8 78 mov -24(vp,n,8), %r9 79 ADCSBB %r11, %r10 80 mov %r10, -24(rp,n,8) 81L(e00): 82 mov -16(up,n,8), %r10 83 mov -16(vp,n,8), %r11 84 ADCSBB %r9, %r8 85 mov %r8, -16(rp,n,8) 86L(e11): 87 mov -8(up,n,8), %r8 88 mov -8(vp,n,8), %r9 89 ADCSBB %r11, %r10 90 mov %r10, -8(rp,n,8) 91L(e10): 92 mov (up,n,8), %r10 93 mov (vp,n,8), %r11 94 ADCSBB %r9, %r8 95 mov %r8, (rp,n,8) 96L(e01): 97 jrcxz L(end) 98 lea 4(n), n 99 jmp L(top) 100END (func) 101