1/* PowerPC64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add 2 the result to a second limb vector. 3 Copyright (C) 1999-2022 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 5 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 The GNU C Library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 19 20#include <sysdep.h> 21 22#ifdef USE_AS_SUBMUL 23# define FUNC __mpn_submul_1 24# define ADDSUBC subfe 25# define ADDSUB subfc 26#else 27# define FUNC __mpn_addmul_1 28# define ADDSUBC adde 29# define ADDSUB addc 30#endif 31 32#define RP r3 33#define UP r4 34#define N r5 35#define VL r6 36 37#define R27SAVE (-40) 38#define R28SAVE (-32) 39#define R29SAVE (-24) 40#define R30SAVE (-16) 41#define R31SAVE (-8) 42 43ENTRY_TOCLESS (FUNC, 5) 44 std r31, R31SAVE(r1) 45 rldicl. r0, N, 0, 62 46 std r30, R30SAVE(r1) 47 cmpdi VL, r0, 2 48 std r29, R29SAVE(r1) 49 addi N, N, 3 50 std r28, R28SAVE(r1) 51 srdi N, N, 2 52 std r27, R27SAVE(r1) 53 cfi_offset(r31, R31SAVE) 54 cfi_offset(r30, R30SAVE) 55 cfi_offset(r29, R29SAVE) 56 cfi_offset(r28, R28SAVE) 57 cfi_offset(r27, R27SAVE) 58 mtctr N 59 beq cr0, L(b00) 60 blt cr6, L(b01) 61 beq cr6, L(b10) 62 63L(b11): ld r9, 0(UP) 64 ld r28, 0(RP) 65 mulld r0, r9, VL 66 mulhdu r12, r9, VL 67 ADDSUB r0, r0, r28 68 std r0, 0(RP) 69 addi RP, RP, 8 70 ld r9, 8(UP) 71 ld r27, 16(UP) 72 addi UP, UP, 24 73#ifdef USE_AS_SUBMUL 74 subfe r11, r11, r11 75#endif 76 b L(bot) 77 78 .align 4 79L(b00): ld r9, 0(UP) 80 ld r27, 8(UP) 81 ld r28, 0(RP) 82 ld r29, 8(RP) 83 mulld r0, r9, VL 84 mulhdu N, r9, VL 85 mulld r7, r27, VL 86 mulhdu r8, r27, VL 87 addc r7, r7, N 88 addze r12, r8 89 ADDSUB r0, r0, r28 90 std r0, 0(RP) 91 ADDSUBC r7, r7, r29 92 std r7, 8(RP) 93 addi RP, RP, 16 94 ld r9, 16(UP) 95 ld r27, 24(UP) 96 addi UP, UP, 32 97#ifdef USE_AS_SUBMUL 98 subfe r11, r11, r11 99#endif 100 b L(bot) 101 102 .align 4 103L(b01): bdnz L(gt1) 104 ld r9, 0(UP) 105 ld r11, 0(RP) 106 mulld r0, r9, VL 107 mulhdu r8, r9, VL 108 ADDSUB r0, r0, r11 109 std r0, 0(RP) 110#ifdef USE_AS_SUBMUL 111 subfe r11, r11, r11 112 addic r11, r11, 1 113#endif 114 addze RP, r8 115 blr 116 117L(gt1): ld r9, 0(UP) 118 ld r27, 8(UP) 119 mulld r0, r9, VL 120 mulhdu N, r9, VL 121 mulld r7, r27, VL 122 mulhdu r8, r27, VL 123 ld r9, 16(UP) 124 ld r28, 0(RP) 125 ld r29, 8(RP) 126 ld r30, 16(RP) 127 mulld r11, r9, VL 128 mulhdu r10, r9, VL 129 addc r7, r7, N 130 adde r11, r11, r8 131 addze r12, r10 132 ADDSUB r0, r0, r28 133 std r0, 0(RP) 134 ADDSUBC r7, r7, r29 135 std r7, 8(RP) 136 ADDSUBC r11, r11, r30 137 std r11, 16(RP) 138 addi RP, RP, 24 139 ld r9, 24(UP) 140 ld r27, 32(UP) 141 addi UP, UP, 40 142#ifdef USE_AS_SUBMUL 143 subfe r11, r11, r11 144#endif 145 b L(bot) 146 147L(b10): addic r0, r0, r0 148 li r12, 0 149 ld r9, 0(UP) 150 ld r27, 8(UP) 151 bdz L(end) 152 addi UP, UP, 16 153 154 .align 4 155L(top): mulld r0, r9, VL 156 mulhdu N, r9, VL 157 mulld r7, r27, VL 158 mulhdu r8, r27, VL 159 ld r9, 0(UP) 160 ld r28, 0(RP) 161 ld r27, 8(UP) 162 ld r29, 8(RP) 163 adde r0, r0, r12 164 adde r7, r7, N 165 mulld N, r9, VL 166 mulhdu r10, r9, VL 167 mulld r11, r27, VL 168 mulhdu r12, r27, VL 169 ld r9, 16(UP) 170 ld r30, 16(RP) 171 ld r27, 24(UP) 172 ld r31, 24(RP) 173 adde N, N, r8 174 adde r11, r11, r10 175 addze r12, r12 176 ADDSUB r0, r0, r28 177 std r0, 0(RP) 178 ADDSUBC r7, r7, r29 179 std r7, 8(RP) 180 ADDSUBC N, N, r30 181 std N, 16(RP) 182 ADDSUBC r11, r11, r31 183 std r11, 24(RP) 184 addi UP, UP, 32 185#ifdef USE_AS_SUBMUL 186 subfe r11, r11, r11 187#endif 188 addi RP, RP, 32 189L(bot): 190#ifdef USE_AS_SUBMUL 191 addic r11, r11, 1 192#endif 193 bdnz L(top) 194 195L(end): mulld r0, r9, VL 196 mulhdu N, r9, VL 197 mulld r7, r27, VL 198 mulhdu r8, r27, VL 199 ld r28, 0(RP) 200 ld r29, 8(RP) 201 adde r0, r0, r12 202 adde r7, r7, N 203 addze r8, r8 204 ADDSUB r0, r0, r28 205 std r0, 0(RP) 206 ADDSUBC r7, r7, r29 207 std r7, 8(RP) 208#ifdef USE_AS_SUBMUL 209 subfe r11, r11, r11 210 addic r11, r11, 1 211#endif 212 addze RP, r8 213 ld r31, R31SAVE(r1) 214 ld r30, R30SAVE(r1) 215 ld r29, R29SAVE(r1) 216 ld r28, R28SAVE(r1) 217 ld r27, R27SAVE(r1) 218 blr 219END(FUNC) 220