1! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and 2! store difference in a third limb vector. 3! 4! Copyright (C) 1995-2022 Free Software Foundation, Inc. 5! 6! This file is part of the GNU MP Library. 7! 8! The GNU MP Library is free software; you can redistribute it and/or modify 9! it under the terms of the GNU Lesser General Public License as published by 10! the Free Software Foundation; either version 2.1 of the License, or (at your 11! option) any later version. 12! 13! The GNU MP Library is distributed in the hope that it will be useful, but 14! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16! License for more details. 17! 18! You should have received a copy of the GNU Lesser General Public License 19! along with the GNU MP Library; see the file COPYING.LIB. If not, 20! see <https://www.gnu.org/licenses/>. 21 22 23! INPUT PARAMETERS 24#define RES_PTR %o0 25#define S1_PTR %o1 26#define S2_PTR %o2 27#define SIZE %o3 28 29#include <sysdep.h> 30 31ENTRY(__mpn_sub_n) 32 xor S2_PTR,RES_PTR,%g1 33 andcc %g1,4,%g0 34 bne LOC(1) ! branch if alignment differs 35 nop 36! ** V1a ** 37 andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 38 be LOC(v1) ! if no, branch 39 nop 40/* Add least significant limb separately to align RES_PTR and S2_PTR */ 41 ld [S1_PTR],%g4 42 add S1_PTR,4,S1_PTR 43 ld [S2_PTR],%g2 44 add S2_PTR,4,S2_PTR 45 add SIZE,-1,SIZE 46 subcc %g4,%g2,%o4 47 st %o4,[RES_PTR] 48 add RES_PTR,4,RES_PTR 49LOC(v1): 50 addx %g0,%g0,%o4 ! save cy in register 51 cmp SIZE,2 ! if SIZE < 2 ... 52 bl LOC(end2) ! ... branch to tail code 53 subcc %g0,%o4,%g0 ! restore cy 54 55 ld [S1_PTR+0],%g4 56 addcc SIZE,-10,SIZE 57 ld [S1_PTR+4],%g1 58 ldd [S2_PTR+0],%g2 59 blt LOC(fin1) 60 subcc %g0,%o4,%g0 ! restore cy 61/* Add blocks of 8 limbs until less than 8 limbs remain */ 62LOC(loop1): 63 subxcc %g4,%g2,%o4 64 ld [S1_PTR+8],%g4 65 subxcc %g1,%g3,%o5 66 ld [S1_PTR+12],%g1 67 ldd [S2_PTR+8],%g2 68 std %o4,[RES_PTR+0] 69 subxcc %g4,%g2,%o4 70 ld [S1_PTR+16],%g4 71 subxcc %g1,%g3,%o5 72 ld [S1_PTR+20],%g1 73 ldd [S2_PTR+16],%g2 74 std %o4,[RES_PTR+8] 75 subxcc %g4,%g2,%o4 76 ld [S1_PTR+24],%g4 77 subxcc %g1,%g3,%o5 78 ld [S1_PTR+28],%g1 79 ldd [S2_PTR+24],%g2 80 std %o4,[RES_PTR+16] 81 subxcc %g4,%g2,%o4 82 ld [S1_PTR+32],%g4 83 subxcc %g1,%g3,%o5 84 ld [S1_PTR+36],%g1 85 ldd [S2_PTR+32],%g2 86 std %o4,[RES_PTR+24] 87 addx %g0,%g0,%o4 ! save cy in register 88 addcc SIZE,-8,SIZE 89 add S1_PTR,32,S1_PTR 90 add S2_PTR,32,S2_PTR 91 add RES_PTR,32,RES_PTR 92 bge LOC(loop1) 93 subcc %g0,%o4,%g0 ! restore cy 94 95LOC(fin1): 96 addcc SIZE,8-2,SIZE 97 blt LOC(end1) 98 subcc %g0,%o4,%g0 ! restore cy 99/* Add blocks of 2 limbs until less than 2 limbs remain */ 100LOC(loope1): 101 subxcc %g4,%g2,%o4 102 ld [S1_PTR+8],%g4 103 subxcc %g1,%g3,%o5 104 ld [S1_PTR+12],%g1 105 ldd [S2_PTR+8],%g2 106 std %o4,[RES_PTR+0] 107 addx %g0,%g0,%o4 ! save cy in register 108 addcc SIZE,-2,SIZE 109 add S1_PTR,8,S1_PTR 110 add S2_PTR,8,S2_PTR 111 add RES_PTR,8,RES_PTR 112 bge LOC(loope1) 113 subcc %g0,%o4,%g0 ! restore cy 114LOC(end1): 115 subxcc %g4,%g2,%o4 116 subxcc %g1,%g3,%o5 117 std %o4,[RES_PTR+0] 118 addx %g0,%g0,%o4 ! save cy in register 119 120 andcc SIZE,1,%g0 121 be LOC(ret1) 122 subcc %g0,%o4,%g0 ! restore cy 123/* Add last limb */ 124 ld [S1_PTR+8],%g4 125 ld [S2_PTR+8],%g2 126 subxcc %g4,%g2,%o4 127 st %o4,[RES_PTR+8] 128 129LOC(ret1): 130 retl 131 addx %g0,%g0,%o0 ! return carry-out from most sign. limb 132 133LOC(1): xor S1_PTR,RES_PTR,%g1 134 andcc %g1,4,%g0 135 bne LOC(2) 136 nop 137! ** V1b ** 138 andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0 139 be LOC(v1b) ! if no, branch 140 nop 141/* Add least significant limb separately to align RES_PTR and S1_PTR */ 142 ld [S2_PTR],%g4 143 add S2_PTR,4,S2_PTR 144 ld [S1_PTR],%g2 145 add S1_PTR,4,S1_PTR 146 add SIZE,-1,SIZE 147 subcc %g2,%g4,%o4 148 st %o4,[RES_PTR] 149 add RES_PTR,4,RES_PTR 150LOC(v1b): 151 addx %g0,%g0,%o4 ! save cy in register 152 cmp SIZE,2 ! if SIZE < 2 ... 153 bl LOC(end2) ! ... branch to tail code 154 subcc %g0,%o4,%g0 ! restore cy 155 156 ld [S2_PTR+0],%g4 157 addcc SIZE,-10,SIZE 158 ld [S2_PTR+4],%g1 159 ldd [S1_PTR+0],%g2 160 blt LOC(fin1b) 161 subcc %g0,%o4,%g0 ! restore cy 162/* Add blocks of 8 limbs until less than 8 limbs remain */ 163LOC(loop1b): 164 subxcc %g2,%g4,%o4 165 ld [S2_PTR+8],%g4 166 subxcc %g3,%g1,%o5 167 ld [S2_PTR+12],%g1 168 ldd [S1_PTR+8],%g2 169 std %o4,[RES_PTR+0] 170 subxcc %g2,%g4,%o4 171 ld [S2_PTR+16],%g4 172 subxcc %g3,%g1,%o5 173 ld [S2_PTR+20],%g1 174 ldd [S1_PTR+16],%g2 175 std %o4,[RES_PTR+8] 176 subxcc %g2,%g4,%o4 177 ld [S2_PTR+24],%g4 178 subxcc %g3,%g1,%o5 179 ld [S2_PTR+28],%g1 180 ldd [S1_PTR+24],%g2 181 std %o4,[RES_PTR+16] 182 subxcc %g2,%g4,%o4 183 ld [S2_PTR+32],%g4 184 subxcc %g3,%g1,%o5 185 ld [S2_PTR+36],%g1 186 ldd [S1_PTR+32],%g2 187 std %o4,[RES_PTR+24] 188 addx %g0,%g0,%o4 ! save cy in register 189 addcc SIZE,-8,SIZE 190 add S1_PTR,32,S1_PTR 191 add S2_PTR,32,S2_PTR 192 add RES_PTR,32,RES_PTR 193 bge LOC(loop1b) 194 subcc %g0,%o4,%g0 ! restore cy 195 196LOC(fin1b): 197 addcc SIZE,8-2,SIZE 198 blt LOC(end1b) 199 subcc %g0,%o4,%g0 ! restore cy 200/* Add blocks of 2 limbs until less than 2 limbs remain */ 201LOC(loope1b): 202 subxcc %g2,%g4,%o4 203 ld [S2_PTR+8],%g4 204 subxcc %g3,%g1,%o5 205 ld [S2_PTR+12],%g1 206 ldd [S1_PTR+8],%g2 207 std %o4,[RES_PTR+0] 208 addx %g0,%g0,%o4 ! save cy in register 209 addcc SIZE,-2,SIZE 210 add S1_PTR,8,S1_PTR 211 add S2_PTR,8,S2_PTR 212 add RES_PTR,8,RES_PTR 213 bge LOC(loope1b) 214 subcc %g0,%o4,%g0 ! restore cy 215LOC(end1b): 216 subxcc %g2,%g4,%o4 217 subxcc %g3,%g1,%o5 218 std %o4,[RES_PTR+0] 219 addx %g0,%g0,%o4 ! save cy in register 220 221 andcc SIZE,1,%g0 222 be LOC(ret1b) 223 subcc %g0,%o4,%g0 ! restore cy 224/* Add last limb */ 225 ld [S2_PTR+8],%g4 226 ld [S1_PTR+8],%g2 227 subxcc %g2,%g4,%o4 228 st %o4,[RES_PTR+8] 229 230LOC(ret1b): 231 retl 232 addx %g0,%g0,%o0 ! return carry-out from most sign. limb 233 234! ** V2 ** 235/* If we come here, the alignment of S1_PTR and RES_PTR as well as the 236 alignment of S2_PTR and RES_PTR differ. Since there are only two ways 237 things can be aligned (that we care about) we now know that the alignment 238 of S1_PTR and S2_PTR are the same. */ 239 240LOC(2): cmp SIZE,1 241 be LOC(jone) 242 nop 243 andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0 244 be LOC(v2) ! if no, branch 245 nop 246/* Add least significant limb separately to align S1_PTR and S2_PTR */ 247 ld [S1_PTR],%g4 248 add S1_PTR,4,S1_PTR 249 ld [S2_PTR],%g2 250 add S2_PTR,4,S2_PTR 251 add SIZE,-1,SIZE 252 subcc %g4,%g2,%o4 253 st %o4,[RES_PTR] 254 add RES_PTR,4,RES_PTR 255 256LOC(v2): 257 addx %g0,%g0,%o4 ! save cy in register 258 addcc SIZE,-8,SIZE 259 blt LOC(fin2) 260 subcc %g0,%o4,%g0 ! restore cy 261/* Add blocks of 8 limbs until less than 8 limbs remain */ 262LOC(loop2): 263 ldd [S1_PTR+0],%g2 264 ldd [S2_PTR+0],%o4 265 subxcc %g2,%o4,%g2 266 st %g2,[RES_PTR+0] 267 subxcc %g3,%o5,%g3 268 st %g3,[RES_PTR+4] 269 ldd [S1_PTR+8],%g2 270 ldd [S2_PTR+8],%o4 271 subxcc %g2,%o4,%g2 272 st %g2,[RES_PTR+8] 273 subxcc %g3,%o5,%g3 274 st %g3,[RES_PTR+12] 275 ldd [S1_PTR+16],%g2 276 ldd [S2_PTR+16],%o4 277 subxcc %g2,%o4,%g2 278 st %g2,[RES_PTR+16] 279 subxcc %g3,%o5,%g3 280 st %g3,[RES_PTR+20] 281 ldd [S1_PTR+24],%g2 282 ldd [S2_PTR+24],%o4 283 subxcc %g2,%o4,%g2 284 st %g2,[RES_PTR+24] 285 subxcc %g3,%o5,%g3 286 st %g3,[RES_PTR+28] 287 addx %g0,%g0,%o4 ! save cy in register 288 addcc SIZE,-8,SIZE 289 add S1_PTR,32,S1_PTR 290 add S2_PTR,32,S2_PTR 291 add RES_PTR,32,RES_PTR 292 bge LOC(loop2) 293 subcc %g0,%o4,%g0 ! restore cy 294 295LOC(fin2): 296 addcc SIZE,8-2,SIZE 297 blt LOC(end2) 298 subcc %g0,%o4,%g0 ! restore cy 299LOC(loope2): 300 ldd [S1_PTR+0],%g2 301 ldd [S2_PTR+0],%o4 302 subxcc %g2,%o4,%g2 303 st %g2,[RES_PTR+0] 304 subxcc %g3,%o5,%g3 305 st %g3,[RES_PTR+4] 306 addx %g0,%g0,%o4 ! save cy in register 307 addcc SIZE,-2,SIZE 308 add S1_PTR,8,S1_PTR 309 add S2_PTR,8,S2_PTR 310 add RES_PTR,8,RES_PTR 311 bge LOC(loope2) 312 subcc %g0,%o4,%g0 ! restore cy 313LOC(end2): 314 andcc SIZE,1,%g0 315 be LOC(ret2) 316 subcc %g0,%o4,%g0 ! restore cy 317/* Add last limb */ 318LOC(jone): 319 ld [S1_PTR],%g4 320 ld [S2_PTR],%g2 321 subxcc %g4,%g2,%o4 322 st %o4,[RES_PTR] 323 324LOC(ret2): 325 retl 326 addx %g0,%g0,%o0 ! return carry-out from most sign. limb 327 328END(__mpn_sub_n) 329