1/* From the Intel IA-64 Optimization Guide, choose the minimum latency 2 alternative. */ 3 4#include <sysdep.h> 5#undef ret 6 7#include <shlib-compat.h> 8 9#if SHLIB_COMPAT(libc, GLIBC_2_2, GLIBC_2_2_6) 10 11/* __divtf3 12 Compute a 80-bit IEEE double-extended quotient. 13 farg0 holds the dividend. farg1 holds the divisor. */ 14 15ENTRY(___divtf3) 16 cmp.eq p7, p0 = r0, r0 17 frcpa.s0 f10, p6 = farg0, farg1 18 ;; 19(p6) cmp.ne p7, p0 = r0, r0 20 .pred.rel.mutex p6, p7 21(p6) fnma.s1 f11 = farg1, f10, f1 22(p6) fma.s1 f12 = farg0, f10, f0 23 ;; 24(p6) fma.s1 f13 = f11, f11, f0 25(p6) fma.s1 f14 = f11, f11, f11 26 ;; 27(p6) fma.s1 f11 = f13, f13, f11 28(p6) fma.s1 f13 = f14, f10, f10 29 ;; 30(p6) fma.s1 f10 = f13, f11, f10 31(p6) fnma.s1 f11 = farg1, f12, farg0 32 ;; 33(p6) fma.s1 f11 = f11, f10, f12 34(p6) fnma.s1 f12 = farg1, f10, f1 35 ;; 36(p6) fma.s1 f10 = f12, f10, f10 37(p6) fnma.s1 f12 = farg1, f11, farg0 38 ;; 39(p6) fma.s0 fret0 = f12, f10, f11 40(p7) mov fret0 = f10 41 br.ret.sptk rp 42END(___divtf3) 43 .symver ___divtf3, __divtf3@GLIBC_2.2 44 45/* __divdf3 46 Compute a 64-bit IEEE double quotient. 47 farg0 holds the dividend. farg1 holds the divisor. */ 48 49ENTRY(___divdf3) 50 cmp.eq p7, p0 = r0, r0 51 frcpa.s0 f10, p6 = farg0, farg1 52 ;; 53(p6) cmp.ne p7, p0 = r0, r0 54 .pred.rel.mutex p6, p7 55(p6) fmpy.s1 f11 = farg0, f10 56(p6) fnma.s1 f12 = farg1, f10, f1 57 ;; 58(p6) fma.s1 f11 = f12, f11, f11 59(p6) fmpy.s1 f13 = f12, f12 60 ;; 61(p6) fma.s1 f10 = f12, f10, f10 62(p6) fma.s1 f11 = f13, f11, f11 63 ;; 64(p6) fmpy.s1 f12 = f13, f13 65(p6) fma.s1 f10 = f13, f10, f10 66 ;; 67(p6) fma.d.s1 f11 = f12, f11, f11 68(p6) fma.s1 f10 = f12, f10, f10 69 ;; 70(p6) fnma.d.s1 f8 = farg1, f11, farg0 71 ;; 72(p6) fma.d fret0 = f8, f10, f11 73(p7) mov fret0 = f10 74 br.ret.sptk rp 75 ;; 76END(___divdf3) 77 .symver ___divdf3, __divdf3@GLIBC_2.2 78 79/* __divsf3 80 Compute a 32-bit IEEE float quotient. 81 farg0 holds the dividend. farg1 holds the divisor. */ 82 83ENTRY(___divsf3) 84 cmp.eq p7, p0 = r0, r0 85 frcpa.s0 f10, p6 = farg0, farg1 86 ;; 87(p6) cmp.ne p7, p0 = r0, r0 88 .pred.rel.mutex p6, p7 89(p6) fmpy.s1 f8 = farg0, f10 90(p6) fnma.s1 f9 = farg1, f10, f1 91 ;; 92(p6) fma.s1 f8 = f9, f8, f8 93(p6) fmpy.s1 f9 = f9, f9 94 ;; 95(p6) fma.s1 f8 = f9, f8, f8 96(p6) fmpy.s1 f9 = f9, f9 97 ;; 98(p6) fma.d.s1 f10 = f9, f8, f8 99 ;; 100(p6) fnorm.s.s0 fret0 = f10 101(p7) mov fret0 = f10 102 br.ret.sptk rp 103 ;; 104END(___divsf3) 105 .symver ___divsf3, __divsf3@GLIBC_2.2 106 107/* __divdi3 108 Compute a 64-bit integer quotient. 109 in0 holds the dividend. in1 holds the divisor. */ 110 111ENTRY(___divdi3) 112 .regstk 2,0,0,0 113 /* Transfer inputs to FP registers. */ 114 setf.sig f8 = in0 115 setf.sig f9 = in1 116 ;; 117 /* Convert the inputs to FP, so that they won't be treated as 118 unsigned. */ 119 fcvt.xf f8 = f8 120 fcvt.xf f9 = f9 121 ;; 122 /* Compute the reciprocal approximation. */ 123 frcpa.s1 f10, p6 = f8, f9 124 ;; 125 /* 3 Newton-Raphson iterations. */ 126(p6) fnma.s1 f11 = f9, f10, f1 127(p6) fmpy.s1 f12 = f8, f10 128 ;; 129(p6) fmpy.s1 f13 = f11, f11 130(p6) fma.s1 f12 = f11, f12, f12 131 ;; 132(p6) fma.s1 f10 = f11, f10, f10 133(p6) fma.s1 f11 = f13, f12, f12 134 ;; 135(p6) fma.s1 f10 = f13, f10, f10 136(p6) fnma.s1 f12 = f9, f11, f8 137 ;; 138(p6) fma.s1 f10 = f12, f10, f11 139 ;; 140 /* Round quotient to an integer. */ 141 fcvt.fx.trunc.s1 f10 = f10 142 ;; 143 /* Transfer result to GP registers. */ 144 getf.sig ret0 = f10 145 br.ret.sptk rp 146 ;; 147END(___divdi3) 148 .symver ___divdi3, __divdi3@GLIBC_2.2 149 150/* __moddi3 151 Compute a 64-bit integer modulus. 152 in0 holds the dividend (a). in1 holds the divisor (b). */ 153 154ENTRY(___moddi3) 155 .regstk 2,0,0,0 156 /* Transfer inputs to FP registers. */ 157 setf.sig f14 = in0 158 setf.sig f9 = in1 159 ;; 160 /* Convert the inputs to FP, so that they won't be treated as 161 unsigned. */ 162 fcvt.xf f8 = f14 163 fcvt.xf f9 = f9 164 ;; 165 /* Compute the reciprocal approximation. */ 166 frcpa.s1 f10, p6 = f8, f9 167 ;; 168 /* 3 Newton-Raphson iterations. */ 169(p6) fmpy.s1 f12 = f8, f10 170(p6) fnma.s1 f11 = f9, f10, f1 171 ;; 172(p6) fma.s1 f12 = f11, f12, f12 173(p6) fmpy.s1 f13 = f11, f11 174 ;; 175(p6) fma.s1 f10 = f11, f10, f10 176(p6) fma.s1 f11 = f13, f12, f12 177 ;; 178 sub in1 = r0, in1 179(p6) fma.s1 f10 = f13, f10, f10 180(p6) fnma.s1 f12 = f9, f11, f8 181 ;; 182 setf.sig f9 = in1 183(p6) fma.s1 f10 = f12, f10, f11 184 ;; 185 fcvt.fx.trunc.s1 f10 = f10 186 ;; 187 /* r = q * (-b) + a */ 188 xma.l f10 = f10, f9, f14 189 ;; 190 /* Transfer result to GP registers. */ 191 getf.sig ret0 = f10 192 br.ret.sptk rp 193 ;; 194END(___moddi3) 195 .symver ___moddi3, __moddi3@GLIBC_2.2 196 197/* __udivdi3 198 Compute a 64-bit unsigned integer quotient. 199 in0 holds the dividend. in1 holds the divisor. */ 200 201ENTRY(___udivdi3) 202 .regstk 2,0,0,0 203 /* Transfer inputs to FP registers. */ 204 setf.sig f8 = in0 205 setf.sig f9 = in1 206 ;; 207 /* Convert the inputs to FP, to avoid FP software-assist faults. */ 208 fcvt.xuf.s1 f8 = f8 209 fcvt.xuf.s1 f9 = f9 210 ;; 211 /* Compute the reciprocal approximation. */ 212 frcpa.s1 f10, p6 = f8, f9 213 ;; 214 /* 3 Newton-Raphson iterations. */ 215(p6) fnma.s1 f11 = f9, f10, f1 216(p6) fmpy.s1 f12 = f8, f10 217 ;; 218(p6) fmpy.s1 f13 = f11, f11 219(p6) fma.s1 f12 = f11, f12, f12 220 ;; 221(p6) fma.s1 f10 = f11, f10, f10 222(p6) fma.s1 f11 = f13, f12, f12 223 ;; 224(p6) fma.s1 f10 = f13, f10, f10 225(p6) fnma.s1 f12 = f9, f11, f8 226 ;; 227(p6) fma.s1 f10 = f12, f10, f11 228 ;; 229 /* Round quotient to an unsigned integer. */ 230 fcvt.fxu.trunc.s1 f10 = f10 231 ;; 232 /* Transfer result to GP registers. */ 233 getf.sig ret0 = f10 234 br.ret.sptk rp 235 ;; 236END(___udivdi3) 237 .symver ___udivdi3, __udivdi3@GLIBC_2.2 238 239/* __umoddi3 240 Compute a 64-bit unsigned integer modulus. 241 in0 holds the dividend (a). in1 holds the divisor (b). */ 242 243ENTRY(___umoddi3) 244 .regstk 2,0,0,0 245 /* Transfer inputs to FP registers. */ 246 setf.sig f14 = in0 247 setf.sig f9 = in1 248 ;; 249 /* Convert the inputs to FP, to avoid FP software assist faults. */ 250 fcvt.xuf.s1 f8 = f14 251 fcvt.xuf.s1 f9 = f9 252 ;; 253 /* Compute the reciprocal approximation. */ 254 frcpa.s1 f10, p6 = f8, f9 255 ;; 256 /* 3 Newton-Raphson iterations. */ 257(p6) fmpy.s1 f12 = f8, f10 258(p6) fnma.s1 f11 = f9, f10, f1 259 ;; 260(p6) fma.s1 f12 = f11, f12, f12 261(p6) fmpy.s1 f13 = f11, f11 262 ;; 263(p6) fma.s1 f10 = f11, f10, f10 264(p6) fma.s1 f11 = f13, f12, f12 265 ;; 266 sub in1 = r0, in1 267(p6) fma.s1 f10 = f13, f10, f10 268(p6) fnma.s1 f12 = f9, f11, f8 269 ;; 270 setf.sig f9 = in1 271(p6) fma.s1 f10 = f12, f10, f11 272 ;; 273 /* Round quotient to an unsigned integer. */ 274 fcvt.fxu.trunc.s1 f10 = f10 275 ;; 276 /* r = q * (-b) + a */ 277 xma.l f10 = f10, f9, f14 278 ;; 279 /* Transfer result to GP registers. */ 280 getf.sig ret0 = f10 281 br.ret.sptk rp 282 ;; 283END(___umoddi3) 284 .symver ___umoddi3, __umoddi3@GLIBC_2.2 285 286/* __multi3 287 Compute a 128-bit multiply of 128-bit multiplicands. 288 in0/in1 holds one multiplicand (a), in2/in3 holds the other one (b). */ 289 290ENTRY(___multi3) 291 .regstk 4,0,0,0 292 setf.sig f6 = in1 293 movl r19 = 0xffffffff 294 setf.sig f7 = in2 295 ;; 296 and r14 = r19, in0 297 ;; 298 setf.sig f10 = r14 299 and r14 = r19, in2 300 xmpy.l f9 = f6, f7 301 ;; 302 setf.sig f6 = r14 303 shr.u r14 = in0, 32 304 ;; 305 setf.sig f7 = r14 306 shr.u r14 = in2, 32 307 ;; 308 setf.sig f8 = r14 309 xmpy.l f11 = f10, f6 310 xmpy.l f6 = f7, f6 311 ;; 312 getf.sig r16 = f11 313 xmpy.l f7 = f7, f8 314 ;; 315 shr.u r14 = r16, 32 316 and r16 = r19, r16 317 getf.sig r17 = f6 318 setf.sig f6 = in0 319 ;; 320 setf.sig f11 = r14 321 getf.sig r21 = f7 322 setf.sig f7 = in3 323 ;; 324 xma.l f11 = f10, f8, f11 325 xma.l f6 = f6, f7, f9 326 ;; 327 getf.sig r18 = f11 328 ;; 329 add r18 = r18, r17 330 ;; 331 and r15 = r19, r18 332 cmp.ltu p7, p6 = r18, r17 333 ;; 334 getf.sig r22 = f6 335(p7) adds r14 = 1, r19 336 ;; 337(p7) add r21 = r21, r14 338 shr.u r14 = r18, 32 339 shl r15 = r15, 32 340 ;; 341 add r20 = r21, r14 342 ;; 343 add ret0 = r15, r16 344 add ret1 = r22, r20 345 br.ret.sptk rp 346 ;; 347END(___multi3) 348 .symver ___multi3, __multi3@GLIBC_2.2 349 350#endif 351