1| 2| ssin.sa 3.3 7/29/91 3| 4| The entry point sSIN computes the sine of an input argument 5| sCOS computes the cosine, and sSINCOS computes both. The 6| corresponding entry points with a "d" computes the same 7| corresponding function values for denormalized inputs. 8| 9| Input: Double-extended number X in location pointed to 10| by address register a0. 11| 12| Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or 13| COS is requested. Otherwise, for SINCOS, sin(X) is returned 14| in Fp0, and cos(X) is returned in Fp1. 15| 16| Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS. 17| 18| Accuracy and Monotonicity: The returned result is within 1 ulp in 19| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the 20| result is subsequently rounded to double precision. The 21| result is provably monotonic in double precision. 22| 23| Speed: The programs sSIN and sCOS take approximately 150 cycles for 24| input argument X such that |X| < 15Pi, which is the usual 25| situation. The speed for sSINCOS is approximately 190 cycles. 26| 27| Algorithm: 28| 29| SIN and COS: 30| 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. 31| 32| 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. 33| 34| 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let 35| k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite 36| k by k := k + AdjN. 37| 38| 4. If k is even, go to 6. 39| 40| 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r) 41| where cos(r) is approximated by an even polynomial in r, 42| 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r. 43| Exit. 44| 45| 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) 46| where sin(r) is approximated by an odd polynomial in r 47| r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. 48| Exit. 49| 50| 7. If |X| > 1, go to 9. 51| 52| 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1. 53| 54| 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3. 55| 56| SINCOS: 57| 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. 58| 59| 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let 60| k = N mod 4, so in particular, k = 0,1,2,or 3. 61| 62| 3. If k is even, go to 5. 63| 64| 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e. 65| j1 exclusive or with the l.s.b. of k. 66| sgn1 := (-1)**j1, sgn2 := (-1)**j2. 67| SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where 68| sin(r) and cos(r) are computed as odd and even polynomials 69| in r, respectively. Exit 70| 71| 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. 72| SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where 73| sin(r) and cos(r) are computed as odd and even polynomials 74| in r, respectively. Exit 75| 76| 6. If |X| > 1, go to 8. 77| 78| 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. 79| 80| 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2. 81| 82 83| Copyright (C) Motorola, Inc. 1990 84| All Rights Reserved 85| 86| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 87| The copyright notice above does not evidence any 88| actual or intended publication of such source code. 89 90|SSIN idnt 2,1 | Motorola 040 Floating Point Software Package 91 92 |section 8 93 94 .include "fpsp.h" 95 96BOUNDS1: .long 0x3FD78000,0x4004BC7E 97TWOBYPI: .long 0x3FE45F30,0x6DC9C883 98 99SINA7: .long 0xBD6AAA77,0xCCC994F5 100SINA6: .long 0x3DE61209,0x7AAE8DA1 101 102SINA5: .long 0xBE5AE645,0x2A118AE4 103SINA4: .long 0x3EC71DE3,0xA5341531 104 105SINA3: .long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000 106 107SINA2: .long 0x3FF80000,0x88888888,0x888859AF,0x00000000 108 109SINA1: .long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000 110 111COSB8: .long 0x3D2AC4D0,0xD6011EE3 112COSB7: .long 0xBDA9396F,0x9F45AC19 113 114COSB6: .long 0x3E21EED9,0x0612C972 115COSB5: .long 0xBE927E4F,0xB79D9FCF 116 117COSB4: .long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000 118 119COSB3: .long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000 120 121COSB2: .long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E 122COSB1: .long 0xBF000000 123 124INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A 125 126TWOPI1: .long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 127TWOPI2: .long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 128 129 |xref PITBL 130 131 .set INARG,FP_SCR4 132 133 .set X,FP_SCR5 134 .set XDCARE,X+2 135 .set XFRAC,X+4 136 137 .set RPRIME,FP_SCR1 138 .set SPRIME,FP_SCR2 139 140 .set POSNEG1,L_SCR1 141 .set TWOTO63,L_SCR1 142 143 .set ENDFLAG,L_SCR2 144 .set N,L_SCR2 145 146 .set ADJN,L_SCR3 147 148 | xref t_frcinx 149 |xref t_extdnrm 150 |xref sto_cos 151 152 .global ssind 153ssind: 154|--SIN(X) = X FOR DENORMALIZED X 155 bra t_extdnrm 156 157 .global scosd 158scosd: 159|--COS(X) = 1 FOR DENORMALIZED X 160 161 fmoves #0x3F800000,%fp0 162| 163| 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits 164| 165 fmovel #0,%fpsr 166| 167 bra t_frcinx 168 169 .global ssin 170ssin: 171|--SET ADJN TO 0 172 movel #0,ADJN(%a6) 173 bras SINBGN 174 175 .global scos 176scos: 177|--SET ADJN TO 1 178 movel #1,ADJN(%a6) 179 180SINBGN: 181|--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE 182 183 fmovex (%a0),%fp0 | ...LOAD INPUT 184 185 movel (%a0),%d0 186 movew 4(%a0),%d0 187 fmovex %fp0,X(%a6) 188 andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X 189 190 cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)? 191 bges SOK1 192 bra SINSM 193 194SOK1: 195 cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI? 196 blts SINMAIN 197 bra REDUCEX 198 199SINMAIN: 200|--THIS IS THE USUAL CASE, |X| <= 15 PI. 201|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 202 fmovex %fp0,%fp1 203 fmuld TWOBYPI,%fp1 | ...X*2/PI 204 205|--HIDE THE NEXT THREE INSTRUCTIONS 206 lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32 207 208 209|--FP1 IS NOW READY 210 fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER 211 212 movel N(%a6),%d0 213 asll #4,%d0 214 addal %d0,%a1 | ...A1 IS THE ADDRESS OF N*PIBY2 215| ...WHICH IS IN TWO PIECES Y1 & Y2 216 217 fsubx (%a1)+,%fp0 | ...X-Y1 218|--HIDE THE NEXT ONE 219 fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2 220 221SINCONT: 222|--continuation from REDUCEX 223 224|--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED 225 movel N(%a6),%d0 226 addl ADJN(%a6),%d0 | ...SEE IF D0 IS ODD OR EVEN 227 rorl #1,%d0 | ...D0 WAS ODD IFF D0 IS NEGATIVE 228 cmpil #0,%d0 229 blt COSPOLY 230 231SINPOLY: 232|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. 233|--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY 234|--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE 235|--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS 236|--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) 237|--WHERE T=S*S. 238|--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION 239|--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. 240 fmovex %fp0,X(%a6) | ...X IS R 241 fmulx %fp0,%fp0 | ...FP0 IS S 242|---HIDE THE NEXT TWO WHILE WAITING FOR FP0 243 fmoved SINA7,%fp3 244 fmoved SINA6,%fp2 245|--FP0 IS NOW READY 246 fmovex %fp0,%fp1 247 fmulx %fp1,%fp1 | ...FP1 IS T 248|--HIDE THE NEXT TWO WHILE WAITING FOR FP1 249 250 rorl #1,%d0 251 andil #0x80000000,%d0 252| ...LEAST SIG. BIT OF D0 IN SIGN POSITION 253 eorl %d0,X(%a6) | ...X IS NOW R'= SGN*R 254 255 fmulx %fp1,%fp3 | ...TA7 256 fmulx %fp1,%fp2 | ...TA6 257 258 faddd SINA5,%fp3 | ...A5+TA7 259 faddd SINA4,%fp2 | ...A4+TA6 260 261 fmulx %fp1,%fp3 | ...T(A5+TA7) 262 fmulx %fp1,%fp2 | ...T(A4+TA6) 263 264 faddd SINA3,%fp3 | ...A3+T(A5+TA7) 265 faddx SINA2,%fp2 | ...A2+T(A4+TA6) 266 267 fmulx %fp3,%fp1 | ...T(A3+T(A5+TA7)) 268 269 fmulx %fp0,%fp2 | ...S(A2+T(A4+TA6)) 270 faddx SINA1,%fp1 | ...A1+T(A3+T(A5+TA7)) 271 fmulx X(%a6),%fp0 | ...R'*S 272 273 faddx %fp2,%fp1 | ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] 274|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING 275|--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING 276 277 278 fmulx %fp1,%fp0 | ...SIN(R')-R' 279|--FP1 RELEASED. 280 281 fmovel %d1,%FPCR |restore users exceptions 282 faddx X(%a6),%fp0 |last inst - possible exception set 283 bra t_frcinx 284 285 286COSPOLY: 287|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. 288|--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY 289|--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE 290|--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS 291|--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) 292|--WHERE T=S*S. 293|--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION 294|--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2 295|--AND IS THEREFORE STORED AS SINGLE PRECISION. 296 297 fmulx %fp0,%fp0 | ...FP0 IS S 298|---HIDE THE NEXT TWO WHILE WAITING FOR FP0 299 fmoved COSB8,%fp2 300 fmoved COSB7,%fp3 301|--FP0 IS NOW READY 302 fmovex %fp0,%fp1 303 fmulx %fp1,%fp1 | ...FP1 IS T 304|--HIDE THE NEXT TWO WHILE WAITING FOR FP1 305 fmovex %fp0,X(%a6) | ...X IS S 306 rorl #1,%d0 307 andil #0x80000000,%d0 308| ...LEAST SIG. BIT OF D0 IN SIGN POSITION 309 310 fmulx %fp1,%fp2 | ...TB8 311|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU 312 eorl %d0,X(%a6) | ...X IS NOW S'= SGN*S 313 andil #0x80000000,%d0 314 315 fmulx %fp1,%fp3 | ...TB7 316|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU 317 oril #0x3F800000,%d0 | ...D0 IS SGN IN SINGLE 318 movel %d0,POSNEG1(%a6) 319 320 faddd COSB6,%fp2 | ...B6+TB8 321 faddd COSB5,%fp3 | ...B5+TB7 322 323 fmulx %fp1,%fp2 | ...T(B6+TB8) 324 fmulx %fp1,%fp3 | ...T(B5+TB7) 325 326 faddd COSB4,%fp2 | ...B4+T(B6+TB8) 327 faddx COSB3,%fp3 | ...B3+T(B5+TB7) 328 329 fmulx %fp1,%fp2 | ...T(B4+T(B6+TB8)) 330 fmulx %fp3,%fp1 | ...T(B3+T(B5+TB7)) 331 332 faddx COSB2,%fp2 | ...B2+T(B4+T(B6+TB8)) 333 fadds COSB1,%fp1 | ...B1+T(B3+T(B5+TB7)) 334 335 fmulx %fp2,%fp0 | ...S(B2+T(B4+T(B6+TB8))) 336|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING 337|--FP2 RELEASED. 338 339 340 faddx %fp1,%fp0 341|--FP1 RELEASED 342 343 fmulx X(%a6),%fp0 344 345 fmovel %d1,%FPCR |restore users exceptions 346 fadds POSNEG1(%a6),%fp0 |last inst - possible exception set 347 bra t_frcinx 348 349 350SINBORS: 351|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. 352|--IF |X| < 2**(-40), RETURN X OR 1. 353 cmpil #0x3FFF8000,%d0 354 bgts REDUCEX 355 356 357SINSM: 358 movel ADJN(%a6),%d0 359 cmpil #0,%d0 360 bgts COSTINY 361 362SINTINY: 363 movew #0x0000,XDCARE(%a6) | ...JUST IN CASE 364 fmovel %d1,%FPCR |restore users exceptions 365 fmovex X(%a6),%fp0 |last inst - possible exception set 366 bra t_frcinx 367 368 369COSTINY: 370 fmoves #0x3F800000,%fp0 371 372 fmovel %d1,%FPCR |restore users exceptions 373 fsubs #0x00800000,%fp0 |last inst - possible exception set 374 bra t_frcinx 375 376 377REDUCEX: 378|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. 379|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING 380|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. 381 382 fmovemx %fp2-%fp5,-(%a7) | ...save FP2 through FP5 383 movel %d2,-(%a7) 384 fmoves #0x00000000,%fp1 385|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that 386|--there is a danger of unwanted overflow in first LOOP iteration. In this 387|--case, reduce argument by one remainder step to make subsequent reduction 388|--safe. 389 cmpil #0x7ffeffff,%d0 |is argument dangerously large? 390 bnes LOOP 391 movel #0x7ffe0000,FP_SCR2(%a6) |yes 392| ;create 2**16383*PI/2 393 movel #0xc90fdaa2,FP_SCR2+4(%a6) 394 clrl FP_SCR2+8(%a6) 395 ftstx %fp0 |test sign of argument 396 movel #0x7fdc0000,FP_SCR3(%a6) |create low half of 2**16383* 397| ;PI/2 at FP_SCR3 398 movel #0x85a308d3,FP_SCR3+4(%a6) 399 clrl FP_SCR3+8(%a6) 400 fblt red_neg 401 orw #0x8000,FP_SCR2(%a6) |positive arg 402 orw #0x8000,FP_SCR3(%a6) 403red_neg: 404 faddx FP_SCR2(%a6),%fp0 |high part of reduction is exact 405 fmovex %fp0,%fp1 |save high result in fp1 406 faddx FP_SCR3(%a6),%fp0 |low part of reduction 407 fsubx %fp0,%fp1 |determine low component of result 408 faddx FP_SCR3(%a6),%fp1 |fp0/fp1 are reduced argument. 409 410|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. 411|--integer quotient will be stored in N 412|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1) 413 414LOOP: 415 fmovex %fp0,INARG(%a6) | ...+-2**K * F, 1 <= F < 2 416 movew INARG(%a6),%d0 417 movel %d0,%a1 | ...save a copy of D0 418 andil #0x00007FFF,%d0 419 subil #0x00003FFF,%d0 | ...D0 IS K 420 cmpil #28,%d0 421 bles LASTLOOP 422CONTLOOP: 423 subil #27,%d0 | ...D0 IS L := K-27 424 movel #0,ENDFLAG(%a6) 425 bras WORK 426LASTLOOP: 427 clrl %d0 | ...D0 IS L := 0 428 movel #1,ENDFLAG(%a6) 429 430WORK: 431|--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN 432|--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. 433 434|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), 435|--2**L * (PIby2_1), 2**L * (PIby2_2) 436 437 movel #0x00003FFE,%d2 | ...BIASED EXPO OF 2/PI 438 subl %d0,%d2 | ...BIASED EXPO OF 2**(-L)*(2/PI) 439 440 movel #0xA2F9836E,FP_SCR1+4(%a6) 441 movel #0x4E44152A,FP_SCR1+8(%a6) 442 movew %d2,FP_SCR1(%a6) | ...FP_SCR1 is 2**(-L)*(2/PI) 443 444 fmovex %fp0,%fp2 445 fmulx FP_SCR1(%a6),%fp2 446|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN 447|--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N 448|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT 449|--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE 450|--US THE DESIRED VALUE IN FLOATING POINT. 451 452|--HIDE SIX CYCLES OF INSTRUCTION 453 movel %a1,%d2 454 swap %d2 455 andil #0x80000000,%d2 456 oril #0x5F000000,%d2 | ...D2 IS SIGN(INARG)*2**63 IN SGL 457 movel %d2,TWOTO63(%a6) 458 459 movel %d0,%d2 460 addil #0x00003FFF,%d2 | ...BIASED EXPO OF 2**L * (PI/2) 461 462|--FP2 IS READY 463 fadds TWOTO63(%a6),%fp2 | ...THE FRACTIONAL PART OF FP1 IS ROUNDED 464 465|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2 466 movew %d2,FP_SCR2(%a6) 467 clrw FP_SCR2+2(%a6) 468 movel #0xC90FDAA2,FP_SCR2+4(%a6) 469 clrl FP_SCR2+8(%a6) | ...FP_SCR2 is 2**(L) * Piby2_1 470 471|--FP2 IS READY 472 fsubs TWOTO63(%a6),%fp2 | ...FP2 is N 473 474 addil #0x00003FDD,%d0 475 movew %d0,FP_SCR3(%a6) 476 clrw FP_SCR3+2(%a6) 477 movel #0x85A308D3,FP_SCR3+4(%a6) 478 clrl FP_SCR3+8(%a6) | ...FP_SCR3 is 2**(L) * Piby2_2 479 480 movel ENDFLAG(%a6),%d0 481 482|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and 483|--P2 = 2**(L) * Piby2_2 484 fmovex %fp2,%fp4 485 fmulx FP_SCR2(%a6),%fp4 | ...W = N*P1 486 fmovex %fp2,%fp5 487 fmulx FP_SCR3(%a6),%fp5 | ...w = N*P2 488 fmovex %fp4,%fp3 489|--we want P+p = W+w but |p| <= half ulp of P 490|--Then, we need to compute A := R-P and a := r-p 491 faddx %fp5,%fp3 | ...FP3 is P 492 fsubx %fp3,%fp4 | ...W-P 493 494 fsubx %fp3,%fp0 | ...FP0 is A := R - P 495 faddx %fp5,%fp4 | ...FP4 is p = (W-P)+w 496 497 fmovex %fp0,%fp3 | ...FP3 A 498 fsubx %fp4,%fp1 | ...FP1 is a := r - p 499 500|--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but 501|--|r| <= half ulp of R. 502 faddx %fp1,%fp0 | ...FP0 is R := A+a 503|--No need to calculate r if this is the last loop 504 cmpil #0,%d0 505 bgt RESTORE 506 507|--Need to calculate r 508 fsubx %fp0,%fp3 | ...A-R 509 faddx %fp3,%fp1 | ...FP1 is r := (A-R)+a 510 bra LOOP 511 512RESTORE: 513 fmovel %fp2,N(%a6) 514 movel (%a7)+,%d2 515 fmovemx (%a7)+,%fp2-%fp5 516 517 518 movel ADJN(%a6),%d0 519 cmpil #4,%d0 520 521 blt SINCONT 522 bras SCCONT 523 524 .global ssincosd 525ssincosd: 526|--SIN AND COS OF X FOR DENORMALIZED X 527 528 fmoves #0x3F800000,%fp1 529 bsr sto_cos |store cosine result 530 bra t_extdnrm 531 532 .global ssincos 533ssincos: 534|--SET ADJN TO 4 535 movel #4,ADJN(%a6) 536 537 fmovex (%a0),%fp0 | ...LOAD INPUT 538 539 movel (%a0),%d0 540 movew 4(%a0),%d0 541 fmovex %fp0,X(%a6) 542 andil #0x7FFFFFFF,%d0 | ...COMPACTIFY X 543 544 cmpil #0x3FD78000,%d0 | ...|X| >= 2**(-40)? 545 bges SCOK1 546 bra SCSM 547 548SCOK1: 549 cmpil #0x4004BC7E,%d0 | ...|X| < 15 PI? 550 blts SCMAIN 551 bra REDUCEX 552 553 554SCMAIN: 555|--THIS IS THE USUAL CASE, |X| <= 15 PI. 556|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. 557 fmovex %fp0,%fp1 558 fmuld TWOBYPI,%fp1 | ...X*2/PI 559 560|--HIDE THE NEXT THREE INSTRUCTIONS 561 lea PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32 562 563 564|--FP1 IS NOW READY 565 fmovel %fp1,N(%a6) | ...CONVERT TO INTEGER 566 567 movel N(%a6),%d0 568 asll #4,%d0 569 addal %d0,%a1 | ...ADDRESS OF N*PIBY2, IN Y1, Y2 570 571 fsubx (%a1)+,%fp0 | ...X-Y1 572 fsubs (%a1),%fp0 | ...FP0 IS R = (X-Y1)-Y2 573 574SCCONT: 575|--continuation point from REDUCEX 576 577|--HIDE THE NEXT TWO 578 movel N(%a6),%d0 579 rorl #1,%d0 580 581 cmpil #0,%d0 | ...D0 < 0 IFF N IS ODD 582 bge NEVEN 583 584NODD: 585|--REGISTERS SAVED SO FAR: D0, A0, FP2. 586 587 fmovex %fp0,RPRIME(%a6) 588 fmulx %fp0,%fp0 | ...FP0 IS S = R*R 589 fmoved SINA7,%fp1 | ...A7 590 fmoved COSB8,%fp2 | ...B8 591 fmulx %fp0,%fp1 | ...SA7 592 movel %d2,-(%a7) 593 movel %d0,%d2 594 fmulx %fp0,%fp2 | ...SB8 595 rorl #1,%d2 596 andil #0x80000000,%d2 597 598 faddd SINA6,%fp1 | ...A6+SA7 599 eorl %d0,%d2 600 andil #0x80000000,%d2 601 faddd COSB7,%fp2 | ...B7+SB8 602 603 fmulx %fp0,%fp1 | ...S(A6+SA7) 604 eorl %d2,RPRIME(%a6) 605 movel (%a7)+,%d2 606 fmulx %fp0,%fp2 | ...S(B7+SB8) 607 rorl #1,%d0 608 andil #0x80000000,%d0 609 610 faddd SINA5,%fp1 | ...A5+S(A6+SA7) 611 movel #0x3F800000,POSNEG1(%a6) 612 eorl %d0,POSNEG1(%a6) 613 faddd COSB6,%fp2 | ...B6+S(B7+SB8) 614 615 fmulx %fp0,%fp1 | ...S(A5+S(A6+SA7)) 616 fmulx %fp0,%fp2 | ...S(B6+S(B7+SB8)) 617 fmovex %fp0,SPRIME(%a6) 618 619 faddd SINA4,%fp1 | ...A4+S(A5+S(A6+SA7)) 620 eorl %d0,SPRIME(%a6) 621 faddd COSB5,%fp2 | ...B5+S(B6+S(B7+SB8)) 622 623 fmulx %fp0,%fp1 | ...S(A4+...) 624 fmulx %fp0,%fp2 | ...S(B5+...) 625 626 faddd SINA3,%fp1 | ...A3+S(A4+...) 627 faddd COSB4,%fp2 | ...B4+S(B5+...) 628 629 fmulx %fp0,%fp1 | ...S(A3+...) 630 fmulx %fp0,%fp2 | ...S(B4+...) 631 632 faddx SINA2,%fp1 | ...A2+S(A3+...) 633 faddx COSB3,%fp2 | ...B3+S(B4+...) 634 635 fmulx %fp0,%fp1 | ...S(A2+...) 636 fmulx %fp0,%fp2 | ...S(B3+...) 637 638 faddx SINA1,%fp1 | ...A1+S(A2+...) 639 faddx COSB2,%fp2 | ...B2+S(B3+...) 640 641 fmulx %fp0,%fp1 | ...S(A1+...) 642 fmulx %fp2,%fp0 | ...S(B2+...) 643 644 645 646 fmulx RPRIME(%a6),%fp1 | ...R'S(A1+...) 647 fadds COSB1,%fp0 | ...B1+S(B2...) 648 fmulx SPRIME(%a6),%fp0 | ...S'(B1+S(B2+...)) 649 650 movel %d1,-(%sp) |restore users mode & precision 651 andil #0xff,%d1 |mask off all exceptions 652 fmovel %d1,%FPCR 653 faddx RPRIME(%a6),%fp1 | ...COS(X) 654 bsr sto_cos |store cosine result 655 fmovel (%sp)+,%FPCR |restore users exceptions 656 fadds POSNEG1(%a6),%fp0 | ...SIN(X) 657 658 bra t_frcinx 659 660 661NEVEN: 662|--REGISTERS SAVED SO FAR: FP2. 663 664 fmovex %fp0,RPRIME(%a6) 665 fmulx %fp0,%fp0 | ...FP0 IS S = R*R 666 fmoved COSB8,%fp1 | ...B8 667 fmoved SINA7,%fp2 | ...A7 668 fmulx %fp0,%fp1 | ...SB8 669 fmovex %fp0,SPRIME(%a6) 670 fmulx %fp0,%fp2 | ...SA7 671 rorl #1,%d0 672 andil #0x80000000,%d0 673 faddd COSB7,%fp1 | ...B7+SB8 674 faddd SINA6,%fp2 | ...A6+SA7 675 eorl %d0,RPRIME(%a6) 676 eorl %d0,SPRIME(%a6) 677 fmulx %fp0,%fp1 | ...S(B7+SB8) 678 oril #0x3F800000,%d0 679 movel %d0,POSNEG1(%a6) 680 fmulx %fp0,%fp2 | ...S(A6+SA7) 681 682 faddd COSB6,%fp1 | ...B6+S(B7+SB8) 683 faddd SINA5,%fp2 | ...A5+S(A6+SA7) 684 685 fmulx %fp0,%fp1 | ...S(B6+S(B7+SB8)) 686 fmulx %fp0,%fp2 | ...S(A5+S(A6+SA7)) 687 688 faddd COSB5,%fp1 | ...B5+S(B6+S(B7+SB8)) 689 faddd SINA4,%fp2 | ...A4+S(A5+S(A6+SA7)) 690 691 fmulx %fp0,%fp1 | ...S(B5+...) 692 fmulx %fp0,%fp2 | ...S(A4+...) 693 694 faddd COSB4,%fp1 | ...B4+S(B5+...) 695 faddd SINA3,%fp2 | ...A3+S(A4+...) 696 697 fmulx %fp0,%fp1 | ...S(B4+...) 698 fmulx %fp0,%fp2 | ...S(A3+...) 699 700 faddx COSB3,%fp1 | ...B3+S(B4+...) 701 faddx SINA2,%fp2 | ...A2+S(A3+...) 702 703 fmulx %fp0,%fp1 | ...S(B3+...) 704 fmulx %fp0,%fp2 | ...S(A2+...) 705 706 faddx COSB2,%fp1 | ...B2+S(B3+...) 707 faddx SINA1,%fp2 | ...A1+S(A2+...) 708 709 fmulx %fp0,%fp1 | ...S(B2+...) 710 fmulx %fp2,%fp0 | ...s(a1+...) 711 712 713 714 fadds COSB1,%fp1 | ...B1+S(B2...) 715 fmulx RPRIME(%a6),%fp0 | ...R'S(A1+...) 716 fmulx SPRIME(%a6),%fp1 | ...S'(B1+S(B2+...)) 717 718 movel %d1,-(%sp) |save users mode & precision 719 andil #0xff,%d1 |mask off all exceptions 720 fmovel %d1,%FPCR 721 fadds POSNEG1(%a6),%fp1 | ...COS(X) 722 bsr sto_cos |store cosine result 723 fmovel (%sp)+,%FPCR |restore users exceptions 724 faddx RPRIME(%a6),%fp0 | ...SIN(X) 725 726 bra t_frcinx 727 728SCBORS: 729 cmpil #0x3FFF8000,%d0 730 bgt REDUCEX 731 732 733SCSM: 734 movew #0x0000,XDCARE(%a6) 735 fmoves #0x3F800000,%fp1 736 737 movel %d1,-(%sp) |save users mode & precision 738 andil #0xff,%d1 |mask off all exceptions 739 fmovel %d1,%FPCR 740 fsubs #0x00800000,%fp1 741 bsr sto_cos |store cosine result 742 fmovel (%sp)+,%FPCR |restore users exceptions 743 fmovex X(%a6),%fp0 744 bra t_frcinx 745 746 |end 747