1| 2| satan.sa 3.3 12/19/90 3| 4| The entry point satan computes the arctangent of an 5| input value. satand does the same except the input value is a 6| denormalized number. 7| 8| Input: Double-extended value in memory location pointed to by address 9| register a0. 10| 11| Output: Arctan(X) returned in floating-point register Fp0. 12| 13| Accuracy and Monotonicity: The returned result is within 2 ulps in 14| 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the 15| result is subsequently rounded to double precision. The 16| result is provably monotonic in double precision. 17| 18| Speed: The program satan takes approximately 160 cycles for input 19| argument X such that 1/16 < |X| < 16. For the other arguments, 20| the program will run no worse than 10% slower. 21| 22| Algorithm: 23| Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. 24| 25| Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3. 26| Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits 27| of X with a bit-1 attached at the 6-th bit position. Define u 28| to be u = (X-F) / (1 + X*F). 29| 30| Step 3. Approximate arctan(u) by a polynomial poly. 31| 32| Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values 33| calculated beforehand. Exit. 34| 35| Step 5. If |X| >= 16, go to Step 7. 36| 37| Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. 38| 39| Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'. 40| Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. 41| 42 43| Copyright (C) Motorola, Inc. 1990 44| All Rights Reserved 45| 46| THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 47| The copyright notice above does not evidence any 48| actual or intended publication of such source code. 49 50|satan idnt 2,1 | Motorola 040 Floating Point Software Package 51 52 |section 8 53 54 .include "fpsp.h" 55 56BOUNDS1: .long 0x3FFB8000,0x4002FFFF 57 58ONE: .long 0x3F800000 59 60 .long 0x00000000 61 62ATANA3: .long 0xBFF6687E,0x314987D8 63ATANA2: .long 0x4002AC69,0x34A26DB3 64 65ATANA1: .long 0xBFC2476F,0x4E1DA28E 66ATANB6: .long 0x3FB34444,0x7F876989 67 68ATANB5: .long 0xBFB744EE,0x7FAF45DB 69ATANB4: .long 0x3FBC71C6,0x46940220 70 71ATANB3: .long 0xBFC24924,0x921872F9 72ATANB2: .long 0x3FC99999,0x99998FA9 73 74ATANB1: .long 0xBFD55555,0x55555555 75ATANC5: .long 0xBFB70BF3,0x98539E6A 76 77ATANC4: .long 0x3FBC7187,0x962D1D7D 78ATANC3: .long 0xBFC24924,0x827107B8 79 80ATANC2: .long 0x3FC99999,0x9996263E 81ATANC1: .long 0xBFD55555,0x55555536 82 83PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 84NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 85PTINY: .long 0x00010000,0x80000000,0x00000000,0x00000000 86NTINY: .long 0x80010000,0x80000000,0x00000000,0x00000000 87 88ATANTBL: 89 .long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 90 .long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 91 .long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 92 .long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 93 .long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 94 .long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 95 .long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 96 .long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 97 .long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 98 .long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 99 .long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 100 .long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 101 .long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 102 .long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 103 .long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 104 .long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 105 .long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 106 .long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 107 .long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 108 .long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 109 .long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 110 .long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 111 .long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 112 .long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 113 .long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 114 .long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 115 .long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 116 .long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 117 .long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 118 .long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 119 .long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 120 .long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 121 .long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 122 .long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 123 .long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 124 .long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 125 .long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 126 .long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 127 .long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 128 .long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 129 .long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 130 .long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 131 .long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 132 .long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 133 .long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 134 .long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 135 .long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 136 .long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 137 .long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 138 .long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 139 .long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 140 .long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 141 .long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 142 .long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 143 .long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 144 .long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 145 .long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 146 .long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 147 .long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 148 .long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 149 .long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 150 .long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 151 .long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 152 .long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 153 .long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 154 .long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 155 .long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 156 .long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 157 .long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 158 .long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 159 .long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 160 .long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 161 .long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 162 .long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 163 .long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 164 .long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 165 .long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 166 .long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 167 .long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 168 .long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 169 .long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 170 .long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 171 .long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 172 .long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 173 .long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 174 .long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 175 .long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 176 .long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 177 .long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 178 .long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 179 .long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 180 .long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 181 .long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 182 .long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 183 .long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 184 .long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 185 .long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 186 .long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 187 .long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 188 .long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 189 .long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 190 .long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 191 .long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 192 .long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 193 .long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 194 .long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 195 .long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 196 .long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 197 .long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 198 .long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 199 .long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 200 .long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 201 .long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 202 .long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 203 .long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 204 .long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 205 .long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 206 .long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 207 .long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 208 .long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 209 .long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 210 .long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 211 .long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 212 .long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 213 .long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 214 .long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 215 .long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 216 .long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 217 218 .set X,FP_SCR1 219 .set XDCARE,X+2 220 .set XFRAC,X+4 221 .set XFRACLO,X+8 222 223 .set ATANF,FP_SCR2 224 .set ATANFHI,ATANF+4 225 .set ATANFLO,ATANF+8 226 227 228 | xref t_frcinx 229 |xref t_extdnrm 230 231 .global satand 232satand: 233|--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT 234 235 bra t_extdnrm 236 237 .global satan 238satan: 239|--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S 240 241 fmovex (%a0),%fp0 | ...LOAD INPUT 242 243 movel (%a0),%d0 244 movew 4(%a0),%d0 245 fmovex %fp0,X(%a6) 246 andil #0x7FFFFFFF,%d0 247 248 cmpil #0x3FFB8000,%d0 | ...|X| >= 1/16? 249 bges ATANOK1 250 bra ATANSM 251 252ATANOK1: 253 cmpil #0x4002FFFF,%d0 | ...|X| < 16 ? 254 bles ATANMAIN 255 bra ATANBIG 256 257 258|--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE 259|--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). 260|--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN 261|--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE 262|--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS 263|--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR 264|--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO 265|--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE 266|--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL 267|--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE 268|--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION 269|--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION 270|--WILL INVOLVE A VERY LONG POLYNOMIAL. 271 272|--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS 273|--WE CHOSE F TO BE +-2^K * 1.BBBB1 274|--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE 275|--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE 276|--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS 277|-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). 278 279ATANMAIN: 280 281 movew #0x0000,XDCARE(%a6) | ...CLEAN UP X JUST IN CASE 282 andil #0xF8000000,XFRAC(%a6) | ...FIRST 5 BITS 283 oril #0x04000000,XFRAC(%a6) | ...SET 6-TH BIT TO 1 284 movel #0x00000000,XFRACLO(%a6) | ...LOCATION OF X IS NOW F 285 286 fmovex %fp0,%fp1 | ...FP1 IS X 287 fmulx X(%a6),%fp1 | ...FP1 IS X*F, NOTE THAT X*F > 0 288 fsubx X(%a6),%fp0 | ...FP0 IS X-F 289 fadds #0x3F800000,%fp1 | ...FP1 IS 1 + X*F 290 fdivx %fp1,%fp0 | ...FP0 IS U = (X-F)/(1+X*F) 291 292|--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) 293|--CREATE ATAN(F) AND STORE IT IN ATANF, AND 294|--SAVE REGISTERS FP2. 295 296 movel %d2,-(%a7) | ...SAVE d2 TEMPORARILY 297 movel %d0,%d2 | ...THE EXPO AND 16 BITS OF X 298 andil #0x00007800,%d0 | ...4 VARYING BITS OF F'S FRACTION 299 andil #0x7FFF0000,%d2 | ...EXPONENT OF F 300 subil #0x3FFB0000,%d2 | ...K+4 301 asrl #1,%d2 302 addl %d2,%d0 | ...THE 7 BITS IDENTIFYING F 303 asrl #7,%d0 | ...INDEX INTO TBL OF ATAN(|F|) 304 lea ATANTBL,%a1 305 addal %d0,%a1 | ...ADDRESS OF ATAN(|F|) 306 movel (%a1)+,ATANF(%a6) 307 movel (%a1)+,ATANFHI(%a6) 308 movel (%a1)+,ATANFLO(%a6) | ...ATANF IS NOW ATAN(|F|) 309 movel X(%a6),%d0 | ...LOAD SIGN AND EXPO. AGAIN 310 andil #0x80000000,%d0 | ...SIGN(F) 311 orl %d0,ATANF(%a6) | ...ATANF IS NOW SIGN(F)*ATAN(|F|) 312 movel (%a7)+,%d2 | ...RESTORE d2 313 314|--THAT'S ALL I HAVE TO DO FOR NOW, 315|--BUT ALAS, THE DIVIDE IS STILL CRANKING! 316 317|--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS 318|--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U 319|--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. 320|--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) 321|--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. 322|--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT 323|--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED 324 325 326 fmovex %fp0,%fp1 327 fmulx %fp1,%fp1 328 fmoved ATANA3,%fp2 329 faddx %fp1,%fp2 | ...A3+V 330 fmulx %fp1,%fp2 | ...V*(A3+V) 331 fmulx %fp0,%fp1 | ...U*V 332 faddd ATANA2,%fp2 | ...A2+V*(A3+V) 333 fmuld ATANA1,%fp1 | ...A1*U*V 334 fmulx %fp2,%fp1 | ...A1*U*V*(A2+V*(A3+V)) 335 336 faddx %fp1,%fp0 | ...ATAN(U), FP1 RELEASED 337 fmovel %d1,%FPCR |restore users exceptions 338 faddx ATANF(%a6),%fp0 | ...ATAN(X) 339 bra t_frcinx 340 341ATANBORS: 342|--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. 343|--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. 344 cmpil #0x3FFF8000,%d0 345 bgt ATANBIG | ...I.E. |X| >= 16 346 347ATANSM: 348|--|X| <= 1/16 349|--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE 350|--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) 351|--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) 352|--WHERE Y = X*X, AND Z = Y*Y. 353 354 cmpil #0x3FD78000,%d0 355 blt ATANTINY 356|--COMPUTE POLYNOMIAL 357 fmulx %fp0,%fp0 | ...FP0 IS Y = X*X 358 359 360 movew #0x0000,XDCARE(%a6) 361 362 fmovex %fp0,%fp1 363 fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y 364 365 fmoved ATANB6,%fp2 366 fmoved ATANB5,%fp3 367 368 fmulx %fp1,%fp2 | ...Z*B6 369 fmulx %fp1,%fp3 | ...Z*B5 370 371 faddd ATANB4,%fp2 | ...B4+Z*B6 372 faddd ATANB3,%fp3 | ...B3+Z*B5 373 374 fmulx %fp1,%fp2 | ...Z*(B4+Z*B6) 375 fmulx %fp3,%fp1 | ...Z*(B3+Z*B5) 376 377 faddd ATANB2,%fp2 | ...B2+Z*(B4+Z*B6) 378 faddd ATANB1,%fp1 | ...B1+Z*(B3+Z*B5) 379 380 fmulx %fp0,%fp2 | ...Y*(B2+Z*(B4+Z*B6)) 381 fmulx X(%a6),%fp0 | ...X*Y 382 383 faddx %fp2,%fp1 | ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] 384 385 386 fmulx %fp1,%fp0 | ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) 387 388 fmovel %d1,%FPCR |restore users exceptions 389 faddx X(%a6),%fp0 390 391 bra t_frcinx 392 393ATANTINY: 394|--|X| < 2^(-40), ATAN(X) = X 395 movew #0x0000,XDCARE(%a6) 396 397 fmovel %d1,%FPCR |restore users exceptions 398 fmovex X(%a6),%fp0 |last inst - possible exception set 399 400 bra t_frcinx 401 402ATANBIG: 403|--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, 404|--RETURN SIGN(X)*PI/2 + ATAN(-1/X). 405 cmpil #0x40638000,%d0 406 bgt ATANHUGE 407 408|--APPROXIMATE ATAN(-1/X) BY 409|--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' 410|--THIS CAN BE RE-WRITTEN AS 411|--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. 412 413 fmoves #0xBF800000,%fp1 | ...LOAD -1 414 fdivx %fp0,%fp1 | ...FP1 IS -1/X 415 416 417|--DIVIDE IS STILL CRANKING 418 419 fmovex %fp1,%fp0 | ...FP0 IS X' 420 fmulx %fp0,%fp0 | ...FP0 IS Y = X'*X' 421 fmovex %fp1,X(%a6) | ...X IS REALLY X' 422 423 fmovex %fp0,%fp1 424 fmulx %fp1,%fp1 | ...FP1 IS Z = Y*Y 425 426 fmoved ATANC5,%fp3 427 fmoved ATANC4,%fp2 428 429 fmulx %fp1,%fp3 | ...Z*C5 430 fmulx %fp1,%fp2 | ...Z*B4 431 432 faddd ATANC3,%fp3 | ...C3+Z*C5 433 faddd ATANC2,%fp2 | ...C2+Z*C4 434 435 fmulx %fp3,%fp1 | ...Z*(C3+Z*C5), FP3 RELEASED 436 fmulx %fp0,%fp2 | ...Y*(C2+Z*C4) 437 438 faddd ATANC1,%fp1 | ...C1+Z*(C3+Z*C5) 439 fmulx X(%a6),%fp0 | ...X'*Y 440 441 faddx %fp2,%fp1 | ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] 442 443 444 fmulx %fp1,%fp0 | ...X'*Y*([B1+Z*(B3+Z*B5)] 445| ... +[Y*(B2+Z*(B4+Z*B6))]) 446 faddx X(%a6),%fp0 447 448 fmovel %d1,%FPCR |restore users exceptions 449 450 btstb #7,(%a0) 451 beqs pos_big 452 453neg_big: 454 faddx NPIBY2,%fp0 455 bra t_frcinx 456 457pos_big: 458 faddx PPIBY2,%fp0 459 bra t_frcinx 460 461ATANHUGE: 462|--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY 463 btstb #7,(%a0) 464 beqs pos_huge 465 466neg_huge: 467 fmovex NPIBY2,%fp0 468 fmovel %d1,%fpcr 469 fsubx NTINY,%fp0 470 bra t_frcinx 471 472pos_huge: 473 fmovex PPIBY2,%fp0 474 fmovel %d1,%fpcr 475 fsubx PTINY,%fp0 476 bra t_frcinx 477 478 |end 479