1.file "log2f.s" 2 3 4// Copyright (c) 2000 - 2003, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//============================================================== 41// 09/11/00 Initial version 42// 05/20/02 Cleaned up namespace and sf0 syntax 43// 02/10/03 Reordered header: .section, .global, .proc, .align 44// 45// API 46//============================================================== 47// float log2f(float) 48// 49// Overview of operation 50//============================================================== 51// Background 52// 53// Implementation 54// 55// Let x = 2^l * m, where m=1.b1 b2 ... b8 b9 ... b52 56// y=frcpa(m), r=m*y-1, f=b1 b2 .. b8 (table index) 57// j=0 if f<128; j=1 if f>=128 58// T is a table that stores log2(1/y) (in entries 1..255) rounded to 59// double extended precision; f is used as an index; T[255]=0 60// 61// If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m), 62// and 0 is used instead of T[0] 63// (polynomial evaluation only, for m=1+r, 0<=r<2^{-9}) 64// If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used 65// for m=2(1-r'), 0<=r'<2^{-9}) 66// 67// log2f(x) is approximated as 68// (l-j) + T[f] + (c1*r+c2*r^2+...+c6*r^6), if f>0 69// 70 71 72// Special values 73//============================================================== 74// log2f(0)=-inf, raises Divide by Zero 75// log2f(+inf)=inf 76// log2f(x)=NaN, raises Invalid if x<0 77// 78 79 80// Registers used 81//============================================================== 82// f6-f14 83// r2-r3, r23-r30 84// p6,p7,p8,p12 85// 86 87 88GR_SAVE_B0 = r33 89GR_SAVE_PFS = r34 90GR_SAVE_GP = r35 // This reg. can safely be used 91GR_SAVE_SP = r36 92 93GR_Parameter_X = r37 94GR_Parameter_Y = r38 95GR_Parameter_RESULT = r39 96GR_Parameter_TAG = r40 97 98FR_X = f10 99FR_Y = f1 100FR_RESULT = f8 101 102 103 104 105// Data tables 106//============================================================== 107 108RODATA 109 110.align 16 111 112LOCAL_OBJECT_START(poly_coeffs) 113 114data8 0x3fdec709dc3a03fd, 0xbfd71547652b82fe //C_3 and C_4 115data8 0xb8aa3b295c17f0bc, 0x00003fff // C_1 116data8 0xb8aa3b295c17f0bc, 0x0000bffe // C_2 117LOCAL_OBJECT_END(poly_coeffs) 118 119 120LOCAL_OBJECT_START(T_table) 121 122data8 0x3f671b0ea42e5fda, 0x3f815cfe8eaec830 123data8 0x3f8cfee70c5ce5dc, 0x3f94564a62192834 124data8 0x3f997723ace35766, 0x3f9f5923c69b54a1 125data8 0x3fa2a094a085d693, 0x3fa538941776b01e 126data8 0x3fa8324c9b914bc7, 0x3faacf54ce07d7e9 127data8 0x3fadced958dadc12, 0x3fb0387efbca869e 128data8 0x3fb18ac6067479c0, 0x3fb30edd3e13530d 129data8 0x3fb463c15936464e, 0x3fb5b9e13c3fa21d 130data8 0x3fb7113f3259e07a, 0x3fb869dd8d1b2035 131data8 0x3fb9c3bea49d3214, 0x3fbb1ee4d7961701 132data8 0x3fbc7b528b70f1c5, 0x3fbdd90a2c676ed4 133data8 0x3fbf05d4976c2028, 0x3fc032fbbaee6d65 134data8 0x3fc0e3b5a9f3284a, 0x3fc195195c7d125b 135data8 0x3fc22dadc2ab3497, 0x3fc2e050231df57d 136data8 0x3fc379f79c2b255b, 0x3fc42ddd2ba1b4a9 137data8 0x3fc4c89b9e6807f5, 0x3fc563dc29ffacb2 138data8 0x3fc619a25f5d798d, 0x3fc6b5ffbf367644 139data8 0x3fc752e1f660f8d6, 0x3fc7f049e753e7cf 140data8 0x3fc8a8980abfbd32, 0x3fc94724cca657be 141data8 0x3fc9e63a24971f46, 0x3fca85d8feb202f7 142data8 0x3fcb2602497d5346, 0x3fcbc6b6f5ee1c9b 143data8 0x3fcc67f7f770a67e, 0x3fcceec4b2234fba 144data8 0x3fcd91097ad13982, 0x3fce33dd57f3d335 145data8 0x3fced74146bc7b10, 0x3fcf7b3646fef683 146data8 0x3fd00223a943dc19, 0x3fd054a474bf0eb7 147data8 0x3fd0999d9b9259a1, 0x3fd0eca66d3b2581 148data8 0x3fd13ffa2e85b475, 0x3fd185a444fa0a7b 149data8 0x3fd1cb8312f27eff, 0x3fd21fa1441ce5e8 150data8 0x3fd265f526e603cb, 0x3fd2baa0c34be1ec 151data8 0x3fd3016b45de21ce, 0x3fd3486c38aa29a8 152data8 0x3fd38fa3efaa8262, 0x3fd3e562c0816a02 153data8 0x3fd42d141f53b646, 0x3fd474fd543f222c 154data8 0x3fd4bd1eb680e548, 0x3fd505789e234bd1 155data8 0x3fd54e0b64003b70, 0x3fd596d761c3c1f0 156data8 0x3fd5dfdcf1eeae0e, 0x3fd6291c6fd9329c 157data8 0x3fd6729637b59418, 0x3fd6bc4aa692e0fd 158data8 0x3fd7063a1a5fb4f2, 0x3fd75064f1ed0715 159data8 0x3fd79acb8cf10390, 0x3fd7d67c1e43ae5c 160data8 0x3fd8214f4068afa7, 0x3fd86c5f36dea3dc 161data8 0x3fd8b7ac64dd7f9d, 0x3fd8f4167a0c6f92 162data8 0x3fd93fd2d5e1bf1d, 0x3fd98bcd84296946 163data8 0x3fd9c8c333e6e9a5, 0x3fda152f142981b4 164data8 0x3fda527fd95fd8ff, 0x3fda9f5e3edeb9e6 165data8 0x3fdadd0b2b5755a7, 0x3fdb2a5d6f51ff83 166data8 0x3fdb686799b00be3, 0x3fdbb62f1b887cd8 167data8 0x3fdbf4979f666668, 0x3fdc332a6e8399d4 168data8 0x3fdc819dc2d45fe4, 0x3fdcc0908e19b7bd 169data8 0x3fdcffae611ad12b, 0x3fdd3ef776d43ff4 170data8 0x3fdd8e5002710128, 0x3fddcdfb486cb9a1 171data8 0x3fde0dd294245fe4, 0x3fde4dd622a28840 172data8 0x3fde8e06317114f0, 0x3fdece62fe9a9915 173data8 0x3fdf1f164a15389a, 0x3fdf5fd8a9063e35 174data8 0x3fdfa0c8937e7d5d, 0x3fdfe1e649bb6335 175data8 0x3fe011990641535a, 0x3fe032560e91e59e 176data8 0x3fe0532a5ebcd44a, 0x3fe0741617f5fc28 177data8 0x3fe08cd653f38839, 0x3fe0adeb55c1103b 178data8 0x3fe0cf181d5d1dd0, 0x3fe0f05ccd0aced7 179data8 0x3fe111b9875788ab, 0x3fe1332e6f1bcf73 180data8 0x3fe154bba77c2088, 0x3fe16df59bfa06c1 181data8 0x3fe18fadb6e2d3c2, 0x3fe1b17e849adc26 182data8 0x3fe1caeb6a0de814, 0x3fe1ece7c830eec9 183data8 0x3fe20efd3dae01df, 0x3fe2289de375d901 184data8 0x3fe24adf9b6a6fe0, 0x3fe26d3ad1aebcfc 185data8 0x3fe287100c2771f4, 0x3fe2a9983b3c1b28 186data8 0xbfda78e146f7bef4, 0xbfda33760a7f6051 187data8 0xbfd9ff43476fb5f7, 0xbfd9b97c3c4eec8f 188data8 0xbfd98504431717fc, 0xbfd93ee07535f967 189data8 0xbfd90a228d5712b2, 0xbfd8c3a104cb24f5 190data8 0xbfd88e9c72e0b226, 0xbfd847bc33d8618e 191data8 0xbfd812703988bb69, 0xbfd7dd0569c04bff 192data8 0xbfd7959c202292f1, 0xbfd75fe8d2c5d48f 193data8 0xbfd72a1637cbc183, 0xbfd6e221cd9d0cde 194data8 0xbfd6ac059985503b, 0xbfd675c99ce81f92 195data8 0xbfd63f6db2590482, 0xbfd5f6c138136489 196data8 0xbfd5c01a39fbd688, 0xbfd58952cf519193 197data8 0xbfd5526ad18493ce, 0xbfd51b6219bfe6ea 198data8 0xbfd4d1cdf8b4846f, 0xbfd49a784bcd1b8b 199data8 0xbfd4630161832547, 0xbfd42b6911cf5465 200data8 0xbfd3f3af3461e1c4, 0xbfd3bbd3a0a1dcfb 201data8 0xbfd383d62dac7ae7, 0xbfd34bb6b2546218 202data8 0xbfd313750520f520, 0xbfd2db10fc4d9aaf 203data8 0xbfd2a28a6dc90387, 0xbfd269e12f346e2c 204data8 0xbfd2311515e2e855, 0xbfd1f825f6d88e13 205data8 0xbfd1bf13a6c9c69f, 0xbfd185ddfa1a7ed0 206data8 0xbfd14c84c4dd6128, 0xbfd11307dad30b76 207data8 0xbfd0d9670f6941fe, 0xbfd09fa235ba2020 208data8 0xbfd0790adbb03009, 0xbfd03f09858c55fb 209data8 0xbfd004e3a7c97cbd, 0xbfcf9532288fcf69 210data8 0xbfcf205339208f27, 0xbfceab2a23a5b83e 211data8 0xbfce5ce55fdd37a5, 0xbfcde73fe3b1480f 212data8 0xbfcd714f44623927, 0xbfccfb1321b8c400 213data8 0xbfccac163c770dc9, 0xbfcc355b67195dd0 214data8 0xbfcbbe540a3f036f, 0xbfcb6ecf175f95e9 215data8 0xbfcaf74751e1be33, 0xbfca7f71fb7bab9d 216data8 0xbfca2f632320b86b, 0xbfc9b70ba539dfae 217data8 0xbfc93e6587910444, 0xbfc8edcae8352b6c 218data8 0xbfc874a0db01a719, 0xbfc7fb27199df16d 219data8 0xbfc7a9fec7d05ddf, 0xbfc72fff456ac70d 220data8 0xbfc6de7d66023dbc, 0xbfc663f6fac91316 221data8 0xbfc6121ac74813cf, 0xbfc5970c478fff4a 222data8 0xbfc51bab907a5c8a, 0xbfc4c93d33151b24 223data8 0xbfc44d527fdadf55, 0xbfc3fa87be0f3a1b 224data8 0xbfc3a797cd35d959, 0xbfc32ae9e278ae1a 225data8 0xbfc2d79c6937efdd, 0xbfc25a619370d9dc 226data8 0xbfc206b5bde2f8b8, 0xbfc188ecbd1d16be 227data8 0xbfc134e1b489062e, 0xbfc0b6894488e95f 228data8 0xbfc0621e2f556b5c, 0xbfc00d8c711a12cc 229data8 0xbfbf1cd21257e18c, 0xbfbe72ec117fa5b2 230data8 0xbfbdc8b7c49a1ddb, 0xbfbcc8d5e467b710 231data8 0xbfbc1ddc9c39c7a1, 0xbfbb7294093cdd0f 232data8 0xbfba7111df348494, 0xbfb9c501cdf75872 233data8 0xbfb918a16e46335b, 0xbfb81579a73e83c6 234data8 0xbfb7684f39f4ff2d, 0xbfb6bad3758efd87 235data8 0xbfb60d060d7e41ac, 0xbfb507b836033bb7 236data8 0xbfb4591d6310d85a, 0xbfb3aa2fdd27f1c3 237data8 0xbfb2faef55ccb372, 0xbfb1f3723b4ae6db 238data8 0xbfb14360d6136ffa, 0xbfb092fb594145c1 239data8 0xbfafc482e8b48a7e, 0xbfae6265ace11ae4 240data8 0xbfacff9e5c4341d0, 0xbfaaea3316095f72 241data8 0xbfa985bfc3495194, 0xbfa820a01ac754cb 242data8 0xbfa6bad3758efd87, 0xbfa554592bb8cd58 243data8 0xbfa3ed3094685a26, 0xbfa2855905ca70f6 244data8 0xbfa11cd1d5133413, 0xbf9dfd78881399f1 245data8 0xbf9b28f618cc85df, 0xbf98530faa3c087b 246data8 0xbf957bc3dddcd7fa, 0xbf92a3115322f9e6 247data8 0xbf8f91ed4eef8370, 0xbf89dae4ec6b8b2e 248data8 0xbf842106b1499209, 0xbf7cc89f97d67594 249data8 0xbf71497accf7e11d, 0x0000000000000000 250LOCAL_OBJECT_END(T_table) 251 252 253.section .text 254WEAK_LIBM_ENTRY(log2f) 255 256{ .mfi 257 alloc r32=ar.pfs,1,4,4,0 258 // y=frcpa(x) 259 frcpa.s1 f6,p0=f1,f8 260 // will form significand of 1.5 (to test whether the index is 128 or above) 261 mov r24=0xc 262} 263{.mfi 264 nop.m 0 265 // normalize x 266 fma.s1 f7=f8,f1,f0 267 // r2 = pointer to C_1...C_6 followed by T_table 268 addl r2 = @ltoff(poly_coeffs), gp;; 269} 270{.mfi 271 // get significand 272 getf.sig r25=f8 273 // f8 denormal ? 274 fclass.m p8,p10=f8,0x9 275 // will form significand of 1.5 (to test whether the index is 128 or above) 276 shl r24=r24,60 277} 278{.mfi 279 mov r26=0x804 280 nop.f 0 281 // r23=bias-1 282 mov r23=0xfffe;; 283} 284 285{.mmf 286 getf.exp r29=f8 287 // load start address for C_1...C_6 followed by T_table 288 ld8 r2=[r2] 289 // will continue only for positive normal/denormal numbers 290 fclass.nm.unc p12,p7 = f8, 0x19 ;; 291} 292 293.pred.rel "mutex",p8,p10 294{.mfi 295 // denormal input, repeat get significand (after normalization) 296 (p8) getf.sig r25=f7 297 // x=1 ? 298 fcmp.eq.s0 p6,p0=f8,f1 299 // get T_index 300 (p10) shr.u r28=r25,63-8 301} 302{.mfi 303 // f12=0.5 304 setf.exp f12=r23 305 nop.f 0 306 // r27=bias 307 mov r27=0xffff;; 308} 309 310{.mfb 311 // denormal input, repeat get exponent (after normalization) 312 (p8) getf.exp r29=f7 313 nop.f 0 314 (p12) br.cond.spnt SPECIAL_log2f 315} 316{.mfi 317 cmp.geu p12,p0=r25,r24 318 nop.f 0 319 mov r23=0xff;; 320} 321 322{.mfi 323 add r3=32,r2 324 // r=1-x*y 325 fms.s1 f6=f6,f8,f1 326 // r26=0x80400...0 (threshold for using polynomial approximation) 327 shl r26=r26,64-12 328} 329{.mfi 330 // load C_3, C_4 331 ldfpd f10,f11=[r2],16 332 nop.f 0 333 // r27=bias-1 (if index >=128, will add exponent+1) 334 (p12) mov r27=0xfffe;; 335} 336 337{.mfi 338 // load C_1 339 ldfe f14=[r2],32 340 // x=1, return 0 341 (p6) fma.s.s0 f8=f0,f0,f0 342 (p8) shr.u r28=r25,63-8 343} 344{.mib 345 // load C_2 346 ldfe f13=[r3] 347 // r29=exponent-bias 348 sub r29=r29,r27 349 // x=1, return 350 (p6) br.ret.spnt b0;; 351} 352 353 354{.mfi 355 // get T_index 356 and r28=r28,r23 357 fmerge.se f7=f1,f7 358 // if first 9 bits after leading 1 are all zero, then p8=1 359 cmp.ltu p8,p12=r25,r26;; 360} 361{.mfi 362 // f8=expon - bias 363 setf.sig f8=r29 364 nop.f 0 365 // get T address 366 shladd r2=r28,3,r2 367} 368{.mfi 369 // first 8 bits after leading 1 are all ones ? 370 cmp.eq p10,p0=r23,r28 371 // if first 8 bits after leading bit are 0, use polynomial approx. only 372 (p8) fms.s1 f6=f7,f1,f1 373 nop.i 0;; 374} 375{.mfi 376 //r26=1 377 mov r26=1 378 // if first 8 bits after leading 1 are all ones, use polynomial approx. only 379 (p10) fms.s1 f6=f7,f12,f1 380 nop.i 0;; 381} 382 383.pred.rel "mutex",p8,p12 384{.mmf 385 // load T (unless first 9 bits after leading 1 are 0) 386 (p12) ldfd f12=[r2] 387 nop.m 0 388 // set T=0 (if first 9 bits after leading 1 are 0) 389 (p8) fma.s1 f12=f0,f0,f0;; 390} 391 392{.mfi 393 nop.m 0 394 // P34=C_3+C_4*r 395 fma.s1 f10=f11,f6,f10 396 // r26=2^{63} 397 shl r26=r26,63 398} 399{.mfi 400 nop.m 0 401 // r2=r*r 402 fma.s1 f11=f6,f6,f0 403 nop.i 0;; 404} 405{.mfi 406 // significand of x is 1 ? 407 cmp.eq p0,p6=r25,r26 408 // P12=C_1+C_2*r 409 fma.s1 f14=f13,f6,f14 410 nop.i 0;; 411} 412{.mfi 413 nop.m 0 414 // normalize additive term (l=exponent of x) 415 fcvt.xf f8=f8 416 // if significand(x)=1, return exponent (l) 417 nop.i 0;; 418} 419{.mfi 420 nop.m 0 421 // add T+l 422 (p6) fma.s1 f8=f8,f1,f12 423 nop.i 0 424} 425{.mfi 426 nop.m 0 427 // P14=P12+r2*P34 428 (p6) fma.s1 f13=f10,f11,f14 429 nop.i 0;; 430} 431 432{.mfb 433 nop.m 0 434 // result=T+l+r*P14 435 (p6) fma.s.s0 f8=f13,f6,f8 436 // return 437 br.ret.sptk b0;; 438} 439 440 441SPECIAL_log2f: 442{.mfi 443 nop.m 0 444 // x=+Infinity ? 445 fclass.m p7,p0=f8,0x21 446 nop.i 0;; 447} 448{.mfi 449 nop.m 0 450 // x=+/-Zero ? 451 fclass.m p8,p0=f8,0x7 452 nop.i 0;; 453} 454{.mfi 455 nop.m 0 456 // x=-Infinity, -normal, -denormal ? 457 fclass.m p6,p0=f8,0x3a 458 nop.i 0;; 459} 460{.mfb 461 nop.m 0 462 // log2f(+Infinity)=+Infinity 463 nop.f 0 464 (p7) br.ret.spnt b0;; 465} 466{.mfi 467 (p8) mov GR_Parameter_TAG = 172 468 // log2f(+/-0)=-infinity, raises Divide by Zero 469 // set f8=-0 470 (p8) fmerge.ns f8=f0,f8 471 nop.i 0;; 472} 473{.mfb 474 nop.m 0 475 (p8) frcpa.s0 f8,p0=f1,f8 476 (p8) br.cond.sptk __libm_error_region;; 477} 478{.mfb 479 (p6) mov GR_Parameter_TAG = 173 480 // x<0: return NaN, raise Invalid 481 (p6) frcpa.s0 f8,p0=f0,f0 482 (p6) br.cond.sptk __libm_error_region;; 483} 484 485 486{.mfb 487 nop.m 0 488 // Remaining cases: NaNs 489 fma.s.s0 f8=f8,f1,f0 490 br.ret.sptk b0;; 491} 492 493WEAK_LIBM_END(log2f) 494libm_alias_float_other (__log2, log2) 495#ifdef SHARED 496.symver log2f,log2f@@GLIBC_2.27 497.weak __log2f_compat 498.set __log2f_compat,__log2f 499.symver __log2f_compat,log2f@GLIBC_2.2 500#endif 501 502 503LOCAL_LIBM_ENTRY(__libm_error_region) 504.prologue 505{ .mfi 506 add GR_Parameter_Y=-32,sp // Parameter 2 value 507 nop.f 0 508.save ar.pfs,GR_SAVE_PFS 509 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 510} 511{ .mfi 512.fframe 64 513 add sp=-64,sp // Create new stack 514 nop.f 0 515 mov GR_SAVE_GP=gp // Save gp 516};; 517{ .mmi 518 stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack 519 add GR_Parameter_X = 16,sp // Parameter 1 address 520.save b0, GR_SAVE_B0 521 mov GR_SAVE_B0=b0 // Save b0 522};; 523.body 524{ .mib 525 stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack 526 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 527 nop.b 0 528} 529{ .mib 530 stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack 531 add GR_Parameter_Y = -16,GR_Parameter_Y 532 br.call.sptk b0=__libm_error_support# // Call error handling function 533};; 534{ .mmi 535 nop.m 0 536 nop.m 0 537 add GR_Parameter_RESULT = 48,sp 538};; 539{ .mmi 540 ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack 541.restore sp 542 add sp = 64,sp // Restore stack pointer 543 mov b0 = GR_SAVE_B0 // Restore return address 544};; 545{ .mib 546 mov gp = GR_SAVE_GP // Restore gp 547 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 548 br.ret.sptk b0 // Return 549};; 550 551LOCAL_LIBM_END(__libm_error_region) 552.type __libm_error_support#,@function 553.global __libm_error_support# 554