1 .file "reg_round.S" 2/*---------------------------------------------------------------------------+ 3 | reg_round.S | 4 | | 5 | Rounding/truncation/etc for FPU basic arithmetic functions. | 6 | | 7 | Copyright (C) 1993,1995,1997 | 8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 9 | Australia. E-mail billm@suburbia.net | 10 | | 11 | This code has four possible entry points. | 12 | The following must be entered by a jmp instruction: | 13 | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | 14 | | 15 | The FPU_round entry point is intended to be used by C code. | 16 | From C, call as: | 17 | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) | 18 | | 19 | Return value is the tag of the answer, or-ed with FPU_Exception if | 20 | one was raised, or -1 on internal error. | 21 | | 22 | For correct "up" and "down" rounding, the argument must have the correct | 23 | sign. | 24 | | 25 +---------------------------------------------------------------------------*/ 26 27/*---------------------------------------------------------------------------+ 28 | Four entry points. | 29 | | 30 | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | 31 | %eax:%ebx 64 bit significand | 32 | %edx 32 bit extension of the significand | 33 | %edi pointer to an FPU_REG for the result to be stored | 34 | stack calling function must have set up a C stack frame and | 35 | pushed %esi, %edi, and %ebx | 36 | | 37 | Needed just for the fpu_reg_round_sqrt entry point: | 38 | %cx A control word in the same format as the FPU control word. | 39 | Otherwise, PARAM4 must give such a value. | 40 | | 41 | | 42 | The significand and its extension are assumed to be exact in the | 43 | following sense: | 44 | If the significand by itself is the exact result then the significand | 45 | extension (%edx) must contain 0, otherwise the significand extension | 46 | must be non-zero. | 47 | If the significand extension is non-zero then the significand is | 48 | smaller than the magnitude of the correct exact result by an amount | 49 | greater than zero and less than one ls bit of the significand. | 50 | The significand extension is only required to have three possible | 51 | non-zero values: | 52 | less than 0x80000000 <=> the significand is less than 1/2 an ls | 53 | bit smaller than the magnitude of the | 54 | true exact result. | 55 | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | 56 | smaller than the magnitude of the true | 57 | exact result. | 58 | greater than 0x80000000 <=> the significand is more than 1/2 an ls | 59 | bit smaller than the magnitude of the | 60 | true exact result. | 61 | | 62 +---------------------------------------------------------------------------*/ 63 64/*---------------------------------------------------------------------------+ 65 | The code in this module has become quite complex, but it should handle | 66 | all of the FPU flags which are set at this stage of the basic arithmetic | 67 | computations. | 68 | There are a few rare cases where the results are not set identically to | 69 | a real FPU. These require a bit more thought because at this stage the | 70 | results of the code here appear to be more consistent... | 71 | This may be changed in a future version. | 72 +---------------------------------------------------------------------------*/ 73 74 75#include "fpu_emu.h" 76#include "exception.h" 77#include "control_w.h" 78 79/* Flags for FPU_bits_lost */ 80#define LOST_DOWN $1 81#define LOST_UP $2 82 83/* Flags for FPU_denormal */ 84#define DENORMAL $1 85#define UNMASKED_UNDERFLOW $2 86 87 88#ifndef NON_REENTRANT_FPU 89/* Make the code re-entrant by putting 90 local storage on the stack: */ 91#define FPU_bits_lost (%esp) 92#define FPU_denormal 1(%esp) 93 94#else 95/* Not re-entrant, so we can gain speed by putting 96 local storage in a static area: */ 97.data 98 .align 4,0 99FPU_bits_lost: 100 .byte 0 101FPU_denormal: 102 .byte 0 103#endif /* NON_REENTRANT_FPU */ 104 105 106.text 107.globl fpu_reg_round 108.globl fpu_reg_round_sqrt 109.globl fpu_Arith_exit 110 111/* Entry point when called from C */ 112ENTRY(FPU_round) 113 pushl %ebp 114 movl %esp,%ebp 115 pushl %esi 116 pushl %edi 117 pushl %ebx 118 119 movl PARAM1,%edi 120 movl SIGH(%edi),%eax 121 movl SIGL(%edi),%ebx 122 movl PARAM2,%edx 123 124fpu_reg_round: /* Normal entry point */ 125 movl PARAM4,%ecx 126 127#ifndef NON_REENTRANT_FPU 128 pushl %ebx /* adjust the stack pointer */ 129#endif /* NON_REENTRANT_FPU */ 130 131#ifdef PARANOID 132/* Cannot use this here yet */ 133/* orl %eax,%eax */ 134/* jns L_entry_bugged */ 135#endif /* PARANOID */ 136 137 cmpw EXP_UNDER,EXP(%edi) 138 jle L_Make_denorm /* The number is a de-normal */ 139 140 movb $0,FPU_denormal /* 0 -> not a de-normal */ 141 142Denorm_done: 143 movb $0,FPU_bits_lost /* No bits yet lost in rounding */ 144 145 movl %ecx,%esi 146 andl CW_PC,%ecx 147 cmpl PR_64_BITS,%ecx 148 je LRound_To_64 149 150 cmpl PR_53_BITS,%ecx 151 je LRound_To_53 152 153 cmpl PR_24_BITS,%ecx 154 je LRound_To_24 155 156#ifdef PECULIAR_486 157/* With the precision control bits set to 01 "(reserved)", a real 80486 158 behaves as if the precision control bits were set to 11 "64 bits" */ 159 cmpl PR_RESERVED_BITS,%ecx 160 je LRound_To_64 161#ifdef PARANOID 162 jmp L_bugged_denorm_486 163#endif /* PARANOID */ 164#else 165#ifdef PARANOID 166 jmp L_bugged_denorm /* There is no bug, just a bad control word */ 167#endif /* PARANOID */ 168#endif /* PECULIAR_486 */ 169 170 171/* Round etc to 24 bit precision */ 172LRound_To_24: 173 movl %esi,%ecx 174 andl CW_RC,%ecx 175 cmpl RC_RND,%ecx 176 je LRound_nearest_24 177 178 cmpl RC_CHOP,%ecx 179 je LCheck_truncate_24 180 181 cmpl RC_UP,%ecx /* Towards +infinity */ 182 je LUp_24 183 184 cmpl RC_DOWN,%ecx /* Towards -infinity */ 185 je LDown_24 186 187#ifdef PARANOID 188 jmp L_bugged_round24 189#endif /* PARANOID */ 190 191LUp_24: 192 cmpb SIGN_POS,PARAM5 193 jne LCheck_truncate_24 /* If negative then up==truncate */ 194 195 jmp LCheck_24_round_up 196 197LDown_24: 198 cmpb SIGN_POS,PARAM5 199 je LCheck_truncate_24 /* If positive then down==truncate */ 200 201LCheck_24_round_up: 202 movl %eax,%ecx 203 andl $0x000000ff,%ecx 204 orl %ebx,%ecx 205 orl %edx,%ecx 206 jnz LDo_24_round_up 207 jmp L_Re_normalise 208 209LRound_nearest_24: 210 /* Do rounding of the 24th bit if needed (nearest or even) */ 211 movl %eax,%ecx 212 andl $0x000000ff,%ecx 213 cmpl $0x00000080,%ecx 214 jc LCheck_truncate_24 /* less than half, no increment needed */ 215 216 jne LGreater_Half_24 /* greater than half, increment needed */ 217 218 /* Possibly half, we need to check the ls bits */ 219 orl %ebx,%ebx 220 jnz LGreater_Half_24 /* greater than half, increment needed */ 221 222 orl %edx,%edx 223 jnz LGreater_Half_24 /* greater than half, increment needed */ 224 225 /* Exactly half, increment only if 24th bit is 1 (round to even) */ 226 testl $0x00000100,%eax 227 jz LDo_truncate_24 228 229LGreater_Half_24: /* Rounding: increment at the 24th bit */ 230LDo_24_round_up: 231 andl $0xffffff00,%eax /* Truncate to 24 bits */ 232 xorl %ebx,%ebx 233 movb LOST_UP,FPU_bits_lost 234 addl $0x00000100,%eax 235 jmp LCheck_Round_Overflow 236 237LCheck_truncate_24: 238 movl %eax,%ecx 239 andl $0x000000ff,%ecx 240 orl %ebx,%ecx 241 orl %edx,%ecx 242 jz L_Re_normalise /* No truncation needed */ 243 244LDo_truncate_24: 245 andl $0xffffff00,%eax /* Truncate to 24 bits */ 246 xorl %ebx,%ebx 247 movb LOST_DOWN,FPU_bits_lost 248 jmp L_Re_normalise 249 250 251/* Round etc to 53 bit precision */ 252LRound_To_53: 253 movl %esi,%ecx 254 andl CW_RC,%ecx 255 cmpl RC_RND,%ecx 256 je LRound_nearest_53 257 258 cmpl RC_CHOP,%ecx 259 je LCheck_truncate_53 260 261 cmpl RC_UP,%ecx /* Towards +infinity */ 262 je LUp_53 263 264 cmpl RC_DOWN,%ecx /* Towards -infinity */ 265 je LDown_53 266 267#ifdef PARANOID 268 jmp L_bugged_round53 269#endif /* PARANOID */ 270 271LUp_53: 272 cmpb SIGN_POS,PARAM5 273 jne LCheck_truncate_53 /* If negative then up==truncate */ 274 275 jmp LCheck_53_round_up 276 277LDown_53: 278 cmpb SIGN_POS,PARAM5 279 je LCheck_truncate_53 /* If positive then down==truncate */ 280 281LCheck_53_round_up: 282 movl %ebx,%ecx 283 andl $0x000007ff,%ecx 284 orl %edx,%ecx 285 jnz LDo_53_round_up 286 jmp L_Re_normalise 287 288LRound_nearest_53: 289 /* Do rounding of the 53rd bit if needed (nearest or even) */ 290 movl %ebx,%ecx 291 andl $0x000007ff,%ecx 292 cmpl $0x00000400,%ecx 293 jc LCheck_truncate_53 /* less than half, no increment needed */ 294 295 jnz LGreater_Half_53 /* greater than half, increment needed */ 296 297 /* Possibly half, we need to check the ls bits */ 298 orl %edx,%edx 299 jnz LGreater_Half_53 /* greater than half, increment needed */ 300 301 /* Exactly half, increment only if 53rd bit is 1 (round to even) */ 302 testl $0x00000800,%ebx 303 jz LTruncate_53 304 305LGreater_Half_53: /* Rounding: increment at the 53rd bit */ 306LDo_53_round_up: 307 movb LOST_UP,FPU_bits_lost 308 andl $0xfffff800,%ebx /* Truncate to 53 bits */ 309 addl $0x00000800,%ebx 310 adcl $0,%eax 311 jmp LCheck_Round_Overflow 312 313LCheck_truncate_53: 314 movl %ebx,%ecx 315 andl $0x000007ff,%ecx 316 orl %edx,%ecx 317 jz L_Re_normalise 318 319LTruncate_53: 320 movb LOST_DOWN,FPU_bits_lost 321 andl $0xfffff800,%ebx /* Truncate to 53 bits */ 322 jmp L_Re_normalise 323 324 325/* Round etc to 64 bit precision */ 326LRound_To_64: 327 movl %esi,%ecx 328 andl CW_RC,%ecx 329 cmpl RC_RND,%ecx 330 je LRound_nearest_64 331 332 cmpl RC_CHOP,%ecx 333 je LCheck_truncate_64 334 335 cmpl RC_UP,%ecx /* Towards +infinity */ 336 je LUp_64 337 338 cmpl RC_DOWN,%ecx /* Towards -infinity */ 339 je LDown_64 340 341#ifdef PARANOID 342 jmp L_bugged_round64 343#endif /* PARANOID */ 344 345LUp_64: 346 cmpb SIGN_POS,PARAM5 347 jne LCheck_truncate_64 /* If negative then up==truncate */ 348 349 orl %edx,%edx 350 jnz LDo_64_round_up 351 jmp L_Re_normalise 352 353LDown_64: 354 cmpb SIGN_POS,PARAM5 355 je LCheck_truncate_64 /* If positive then down==truncate */ 356 357 orl %edx,%edx 358 jnz LDo_64_round_up 359 jmp L_Re_normalise 360 361LRound_nearest_64: 362 cmpl $0x80000000,%edx 363 jc LCheck_truncate_64 364 365 jne LDo_64_round_up 366 367 /* Now test for round-to-even */ 368 testb $1,%bl 369 jz LCheck_truncate_64 370 371LDo_64_round_up: 372 movb LOST_UP,FPU_bits_lost 373 addl $1,%ebx 374 adcl $0,%eax 375 376LCheck_Round_Overflow: 377 jnc L_Re_normalise 378 379 /* Overflow, adjust the result (significand to 1.0) */ 380 rcrl $1,%eax 381 rcrl $1,%ebx 382 incw EXP(%edi) 383 jmp L_Re_normalise 384 385LCheck_truncate_64: 386 orl %edx,%edx 387 jz L_Re_normalise 388 389LTruncate_64: 390 movb LOST_DOWN,FPU_bits_lost 391 392L_Re_normalise: 393 testb $0xff,FPU_denormal 394 jnz Normalise_result 395 396L_Normalised: 397 movl TAG_Valid,%edx 398 399L_deNormalised: 400 cmpb LOST_UP,FPU_bits_lost 401 je L_precision_lost_up 402 403 cmpb LOST_DOWN,FPU_bits_lost 404 je L_precision_lost_down 405 406L_no_precision_loss: 407 /* store the result */ 408 409L_Store_significand: 410 movl %eax,SIGH(%edi) 411 movl %ebx,SIGL(%edi) 412 413 cmpw EXP_OVER,EXP(%edi) 414 jge L_overflow 415 416 movl %edx,%eax 417 418 /* Convert the exponent to 80x87 form. */ 419 addw EXTENDED_Ebias,EXP(%edi) 420 andw $0x7fff,EXP(%edi) 421 422fpu_reg_round_signed_special_exit: 423 424 cmpb SIGN_POS,PARAM5 425 je fpu_reg_round_special_exit 426 427 orw $0x8000,EXP(%edi) /* Negative sign for the result. */ 428 429fpu_reg_round_special_exit: 430 431#ifndef NON_REENTRANT_FPU 432 popl %ebx /* adjust the stack pointer */ 433#endif /* NON_REENTRANT_FPU */ 434 435fpu_Arith_exit: 436 popl %ebx 437 popl %edi 438 popl %esi 439 leave 440 ret 441 442 443/* 444 * Set the FPU status flags to represent precision loss due to 445 * round-up. 446 */ 447L_precision_lost_up: 448 push %edx 449 push %eax 450 call SYMBOL_NAME(set_precision_flag_up) 451 popl %eax 452 popl %edx 453 jmp L_no_precision_loss 454 455/* 456 * Set the FPU status flags to represent precision loss due to 457 * truncation. 458 */ 459L_precision_lost_down: 460 push %edx 461 push %eax 462 call SYMBOL_NAME(set_precision_flag_down) 463 popl %eax 464 popl %edx 465 jmp L_no_precision_loss 466 467 468/* 469 * The number is a denormal (which might get rounded up to a normal) 470 * Shift the number right the required number of bits, which will 471 * have to be undone later... 472 */ 473L_Make_denorm: 474 /* The action to be taken depends upon whether the underflow 475 exception is masked */ 476 testb CW_Underflow,%cl /* Underflow mask. */ 477 jz Unmasked_underflow /* Do not make a denormal. */ 478 479 movb DENORMAL,FPU_denormal 480 481 pushl %ecx /* Save */ 482 movw EXP_UNDER+1,%cx 483 subw EXP(%edi),%cx 484 485 cmpw $64,%cx /* shrd only works for 0..31 bits */ 486 jnc Denorm_shift_more_than_63 487 488 cmpw $32,%cx /* shrd only works for 0..31 bits */ 489 jnc Denorm_shift_more_than_32 490 491/* 492 * We got here without jumps by assuming that the most common requirement 493 * is for a small de-normalising shift. 494 * Shift by [1..31] bits 495 */ 496 addw %cx,EXP(%edi) 497 orl %edx,%edx /* extension */ 498 setne %ch /* Save whether %edx is non-zero */ 499 xorl %edx,%edx 500 shrd %cl,%ebx,%edx 501 shrd %cl,%eax,%ebx 502 shr %cl,%eax 503 orb %ch,%dl 504 popl %ecx 505 jmp Denorm_done 506 507/* Shift by [32..63] bits */ 508Denorm_shift_more_than_32: 509 addw %cx,EXP(%edi) 510 subb $32,%cl 511 orl %edx,%edx 512 setne %ch 513 orb %ch,%bl 514 xorl %edx,%edx 515 shrd %cl,%ebx,%edx 516 shrd %cl,%eax,%ebx 517 shr %cl,%eax 518 orl %edx,%edx /* test these 32 bits */ 519 setne %cl 520 orb %ch,%bl 521 orb %cl,%bl 522 movl %ebx,%edx 523 movl %eax,%ebx 524 xorl %eax,%eax 525 popl %ecx 526 jmp Denorm_done 527 528/* Shift by [64..) bits */ 529Denorm_shift_more_than_63: 530 cmpw $64,%cx 531 jne Denorm_shift_more_than_64 532 533/* Exactly 64 bit shift */ 534 addw %cx,EXP(%edi) 535 xorl %ecx,%ecx 536 orl %edx,%edx 537 setne %cl 538 orl %ebx,%ebx 539 setne %ch 540 orb %ch,%cl 541 orb %cl,%al 542 movl %eax,%edx 543 xorl %eax,%eax 544 xorl %ebx,%ebx 545 popl %ecx 546 jmp Denorm_done 547 548Denorm_shift_more_than_64: 549 movw EXP_UNDER+1,EXP(%edi) 550/* This is easy, %eax must be non-zero, so.. */ 551 movl $1,%edx 552 xorl %eax,%eax 553 xorl %ebx,%ebx 554 popl %ecx 555 jmp Denorm_done 556 557 558Unmasked_underflow: 559 movb UNMASKED_UNDERFLOW,FPU_denormal 560 jmp Denorm_done 561 562 563/* Undo the de-normalisation. */ 564Normalise_result: 565 cmpb UNMASKED_UNDERFLOW,FPU_denormal 566 je Signal_underflow 567 568/* The number must be a denormal if we got here. */ 569#ifdef PARANOID 570 /* But check it... just in case. */ 571 cmpw EXP_UNDER+1,EXP(%edi) 572 jne L_norm_bugged 573#endif /* PARANOID */ 574 575#ifdef PECULIAR_486 576 /* 577 * This implements a special feature of 80486 behaviour. 578 * Underflow will be signalled even if the number is 579 * not a denormal after rounding. 580 * This difference occurs only for masked underflow, and not 581 * in the unmasked case. 582 * Actual 80486 behaviour differs from this in some circumstances. 583 */ 584 orl %eax,%eax /* ms bits */ 585 js LPseudoDenormal /* Will be masked underflow */ 586#else 587 orl %eax,%eax /* ms bits */ 588 js L_Normalised /* No longer a denormal */ 589#endif /* PECULIAR_486 */ 590 591 jnz LDenormal_adj_exponent 592 593 orl %ebx,%ebx 594 jz L_underflow_to_zero /* The contents are zero */ 595 596LDenormal_adj_exponent: 597 decw EXP(%edi) 598 599LPseudoDenormal: 600 testb $0xff,FPU_bits_lost /* bits lost == underflow */ 601 movl TAG_Special,%edx 602 jz L_deNormalised 603 604 /* There must be a masked underflow */ 605 push %eax 606 pushl EX_Underflow 607 call EXCEPTION 608 popl %eax 609 popl %eax 610 movl TAG_Special,%edx 611 jmp L_deNormalised 612 613 614/* 615 * The operations resulted in a number too small to represent. 616 * Masked response. 617 */ 618L_underflow_to_zero: 619 push %eax 620 call SYMBOL_NAME(set_precision_flag_down) 621 popl %eax 622 623 push %eax 624 pushl EX_Underflow 625 call EXCEPTION 626 popl %eax 627 popl %eax 628 629/* Reduce the exponent to EXP_UNDER */ 630 movw EXP_UNDER,EXP(%edi) 631 movl TAG_Zero,%edx 632 jmp L_Store_significand 633 634 635/* The operations resulted in a number too large to represent. */ 636L_overflow: 637 addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */ 638 push %edi 639 call SYMBOL_NAME(arith_overflow) 640 pop %edi 641 jmp fpu_reg_round_signed_special_exit 642 643 644Signal_underflow: 645 /* The number may have been changed to a non-denormal */ 646 /* by the rounding operations. */ 647 cmpw EXP_UNDER,EXP(%edi) 648 jle Do_unmasked_underflow 649 650 jmp L_Normalised 651 652Do_unmasked_underflow: 653 /* Increase the exponent by the magic number */ 654 addw $(3*(1<<13)),EXP(%edi) 655 push %eax 656 pushl EX_Underflow 657 call EXCEPTION 658 popl %eax 659 popl %eax 660 jmp L_Normalised 661 662 663#ifdef PARANOID 664#ifdef PECULIAR_486 665L_bugged_denorm_486: 666 pushl EX_INTERNAL|0x236 667 call EXCEPTION 668 popl %ebx 669 jmp L_exception_exit 670#else 671L_bugged_denorm: 672 pushl EX_INTERNAL|0x230 673 call EXCEPTION 674 popl %ebx 675 jmp L_exception_exit 676#endif /* PECULIAR_486 */ 677 678L_bugged_round24: 679 pushl EX_INTERNAL|0x231 680 call EXCEPTION 681 popl %ebx 682 jmp L_exception_exit 683 684L_bugged_round53: 685 pushl EX_INTERNAL|0x232 686 call EXCEPTION 687 popl %ebx 688 jmp L_exception_exit 689 690L_bugged_round64: 691 pushl EX_INTERNAL|0x233 692 call EXCEPTION 693 popl %ebx 694 jmp L_exception_exit 695 696L_norm_bugged: 697 pushl EX_INTERNAL|0x234 698 call EXCEPTION 699 popl %ebx 700 jmp L_exception_exit 701 702L_entry_bugged: 703 pushl EX_INTERNAL|0x235 704 call EXCEPTION 705 popl %ebx 706L_exception_exit: 707 mov $-1,%eax 708 jmp fpu_reg_round_special_exit 709#endif /* PARANOID */ 710