1.file "fdiml.s" 2 3 4// Copyright (c) 2001 - 2003, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//============================================================== 41// 06/08/01 Initial version 42// 08/23/01 Corrected error tag number 43// 05/20/02 Cleaned up namespace and sf0 syntax 44// 01/28/03 Improved performance; fixed parameters for call to error routine 45// 46// API 47//============================================================== 48// long double fdiml( long double x, long double y ); 49// input floating point f8, f9 50// output floating point f8 51// 52// 53// Overview of operation 54//============================================================== 55// fdiml determines the positive difference between the arguments 56// Result = x - y if x > y 57// = +0 if x <= y 58// 59// Error support is called if x-y overflows for x > y 60// 61 62// Registers used 63//============================================================== 64// General purpose registers: r14, r32 - r39 65 66rExpBig = r14 67 68// r36-39 parameters for libm_error_support 69GR_SAVE_B0 = r33 70GR_SAVE_GP = r34 71GR_SAVE_PFS = r35 72 73GR_Parameter_X = r36 74GR_Parameter_Y = r37 75GR_Parameter_RESULT = r38 76GR_Parameter_TAG = r39 77 78// Floating-point registers: f8 - f12 79 80f_tmp_result = f10 81fBig = f11 82fNormX = f12 83 84// Predicate registers: p6 - p10 85 86 87.section .text 88GLOBAL_LIBM_ENTRY(fdiml) 89 90{ .mfi 91 mov rExpBig = 0x13fff // Exponent to indicate overflow 92 fcmp.le.s1 p6,p7 = f8, f9 // Is x <= y? 93 nop.i 0 94} 95{ .mfi 96 nop.m 0 97 fnorm.s1 fNormX = f8 // Save x 98 nop.i 0 99} 100;; 101 102{ .mfi 103 setf.exp fBig = rExpBig // Constant to test for overflow 104 fcmp.eq.s0 p8,p0 = f8, f9 // Dummy op to set Denormal or Invalid 105 nop.i 0 106} 107;; 108 109{ .mfi 110 nop.m 0 111 fclass.m p9,p10 = f8, 0x1e3 // Test for x natval, nan, inf 112 nop.i 0 113} 114;; 115 116{ .mfi 117 nop.m 0 118(p6) fmerge.s f8 = f0, f0 // Result is +0 if x <= y 119 nop.i 0 120} 121{ .mfi 122 nop.m 0 123(p7) fms.s0 f8 = f8, f1, f9 // Result is x - y if x > y 124 nop.i 0 125} 126;; 127 128{ .mfi 129 nop.m 0 130(p10) fclass.m p9,p10 = f9, 0x1e3 // Test for y natval, nan, inf 131 nop.i 0 132} 133;; 134 135{ .mfi 136 nop.m 0 137(p10) fcmp.ge.s1 p8,p0 = f8, fBig // Test result for overflow 138 nop.i 0 139} 140;; 141 142{ .mbb 143(p9) cmp.ne p8,p0 = r0,r0 // Clear p8 if x or y natval,nan,inf 144(p8) br.cond.spnt FDIM_OVERFLOW // Branch if result overflows 145 br.ret.sptk b0 // Normal return 146} 147;; 148 149 150// Here if result will overflow 151FDIM_OVERFLOW: 152{ .mfi 153 alloc r32=ar.pfs,2,2,4,0 154 fms.s0 f_tmp_result = f8,f1,f9 // Normalize result force overflow 155 nop.i 0 156} 157{ .mfb 158 mov GR_Parameter_TAG = 195 // Error code 159 nop.f 0 160 br.cond.sptk __libm_error_region // Branch to error code 161} 162;; 163 164GLOBAL_LIBM_END(fdiml) 165libm_alias_ldouble_other (fdim, fdim) 166 167 168LOCAL_LIBM_ENTRY(__libm_error_region) 169// Call error support to report possible range error 170.prologue 171 172{ .mfi 173 add GR_Parameter_Y=-32,sp // Parameter 2 value 174 nop.f 0 175.save ar.pfs,GR_SAVE_PFS 176 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs 177} 178{ .mfi 179.fframe 64 180 add sp=-64,sp // Create new stack 181 nop.f 0 182 mov GR_SAVE_GP=gp // Save gp 183};; 184 185{ .mmi 186 stfe [GR_Parameter_Y] = f9,16 // STORE Parameter 2 on stack 187 add GR_Parameter_X = 16,sp // Parameter 1 address 188.save b0, GR_SAVE_B0 189 mov GR_SAVE_B0=b0 // Save b0 190};; 191 192.body 193{ .mib 194 stfe [GR_Parameter_X] = fNormX // STORE Parameter 1 on stack 195 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address 196 nop.b 0 197} 198{ .mib 199 stfe [GR_Parameter_Y] = f_tmp_result // STORE Parameter 3 on stack 200 add GR_Parameter_Y = -16,GR_Parameter_Y 201 br.call.sptk b0=__libm_error_support# // Call error handling function 202};; 203 204{ .mmi 205 add GR_Parameter_RESULT = 48,sp 206 nop.m 0 207 nop.i 0 208};; 209 210{ .mmi 211 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack 212.restore sp 213 add sp = 64,sp // Restore stack pointer 214 mov b0 = GR_SAVE_B0 // Restore return address 215};; 216 217{ .mib 218 mov gp = GR_SAVE_GP // Restore gp 219 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs 220 br.ret.sptk b0 // Return 221};; 222 223LOCAL_LIBM_END(__libm_error_region) 224 225 226.type __libm_error_support#,@function 227.global __libm_error_support# 228