1.file "libm_scalblnf.s"
2
3
4// Copyright (c) 2001 - 2003, Intel Corporation
5// All rights reserved.
6//
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12// * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14//
15// * Redistributions in binary form must reproduce the above copyright
16// notice, this list of conditions and the following disclaimer in the
17// documentation and/or other materials provided with the distribution.
18//
19// * The name of Intel Corporation may not be used to endorse or promote
20// products derived from this software without specific prior written
21// permission.
22
23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34//
35// Intel Corporation is the author of this code, and requests that all
36// problem reports or change requests be submitted to it directly at
37// http://www.intel.com/software/products/opensource/libraries/num.htm.
38//
39// History
40//==============================================================
41// 08/03/01 Initial version
42// 08/23/01 Corrected error tag number
43// 02/06/02 Corrected to handle 32- or 64-bit integers
44// 05/20/02 Cleaned up namespace and sf0 syntax
45// 02/10/03 Reordered header: .section, .global, .proc, .align
46// 08/25/03 Improved performance
47//
48// API
49//==============================================================
50// float __libm_scalblnf  (float x, long int n, int long_int_type)
51// input  floating point f8 and long int n (r33)
52// input  long_int_type = 0 if long int defined as 32 bits, = 1 if 64 bits
53// output floating point f8
54//
55// Returns x* 2**n using an fma and detects overflow
56// and underflow.
57//
58//
59// Strategy:
60//  Compute biased exponent of result exp_Result = N + exp_X
61//  Break into ranges:
62//   exp_Result > 0x1007e                 -> Certain overflow
63//   exp_Result = 0x1007e                 -> Possible overflow
64//   0x0ff81 <= exp_Result < 0x1007e      -> No over/underflow (main path)
65//   0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
66//   exp_Result < 0x0ff81 - 23            -> Certain underflow
67
68FR_Big         = f6
69FR_NBig        = f7
70FR_Floating_X  = f8
71FR_Result      = f8
72FR_Result2     = f9
73FR_Result3     = f10
74FR_Norm_X      = f11
75FR_Two_N       = f12
76
77GR_neg_ov_limit= r14
78GR_N_Biased    = r15
79GR_Big         = r16
80GR_NBig        = r17
81GR_exp_Result  = r18
82GR_pos_ov_limit= r19
83GR_Bias        = r20
84GR_N_as_int    = r21
85GR_signexp_X   = r22
86GR_exp_X       = r23
87GR_exp_mask    = r24
88GR_max_exp     = r25
89GR_min_exp     = r26
90GR_min_den_exp = r27
91
92GR_SAVE_B0          = r32
93GR_SAVE_GP          = r33
94GR_SAVE_PFS         = r34
95GR_Parameter_X      = r35
96GR_Parameter_Y      = r36
97GR_Parameter_RESULT = r37
98GR_Tag              = r38
99
100.section .text
101GLOBAL_LIBM_ENTRY(__libm_scalblnf)
102
103//
104//   Is x NAN, INF, ZERO, +-?
105//   Build the exponent Bias
106//
107{    .mfi
108     getf.exp      GR_signexp_X = FR_Floating_X // Get signexp of x
109     fclass.m      p6,p0 = FR_Floating_X, 0xe7  // @snan | @qnan | @inf | @zero
110     mov           GR_Bias = 0x0ffff
111}
112//
113//   Normalize x
114//   Is long integer type 32 bits?
115//
116{    .mfi
117     mov           GR_Big = 35000      // If N this big then certain overflow
118     fnorm.s1      FR_Norm_X = FR_Floating_X
119     cmp.eq        p8,p9 = r34,r0
120}
121;;
122
123//   Sign extend N if long int is 32 bits
124{    .mfi
125(p9) mov           GR_N_as_int = r33     // Copy N if long int is 64 bits
126     fclass.m      p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm
127(p8) sxt4          GR_N_as_int = r33     // Sign extend N if long int is 32 bits
128}
129{ .mfi
130     mov           GR_NBig = -35000    // If N this small then certain underflow
131     nop.f         0
132     mov           GR_max_exp = 0x1007e      // Exponent of maximum float
133}
134;;
135
136//   Create biased exponent for 2**N
137{    .mfi
138     add           GR_N_Biased = GR_Bias,GR_N_as_int
139     nop.f         0
140     cmp.ge        p7, p0 = GR_N_as_int, GR_Big  // Certain overflow?
141}
142{    .mib
143     cmp.le        p8, p0 = GR_N_as_int, GR_NBig // Certain underflow?
144     mov           GR_min_exp = 0x0ff81      // Exponent of minimum float
145(p9) br.cond.spnt  SCALBNF_UNORM              // Branch if x=unorm
146}
147;;
148
149SCALBNF_COMMON:
150// Main path continues.  Also return here from x=unorm path.
151//   Create 2**N
152.pred.rel "mutex",p7,p8
153{    .mfi
154     setf.exp      FR_Two_N = GR_N_Biased
155     nop.f         0
156(p7) mov           GR_N_as_int = GR_Big      // Limit max N
157}
158{    .mfi
159(p8) mov           GR_N_as_int = GR_NBig     // Limit min N
160     nop.f         0
161(p8) cmp.eq        p7,p0 = r0,r0             // Set p7 if |N| big
162}
163;;
164
165//
166//   Create biased exponent for 2**N for N big
167//   Is N zero?
168//
169{    .mfi
170(p7) add           GR_N_Biased = GR_Bias,GR_N_as_int
171     nop.f         0
172     cmp.eq.or     p6,p0 = r33,r0
173}
174{    .mfi
175     mov           GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
176     nop.f         0
177     mov           GR_exp_mask = 0x1ffff     // Exponent mask
178}
179;;
180
181//
182//   Create 2**N for N big
183//   Return x when N = 0 or X = Nan, Inf, Zero
184//
185{    .mfi
186(p7) setf.exp      FR_Two_N = GR_N_Biased
187     nop.f         0
188     mov           GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
189}
190{    .mfb
191     and           GR_exp_X = GR_exp_mask, GR_signexp_X
192(p6) fma.s.s0      FR_Result = FR_Floating_X, f1, f0
193(p6) br.ret.spnt   b0
194}
195;;
196
197//
198//   Raise Denormal operand flag with compare
199//   Compute biased result exponent
200//
201{    .mfi
202     add           GR_exp_Result = GR_exp_X, GR_N_as_int
203     fcmp.ge.s0    p0,p11 = FR_Floating_X,f0
204     mov           GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
205}
206;;
207
208//
209//   Do final operation
210//
211{    .mfi
212     cmp.lt        p7,p6 = GR_exp_Result, GR_max_exp  // Test no overflow
213     fma.s.s0      FR_Result = FR_Two_N,FR_Norm_X,f0
214     cmp.lt        p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
215}
216{    .mfb
217     nop.m         0
218     nop.f         0
219(p9) br.cond.spnt  SCALBNF_UNDERFLOW           // Branch if certain underflow
220}
221;;
222
223{    .mib
224(p6) cmp.gt.unc    p6,p8 = GR_exp_Result, GR_max_exp  // Test sure overflow
225(p7) cmp.ge.unc    p7,p9 = GR_exp_Result, GR_min_exp  // Test no over/underflow
226(p7) br.ret.sptk   b0                         // Return from main path
227}
228;;
229
230{    .bbb
231(p6) br.cond.spnt  SCALBNF_OVERFLOW            // Branch if certain overflow
232(p8) br.cond.spnt  SCALBNF_POSSIBLE_OVERFLOW   // Branch if possible overflow
233(p9) br.cond.spnt  SCALBNF_POSSIBLE_UNDERFLOW  // Branch if possible underflow
234}
235;;
236
237// Here if possible underflow.
238// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
239SCALBNF_POSSIBLE_UNDERFLOW:
240//
241// Here if possible overflow.
242// Resulting exponent: 0x1007e = exp_Result
243SCALBNF_POSSIBLE_OVERFLOW:
244
245//   Set up necessary status fields
246//
247//   S0 user supplied status
248//   S2 user supplied status + WRE + TD  (Overflows)
249//   S3 user supplied status + FZ + TD   (Underflows)
250//
251{    .mfi
252     nop.m         0
253     fsetc.s3      0x7F,0x41
254     nop.i         0
255}
256{    .mfi
257     nop.m         0
258     fsetc.s2      0x7F,0x42
259     nop.i         0
260}
261;;
262
263//
264//   Do final operation with s2 and s3
265//
266{    .mfi
267     setf.exp      FR_NBig = GR_neg_ov_limit
268     fma.s.s3      FR_Result3 = FR_Two_N,FR_Norm_X,f0
269     nop.i         0
270}
271{    .mfi
272     setf.exp      FR_Big = GR_pos_ov_limit
273     fma.s.s2      FR_Result2 = FR_Two_N,FR_Norm_X,f0
274     nop.i         0
275}
276;;
277
278//   Check for overflow or underflow.
279//   Restore s3
280//   Restore s2
281//
282{    .mfi
283     nop.m         0
284     fsetc.s3      0x7F,0x40
285     nop.i         0
286}
287{    .mfi
288     nop.m         0
289     fsetc.s2      0x7F,0x40
290     nop.i         0
291}
292;;
293
294//
295//   Is the result zero?
296//
297{    .mfi
298     nop.m         0
299     fclass.m      p6, p0 =  FR_Result3, 0x007
300     nop.i         0
301}
302{    .mfi
303     nop.m         0
304     fcmp.ge.s1    p7, p8 = FR_Result2 , FR_Big
305     nop.i         0
306}
307;;
308
309//
310//   Detect masked underflow - Tiny + Inexact Only
311//
312{    .mfi
313     nop.m         0
314(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
315     nop.i         0
316}
317;;
318
319//
320//   Is result bigger the allowed range?
321//   Branch out for underflow
322//
323{    .mfb
324     nop.m          0
325(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
326(p6) br.cond.spnt   SCALBNF_UNDERFLOW
327}
328;;
329
330//
331//   Branch out for overflow
332//
333{ .bbb
334(p7) br.cond.spnt   SCALBNF_OVERFLOW
335(p9) br.cond.spnt   SCALBNF_OVERFLOW
336     br.ret.sptk    b0             //   Return from main path.
337}
338;;
339
340// Here if result overflows
341SCALBNF_OVERFLOW:
342{ .mib
343     alloc         r32=ar.pfs,3,0,4,0
344     addl          GR_Tag = 205, r0    // Set error tag for overflow
345     br.cond.sptk  __libm_error_region // Call error support for overflow
346}
347;;
348
349// Here if result underflows
350SCALBNF_UNDERFLOW:
351{ .mib
352     alloc         r32=ar.pfs,3,0,4,0
353     addl          GR_Tag = 206, r0    // Set error tag for underflow
354     br.cond.sptk  __libm_error_region // Call error support for underflow
355}
356;;
357
358// Here if x=unorm
359SCALBNF_UNORM:
360{ .mib
361     getf.exp      GR_signexp_X = FR_Norm_X // Get signexp of normalized x
362     nop.i         0
363     br.cond.sptk  SCALBNF_COMMON            // Return to main path
364}
365;;
366
367
368GLOBAL_LIBM_END(__libm_scalblnf)
369LOCAL_LIBM_ENTRY(__libm_error_region)
370
371//
372// Get stack address of N
373//
374.prologue
375{ .mfi
376    add   GR_Parameter_Y=-32,sp
377    nop.f 0
378.save   ar.pfs,GR_SAVE_PFS
379    mov  GR_SAVE_PFS=ar.pfs
380}
381//
382// Adjust sp
383//
384{ .mfi
385.fframe 64
386   add sp=-64,sp
387   nop.f 0
388   mov GR_SAVE_GP=gp
389};;
390
391//
392//  Store N on stack in correct position
393//  Locate the address of x on stack
394//
395{ .mmi
396   st8 [GR_Parameter_Y] =  GR_N_as_int,16
397   add GR_Parameter_X = 16,sp
398.save   b0, GR_SAVE_B0
399   mov GR_SAVE_B0=b0
400};;
401
402//
403// Store x on the stack.
404// Get address for result on stack.
405//
406.body
407{ .mib
408   stfs [GR_Parameter_X] = FR_Norm_X
409   add   GR_Parameter_RESULT = 0,GR_Parameter_Y
410   nop.b 0
411}
412{ .mib
413   stfs [GR_Parameter_Y] = FR_Result
414   add   GR_Parameter_Y = -16,GR_Parameter_Y
415   br.call.sptk b0=__libm_error_support#
416};;
417
418//
419//  Get location of result on stack
420//
421{ .mmi
422   add   GR_Parameter_RESULT = 48,sp
423   nop.m 0
424   nop.i 0
425};;
426
427//
428//  Get the new result
429//
430{ .mmi
431   ldfs  FR_Result = [GR_Parameter_RESULT]
432.restore sp
433   add   sp = 64,sp
434   mov   b0 = GR_SAVE_B0
435};;
436
437//
438//  Restore gp, ar.pfs and return
439//
440{ .mib
441   mov   gp = GR_SAVE_GP
442   mov   ar.pfs = GR_SAVE_PFS
443   br.ret.sptk     b0
444};;
445
446LOCAL_LIBM_END(__libm_error_region)
447
448.type   __libm_error_support#,@function
449.global __libm_error_support#
450