1.file "fmodf.s"
2
3
4// Copyright (c) 2000 - 2003, Intel Corporation
5// All rights reserved.
6//
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12// * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14//
15// * Redistributions in binary form must reproduce the above copyright
16// notice, this list of conditions and the following disclaimer in the
17// documentation and/or other materials provided with the distribution.
18//
19// * The name of Intel Corporation may not be used to endorse or promote
20// products derived from this software without specific prior written
21// permission.
22
23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34//
35// Intel Corporation is the author of this code, and requests that all
36// problem reports or change requests be submitted to it directly at
37// http://www.intel.com/software/products/opensource/libraries/num.htm.
38//
39// History
40//====================================================================
41// 02/02/00 Initial version
42// 03/02/00 New Algorithm
43// 04/04/00 Unwind support added
44// 08/15/00 Bundle added after call to __libm_error_support to properly
45//          set [the previously overwritten] GR_Parameter_RESULT.
46// 11/28/00 Set FR_Y to f9
47// 03/11/02 Fixed flags for fmodf(qnan,zero)
48// 05/20/02 Cleaned up namespace and sf0 syntax
49// 02/10/03 Reordered header: .section, .global, .proc, .align
50// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
51//
52// API
53//====================================================================
54// float fmodf(float,float);
55//
56// Overview of operation
57//====================================================================
58//  fmod(a,b)=a-i*b,
59//  where i is an integer such that, if b!=0,
60//  |i|<|a/b| and |a/b-i|<1
61
62// Algorithm
63//====================================================================
64// a). if |a|<|b|, return a
65// b). get quotient and reciprocal overestimates accurate to
66//     33 bits (q2,y2)
67// c). if the exponent difference (exponent(a)-exponent(b))
68//     is less than 32, truncate quotient to integer and
69//     finish in one iteration
70// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
71//     round quotient estimate to single precision (k=RN(q2)),
72//     calculate partial remainder (a'=a-k*b),
73//     get quotient estimate (a'*y2), and repeat from c).
74
75// Special cases
76//====================================================================
77// b=+/-0: return NaN, call libm_error_support
78// a=+/-Inf, a=NaN or b=NaN: return NaN
79
80// Registers used
81//====================================================================
82// Predicate registers: p6-p11
83// General registers:   r2,r29,r32 (ar.pfs), r33-r39
84// Floating point registers: f6-f15
85
86GR_SAVE_B0                    = r33
87GR_SAVE_PFS                   = r34
88GR_SAVE_GP                    = r35
89GR_SAVE_SP                    = r36
90
91GR_Parameter_X                = r37
92GR_Parameter_Y                = r38
93GR_Parameter_RESULT           = r39
94GR_Parameter_TAG              = r40
95
96FR_X             = f10
97FR_Y             = f9
98FR_RESULT        = f8
99
100
101.section .text
102GLOBAL_IEEE754_ENTRY(fmodf)
103
104// inputs in f8, f9
105// result in f8
106
107{ .mfi
108  alloc r32=ar.pfs,1,4,4,0
109  // f6=|a|
110  fmerge.s f6=f0,f8
111  mov r2 = 0x0ffdd
112}
113  {.mfi
114  nop.m 0
115  // f7=|b|
116  fmerge.s f7=f0,f9
117  nop.i 0;;
118}
119
120{ .mfi
121  setf.exp f11 = r2
122  // (1) y0
123  frcpa.s1 f10,p6=f6,f7
124  nop.i 0
125}
126
127// eliminate special cases
128// Y +-NAN, +-inf, +-0?     p7
129{ .mfi
130      nop.m 999
131      fclass.m.unc  p7,p0 = f9, 0xe7
132      nop.i 999;;
133}
134
135// qnan snan inf norm     unorm 0 -+
136// 1    1    1   0        0     0 11
137// e                      3
138// X +-NAN, +-inf, ?        p9
139
140{ .mfi
141      nop.m 999
142      fclass.m.unc  p9,p0 = f8, 0xe3
143      nop.i 999
144}
145
146// |x| < |y|? Return x p8
147{ .mfi
148      nop.m 999
149      fcmp.lt.unc.s1 p8,p0 = f6,f7
150      nop.i 999 ;;
151}
152
153{ .mfi
154  nop.m 0
155  // normalize y (if |x|<|y|)
156  (p8) fma.s0 f9=f9,f1,f0
157  nop.i 0;;
158}
159
160  { .mfi
161  mov r2=0x1001f
162  // (2) q0=a*y0
163  (p6) fma.s1 f13=f6,f10,f0
164  nop.i 0
165}
166{ .mfi
167  nop.m 0
168  // (3) e0 = 1 - b * y0
169  (p6) fnma.s1 f12=f7,f10,f1
170  nop.i 0;;
171}
172
173  {.mfi
174  nop.m 0
175  // normalize x (if |x|<|y|)
176  (p8) fma.s.s0 f8=f8,f1,f0
177  nop.i 0
178}
179{.bbb
180  (p9) br.cond.spnt FMOD_X_NAN_INF
181  (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
182  // if |x|<|y|, return
183  (p8) br.ret.spnt    b0;;
184}
185
186  {.mfi
187  nop.m 0
188  // normalize x
189  fma.s0 f6=f6,f1,f0
190  nop.i 0
191}
192{.mfi
193  nop.m 0
194  // normalize y
195  fma.s0 f7=f7,f1,f0
196  nop.i 0;;
197}
198
199
200  {.mfi
201  // f15=2^32
202  setf.exp f15=r2
203  // (4) q1=q0+e0*q0
204  (p6) fma.s1 f13=f12,f13,f13
205  nop.i 0
206}
207{ .mfi
208  nop.m 0
209  // (5) e1 = e0 * e0 + 2^-34
210  (p6) fma.s1 f14=f12,f12,f11
211  nop.i 0;;
212}
213{.mlx
214  nop.m 0
215  movl r2=0x33a00000;;
216}
217{ .mfi
218  nop.m 0
219  // (6) y1 = y0 + e0 * y0
220  (p6) fma.s1 f10=f12,f10,f10
221  nop.i 0;;
222}
223{.mfi
224  // set f12=1.25*2^{-24}
225  setf.s f12=r2
226  // (7) q2=q1+e1*q1
227  (p6) fma.s1 f13=f13,f14,f13
228  nop.i 0;;
229}
230{.mfi
231  nop.m 0
232  fmerge.s f9=f8,f9
233  nop.i 0
234}
235{ .mfi
236  nop.m 0
237  // (8) y2 = y1 + e1 * y1
238  (p6) fma.s1 f10=f14,f10,f10
239  // set p6=0, p10=0
240  cmp.ne.and p6,p10=r0,r0;;
241}
242
243.align 32
244loop24:
245  {.mfi
246  nop.m 0
247  // compare q2, 2^32
248  fcmp.lt.unc.s1 p8,p7=f13,f15
249  nop.i 0
250}
251  {.mfi
252  nop.m 0
253  // will truncate quotient to integer, if exponent<32 (in advance)
254  fcvt.fx.trunc.s1 f11=f13
255  nop.i 0;;
256}
257  {.mfi
258  nop.m 0
259  // if exponent>32, round quotient to single precision (perform in advance)
260  fma.s.s1 f13=f13,f1,f0
261  nop.i 0;;
262}
263  {.mfi
264  nop.m 0
265  // set f12=sgn(a)
266  (p8) fmerge.s f12=f8,f1
267  nop.i 0
268}
269  {.mfi
270  nop.m 0
271  // normalize truncated quotient
272  (p8) fcvt.xf f13=f11
273  nop.i 0;;
274}
275  { .mfi
276  nop.m 0
277  // calculate remainder (assuming f13=RZ(Q))
278  (p7) fnma.s1 f14=f13,f7,f6
279  nop.i 0
280}
281  {.mfi
282  nop.m 0
283  // also if exponent>32, round quotient to single precision
284  // and subtract 1 ulp: q=q-q*(1.25*2^{-24})
285  (p7) fnma.s.s1 f11=f13,f12,f13
286  nop.i 0;;
287}
288
289  {.mfi
290  nop.m 0
291  // (p8) calculate remainder (82-bit format)
292  (p8) fnma.s1 f11=f13,f7,f6
293  nop.i 0
294}
295  {.mfi
296  nop.m 0
297  // (p7) calculate remainder (assuming f11=RZ(Q))
298  (p7) fnma.s1 f6=f11,f7,f6
299  nop.i 0;;
300}
301
302
303  {.mfi
304  nop.m 0
305  // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
306  (p8) fcmp.lt.unc.s1 p6,p10=f11,f0
307  nop.i 0;;
308}
309  {.mfi
310  nop.m 0
311  // get new quotient estimation: a'*y2
312  (p7) fma.s1 f13=f14,f10,f0
313  nop.i 0
314}
315  {.mfb
316  nop.m 0
317  // was f14=RZ(Q) ? (then new remainder f14>=0)
318  (p7) fcmp.lt.unc.s1 p7,p9=f14,f0
319  nop.b 0;;
320}
321
322
323.pred.rel "mutex",p6,p10
324  {.mfb
325  nop.m 0
326  // add b to estimated remainder (to cover the case when the quotient was overestimated)
327  // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
328  (p6) fma.s.s0 f8=f11,f12,f9
329  nop.b 0
330}
331  {.mfb
332  nop.m 0
333  // calculate remainder (single precision)
334  // set correct sign of result before returning
335  (p10) fma.s.s0 f8=f11,f12,f0
336  (p8) br.ret.sptk b0;;
337}
338  {.mfi
339  nop.m 0
340  // if f13!=RZ(Q), get alternative quotient estimation: a''*y2
341  (p7) fma.s1 f13=f6,f10,f0
342  nop.i 0
343}
344  {.mfb
345  nop.m 0
346  // if f14 was RZ(Q), set remainder to f14
347  (p9) mov f6=f14
348  br.cond.sptk loop24;;
349}
350
351  {  .mmb
352    nop.m 0
353    nop.m 0
354    br.ret.sptk b0;;
355 }
356
357FMOD_X_NAN_INF:
358
359
360// Y zero ?
361{.mfi
362  nop.m 0
363  fclass.m p10,p0=f8,0xc3     // Test x=nan
364  nop.i 0
365}
366{.mfi
367  nop.m 0
368  fma.s1 f10=f9,f1,f0
369  nop.i 0;;
370}
371
372{.mfi
373  nop.m 0
374  fma.s0 f8=f8,f1,f0
375  nop.i 0
376}
377{.mfi
378  nop.m 0
379(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
380  nop.i 0;;
381}
382{.mfb
383 nop.m 0
384 fcmp.eq.unc.s1 p11,p0=f10,f0
385(p10) br.ret.spnt b0;;        // Exit with result=x if x=nan and y=zero
386}
387{.mib
388  nop.m 0
389  nop.i 0
390  // if Y zero
391  (p11) br.cond.spnt FMOD_Y_ZERO;;
392}
393
394// X infinity? Return QNAN indefinite
395{ .mfi
396      nop.m 999
397      fclass.m.unc  p8,p9 = f8, 0x23
398      nop.i 999;;
399}
400// Y NaN ?
401{.mfi
402     nop.m 999
403(p8) fclass.m p9,p8=f9,0xc3
404     nop.i 0;;
405}
406{.mfi
407    nop.m 999
408(p8)  frcpa.s0 f8,p0 = f8,f8
409    nop.i 0
410}
411{ .mfi
412      nop.m 999
413    // also set Denormal flag if necessary
414(p8)  fma.s0 f9=f9,f1,f0
415      nop.i 999 ;;
416}
417
418{ .mfb
419      nop.m 999
420(p8)  fma.s.s0 f8=f8,f1,f0
421      nop.b 999 ;;
422}
423
424{ .mfb
425      nop.m 999
426(p9)  frcpa.s0 f8,p7=f8,f9
427      br.ret.sptk    b0 ;;
428}
429
430
431FMOD_Y_NAN_INF_ZERO:
432
433// Y INF
434{ .mfi
435      nop.m 999
436      fclass.m.unc  p7,p0 = f9, 0x23
437      nop.i 999 ;;
438}
439
440{ .mfb
441      nop.m 999
442(p7)  fma.s.s0 f8=f8,f1,f0
443(p7)  br.ret.spnt    b0 ;;
444}
445
446// Y NAN?
447{ .mfi
448      nop.m 999
449      fclass.m.unc  p9,p0 = f9, 0xc3
450      nop.i 999 ;;
451}
452
453{ .mfb
454      nop.m 999
455(p9)  fma.s.s0 f8=f9,f1,f0
456(p9)  br.ret.spnt    b0 ;;
457}
458
459FMOD_Y_ZERO:
460// Y zero? Must be zero at this point
461// because it is the only choice left.
462// Return QNAN indefinite
463
464{.mfi
465  nop.m 0
466  // set Invalid
467  frcpa.s0 f12,p0=f0,f0
468  nop.i 999
469}
470// X NAN?
471{ .mfi
472      nop.m 999
473      fclass.m.unc  p9,p10 = f8, 0xc3
474      nop.i 999 ;;
475}
476{ .mfi
477      nop.m 999
478(p10)  fclass.nm  p9,p10 = f8, 0xff
479      nop.i 999 ;;
480}
481
482{.mfi
483 nop.m 999
484 (p9) frcpa.s0 f11,p7=f8,f0
485 nop.i 0;;
486}
487
488{ .mfi
489      nop.m 999
490(p10) frcpa.s0 f11,p7 = f0,f0
491nop.i 999;;
492}
493
494{ .mfi
495      nop.m 999
496      fmerge.s      f10 = f8, f8
497      nop.i 999
498}
499
500{ .mfi
501      nop.m 999
502      fma.s.s0 f8=f11,f1,f0
503      nop.i 999;;
504}
505
506EXP_ERROR_RETURN:
507
508
509{ .mib
510      nop.m 0
511      mov GR_Parameter_TAG=122
512      br.sptk __libm_error_region;;
513}
514
515GLOBAL_IEEE754_END(fmodf)
516libm_alias_float_other (__fmod, fmod)
517
518LOCAL_LIBM_ENTRY(__libm_error_region)
519.prologue
520{ .mfi
521        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
522        nop.f 0
523.save   ar.pfs,GR_SAVE_PFS
524        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
525}
526{ .mfi
527.fframe 64
528        add sp=-64,sp                           // Create new stack
529        nop.f 0
530        mov GR_SAVE_GP=gp                       // Save gp
531};;
532{ .mmi
533        stfs [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
534        add GR_Parameter_X = 16,sp              // Parameter 1 address
535.save   b0, GR_SAVE_B0
536        mov GR_SAVE_B0=b0                       // Save b0
537};;
538.body
539{ .mib
540        stfs [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
541        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
542    nop.b 0                                 // Parameter 3 address
543}
544{ .mib
545        stfs [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
546        add   GR_Parameter_Y = -16,GR_Parameter_Y
547        br.call.sptk b0=__libm_error_support#;;  // Call error handling function
548}
549{ .mmi
550        nop.m 0
551        nop.m 0
552        add   GR_Parameter_RESULT = 48,sp
553};;
554{ .mmi
555        ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
556.restore sp
557        add   sp = 64,sp                       // Restore stack pointer
558        mov   b0 = GR_SAVE_B0                  // Restore return address
559};;
560{ .mib
561        mov   gp = GR_SAVE_GP                  // Restore gp
562        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
563        br.ret.sptk     b0                     // Return
564};;
565
566LOCAL_LIBM_END(__libm_error_region)
567
568.type   __libm_error_support#,@function
569.global __libm_error_support#
570