1.file "fmod.s"
2
3
4// Copyright (c) 2000 - 2003, Intel Corporation
5// All rights reserved.
6//
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12// * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14//
15// * Redistributions in binary form must reproduce the above copyright
16// notice, this list of conditions and the following disclaimer in the
17// documentation and/or other materials provided with the distribution.
18//
19// * The name of Intel Corporation may not be used to endorse or promote
20// products derived from this software without specific prior written
21// permission.
22
23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34//
35// Intel Corporation is the author of this code, and requests that all
36// problem reports or change requests be submitted to it directly at
37// http://www.intel.com/software/products/opensource/libraries/num.htm.
38//
39// History
40//====================================================================
41// 02/02/00 Initial version
42// 03/02/00 New Algorithm
43// 04/04/00 Unwind support added
44// 08/15/00 Bundle added after call to __libm_error_support to properly
45//          set [the previously overwritten] GR_Parameter_RESULT.
46// 11/28/00 Set FR_Y to f9
47// 03/11/02 Fixed flags for fmod(qnan,zero)
48// 05/20/02 Cleaned up namespace and sf0 syntax
49// 02/10/03 Reordered header: .section, .global, .proc, .align
50// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
51//
52// API
53//====================================================================
54// double fmod(double,double);
55//
56// Overview of operation
57//====================================================================
58//  fmod(a,b)=a-i*b,
59//  where i is an integer such that, if b!=0,
60//  |i|<|a/b| and |a/b-i|<1
61//
62// Algorithm
63//====================================================================
64// a). if |a|<|b|, return a
65// b). get quotient and reciprocal overestimates accurate to
66//     33 bits (q2,y2)
67// c). if the exponent difference (exponent(a)-exponent(b))
68//     is less than 32, truncate quotient to integer and
69//     finish in one iteration
70// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
71//     round quotient estimate to single precision (k=RN(q2)),
72//     calculate partial remainder (a'=a-k*b),
73//     get quotient estimate (a'*y2), and repeat from c).
74//
75// Special cases
76//====================================================================
77// b=+/-0: return NaN, call libm_error_support
78// a=+/-Inf, a=NaN or b=NaN: return NaN
79//
80// Registers used
81//====================================================================
82// Predicate registers: p6-p11
83// General registers:   r2,r29,r32 (ar.pfs), r33-r39
84// Floating point registers: f6-f15
85
86GR_SAVE_B0                    = r33
87GR_SAVE_PFS                   = r34
88GR_SAVE_GP                    = r35
89GR_SAVE_SP                    = r36
90
91GR_Parameter_X                = r37
92GR_Parameter_Y                = r38
93GR_Parameter_RESULT           = r39
94GR_Parameter_TAG              = r40
95
96FR_X             = f10
97FR_Y             = f9
98FR_RESULT        = f8
99
100
101.section .text
102GLOBAL_IEEE754_ENTRY(fmod)
103
104// inputs in f8, f9
105// result in f8
106
107{ .mfi
108  alloc r32=ar.pfs,1,4,4,0
109  // f6=|a|
110  fmerge.s f6=f0,f8
111  mov r2 = 0x0ffdd
112}
113  {.mfi
114  nop.m 0
115  // f7=|b|
116  fmerge.s f7=f0,f9
117  nop.i 0;;
118}
119
120{ .mfi
121  setf.exp f11 = r2
122  // (1) y0
123  frcpa.s1 f10,p6=f6,f7
124  nop.i 0
125}
126
127// Y +-NAN, +-inf, +-0?     p7
128{ .mfi
129      nop.m 999
130      fclass.m.unc  p7,p0 = f9, 0xe7
131      nop.i 999;;
132}
133
134// qnan snan inf norm     unorm 0 -+
135// 1    1    1   0        0     0 11
136// e                      3
137// X +-NAN, +-inf, ?        p9
138
139{ .mfi
140      nop.m 999
141      fclass.m.unc  p9,p0 = f8, 0xe3
142      nop.i 999
143}
144
145// |x| < |y|? Return x p8
146{ .mfi
147      nop.m 999
148      fcmp.lt.unc.s1 p8,p0 = f6,f7
149      nop.i 999 ;;
150}
151
152{ .mfi
153  nop.m 0
154  // normalize y (if |x|<|y|)
155  (p8) fma.s0 f9=f9,f1,f0
156  nop.i 0;;
157}
158
159  { .mfi
160  mov r2=0x1001f
161  // (2) q0=a*y0
162  (p6) fma.s1 f13=f6,f10,f0
163  nop.i 0
164}
165{ .mfi
166  nop.m 0
167  // (3) e0 = 1 - b * y0
168  (p6) fnma.s1 f12=f7,f10,f1
169  nop.i 0;;
170}
171
172  {.mfi
173  nop.m 0
174  // normalize x (if |x|<|y|)
175  (p8) fma.d.s0 f8=f8,f1,f0
176  nop.i 0
177}
178{.bbb
179  (p9) br.cond.spnt FMOD_X_NAN_INF
180  (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
181  // if |x|<|y|, return
182  (p8) br.ret.spnt    b0;;
183}
184
185  {.mfi
186  nop.m 0
187  // normalize x
188  fma.s0 f6=f6,f1,f0
189  nop.i 0
190}
191{.mfi
192  nop.m 0
193  // normalize y
194  fma.s0 f7=f7,f1,f0
195  nop.i 0;;
196}
197
198  {.mfi
199  // f15=2^32
200  setf.exp f15=r2
201  // (4) q1=q0+e0*q0
202  (p6) fma.s1 f13=f12,f13,f13
203  nop.i 0
204}
205{ .mfi
206  nop.m 0
207  // (5) e1 = e0 * e0 + 2^-34
208  (p6) fma.s1 f14=f12,f12,f11
209  nop.i 0;;
210}
211{.mlx
212  nop.m 0
213  movl r2=0x33a00000;;
214}
215{ .mfi
216  nop.m 0
217  // (6) y1 = y0 + e0 * y0
218  (p6) fma.s1 f10=f12,f10,f10
219  nop.i 0;;
220}
221{.mfi
222  // set f12=1.25*2^{-24}
223  setf.s f12=r2
224  // (7) q2=q1+e1*q1
225  (p6) fma.s1 f13=f13,f14,f13
226  nop.i 0;;
227}
228{.mfi
229  nop.m 0
230  fmerge.s f9=f8,f9
231  nop.i 0
232}
233{ .mfi
234  nop.m 0
235  // (8) y2 = y1 + e1 * y1
236  (p6) fma.s1 f10=f14,f10,f10
237  // set p6=0, p10=0
238  cmp.ne.and p6,p10=r0,r0;;
239}
240
241.align 32
242loop53:
243  {.mfi
244  nop.m 0
245  // compare q2, 2^32
246  fcmp.lt.unc.s1 p8,p7=f13,f15
247  nop.i 0
248}
249  {.mfi
250  nop.m 0
251  // will truncate quotient to integer, if exponent<32 (in advance)
252  fcvt.fx.trunc.s1 f11=f13
253  nop.i 0;;
254}
255  {.mfi
256  nop.m 0
257  // if exponent>32, round quotient to single precision (perform in advance)
258  fma.s.s1 f13=f13,f1,f0
259  nop.i 0;;
260}
261  {.mfi
262  nop.m 0
263  // set f12=sgn(a)
264  (p8) fmerge.s f12=f8,f1
265  nop.i 0
266}
267  {.mfi
268  nop.m 0
269  // normalize truncated quotient
270  (p8) fcvt.xf f13=f11
271  nop.i 0;;
272}
273  { .mfi
274  nop.m 0
275  // calculate remainder (assuming f13=RZ(Q))
276  (p7) fnma.s1 f14=f13,f7,f6
277  nop.i 0
278}
279  {.mfi
280  nop.m 0
281  // also if exponent>32, round quotient to single precision
282  // and subtract 1 ulp: q=q-q*(1.25*2^{-24})
283  (p7) fnma.s.s1 f11=f13,f12,f13
284  nop.i 0;;
285}
286
287  {.mfi
288  nop.m 0
289  // (p8) calculate remainder (82-bit format)
290  (p8) fnma.s1 f11=f13,f7,f6
291  nop.i 0
292}
293  {.mfi
294  nop.m 0
295  // (p7) calculate remainder (assuming f11=RZ(Q))
296  (p7) fnma.s1 f6=f11,f7,f6
297  nop.i 0;;
298}
299
300
301  {.mfi
302  nop.m 0
303  // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
304  (p8) fcmp.lt.unc.s1 p6,p10=f11,f0
305  nop.i 0;;
306}
307  {.mfi
308  nop.m 0
309  // get new quotient estimation: a'*y2
310  (p7) fma.s1 f13=f14,f10,f0
311  nop.i 0
312}
313  {.mfb
314  nop.m 0
315  // was f14=RZ(Q) ? (then new remainder f14>=0)
316  (p7) fcmp.lt.unc.s1 p7,p9=f14,f0
317  nop.b 0;;
318}
319
320
321.pred.rel "mutex",p6,p10
322  {.mfb
323  nop.m 0
324  // add b to estimated remainder (to cover the case when the quotient was overestimated)
325  // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
326  (p6) fma.d.s0 f8=f11,f12,f9
327  nop.b 0
328}
329  {.mfb
330  nop.m 0
331  // calculate remainder (single precision)
332  // set correct sign of result before returning
333  (p10) fma.d.s0 f8=f11,f12,f0
334  (p8) br.ret.sptk b0;;
335}
336  {.mfi
337  nop.m 0
338  // if f13!=RZ(Q), get alternative quotient estimation: a''*y2
339  (p7) fma.s1 f13=f6,f10,f0
340  nop.i 0
341}
342  {.mfb
343  nop.m 0
344  // if f14 was RZ(Q), set remainder to f14
345  (p9) mov f6=f14
346  br.cond.sptk loop53;;
347}
348
349
350
351FMOD_X_NAN_INF:
352
353// Y zero ?
354{.mfi
355  nop.m 0
356  fclass.m p10,p0=f8,0xc3     // Test x=nan
357  nop.i 0
358}
359{.mfi
360  nop.m 0
361  fma.s1 f10=f9,f1,f0
362  nop.i 0;;
363}
364
365{.mfi
366  nop.m 0
367  fma.s0 f8=f8,f1,f0
368  nop.i 0
369}
370{.mfi
371  nop.m 0
372(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
373  nop.i 0;;
374}
375
376{.mfb
377 nop.m 0
378 fcmp.eq.unc.s1 p11,p0=f10,f0
379(p10) br.ret.spnt b0;;        // Exit with result=x if x=nan and y=zero
380}
381{.mib
382  nop.m 0
383  nop.i 0
384  // if Y zero
385  (p11) br.cond.spnt FMOD_Y_ZERO;;
386}
387
388// X infinity? Return QNAN indefinite
389{ .mfi
390      nop.m 999
391      fclass.m.unc  p8,p9 = f8, 0x23
392      nop.i 999;;
393}
394// Y NaN ?
395{.mfi
396     nop.m 999
397(p8) fclass.m p9,p8=f9,0xc3
398     nop.i 0;;
399}
400{.mfi
401      nop.m 999
402(p8)  frcpa.s0 f8,p0 = f8,f8
403      nop.i 0
404}
405{ .mfi
406      nop.m 999
407    // also set Denormal flag if necessary
408(p8)  fma.s0 f9=f9,f1,f0
409      nop.i 999 ;;
410}
411
412{ .mfb
413      nop.m 999
414(p8)  fma.d.s0 f8=f8,f1,f0
415      nop.b 999 ;;
416}
417
418{ .mfb
419      nop.m 999
420(p9)  frcpa.s0 f8,p7=f8,f9
421      br.ret.sptk   b0 ;;
422}
423
424
425FMOD_Y_NAN_INF_ZERO:
426
427// Y INF
428{ .mfi
429      nop.m 999
430      fclass.m.unc  p7,p0 = f9, 0x23
431      nop.i 999 ;;
432}
433
434{ .mfb
435      nop.m 999
436(p7)  fma.d.s0 f8=f8,f1,f0
437(p7)  br.ret.spnt    b0 ;;
438}
439
440// Y NAN?
441{ .mfi
442      nop.m 999
443      fclass.m.unc  p9,p0 = f9, 0xc3
444      nop.i 999 ;;
445}
446
447{ .mfb
448      nop.m 999
449(p9)  fma.d.s0 f8=f9,f1,f0
450(p9)  br.ret.spnt    b0 ;;
451}
452
453FMOD_Y_ZERO:
454// Y zero? Must be zero at this point
455// because it is the only choice left.
456// Return QNAN indefinite
457
458{.mfi
459  nop.m 0
460  // set Invalid
461  frcpa.s0 f12,p0=f0,f0
462  nop.i 0
463}
464// X NAN?
465{ .mfi
466      nop.m 999
467      fclass.m.unc  p9,p10 = f8, 0xc3
468      nop.i 999 ;;
469}
470{ .mfi
471      nop.m 999
472(p10)  fclass.nm  p9,p10 = f8, 0xff
473      nop.i 999 ;;
474}
475
476{.mfi
477 nop.m 999
478 (p9) frcpa.s0 f11,p7=f8,f0
479 nop.i 0;;
480}
481
482{ .mfi
483      nop.m 999
484(p10)  frcpa.s0         f11,p7 = f9,f9
485      mov        GR_Parameter_TAG = 121 ;;
486}
487
488{ .mfi
489      nop.m 999
490      fmerge.s      f10 = f8, f8
491      nop.i 999
492}
493
494{ .mfb
495      nop.m 999
496      fma.d.s0 f8=f11,f1,f0
497      br.sptk __libm_error_region;;
498}
499
500GLOBAL_IEEE754_END(fmod)
501libm_alias_double_other (__fmod, fmod)
502
503LOCAL_LIBM_ENTRY(__libm_error_region)
504.prologue
505{ .mfi
506        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
507        nop.f 0
508.save   ar.pfs,GR_SAVE_PFS
509        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
510}
511{ .mfi
512.fframe 64
513        add sp=-64,sp                           // Create new stack
514        nop.f 0
515        mov GR_SAVE_GP=gp                       // Save gp
516};;
517{ .mmi
518        stfd [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
519        add GR_Parameter_X = 16,sp              // Parameter 1 address
520.save   b0, GR_SAVE_B0
521        mov GR_SAVE_B0=b0                       // Save b0
522};;
523.body
524{ .mib
525        stfd [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
526        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
527    nop.b 0                                 // Parameter 3 address
528}
529{ .mib
530        stfd [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
531        add   GR_Parameter_Y = -16,GR_Parameter_Y
532        br.call.sptk b0=__libm_error_support#  // Call error handling function
533};;
534{ .mmi
535        nop.m 0
536        nop.m 0
537        add   GR_Parameter_RESULT = 48,sp
538};;
539{ .mmi
540        ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
541.restore sp
542        add   sp = 64,sp                       // Restore stack pointer
543        mov   b0 = GR_SAVE_B0                  // Restore return address
544};;
545{ .mib
546        mov   gp = GR_SAVE_GP                  // Restore gp
547        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
548        br.ret.sptk     b0                     // Return
549};;
550
551LOCAL_LIBM_END(__libm_error_region)
552
553
554.type   __libm_error_support#,@function
555.global __libm_error_support#
556