1/* Function asinhf vectorized with AVX-512.
2   Copyright (C) 2021-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   https://www.gnu.org/licenses/.  */
18
19/*
20 * ALGORITHM DESCRIPTION:
21 *
22 *   Compute asinh(x) as log(x + sqrt(x*x + 1))
23 *   using RSQRT instructions for starting the
24 *   square root approximation, and small table lookups for log
25 *   that map to AVX-512 permute instructions
26 *
27 *   Special cases:
28 *
29 *   asinh(NaN) = quiet NaN, and raise invalid exception
30 *   asinh(INF) = that INF
31 *   asinh(0)   = that 0
32 *
33 */
34
35/* Offsets for data table __svml_sasinh_data_internal_avx512
36 */
37#define Log_tbl_H			0
38#define Log_tbl_L			128
39#define One				256
40#define AbsMask				320
41#define SmallThreshold			384
42#define Threshold			448
43#define LargeThreshold			512
44#define ca1				576
45#define c2s				640
46#define c1s				704
47#define AddB5				768
48#define RcpBitMask			832
49#define OneEighth			896
50#define Four				960
51#define poly_coeff3			1024
52#define poly_coeff2			1088
53#define poly_coeff1			1152
54#define L2H				1216
55#define L2L				1280
56
57#include <sysdep.h>
58
59	.section .text.exex512, "ax", @progbits
60ENTRY(_ZGVeN16v_asinhf_skx)
61	pushq	%rbp
62	cfi_def_cfa_offset(16)
63	movq	%rsp, %rbp
64	cfi_def_cfa(6, 16)
65	cfi_offset(6, -16)
66	andq	$-64, %rsp
67	subq	$192, %rsp
68	vmovaps	%zmm0, %zmm10
69
70	/* x^2 */
71	vmulps	{rn-sae}, %zmm10, %zmm10, %zmm0
72	vmovups	One+__svml_sasinh_data_internal_avx512(%rip), %zmm2
73
74	/* polynomial computation for small inputs */
75	vmovups	ca1+__svml_sasinh_data_internal_avx512(%rip), %zmm1
76
77	/* not a very small input ? */
78	vmovups	SmallThreshold+__svml_sasinh_data_internal_avx512(%rip), %zmm11
79
80	/* 1+x^2 */
81	vaddps	{rn-sae}, %zmm2, %zmm0, %zmm7
82
83	/* |input| */
84	vandps	AbsMask+__svml_sasinh_data_internal_avx512(%rip), %zmm10, %zmm12
85
86	/* A=max(x^2, 1); */
87	vmaxps	{sae}, %zmm0, %zmm2, %zmm14
88	vrsqrt14ps %zmm7, %zmm8
89
90	/* B=min(x^2, 1); */
91	vminps	{sae}, %zmm0, %zmm2, %zmm15
92	vcmpps	$21, {sae}, %zmm11, %zmm12, %k2
93
94	/* B_high */
95	vsubps	{rn-sae}, %zmm14, %zmm7, %zmm9
96
97	/* sign bit */
98	vxorps	%zmm10, %zmm12, %zmm13
99
100	/* Sh ~sqrt(1+x^2) */
101	vmulps	{rn-sae}, %zmm8, %zmm7, %zmm6
102	vmovups	LargeThreshold+__svml_sasinh_data_internal_avx512(%rip), %zmm14
103
104	/* B_low */
105	vsubps	{rn-sae}, %zmm9, %zmm15, %zmm3
106
107	/* Sh+x */
108	vaddps	{rn-sae}, %zmm12, %zmm6, %zmm15
109
110	/* (Yh*R0)_low */
111	vfmsub213ps {rn-sae}, %zmm6, %zmm8, %zmm7
112	vmulps	{rn-sae}, %zmm1, %zmm0, %zmm9
113	vcmpps	$22, {sae}, %zmm14, %zmm12, %k0
114	vmovups	c1s+__svml_sasinh_data_internal_avx512(%rip), %zmm1
115
116	/* polynomial computation for small inputs */
117	vfmadd213ps {rn-sae}, %zmm12, %zmm12, %zmm9
118	kmovw	%k0, %edx
119
120	/* (x^2)_low */
121	vmovaps	%zmm10, %zmm4
122	vfmsub213ps {rn-sae}, %zmm0, %zmm10, %zmm4
123
124	/* Yl = (x^2)_low + B_low */
125	vaddps	{rn-sae}, %zmm4, %zmm3, %zmm5
126
127	/* rel. error term: Eh=1-Sh*R0 */
128	vmovaps	%zmm2, %zmm0
129	vfnmadd231ps {rn-sae}, %zmm6, %zmm8, %zmm0
130
131	/* Sl = (Yh*R0)_low+(R0*Yl) */
132	vfmadd213ps {rn-sae}, %zmm7, %zmm8, %zmm5
133
134	/* very large inputs ? */
135	vmovups	Threshold+__svml_sasinh_data_internal_avx512(%rip), %zmm7
136
137	/* rel. error term: Eh=(1-Sh*R0)-Sl*R0 */
138	vfnmadd231ps {rn-sae}, %zmm5, %zmm8, %zmm0
139
140	/* sqrt(1+x^2) ~ Sh + Sl + Sh*Eh*poly_s */
141	vmovups	c2s+__svml_sasinh_data_internal_avx512(%rip), %zmm8
142	vcmpps	$21, {sae}, %zmm7, %zmm12, %k1
143
144	/* Sh*Eh */
145	vmulps	{rn-sae}, %zmm0, %zmm6, %zmm4
146	vfmadd231ps {rn-sae}, %zmm0, %zmm8, %zmm1
147
148	/* Sl + Sh*Eh*poly_s */
149	vfmadd213ps {rn-sae}, %zmm5, %zmm1, %zmm4
150
151	/* Xh */
152	vsubps	{rn-sae}, %zmm6, %zmm15, %zmm5
153
154	/* fixup for very large inputs */
155	vmovups	OneEighth+__svml_sasinh_data_internal_avx512(%rip), %zmm6
156
157	/* Xin0+Sl+Sh*Eh*poly_s ~ x+sqrt(1+x^2) */
158	vaddps	{rn-sae}, %zmm4, %zmm15, %zmm3
159
160	/* Xl */
161	vsubps	{rn-sae}, %zmm5, %zmm12, %zmm5
162
163	/* Sl_high */
164	vsubps	{rn-sae}, %zmm15, %zmm3, %zmm0
165	vmulps	{rn-sae}, %zmm6, %zmm12, %zmm3{%k1}
166
167	/* -K*L2H + Th */
168	vmovups	L2H+__svml_sasinh_data_internal_avx512(%rip), %zmm15
169
170	/* Sl_l */
171	vsubps	{rn-sae}, %zmm0, %zmm4, %zmm1
172	vrcp14ps %zmm3, %zmm6
173
174	/* Table lookups */
175	vmovups	__svml_sasinh_data_internal_avx512(%rip), %zmm0
176
177	/* Xin_low */
178	vaddps	{rn-sae}, %zmm5, %zmm1, %zmm7
179
180	/* round reciprocal to 1+4b mantissas */
181	vpaddd	AddB5+__svml_sasinh_data_internal_avx512(%rip), %zmm6, %zmm4
182	vmovups	poly_coeff1+__svml_sasinh_data_internal_avx512(%rip), %zmm5
183	vandps	RcpBitMask+__svml_sasinh_data_internal_avx512(%rip), %zmm4, %zmm8
184
185	/* fixup for very large inputs */
186	vxorps	%zmm7, %zmm7, %zmm7{%k1}
187
188	/* polynomial */
189	vmovups	poly_coeff3+__svml_sasinh_data_internal_avx512(%rip), %zmm4
190
191	/* reduced argument for log(): (Rcp*Xin-1)+Rcp*Xin_low */
192	vfmsub231ps {rn-sae}, %zmm8, %zmm3, %zmm2
193	vmovups	Four+__svml_sasinh_data_internal_avx512(%rip), %zmm3
194
195	/* exponents */
196	vgetexpps {sae}, %zmm8, %zmm1
197
198	/* Prepare table index */
199	vpsrld	$18, %zmm8, %zmm14
200	vfmadd231ps {rn-sae}, %zmm8, %zmm7, %zmm2
201	vmovups	poly_coeff2+__svml_sasinh_data_internal_avx512(%rip), %zmm7
202	vsubps	{rn-sae}, %zmm3, %zmm1, %zmm1{%k1}
203	vpermt2ps Log_tbl_H+64+__svml_sasinh_data_internal_avx512(%rip), %zmm14, %zmm0
204	vmovups	Log_tbl_L+__svml_sasinh_data_internal_avx512(%rip), %zmm3
205	vfmadd231ps {rn-sae}, %zmm2, %zmm4, %zmm7
206	vfnmadd231ps {rn-sae}, %zmm1, %zmm15, %zmm0
207
208	/* R^2 */
209	vmulps	{rn-sae}, %zmm2, %zmm2, %zmm6
210	vfmadd213ps {rn-sae}, %zmm5, %zmm2, %zmm7
211	vpermt2ps Log_tbl_L+64+__svml_sasinh_data_internal_avx512(%rip), %zmm14, %zmm3
212
213	/* -K*L2L + Tl */
214	vmovups	L2L+__svml_sasinh_data_internal_avx512(%rip), %zmm14
215	vfnmadd213ps {rn-sae}, %zmm3, %zmm14, %zmm1
216
217	/* Tl + R^2*Poly */
218	vfmadd213ps {rn-sae}, %zmm1, %zmm6, %zmm7
219
220	/* R+Tl + R^2*Poly */
221	vaddps	{rn-sae}, %zmm2, %zmm7, %zmm2
222	vaddps	{rn-sae}, %zmm2, %zmm0, %zmm9{%k2}
223	vxorps	%zmm13, %zmm9, %zmm0
224	testl	%edx, %edx
225
226	/* Go to special inputs processing branch */
227	jne	L(SPECIAL_VALUES_BRANCH)
228	# LOE rbx r12 r13 r14 r15 edx zmm0 zmm10
229
230	/* Restore registers
231	 * and exit the function
232	 */
233
234L(EXIT):
235	movq	%rbp, %rsp
236	popq	%rbp
237	cfi_def_cfa(7, 8)
238	cfi_restore(6)
239	ret
240	cfi_def_cfa(6, 16)
241	cfi_offset(6, -16)
242
243	/* Branch to process
244	 * special inputs
245	 */
246
247L(SPECIAL_VALUES_BRANCH):
248	vmovups	%zmm10, 64(%rsp)
249	vmovups	%zmm0, 128(%rsp)
250	# LOE rbx r12 r13 r14 r15 edx zmm0
251
252	xorl	%eax, %eax
253	# LOE rbx r12 r13 r14 r15 eax edx
254
255	vzeroupper
256	movq	%r12, 16(%rsp)
257	/*  DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus)  */
258	.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
259	movl	%eax, %r12d
260	movq	%r13, 8(%rsp)
261	/*  DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus)  */
262	.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
263	movl	%edx, %r13d
264	movq	%r14, (%rsp)
265	/*  DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus)  */
266	.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
267	# LOE rbx r15 r12d r13d
268
269	/* Range mask
270	 * bits check
271	 */
272
273L(RANGEMASK_CHECK):
274	btl	%r12d, %r13d
275
276	/* Call scalar math function */
277	jc	L(SCALAR_MATH_CALL)
278	# LOE rbx r15 r12d r13d
279
280	/* Special inputs
281	 * processing loop
282	 */
283
284L(SPECIAL_VALUES_LOOP):
285	incl	%r12d
286	cmpl	$16, %r12d
287
288	/* Check bits in range mask */
289	jl	L(RANGEMASK_CHECK)
290	# LOE rbx r15 r12d r13d
291
292	movq	16(%rsp), %r12
293	cfi_restore(12)
294	movq	8(%rsp), %r13
295	cfi_restore(13)
296	movq	(%rsp), %r14
297	cfi_restore(14)
298	vmovups	128(%rsp), %zmm0
299
300	/* Go to exit */
301	jmp	L(EXIT)
302	/*  DW_CFA_expression: r12 (r12) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -176; DW_OP_plus)  */
303	.cfi_escape 0x10, 0x0c, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x50, 0xff, 0xff, 0xff, 0x22
304	/*  DW_CFA_expression: r13 (r13) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -184; DW_OP_plus)  */
305	.cfi_escape 0x10, 0x0d, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x48, 0xff, 0xff, 0xff, 0x22
306	/*  DW_CFA_expression: r14 (r14) (DW_OP_lit8; DW_OP_minus; DW_OP_const4s: -64; DW_OP_and; DW_OP_const4s: -192; DW_OP_plus)  */
307	.cfi_escape 0x10, 0x0e, 0x0e, 0x38, 0x1c, 0x0d, 0xc0, 0xff, 0xff, 0xff, 0x1a, 0x0d, 0x40, 0xff, 0xff, 0xff, 0x22
308	# LOE rbx r12 r13 r14 r15 zmm0
309
310	/* Scalar math fucntion call
311	 * to process special input
312	 */
313
314L(SCALAR_MATH_CALL):
315	movl	%r12d, %r14d
316	vmovss	64(%rsp, %r14, 4), %xmm0
317	call	asinhf@PLT
318	# LOE rbx r14 r15 r12d r13d xmm0
319
320	vmovss	%xmm0, 128(%rsp, %r14, 4)
321
322	/* Process special inputs in loop */
323	jmp	L(SPECIAL_VALUES_LOOP)
324	# LOE rbx r15 r12d r13d
325END(_ZGVeN16v_asinhf_skx)
326
327	.section .rodata, "a"
328	.align	64
329
330#ifdef __svml_sasinh_data_internal_avx512_typedef
331typedef unsigned int VUINT32;
332typedef struct {
333	__declspec(align(64)) VUINT32 Log_tbl_H[32][1];
334	__declspec(align(64)) VUINT32 Log_tbl_L[32][1];
335	__declspec(align(64)) VUINT32 One[16][1];
336	__declspec(align(64)) VUINT32 AbsMask[16][1];
337	__declspec(align(64)) VUINT32 SmallThreshold[16][1];
338	__declspec(align(64)) VUINT32 Threshold[16][1];
339	__declspec(align(64)) VUINT32 LargeThreshold[16][1];
340	__declspec(align(64)) VUINT32 ca1[16][1];
341	__declspec(align(64)) VUINT32 c2s[16][1];
342	__declspec(align(64)) VUINT32 c1s[16][1];
343	__declspec(align(64)) VUINT32 AddB5[16][1];
344	__declspec(align(64)) VUINT32 RcpBitMask[16][1];
345	__declspec(align(64)) VUINT32 OneEighth[16][1];
346	__declspec(align(64)) VUINT32 Four[16][1];
347	__declspec(align(64)) VUINT32 poly_coeff3[16][1];
348	__declspec(align(64)) VUINT32 poly_coeff2[16][1];
349	__declspec(align(64)) VUINT32 poly_coeff1[16][1];
350	__declspec(align(64)) VUINT32 L2H[16][1];
351	__declspec(align(64)) VUINT32 L2L[16][1];
352} __svml_sasinh_data_internal_avx512;
353#endif
354__svml_sasinh_data_internal_avx512:
355	/* Log_tbl_H */
356	.long	0x00000000
357	.long	0xbcfc0000
358	.long	0xbd788000
359	.long	0xbdb78000
360	.long	0xbdf14000
361	.long	0xbe14a000
362	.long	0xbe300000
363	.long	0xbe4aa000
364	.long	0xbe648000
365	.long	0xbe7dc000
366	.long	0xbe8b4000
367	.long	0xbe974000
368	.long	0xbea31000
369	.long	0xbeae9000
370	.long	0xbeb9d000
371	.long	0xbec4d000
372	.long	0xbecfa000
373	.long	0xbeda2000
374	.long	0xbee48000
375	.long	0xbeeea000
376	.long	0xbef89000
377	.long	0xbf012800
378	.long	0xbf05f000
379	.long	0xbf0aa800
380	.long	0xbf0f4000
381	.long	0xbf13c800
382	.long	0xbf184000
383	.long	0xbf1ca000
384	.long	0xbf20f000
385	.long	0xbf252800
386	.long	0xbf295000
387	.long	0xbf2d6800
388	/* Log_tbl_L */
389	.align	64
390	.long	0x80000000
391	.long	0xb726c39e
392	.long	0x3839e7fe
393	.long	0xb7528ae5
394	.long	0x377891d5
395	.long	0xb8297c10
396	.long	0x37cf8f58
397	.long	0x3852b186
398	.long	0x35838656
399	.long	0xb80c36af
400	.long	0x38235454
401	.long	0xb862bae1
402	.long	0x37e87bc7
403	.long	0x37848150
404	.long	0x37202511
405	.long	0xb74e1b05
406	.long	0x385c1340
407	.long	0xb8777bcd
408	.long	0x36038656
409	.long	0xb7d40984
410	.long	0xb80f5faf
411	.long	0xb8254b4c
412	.long	0xb865c84a
413	.long	0x37f0b42d
414	.long	0xb83ebce1
415	.long	0xb83c2513
416	.long	0x37a332c4
417	.long	0x3779654f
418	.long	0x38602f73
419	.long	0x367449f8
420	.long	0xb7b4996f
421	.long	0xb800986b
422	/* One */
423	.align	64
424	.long	0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
425	/* AbsMask */
426	.align	64
427	.long	0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
428	/* SmallThreshold */
429	.align	64
430	.long	0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000, 0x3c800000
431	/* Threshold */
432	.align	64
433	.long	0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000, 0x5f000000
434	/* LargeThreshold */
435	.align	64
436	.long	0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff
437	/* ca1 */
438	.align	64
439	.long	0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE, 0xbe2AA5DE
440	/* c2s */
441	.align	64
442	.long	0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000, 0x3ec00000
443	/* c1s */
444	.align	64
445	.long	0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000
446	/* AddB5 */
447	.align	64
448	.long	0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000, 0x00020000
449	/* RcpBitMask */
450	.align	64
451	.long	0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000, 0xfffc0000
452	/* OneEighth */
453	.align	64
454	.long	0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000, 0x3e000000
455	/* Four */
456	.align	64
457	.long	0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000, 0x40800000
458	/* poly_coeff3 */
459	.align	64
460	.long	0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810, 0xbe800810
461	/* poly_coeff2 */
462	.align	64
463	.long	0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e, 0x3eaab11e
464	/* poly_coeff1 */
465	.align	64
466	.long	0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000, 0xbf000000
467	/* L2H = log(2)_high */
468	.align	64
469	.long	0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000, 0x3f317000
470	/* L2L = log(2)_low */
471	.align	64
472	.long	0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4, 0x3805fdf4
473	.align	64
474	.type	__svml_sasinh_data_internal_avx512, @object
475	.size	__svml_sasinh_data_internal_avx512, .-__svml_sasinh_data_internal_avx512
476