1/* Function sincos vectorized with AVX-512. Wrapper to AVX2 version.
2   Copyright (C) 2014-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20#include "svml_d_wrapper_impl.h"
21
22	.text
23ENTRY (_ZGVeN8vl8l8_sincos)
24WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
25END (_ZGVeN8vl8l8_sincos)
26
27/* AVX512 ISA version as wrapper to AVX2 ISA version (for vector
28   function declared with #pragma omp declare simd notinbranch).  */
29.macro WRAPPER_IMPL_AVX512_fFF_vvv callee
30#ifndef __ILP32__
31        pushq     %rbp
32        cfi_adjust_cfa_offset (8)
33        cfi_rel_offset (%rbp, 0)
34        movq      %rsp, %rbp
35        cfi_def_cfa_register (%rbp)
36        andq      $-64, %rsp
37        subq      $320, %rsp
38        vmovups    %zmm0, 256(%rsp)
39        lea       (%rsp), %rdi
40        vmovups   %zmm1, 128(%rdi)
41        vmovups   %zmm2, 192(%rdi)
42        lea       64(%rsp), %rsi
43        call      HIDDEN_JUMPTARGET(\callee)
44        vmovdqu   288(%rsp), %ymm0
45        lea       32(%rsp), %rdi
46        lea       96(%rsp), %rsi
47        call      HIDDEN_JUMPTARGET(\callee)
48        movq      128(%rsp), %rdx
49        movq      192(%rsp), %rsi
50        movq      136(%rsp), %r8
51        movq      200(%rsp), %r10
52        movq      (%rsp), %rax
53        movq      64(%rsp), %rcx
54        movq      8(%rsp), %rdi
55        movq      72(%rsp), %r9
56        movq      %rax, (%rdx)
57        movq      %rcx, (%rsi)
58        movq      144(%rsp), %rax
59        movq      208(%rsp), %rcx
60        movq      %rdi, (%r8)
61        movq      %r9, (%r10)
62        movq      152(%rsp), %rdi
63        movq      216(%rsp), %r9
64        movq      16(%rsp), %r11
65        movq      80(%rsp), %rdx
66        movq      24(%rsp), %rsi
67        movq      88(%rsp), %r8
68        movq      %r11, (%rax)
69        movq      %rdx, (%rcx)
70        movq      160(%rsp), %r11
71        movq      224(%rsp), %rdx
72        movq      %rsi, (%rdi)
73        movq      %r8, (%r9)
74        movq      168(%rsp), %rsi
75        movq      232(%rsp), %r8
76        movq      32(%rsp), %r10
77        movq      96(%rsp), %rax
78        movq      40(%rsp), %rcx
79        movq      104(%rsp), %rdi
80        movq      %r10, (%r11)
81        movq      %rax, (%rdx)
82        movq      176(%rsp), %r10
83        movq      240(%rsp), %rax
84        movq      %rcx, (%rsi)
85        movq      %rdi, (%r8)
86        movq      184(%rsp), %rcx
87        movq      248(%rsp), %rdi
88        movq      48(%rsp), %r9
89        movq      112(%rsp), %r11
90        movq      56(%rsp), %rdx
91        movq      120(%rsp), %rsi
92        movq      %r9, (%r10)
93        movq      %r11, (%rax)
94        movq      %rdx, (%rcx)
95        movq      %rsi, (%rdi)
96        movq      %rbp, %rsp
97        cfi_def_cfa_register (%rsp)
98        popq      %rbp
99        cfi_adjust_cfa_offset (-8)
100        cfi_restore (%rbp)
101        ret
102#else
103        leal    8(%rsp), %r10d
104        .cfi_def_cfa 10, 0
105        andl    $-64, %esp
106        pushq   -8(%r10d)
107        pushq   %rbp
108        .cfi_escape 0x10,0x6,0x2,0x76,0
109        movl    %esp, %ebp
110        pushq   %r12
111        leal    -112(%rbp), %esi
112        pushq   %r10
113        .cfi_escape 0xf,0x3,0x76,0x70,0x6
114        .cfi_escape 0x10,0xc,0x2,0x76,0x78
115        leal    -176(%rbp), %edi
116        movq    %rsi, %r12
117        pushq   %rbx
118        .cfi_escape 0x10,0x3,0x2,0x76,0x68
119        movq    %rdi, %rbx
120        subl    $280, %esp
121        vmovdqa %ymm1, -208(%ebp)
122        vmovdqa %ymm2, -240(%ebp)
123        vmovapd %zmm0, -304(%ebp)
124        call    HIDDEN_JUMPTARGET(\callee)
125        leal    32(%r12), %esi
126        vmovupd -272(%ebp), %ymm0
127        leal    32(%rbx), %edi
128        call    HIDDEN_JUMPTARGET(\callee)
129        movl    -208(%ebp), %eax
130        vmovsd  -176(%ebp), %xmm0
131        vmovsd  %xmm0, (%eax)
132        movl    -204(%ebp), %eax
133        vmovsd  -168(%ebp), %xmm0
134        vmovsd  %xmm0, (%eax)
135        movl    -200(%ebp), %eax
136        vmovsd  -160(%ebp), %xmm0
137        vmovsd  %xmm0, (%eax)
138        movl    -196(%ebp), %eax
139        vmovsd  -152(%ebp), %xmm0
140        vmovsd  %xmm0, (%eax)
141        movl    -192(%ebp), %eax
142        vmovsd  -144(%ebp), %xmm0
143        vmovsd  %xmm0, (%eax)
144        movl    -188(%ebp), %eax
145        vmovsd  -136(%ebp), %xmm0
146        vmovsd  %xmm0, (%eax)
147        movl    -184(%ebp), %eax
148        vmovsd  -128(%ebp), %xmm0
149        vmovsd  %xmm0, (%eax)
150        movl    -180(%ebp), %eax
151        vmovsd  -120(%ebp), %xmm0
152        vmovsd  %xmm0, (%eax)
153        movl    -240(%ebp), %eax
154        vmovsd  -112(%ebp), %xmm0
155        vmovsd  %xmm0, (%eax)
156        movl    -236(%ebp), %eax
157        vmovsd  -104(%ebp), %xmm0
158        vmovsd  %xmm0, (%eax)
159        movl    -232(%ebp), %eax
160        vmovsd  -96(%ebp), %xmm0
161        vmovsd  %xmm0, (%eax)
162        movl    -228(%ebp), %eax
163        vmovsd  -88(%ebp), %xmm0
164        vmovsd  %xmm0, (%eax)
165        movl    -224(%ebp), %eax
166        vmovsd  -80(%ebp), %xmm0
167        vmovsd  %xmm0, (%eax)
168        movl    -220(%ebp), %eax
169        vmovsd  -72(%ebp), %xmm0
170        vmovsd  %xmm0, (%eax)
171        movl    -216(%ebp), %eax
172        vmovsd  -64(%ebp), %xmm0
173        vmovsd  %xmm0, (%eax)
174        movl    -212(%ebp), %eax
175        vmovsd  -56(%ebp), %xmm0
176        vmovsd  %xmm0, (%eax)
177        addl    $280, %esp
178        popq    %rbx
179        popq    %r10
180        .cfi_def_cfa 10, 0
181        popq    %r12
182        popq    %rbp
183        leal    -8(%r10), %esp
184        .cfi_def_cfa 7, 8
185        ret
186#endif
187.endm
188
189ENTRY (_ZGVeN8vvv_sincos)
190WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN4vl8l8_sincos
191END (_ZGVeN8vvv_sincos)
192