1 /* Wrapper implementations of vector math functions.
2    Copyright (C) 2014-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 /* SSE2 ISA version as wrapper to scalar.  */
20 .macro WRAPPER_IMPL_SSE2 callee
21         subq      $40, %rsp
22         cfi_adjust_cfa_offset(40)
23         movaps    %xmm0, (%rsp)
24         call      JUMPTARGET(\callee)
25         movsd     %xmm0, 16(%rsp)
26         movsd     8(%rsp), %xmm0
27         call      JUMPTARGET(\callee)
28         movsd     16(%rsp), %xmm1
29         movsd     %xmm0, 24(%rsp)
30         unpcklpd  %xmm0, %xmm1
31         movaps    %xmm1, %xmm0
32         addq      $40, %rsp
33         cfi_adjust_cfa_offset(-40)
34         ret
35 .endm
36 
37 /* 2 argument SSE2 ISA version as wrapper to scalar.  */
38 .macro WRAPPER_IMPL_SSE2_ff callee
39         subq      $56, %rsp
40         cfi_adjust_cfa_offset(56)
41         movaps    %xmm0, (%rsp)
42         movaps    %xmm1, 16(%rsp)
43         call      JUMPTARGET(\callee)
44         movsd     %xmm0, 32(%rsp)
45         movsd     8(%rsp), %xmm0
46         movsd     24(%rsp), %xmm1
47         call      JUMPTARGET(\callee)
48         movsd     32(%rsp), %xmm1
49         movsd     %xmm0, 40(%rsp)
50         unpcklpd  %xmm0, %xmm1
51         movaps    %xmm1, %xmm0
52         addq      $56, %rsp
53         cfi_adjust_cfa_offset(-56)
54         ret
55 .endm
56 
57 /* 3 argument SSE2 ISA version as wrapper to scalar.  */
58 .macro WRAPPER_IMPL_SSE2_fFF callee
59         pushq   %rbp
60         cfi_adjust_cfa_offset (8)
61         cfi_rel_offset (%rbp, 0)
62         pushq   %rbx
63         cfi_adjust_cfa_offset (8)
64         cfi_rel_offset (%rbx, 0)
65         movq    %rdi, %rbp
66         movq    %rsi, %rbx
67         subq    $40, %rsp
68         cfi_adjust_cfa_offset(40)
69         leaq    16(%rsp), %rsi
70         leaq    24(%rsp), %rdi
71         movaps  %xmm0, (%rsp)
72         call    JUMPTARGET(\callee)
73         leaq    16(%rsp), %rsi
74         leaq    24(%rsp), %rdi
75         movsd   24(%rsp), %xmm0
76         movapd  (%rsp), %xmm1
77         movsd   %xmm0, 0(%rbp)
78         unpckhpd        %xmm1, %xmm1
79         movsd   16(%rsp), %xmm0
80         movsd   %xmm0, (%rbx)
81         movapd  %xmm1, %xmm0
82         call    JUMPTARGET(\callee)
83         movsd   24(%rsp), %xmm0
84         movsd   %xmm0, 8(%rbp)
85         movsd   16(%rsp), %xmm0
86         movsd   %xmm0, 8(%rbx)
87         addq    $40, %rsp
88         cfi_adjust_cfa_offset(-40)
89         popq    %rbx
90         cfi_adjust_cfa_offset (-8)
91         cfi_restore (%rbx)
92         popq    %rbp
93         cfi_adjust_cfa_offset (-8)
94         cfi_restore (%rbp)
95         ret
96 .endm
97 
98 /* AVX/AVX2 ISA version as wrapper to SSE ISA version.  */
99 .macro WRAPPER_IMPL_AVX callee
100         pushq		%rbp
101         cfi_adjust_cfa_offset (8)
102         cfi_rel_offset (%rbp, 0)
103         movq		%rsp, %rbp
104         cfi_def_cfa_register (%rbp)
105         andq		$-32, %rsp
106         subq		$32, %rsp
107         vextractf128	$1, %ymm0, (%rsp)
108         vzeroupper
109         call		HIDDEN_JUMPTARGET(\callee)
110         vmovapd		%xmm0, 16(%rsp)
111         vmovaps		(%rsp), %xmm0
112         call		HIDDEN_JUMPTARGET(\callee)
113         vmovapd		%xmm0, %xmm1
114         vmovapd		16(%rsp), %xmm0
115         vinsertf128	$1, %xmm1, %ymm0, %ymm0
116         movq		%rbp, %rsp
117         cfi_def_cfa_register (%rsp)
118         popq		%rbp
119         cfi_adjust_cfa_offset (-8)
120         cfi_restore (%rbp)
121         ret
122 .endm
123 
124 /* 2 argument AVX/AVX2 ISA version as wrapper to SSE ISA version.  */
125 .macro WRAPPER_IMPL_AVX_ff callee
126         pushq     %rbp
127         cfi_adjust_cfa_offset (8)
128         cfi_rel_offset (%rbp, 0)
129         movq      %rsp, %rbp
130         cfi_def_cfa_register (%rbp)
131         andq      $-32, %rsp
132         subq      $64, %rsp
133         vextractf128 $1, %ymm0, 16(%rsp)
134         vextractf128 $1, %ymm1, (%rsp)
135         vzeroupper
136         call      HIDDEN_JUMPTARGET(\callee)
137         vmovaps   %xmm0, 32(%rsp)
138         vmovaps   16(%rsp), %xmm0
139         vmovaps   (%rsp), %xmm1
140         call      HIDDEN_JUMPTARGET(\callee)
141         vmovaps   %xmm0, %xmm1
142         vmovaps   32(%rsp), %xmm0
143         vinsertf128 $1, %xmm1, %ymm0, %ymm0
144         movq      %rbp, %rsp
145         cfi_def_cfa_register (%rsp)
146         popq      %rbp
147         cfi_adjust_cfa_offset (-8)
148         cfi_restore (%rbp)
149         ret
150 .endm
151 
152 /* 3 argument AVX/AVX2 ISA version as wrapper to SSE ISA version.  */
153 .macro WRAPPER_IMPL_AVX_fFF callee
154         pushq     %rbp
155         cfi_adjust_cfa_offset (8)
156         cfi_rel_offset (%rbp, 0)
157         movq      %rsp, %rbp
158         cfi_def_cfa_register (%rbp)
159         andq      $-32, %rsp
160         pushq     %r13
161         cfi_adjust_cfa_offset (8)
162         cfi_rel_offset (%r13, 0)
163         pushq     %r14
164         cfi_adjust_cfa_offset (8)
165         cfi_rel_offset (%r14, 0)
166         subq      $48, %rsp
167         movq      %rsi, %r14
168         movq      %rdi, %r13
169         vextractf128 $1, %ymm0, 32(%rsp)
170         vzeroupper
171         call      HIDDEN_JUMPTARGET(\callee)
172         vmovaps   32(%rsp), %xmm0
173         lea       (%rsp), %rdi
174         lea       16(%rsp), %rsi
175         call      HIDDEN_JUMPTARGET(\callee)
176         vmovapd   (%rsp), %xmm0
177         vmovapd   16(%rsp), %xmm1
178         vmovapd   %xmm0, 16(%r13)
179         vmovapd   %xmm1, 16(%r14)
180         addq      $48, %rsp
181         popq      %r14
182         cfi_adjust_cfa_offset (-8)
183         cfi_restore (%r14)
184         popq      %r13
185         cfi_adjust_cfa_offset (-8)
186         cfi_restore (%r13)
187         movq      %rbp, %rsp
188         cfi_def_cfa_register (%rsp)
189         popq      %rbp
190         cfi_adjust_cfa_offset (-8)
191         cfi_restore (%rbp)
192         ret
193 .endm
194 
195 /* AVX512 ISA version as wrapper to AVX2 ISA version.  */
196 .macro WRAPPER_IMPL_AVX512 callee
197         pushq     %rbp
198         cfi_adjust_cfa_offset (8)
199         cfi_rel_offset (%rbp, 0)
200         movq      %rsp, %rbp
201         cfi_def_cfa_register (%rbp)
202         andq      $-64, %rsp
203         subq      $128, %rsp
204         vmovups   %zmm0, (%rsp)
205         vmovupd   (%rsp), %ymm0
206         call      HIDDEN_JUMPTARGET(\callee)
207         vmovupd   %ymm0, 64(%rsp)
208         vmovupd   32(%rsp), %ymm0
209         call      HIDDEN_JUMPTARGET(\callee)
210         vmovupd   %ymm0, 96(%rsp)
211         vmovups   64(%rsp), %zmm0
212         movq      %rbp, %rsp
213         cfi_def_cfa_register (%rsp)
214         popq      %rbp
215         cfi_adjust_cfa_offset (-8)
216         cfi_restore (%rbp)
217         ret
218 .endm
219 
220 /* 2 argument AVX512 ISA version as wrapper to AVX2 ISA version.  */
221 .macro WRAPPER_IMPL_AVX512_ff callee
222         pushq     %rbp
223         cfi_adjust_cfa_offset (8)
224         cfi_rel_offset (%rbp, 0)
225         movq      %rsp, %rbp
226         cfi_def_cfa_register (%rbp)
227         andq      $-64, %rsp
228         subq      $192, %rsp
229         vmovups   %zmm0, (%rsp)
230         vmovups   %zmm1, 64(%rsp)
231         vmovupd   (%rsp), %ymm0
232         vmovupd   64(%rsp), %ymm1
233         call      HIDDEN_JUMPTARGET(\callee)
234         vmovupd   %ymm0, 128(%rsp)
235         vmovupd   32(%rsp), %ymm0
236         vmovupd   96(%rsp), %ymm1
237         call      HIDDEN_JUMPTARGET(\callee)
238         vmovupd   %ymm0, 160(%rsp)
239         vmovups   128(%rsp), %zmm0
240         movq      %rbp, %rsp
241         cfi_def_cfa_register (%rsp)
242         popq      %rbp
243         cfi_adjust_cfa_offset (-8)
244         cfi_restore (%rbp)
245         ret
246 .endm
247 
248 /* 3 argument AVX512 ISA version as wrapper to AVX2 ISA version.  */
249 .macro WRAPPER_IMPL_AVX512_fFF callee
250         pushq     %rbp
251         cfi_adjust_cfa_offset (8)
252         cfi_rel_offset (%rbp, 0)
253         movq      %rsp, %rbp
254         cfi_def_cfa_register (%rbp)
255         andq      $-64, %rsp
256         pushq     %r12
257         cfi_adjust_cfa_offset (8)
258         cfi_rel_offset (%r12, 0)
259         pushq     %r13
260         cfi_adjust_cfa_offset (8)
261         cfi_rel_offset (%r13, 0)
262         subq      $176, %rsp
263         movq      %rsi, %r13
264         vmovups   %zmm0, (%rsp)
265         movq    %rdi, %r12
266         vmovupd (%rsp), %ymm0
267         call      HIDDEN_JUMPTARGET(\callee)
268         vmovupd   32(%rsp), %ymm0
269         lea       64(%rsp), %rdi
270         lea       96(%rsp), %rsi
271         call      HIDDEN_JUMPTARGET(\callee)
272         vmovupd   64(%rsp), %ymm0
273         vmovupd   96(%rsp), %ymm1
274         vmovupd   %ymm0, 32(%r12)
275         vmovupd   %ymm1, 32(%r13)
276         vzeroupper
277         addq      $176, %rsp
278         popq      %r13
279         cfi_adjust_cfa_offset (-8)
280         cfi_restore (%r13)
281         popq      %r12
282         cfi_adjust_cfa_offset (-8)
283         cfi_restore (%r12)
284         movq      %rbp, %rsp
285         cfi_def_cfa_register (%rsp)
286         popq      %rbp
287         cfi_adjust_cfa_offset (-8)
288         cfi_restore (%rbp)
289         ret
290 .endm
291