1 /* Wrapper implementations of vector math functions. 2 Copyright (C) 2014-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 /* SSE2 ISA version as wrapper to scalar. */ 20 .macro WRAPPER_IMPL_SSE2 callee 21 subq $40, %rsp 22 cfi_adjust_cfa_offset(40) 23 movaps %xmm0, (%rsp) 24 call JUMPTARGET(\callee) 25 movsd %xmm0, 16(%rsp) 26 movsd 8(%rsp), %xmm0 27 call JUMPTARGET(\callee) 28 movsd 16(%rsp), %xmm1 29 movsd %xmm0, 24(%rsp) 30 unpcklpd %xmm0, %xmm1 31 movaps %xmm1, %xmm0 32 addq $40, %rsp 33 cfi_adjust_cfa_offset(-40) 34 ret 35 .endm 36 37 /* 2 argument SSE2 ISA version as wrapper to scalar. */ 38 .macro WRAPPER_IMPL_SSE2_ff callee 39 subq $56, %rsp 40 cfi_adjust_cfa_offset(56) 41 movaps %xmm0, (%rsp) 42 movaps %xmm1, 16(%rsp) 43 call JUMPTARGET(\callee) 44 movsd %xmm0, 32(%rsp) 45 movsd 8(%rsp), %xmm0 46 movsd 24(%rsp), %xmm1 47 call JUMPTARGET(\callee) 48 movsd 32(%rsp), %xmm1 49 movsd %xmm0, 40(%rsp) 50 unpcklpd %xmm0, %xmm1 51 movaps %xmm1, %xmm0 52 addq $56, %rsp 53 cfi_adjust_cfa_offset(-56) 54 ret 55 .endm 56 57 /* 3 argument SSE2 ISA version as wrapper to scalar. */ 58 .macro WRAPPER_IMPL_SSE2_fFF callee 59 pushq %rbp 60 cfi_adjust_cfa_offset (8) 61 cfi_rel_offset (%rbp, 0) 62 pushq %rbx 63 cfi_adjust_cfa_offset (8) 64 cfi_rel_offset (%rbx, 0) 65 movq %rdi, %rbp 66 movq %rsi, %rbx 67 subq $40, %rsp 68 cfi_adjust_cfa_offset(40) 69 leaq 16(%rsp), %rsi 70 leaq 24(%rsp), %rdi 71 movaps %xmm0, (%rsp) 72 call JUMPTARGET(\callee) 73 leaq 16(%rsp), %rsi 74 leaq 24(%rsp), %rdi 75 movsd 24(%rsp), %xmm0 76 movapd (%rsp), %xmm1 77 movsd %xmm0, 0(%rbp) 78 unpckhpd %xmm1, %xmm1 79 movsd 16(%rsp), %xmm0 80 movsd %xmm0, (%rbx) 81 movapd %xmm1, %xmm0 82 call JUMPTARGET(\callee) 83 movsd 24(%rsp), %xmm0 84 movsd %xmm0, 8(%rbp) 85 movsd 16(%rsp), %xmm0 86 movsd %xmm0, 8(%rbx) 87 addq $40, %rsp 88 cfi_adjust_cfa_offset(-40) 89 popq %rbx 90 cfi_adjust_cfa_offset (-8) 91 cfi_restore (%rbx) 92 popq %rbp 93 cfi_adjust_cfa_offset (-8) 94 cfi_restore (%rbp) 95 ret 96 .endm 97 98 /* AVX/AVX2 ISA version as wrapper to SSE ISA version. */ 99 .macro WRAPPER_IMPL_AVX callee 100 pushq %rbp 101 cfi_adjust_cfa_offset (8) 102 cfi_rel_offset (%rbp, 0) 103 movq %rsp, %rbp 104 cfi_def_cfa_register (%rbp) 105 andq $-32, %rsp 106 subq $32, %rsp 107 vextractf128 $1, %ymm0, (%rsp) 108 vzeroupper 109 call HIDDEN_JUMPTARGET(\callee) 110 vmovapd %xmm0, 16(%rsp) 111 vmovaps (%rsp), %xmm0 112 call HIDDEN_JUMPTARGET(\callee) 113 vmovapd %xmm0, %xmm1 114 vmovapd 16(%rsp), %xmm0 115 vinsertf128 $1, %xmm1, %ymm0, %ymm0 116 movq %rbp, %rsp 117 cfi_def_cfa_register (%rsp) 118 popq %rbp 119 cfi_adjust_cfa_offset (-8) 120 cfi_restore (%rbp) 121 ret 122 .endm 123 124 /* 2 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */ 125 .macro WRAPPER_IMPL_AVX_ff callee 126 pushq %rbp 127 cfi_adjust_cfa_offset (8) 128 cfi_rel_offset (%rbp, 0) 129 movq %rsp, %rbp 130 cfi_def_cfa_register (%rbp) 131 andq $-32, %rsp 132 subq $64, %rsp 133 vextractf128 $1, %ymm0, 16(%rsp) 134 vextractf128 $1, %ymm1, (%rsp) 135 vzeroupper 136 call HIDDEN_JUMPTARGET(\callee) 137 vmovaps %xmm0, 32(%rsp) 138 vmovaps 16(%rsp), %xmm0 139 vmovaps (%rsp), %xmm1 140 call HIDDEN_JUMPTARGET(\callee) 141 vmovaps %xmm0, %xmm1 142 vmovaps 32(%rsp), %xmm0 143 vinsertf128 $1, %xmm1, %ymm0, %ymm0 144 movq %rbp, %rsp 145 cfi_def_cfa_register (%rsp) 146 popq %rbp 147 cfi_adjust_cfa_offset (-8) 148 cfi_restore (%rbp) 149 ret 150 .endm 151 152 /* 3 argument AVX/AVX2 ISA version as wrapper to SSE ISA version. */ 153 .macro WRAPPER_IMPL_AVX_fFF callee 154 pushq %rbp 155 cfi_adjust_cfa_offset (8) 156 cfi_rel_offset (%rbp, 0) 157 movq %rsp, %rbp 158 cfi_def_cfa_register (%rbp) 159 andq $-32, %rsp 160 pushq %r13 161 cfi_adjust_cfa_offset (8) 162 cfi_rel_offset (%r13, 0) 163 pushq %r14 164 cfi_adjust_cfa_offset (8) 165 cfi_rel_offset (%r14, 0) 166 subq $48, %rsp 167 movq %rsi, %r14 168 movq %rdi, %r13 169 vextractf128 $1, %ymm0, 32(%rsp) 170 vzeroupper 171 call HIDDEN_JUMPTARGET(\callee) 172 vmovaps 32(%rsp), %xmm0 173 lea (%rsp), %rdi 174 lea 16(%rsp), %rsi 175 call HIDDEN_JUMPTARGET(\callee) 176 vmovapd (%rsp), %xmm0 177 vmovapd 16(%rsp), %xmm1 178 vmovapd %xmm0, 16(%r13) 179 vmovapd %xmm1, 16(%r14) 180 addq $48, %rsp 181 popq %r14 182 cfi_adjust_cfa_offset (-8) 183 cfi_restore (%r14) 184 popq %r13 185 cfi_adjust_cfa_offset (-8) 186 cfi_restore (%r13) 187 movq %rbp, %rsp 188 cfi_def_cfa_register (%rsp) 189 popq %rbp 190 cfi_adjust_cfa_offset (-8) 191 cfi_restore (%rbp) 192 ret 193 .endm 194 195 /* AVX512 ISA version as wrapper to AVX2 ISA version. */ 196 .macro WRAPPER_IMPL_AVX512 callee 197 pushq %rbp 198 cfi_adjust_cfa_offset (8) 199 cfi_rel_offset (%rbp, 0) 200 movq %rsp, %rbp 201 cfi_def_cfa_register (%rbp) 202 andq $-64, %rsp 203 subq $128, %rsp 204 vmovups %zmm0, (%rsp) 205 vmovupd (%rsp), %ymm0 206 call HIDDEN_JUMPTARGET(\callee) 207 vmovupd %ymm0, 64(%rsp) 208 vmovupd 32(%rsp), %ymm0 209 call HIDDEN_JUMPTARGET(\callee) 210 vmovupd %ymm0, 96(%rsp) 211 vmovups 64(%rsp), %zmm0 212 movq %rbp, %rsp 213 cfi_def_cfa_register (%rsp) 214 popq %rbp 215 cfi_adjust_cfa_offset (-8) 216 cfi_restore (%rbp) 217 ret 218 .endm 219 220 /* 2 argument AVX512 ISA version as wrapper to AVX2 ISA version. */ 221 .macro WRAPPER_IMPL_AVX512_ff callee 222 pushq %rbp 223 cfi_adjust_cfa_offset (8) 224 cfi_rel_offset (%rbp, 0) 225 movq %rsp, %rbp 226 cfi_def_cfa_register (%rbp) 227 andq $-64, %rsp 228 subq $192, %rsp 229 vmovups %zmm0, (%rsp) 230 vmovups %zmm1, 64(%rsp) 231 vmovupd (%rsp), %ymm0 232 vmovupd 64(%rsp), %ymm1 233 call HIDDEN_JUMPTARGET(\callee) 234 vmovupd %ymm0, 128(%rsp) 235 vmovupd 32(%rsp), %ymm0 236 vmovupd 96(%rsp), %ymm1 237 call HIDDEN_JUMPTARGET(\callee) 238 vmovupd %ymm0, 160(%rsp) 239 vmovups 128(%rsp), %zmm0 240 movq %rbp, %rsp 241 cfi_def_cfa_register (%rsp) 242 popq %rbp 243 cfi_adjust_cfa_offset (-8) 244 cfi_restore (%rbp) 245 ret 246 .endm 247 248 /* 3 argument AVX512 ISA version as wrapper to AVX2 ISA version. */ 249 .macro WRAPPER_IMPL_AVX512_fFF callee 250 pushq %rbp 251 cfi_adjust_cfa_offset (8) 252 cfi_rel_offset (%rbp, 0) 253 movq %rsp, %rbp 254 cfi_def_cfa_register (%rbp) 255 andq $-64, %rsp 256 pushq %r12 257 cfi_adjust_cfa_offset (8) 258 cfi_rel_offset (%r12, 0) 259 pushq %r13 260 cfi_adjust_cfa_offset (8) 261 cfi_rel_offset (%r13, 0) 262 subq $176, %rsp 263 movq %rsi, %r13 264 vmovups %zmm0, (%rsp) 265 movq %rdi, %r12 266 vmovupd (%rsp), %ymm0 267 call HIDDEN_JUMPTARGET(\callee) 268 vmovupd 32(%rsp), %ymm0 269 lea 64(%rsp), %rdi 270 lea 96(%rsp), %rsi 271 call HIDDEN_JUMPTARGET(\callee) 272 vmovupd 64(%rsp), %ymm0 273 vmovupd 96(%rsp), %ymm1 274 vmovupd %ymm0, 32(%r12) 275 vmovupd %ymm1, 32(%r13) 276 vzeroupper 277 addq $176, %rsp 278 popq %r13 279 cfi_adjust_cfa_offset (-8) 280 cfi_restore (%r13) 281 popq %r12 282 cfi_adjust_cfa_offset (-8) 283 cfi_restore (%r12) 284 movq %rbp, %rsp 285 cfi_def_cfa_register (%rsp) 286 popq %rbp 287 cfi_adjust_cfa_offset (-8) 288 cfi_restore (%rbp) 289 ret 290 .endm 291