1/* Function sincosf vectorized with AVX-512. Wrapper to AVX2 version. 2 Copyright (C) 2014-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20#include "svml_s_wrapper_impl.h" 21 22 .text 23ENTRY (_ZGVeN16vl4l4_sincosf) 24WRAPPER_IMPL_AVX512_fFF _ZGVdN8vl4l4_sincosf 25END (_ZGVeN16vl4l4_sincosf) 26 27/* AVX512 ISA version as wrapper to AVX2 ISA version (for vector 28 function declared with #pragma omp declare simd notinbranch). */ 29.macro WRAPPER_IMPL_AVX512_fFF_vvv callee 30#ifndef __ILP32__ 31 pushq %rbp 32 cfi_adjust_cfa_offset (8) 33 cfi_rel_offset (%rbp, 0) 34 movq %rsp, %rbp 35 cfi_def_cfa_register (%rbp) 36 andq $-64, %rsp 37 subq $448, %rsp 38 vmovups %zmm0, 384(%rsp) 39 lea (%rsp), %rdi 40 vmovups %zmm1, 128(%rdi) 41 vmovups %zmm2, 192(%rdi) 42 vmovups %zmm3, 256(%rdi) 43 vmovups %zmm4, 320(%rdi) 44 lea 64(%rsp), %rsi 45 call HIDDEN_JUMPTARGET(\callee) 46 vmovdqu 416(%rsp), %ymm0 47 lea 32(%rsp), %rdi 48 lea 96(%rsp), %rsi 49 call HIDDEN_JUMPTARGET(\callee) 50 movq 128(%rsp), %rdx 51 movq 136(%rsp), %rsi 52 movq 144(%rsp), %r8 53 movq 152(%rsp), %r10 54 movl (%rsp), %eax 55 movl 4(%rsp), %ecx 56 movl 8(%rsp), %edi 57 movl 12(%rsp), %r9d 58 movl %eax, (%rdx) 59 movl %ecx, (%rsi) 60 movq 160(%rsp), %rax 61 movq 168(%rsp), %rcx 62 movl %edi, (%r8) 63 movl %r9d, (%r10) 64 movq 176(%rsp), %rdi 65 movq 184(%rsp), %r9 66 movl 16(%rsp), %r11d 67 movl 20(%rsp), %edx 68 movl 24(%rsp), %esi 69 movl 28(%rsp), %r8d 70 movl %r11d, (%rax) 71 movl %edx, (%rcx) 72 movq 192(%rsp), %r11 73 movq 200(%rsp), %rdx 74 movl %esi, (%rdi) 75 movl %r8d, (%r9) 76 movq 208(%rsp), %rsi 77 movq 216(%rsp), %r8 78 movl 32(%rsp), %r10d 79 movl 36(%rsp), %eax 80 movl 40(%rsp), %ecx 81 movl 44(%rsp), %edi 82 movl %r10d, (%r11) 83 movl %eax, (%rdx) 84 movq 224(%rsp), %r10 85 movq 232(%rsp), %rax 86 movl %ecx, (%rsi) 87 movl %edi, (%r8) 88 movq 240(%rsp), %rcx 89 movq 248(%rsp), %rdi 90 movl 48(%rsp), %r9d 91 movl 52(%rsp), %r11d 92 movl 56(%rsp), %edx 93 movl 60(%rsp), %esi 94 movl %r9d, (%r10) 95 movl %r11d, (%rax) 96 movq 256(%rsp), %r9 97 movq 264(%rsp), %r11 98 movl %edx, (%rcx) 99 movl %esi, (%rdi) 100 movq 272(%rsp), %rdx 101 movq 280(%rsp), %rsi 102 movl 64(%rsp), %r8d 103 movl 68(%rsp), %r10d 104 movl 72(%rsp), %eax 105 movl 76(%rsp), %ecx 106 movl %r8d, (%r9) 107 movl %r10d, (%r11) 108 movq 288(%rsp), %r8 109 movq 296(%rsp), %r10 110 movl %eax, (%rdx) 111 movl %ecx, (%rsi) 112 movq 304(%rsp), %rax 113 movq 312(%rsp), %rcx 114 movl 80(%rsp), %edi 115 movl 84(%rsp), %r9d 116 movl 88(%rsp), %r11d 117 movl 92(%rsp), %edx 118 movl %edi, (%r8) 119 movl %r9d, (%r10) 120 movq 320(%rsp), %rdi 121 movq 328(%rsp), %r9 122 movl %r11d, (%rax) 123 movl %edx, (%rcx) 124 movq 336(%rsp), %r11 125 movq 344(%rsp), %rdx 126 movl 96(%rsp), %esi 127 movl 100(%rsp), %r8d 128 movl 104(%rsp), %r10d 129 movl 108(%rsp), %eax 130 movl %esi, (%rdi) 131 movl %r8d, (%r9) 132 movq 352(%rsp), %rsi 133 movq 360(%rsp), %r8 134 movl %r10d, (%r11) 135 movl %eax, (%rdx) 136 movq 368(%rsp), %r10 137 movq 376(%rsp), %rax 138 movl 112(%rsp), %ecx 139 movl 116(%rsp), %edi 140 movl 120(%rsp), %r9d 141 movl 124(%rsp), %r11d 142 movl %ecx, (%rsi) 143 movl %edi, (%r8) 144 movl %r9d, (%r10) 145 movl %r11d, (%rax) 146 movq %rbp, %rsp 147 cfi_def_cfa_register (%rsp) 148 popq %rbp 149 cfi_adjust_cfa_offset (-8) 150 cfi_restore (%rbp) 151 ret 152#else 153 leal 8(%rsp), %r10d 154 .cfi_def_cfa 10, 0 155 andl $-64, %esp 156 pushq -8(%r10d) 157 pushq %rbp 158 .cfi_escape 0x10,0x6,0x2,0x76,0 159 movl %esp, %ebp 160 pushq %r12 161 leal -112(%rbp), %esi 162 pushq %r10 163 .cfi_escape 0xf,0x3,0x76,0x70,0x6 164 .cfi_escape 0x10,0xc,0x2,0x76,0x78 165 leal -176(%rbp), %edi 166 movq %rsi, %r12 167 pushq %rbx 168 .cfi_escape 0x10,0x3,0x2,0x76,0x68 169 movq %rdi, %rbx 170 subl $344, %esp 171 vmovdqa64 %zmm1, -240(%ebp) 172 vmovdqa64 %zmm2, -304(%ebp) 173 vmovaps %zmm0, -368(%ebp) 174 call HIDDEN_JUMPTARGET(\callee) 175 leal 32(%r12), %esi 176 vmovups -336(%ebp), %ymm0 177 leal 32(%rbx), %edi 178 call HIDDEN_JUMPTARGET(\callee) 179 movl -240(%ebp), %eax 180 vmovss -176(%ebp), %xmm0 181 vmovss %xmm0, (%eax) 182 movl -236(%ebp), %eax 183 vmovss -172(%ebp), %xmm0 184 vmovss %xmm0, (%eax) 185 movl -232(%ebp), %eax 186 vmovss -168(%ebp), %xmm0 187 vmovss %xmm0, (%eax) 188 movl -228(%ebp), %eax 189 vmovss -164(%ebp), %xmm0 190 vmovss %xmm0, (%eax) 191 movl -224(%ebp), %eax 192 vmovss -160(%ebp), %xmm0 193 vmovss %xmm0, (%eax) 194 movl -220(%ebp), %eax 195 vmovss -156(%ebp), %xmm0 196 vmovss %xmm0, (%eax) 197 movl -216(%ebp), %eax 198 vmovss -152(%ebp), %xmm0 199 vmovss %xmm0, (%eax) 200 movl -212(%ebp), %eax 201 vmovss -148(%ebp), %xmm0 202 vmovss %xmm0, (%eax) 203 movl -208(%ebp), %eax 204 vmovss -144(%ebp), %xmm0 205 vmovss %xmm0, (%eax) 206 movl -204(%ebp), %eax 207 vmovss -140(%ebp), %xmm0 208 vmovss %xmm0, (%eax) 209 movl -200(%ebp), %eax 210 vmovss -136(%ebp), %xmm0 211 vmovss %xmm0, (%eax) 212 movl -196(%ebp), %eax 213 vmovss -132(%ebp), %xmm0 214 vmovss %xmm0, (%eax) 215 movl -192(%ebp), %eax 216 vmovss -128(%ebp), %xmm0 217 vmovss %xmm0, (%eax) 218 movl -188(%ebp), %eax 219 vmovss -124(%ebp), %xmm0 220 vmovss %xmm0, (%eax) 221 movl -184(%ebp), %eax 222 vmovss -120(%ebp), %xmm0 223 vmovss %xmm0, (%eax) 224 movl -180(%ebp), %eax 225 vmovss -116(%ebp), %xmm0 226 vmovss %xmm0, (%eax) 227 movl -304(%ebp), %eax 228 vmovss -112(%ebp), %xmm0 229 vmovss %xmm0, (%eax) 230 movl -300(%ebp), %eax 231 vmovss -108(%ebp), %xmm0 232 vmovss %xmm0, (%eax) 233 movl -296(%ebp), %eax 234 vmovss -104(%ebp), %xmm0 235 vmovss %xmm0, (%eax) 236 movl -292(%ebp), %eax 237 vmovss -100(%ebp), %xmm0 238 vmovss %xmm0, (%eax) 239 movl -288(%ebp), %eax 240 vmovss -96(%ebp), %xmm0 241 vmovss %xmm0, (%eax) 242 movl -284(%ebp), %eax 243 vmovss -92(%ebp), %xmm0 244 vmovss %xmm0, (%eax) 245 movl -280(%ebp), %eax 246 vmovss -88(%ebp), %xmm0 247 vmovss %xmm0, (%eax) 248 movl -276(%ebp), %eax 249 vmovss -84(%ebp), %xmm0 250 vmovss %xmm0, (%eax) 251 movl -272(%ebp), %eax 252 vmovss -80(%ebp), %xmm0 253 vmovss %xmm0, (%eax) 254 movl -268(%ebp), %eax 255 vmovss -76(%ebp), %xmm0 256 vmovss %xmm0, (%eax) 257 movl -264(%ebp), %eax 258 vmovss -72(%ebp), %xmm0 259 vmovss %xmm0, (%eax) 260 movl -260(%ebp), %eax 261 vmovss -68(%ebp), %xmm0 262 vmovss %xmm0, (%eax) 263 movl -256(%ebp), %eax 264 vmovss -64(%ebp), %xmm0 265 vmovss %xmm0, (%eax) 266 movl -252(%ebp), %eax 267 vmovss -60(%ebp), %xmm0 268 vmovss %xmm0, (%eax) 269 movl -248(%ebp), %eax 270 vmovss -56(%ebp), %xmm0 271 vmovss %xmm0, (%eax) 272 movl -244(%ebp), %eax 273 vmovss -52(%ebp), %xmm0 274 vmovss %xmm0, (%eax) 275 addl $344, %esp 276 popq %rbx 277 popq %r10 278 .cfi_def_cfa 10, 0 279 popq %r12 280 popq %rbp 281 leal -8(%r10), %esp 282 .cfi_def_cfa 7, 8 283 ret 284#endif 285.endm 286 287ENTRY (_ZGVeN16vvv_sincosf) 288WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN8vl4l4_sincosf 289END (_ZGVeN16vvv_sincosf) 290