1/* Function sincos vectorized with AVX-512. Wrapper to AVX2 version. 2 Copyright (C) 2014-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20#include "svml_d_wrapper_impl.h" 21 22 .text 23ENTRY (_ZGVeN8vl8l8_sincos) 24WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos 25END (_ZGVeN8vl8l8_sincos) 26 27/* AVX512 ISA version as wrapper to AVX2 ISA version (for vector 28 function declared with #pragma omp declare simd notinbranch). */ 29.macro WRAPPER_IMPL_AVX512_fFF_vvv callee 30#ifndef __ILP32__ 31 pushq %rbp 32 cfi_adjust_cfa_offset (8) 33 cfi_rel_offset (%rbp, 0) 34 movq %rsp, %rbp 35 cfi_def_cfa_register (%rbp) 36 andq $-64, %rsp 37 subq $320, %rsp 38 vmovups %zmm0, 256(%rsp) 39 lea (%rsp), %rdi 40 vmovups %zmm1, 128(%rdi) 41 vmovups %zmm2, 192(%rdi) 42 lea 64(%rsp), %rsi 43 call HIDDEN_JUMPTARGET(\callee) 44 vmovdqu 288(%rsp), %ymm0 45 lea 32(%rsp), %rdi 46 lea 96(%rsp), %rsi 47 call HIDDEN_JUMPTARGET(\callee) 48 movq 128(%rsp), %rdx 49 movq 192(%rsp), %rsi 50 movq 136(%rsp), %r8 51 movq 200(%rsp), %r10 52 movq (%rsp), %rax 53 movq 64(%rsp), %rcx 54 movq 8(%rsp), %rdi 55 movq 72(%rsp), %r9 56 movq %rax, (%rdx) 57 movq %rcx, (%rsi) 58 movq 144(%rsp), %rax 59 movq 208(%rsp), %rcx 60 movq %rdi, (%r8) 61 movq %r9, (%r10) 62 movq 152(%rsp), %rdi 63 movq 216(%rsp), %r9 64 movq 16(%rsp), %r11 65 movq 80(%rsp), %rdx 66 movq 24(%rsp), %rsi 67 movq 88(%rsp), %r8 68 movq %r11, (%rax) 69 movq %rdx, (%rcx) 70 movq 160(%rsp), %r11 71 movq 224(%rsp), %rdx 72 movq %rsi, (%rdi) 73 movq %r8, (%r9) 74 movq 168(%rsp), %rsi 75 movq 232(%rsp), %r8 76 movq 32(%rsp), %r10 77 movq 96(%rsp), %rax 78 movq 40(%rsp), %rcx 79 movq 104(%rsp), %rdi 80 movq %r10, (%r11) 81 movq %rax, (%rdx) 82 movq 176(%rsp), %r10 83 movq 240(%rsp), %rax 84 movq %rcx, (%rsi) 85 movq %rdi, (%r8) 86 movq 184(%rsp), %rcx 87 movq 248(%rsp), %rdi 88 movq 48(%rsp), %r9 89 movq 112(%rsp), %r11 90 movq 56(%rsp), %rdx 91 movq 120(%rsp), %rsi 92 movq %r9, (%r10) 93 movq %r11, (%rax) 94 movq %rdx, (%rcx) 95 movq %rsi, (%rdi) 96 movq %rbp, %rsp 97 cfi_def_cfa_register (%rsp) 98 popq %rbp 99 cfi_adjust_cfa_offset (-8) 100 cfi_restore (%rbp) 101 ret 102#else 103 leal 8(%rsp), %r10d 104 .cfi_def_cfa 10, 0 105 andl $-64, %esp 106 pushq -8(%r10d) 107 pushq %rbp 108 .cfi_escape 0x10,0x6,0x2,0x76,0 109 movl %esp, %ebp 110 pushq %r12 111 leal -112(%rbp), %esi 112 pushq %r10 113 .cfi_escape 0xf,0x3,0x76,0x70,0x6 114 .cfi_escape 0x10,0xc,0x2,0x76,0x78 115 leal -176(%rbp), %edi 116 movq %rsi, %r12 117 pushq %rbx 118 .cfi_escape 0x10,0x3,0x2,0x76,0x68 119 movq %rdi, %rbx 120 subl $280, %esp 121 vmovdqa %ymm1, -208(%ebp) 122 vmovdqa %ymm2, -240(%ebp) 123 vmovapd %zmm0, -304(%ebp) 124 call HIDDEN_JUMPTARGET(\callee) 125 leal 32(%r12), %esi 126 vmovupd -272(%ebp), %ymm0 127 leal 32(%rbx), %edi 128 call HIDDEN_JUMPTARGET(\callee) 129 movl -208(%ebp), %eax 130 vmovsd -176(%ebp), %xmm0 131 vmovsd %xmm0, (%eax) 132 movl -204(%ebp), %eax 133 vmovsd -168(%ebp), %xmm0 134 vmovsd %xmm0, (%eax) 135 movl -200(%ebp), %eax 136 vmovsd -160(%ebp), %xmm0 137 vmovsd %xmm0, (%eax) 138 movl -196(%ebp), %eax 139 vmovsd -152(%ebp), %xmm0 140 vmovsd %xmm0, (%eax) 141 movl -192(%ebp), %eax 142 vmovsd -144(%ebp), %xmm0 143 vmovsd %xmm0, (%eax) 144 movl -188(%ebp), %eax 145 vmovsd -136(%ebp), %xmm0 146 vmovsd %xmm0, (%eax) 147 movl -184(%ebp), %eax 148 vmovsd -128(%ebp), %xmm0 149 vmovsd %xmm0, (%eax) 150 movl -180(%ebp), %eax 151 vmovsd -120(%ebp), %xmm0 152 vmovsd %xmm0, (%eax) 153 movl -240(%ebp), %eax 154 vmovsd -112(%ebp), %xmm0 155 vmovsd %xmm0, (%eax) 156 movl -236(%ebp), %eax 157 vmovsd -104(%ebp), %xmm0 158 vmovsd %xmm0, (%eax) 159 movl -232(%ebp), %eax 160 vmovsd -96(%ebp), %xmm0 161 vmovsd %xmm0, (%eax) 162 movl -228(%ebp), %eax 163 vmovsd -88(%ebp), %xmm0 164 vmovsd %xmm0, (%eax) 165 movl -224(%ebp), %eax 166 vmovsd -80(%ebp), %xmm0 167 vmovsd %xmm0, (%eax) 168 movl -220(%ebp), %eax 169 vmovsd -72(%ebp), %xmm0 170 vmovsd %xmm0, (%eax) 171 movl -216(%ebp), %eax 172 vmovsd -64(%ebp), %xmm0 173 vmovsd %xmm0, (%eax) 174 movl -212(%ebp), %eax 175 vmovsd -56(%ebp), %xmm0 176 vmovsd %xmm0, (%eax) 177 addl $280, %esp 178 popq %rbx 179 popq %r10 180 .cfi_def_cfa 10, 0 181 popq %r12 182 popq %rbp 183 leal -8(%r10), %esp 184 .cfi_def_cfa 7, 8 185 ret 186#endif 187.endm 188 189ENTRY (_ZGVeN8vvv_sincos) 190WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN4vl8l8_sincos 191END (_ZGVeN8vvv_sincos) 192